coach-scraper/app/__main__.py

72 lines
2.2 KiB
Python

import argparse
import asyncio
import csv
import json
import aiohttp
from app.chesscom import Exporter as ChesscomExporter
from app.chesscom import Scraper as ChesscomScraper
from app.lichess import Exporter as LichessExporter
from app.lichess import Scraper as LichessScraper
from app.repo import Site
async def run():
parser = argparse.ArgumentParser(
prog="coach-scraper",
description="Scraping/exporting of chess coaches.",
)
parser.add_argument("-u", "--user-agent", required=True)
parser.add_argument(
"-s",
"--site",
required=True,
action="append",
choices=[
Site.CHESSCOM.value,
Site.LICHESS.value,
],
)
args = parser.parse_args()
async with aiohttp.ClientSession(
headers={"User-Agent": f"BoardWise coach-scraper ({args.user_agent})"}
) as session:
with open("data/export.csv", "w") as f:
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
for site in set(args.site):
scraper, exporter_cls = None, None
if site == Site.CHESSCOM.value:
scraper = ChesscomScraper(session)
exporter_cls = ChesscomExporter
elif site == Site.LICHESS.value:
scraper = LichessScraper(session)
exporter_cls = LichessExporter
usernames = await scraper.scrape()
for username in usernames:
export = exporter_cls(username).export()
writer.writerow(
[
# This should match the order data is loaded in the
# sql/export.sql script.
export["site"],
export["username"],
export.get("name", ""),
export.get("image_url", ""),
export.get("rapid", ""),
export.get("blitz", ""),
export.get("bullet", ""),
]
)
def main():
asyncio.run(run())
if __name__ == "__main__":
main()