coach-scraper/app/__main__.py

72 lines
2.2 KiB
Python
Raw Normal View History

import argparse
import asyncio
import csv
2023-12-01 00:12:16 +00:00
import json
2023-12-01 23:36:53 +00:00
import aiohttp
from app.chesscom import Exporter as ChesscomExporter
from app.chesscom import Scraper as ChesscomScraper
from app.lichess import Exporter as LichessExporter
from app.lichess import Scraper as LichessScraper
2023-12-01 00:12:16 +00:00
from app.repo import Site
async def run():
parser = argparse.ArgumentParser(
prog="coach-scraper",
2023-12-01 00:12:16 +00:00
description="Scraping/exporting of chess coaches.",
)
parser.add_argument("-u", "--user-agent", required=True)
parser.add_argument(
"-s",
"--site",
required=True,
action="append",
choices=[
Site.CHESSCOM.value,
2023-11-30 22:36:44 +00:00
Site.LICHESS.value,
],
)
args = parser.parse_args()
async with aiohttp.ClientSession(
headers={"User-Agent": f"BoardWise coach-scraper ({args.user_agent})"}
) as session:
with open("data/export.csv", "w") as f:
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
for site in set(args.site):
scraper, exporter_cls = None, None
if site == Site.CHESSCOM.value:
scraper = ChesscomScraper(session)
exporter_cls = ChesscomExporter
elif site == Site.LICHESS.value:
scraper = LichessScraper(session)
exporter_cls = LichessExporter
usernames = await scraper.scrape()
for username in usernames:
export = exporter_cls(username).export()
writer.writerow(
[
# This should match the order data is loaded in the
# sql/export.sql script.
export["site"],
export["username"],
export.get("name", ""),
export.get("image_url", ""),
export.get("rapid", ""),
export.get("blitz", ""),
export.get("bullet", ""),
]
)
def main():
asyncio.run(run())
2023-11-28 12:28:21 +00:00
if __name__ == "__main__":
main()