diff --git a/README.md b/README.md index 35ae80d..3feeb18 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,12 @@ data If you have nix available, run: ```bash -$ nix run . -- --user-agent -s chesscom +$ nix run . -- --user-agent -s ``` If not, ensure you have [poetry](https://python-poetry.org/) on your machine and instead run the following: ```bash -$ poetry run python3 -m app -u -s chesscom +$ poetry run python3 -m app -u -s ``` ## Database @@ -83,8 +83,7 @@ necessary to scrape coach data from our chess website and dump the results into the database in one fell swoop. Assuming our database is open with a socket connection available at `@scraper`: ```bash -nix run . -- --user-agent -s chesscom -nix run . -- --user-agent -s lichess +nix run . -- --user-agent -s chesscom -s lichess cat data/{chesscom,lichess}/export.json > data/export.json psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'" ``` diff --git a/app/__main__.py b/app/__main__.py index ed86bd6..7b7bdba 100644 --- a/app/__main__.py +++ b/app/__main__.py @@ -24,6 +24,7 @@ async def run(): "-s", "--site", required=True, + action="append", choices=[ Site.CHESSCOM.value, Site.LICHESS.value, @@ -34,22 +35,25 @@ async def run(): async with aiohttp.ClientSession( headers={"User-Agent": f"BoardWise coach-scraper ({args.user_agent})"} ) as session: - if args.site == Site.CHESSCOM.value: - scraper = ChesscomScraper(session) - exporter_cls = ChesscomExporter - elif args.site == Site.LICHESS.value: - scraper = LichessScraper(session) - exporter_cls = LichessExporter + for site in set(args.site): + scraper, exporter_cls = None, None - # Write out each coach data into NDJSON file. - dump = [] - usernames = await scraper.scrape() - for username in usernames: - export = exporter_cls(username).export() - dump.append(f"{json.dumps(export)}\n") + if site == Site.CHESSCOM.value: + scraper = ChesscomScraper(session) + exporter_cls = ChesscomExporter + elif site == Site.LICHESS.value: + scraper = LichessScraper(session) + exporter_cls = LichessExporter - with open(scraper.path_site_file("export.json"), "w") as f: - f.writelines(dump) + # Write out each coach data into NDJSON file. + dump = [] + usernames = await scraper.scrape() + for username in usernames: + export = exporter_cls(username).export() + dump.append(f"{json.dumps(export)}\n") + + with open(scraper.path_site_file("export.json"), "w") as f: + f.writelines(dump) def main():