Allow specifying multiple sites in command line.
parent
0c4e008b45
commit
a4b1647e53
|
@ -33,12 +33,12 @@ data
|
||||||
|
|
||||||
If you have nix available, run:
|
If you have nix available, run:
|
||||||
```bash
|
```bash
|
||||||
$ nix run . -- --user-agent <your-email> -s chesscom
|
$ nix run . -- --user-agent <your-email> -s <site>
|
||||||
```
|
```
|
||||||
If not, ensure you have [poetry](https://python-poetry.org/) on your machine and
|
If not, ensure you have [poetry](https://python-poetry.org/) on your machine and
|
||||||
instead run the following:
|
instead run the following:
|
||||||
```bash
|
```bash
|
||||||
$ poetry run python3 -m app -u <your-email> -s chesscom
|
$ poetry run python3 -m app -u <your-email> -s <site>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Database
|
## Database
|
||||||
|
@ -83,8 +83,7 @@ necessary to scrape coach data from our chess website and dump the results into
|
||||||
the database in one fell swoop. Assuming our database is open with a socket
|
the database in one fell swoop. Assuming our database is open with a socket
|
||||||
connection available at `@scraper`:
|
connection available at `@scraper`:
|
||||||
```bash
|
```bash
|
||||||
nix run . -- --user-agent <your-email> -s chesscom
|
nix run . -- --user-agent <your-email> -s chesscom -s lichess
|
||||||
nix run . -- --user-agent <your-email> -s lichess
|
|
||||||
cat data/{chesscom,lichess}/export.json > data/export.json
|
cat data/{chesscom,lichess}/export.json > data/export.json
|
||||||
psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'"
|
psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'"
|
||||||
```
|
```
|
||||||
|
|
|
@ -24,6 +24,7 @@ async def run():
|
||||||
"-s",
|
"-s",
|
||||||
"--site",
|
"--site",
|
||||||
required=True,
|
required=True,
|
||||||
|
action="append",
|
||||||
choices=[
|
choices=[
|
||||||
Site.CHESSCOM.value,
|
Site.CHESSCOM.value,
|
||||||
Site.LICHESS.value,
|
Site.LICHESS.value,
|
||||||
|
@ -34,22 +35,25 @@ async def run():
|
||||||
async with aiohttp.ClientSession(
|
async with aiohttp.ClientSession(
|
||||||
headers={"User-Agent": f"BoardWise coach-scraper ({args.user_agent})"}
|
headers={"User-Agent": f"BoardWise coach-scraper ({args.user_agent})"}
|
||||||
) as session:
|
) as session:
|
||||||
if args.site == Site.CHESSCOM.value:
|
for site in set(args.site):
|
||||||
scraper = ChesscomScraper(session)
|
scraper, exporter_cls = None, None
|
||||||
exporter_cls = ChesscomExporter
|
|
||||||
elif args.site == Site.LICHESS.value:
|
|
||||||
scraper = LichessScraper(session)
|
|
||||||
exporter_cls = LichessExporter
|
|
||||||
|
|
||||||
# Write out each coach data into NDJSON file.
|
if site == Site.CHESSCOM.value:
|
||||||
dump = []
|
scraper = ChesscomScraper(session)
|
||||||
usernames = await scraper.scrape()
|
exporter_cls = ChesscomExporter
|
||||||
for username in usernames:
|
elif site == Site.LICHESS.value:
|
||||||
export = exporter_cls(username).export()
|
scraper = LichessScraper(session)
|
||||||
dump.append(f"{json.dumps(export)}\n")
|
exporter_cls = LichessExporter
|
||||||
|
|
||||||
with open(scraper.path_site_file("export.json"), "w") as f:
|
# Write out each coach data into NDJSON file.
|
||||||
f.writelines(dump)
|
dump = []
|
||||||
|
usernames = await scraper.scrape()
|
||||||
|
for username in usernames:
|
||||||
|
export = exporter_cls(username).export()
|
||||||
|
dump.append(f"{json.dumps(export)}\n")
|
||||||
|
|
||||||
|
with open(scraper.path_site_file("export.json"), "w") as f:
|
||||||
|
f.writelines(dump)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
Loading…
Reference in New Issue