parent
27577aa5b0
commit
fe2e504de9
36
README.md
36
README.md
|
@ -8,14 +8,10 @@ adjustments to this script appropriately rate-limit.
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
This is a simple web scraper for [chess.com](https://www.chess.com/coaches)
|
This is a simple web scraper for [chess.com](https://www.chess.com/coaches)
|
||||||
coaches. Running:
|
coaches. The program searches for all listed coaches as well as specific
|
||||||
```bash
|
information about each of them (their profile, recent activity, and stats). The
|
||||||
$> python3 main.py --user-agent <your-email>
|
result will be found in a newly created `data` directory with the following
|
||||||
```
|
structure:
|
||||||
will query [chess.com](https://www.chess.com) for all listed coach usernames as
|
|
||||||
well as specific information about each of corresponding coach (their profile,
|
|
||||||
recent activity, and stats). The result will be found in a newly created `data`
|
|
||||||
directory with the following structure:
|
|
||||||
```
|
```
|
||||||
data
|
data
|
||||||
├── coach
|
├── coach
|
||||||
|
@ -29,12 +25,28 @@ data
|
||||||
├── ...
|
├── ...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
If you have nix available, run:
|
||||||
|
```bash
|
||||||
|
$> nix build
|
||||||
|
$> result/bin/app --user-agent <your-email>
|
||||||
|
```
|
||||||
|
If not, ensure you have [poetry](https://python-poetry.org/) on your machine and
|
||||||
|
instead run the following:
|
||||||
|
```bash
|
||||||
|
$> poetry install
|
||||||
|
$> source $(poetry env info --path)/bin/activate
|
||||||
|
$> python3 -m app
|
||||||
|
```
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
This script was written using Python (version 3.11.6). Packaging and dependency
|
[nix](https://nixos.org/) is used for development. The included `flakes.nix`
|
||||||
management relies on [poetry](https://python-poetry.org/) (version 1.7.0).
|
file automatically loads in Python (version 3.11.6) with packaging and
|
||||||
[direnv](https://direnv.net/) can be used to a launch a dev shell upon entering
|
dependency management handled by poetry (version 1.7.0). [direnv](https://direnv.net/)
|
||||||
this directory (refer to `.envrc`). Otherwise run via:
|
can be used to a launch a dev shell upon entering this directory (refer to
|
||||||
|
`.envrc`). Otherwise run via:
|
||||||
```bash
|
```bash
|
||||||
$> nix develop
|
$> nix develop
|
||||||
```
|
```
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
from app import scraper
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
scraper.run()
|
|
@ -171,7 +171,7 @@ async def _download_coach_data(session: aiohttp.ClientSession, username: str):
|
||||||
print(f"Skipping {ANSI_COLOR(username)}")
|
print(f"Skipping {ANSI_COLOR(username)}")
|
||||||
|
|
||||||
|
|
||||||
async def scrape():
|
async def _scrape():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="chesscom-scraper",
|
prog="chesscom-scraper",
|
||||||
description="HTML scraping of chess.com coaches.",
|
description="HTML scraping of chess.com coaches.",
|
||||||
|
@ -193,5 +193,5 @@ async def scrape():
|
||||||
await _download_coach_data(session, username)
|
await _download_coach_data(session, username)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def run():
|
||||||
asyncio.run(scrape())
|
asyncio.run(_scrape())
|
|
@ -68,6 +68,10 @@
|
||||||
projectDir = ./.;
|
projectDir = ./.;
|
||||||
overrides = poetry2nix-overrides;
|
overrides = poetry2nix-overrides;
|
||||||
preferWheels = true;
|
preferWheels = true;
|
||||||
|
} // {
|
||||||
|
# These attributes are passed to `buildPythonApplication`.
|
||||||
|
pname = "app";
|
||||||
|
version = "0.1.0";
|
||||||
};
|
};
|
||||||
|
|
||||||
default = self.packages.${system}.app;
|
default = self.packages.${system}.app;
|
||||||
|
|
|
@ -16,3 +16,6 @@ types-beautifulsoup4 = "^4.12.0.7"
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
[tool.poetry.scripts]
|
||||||
|
app = "app.scraper:run"
|
||||||
|
|
Loading…
Reference in New Issue