From 97cdb3b9cd644e889cb54b8874daa176081b5b51 Mon Sep 17 00:00:00 2001
From: Joshua Potter <jrpotter2112@gmail.com>
Date: Mon, 27 Nov 2023 14:13:56 -0700
Subject: [PATCH] Add User-Agent to requests.

---
 main.py | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/main.py b/main.py
index 7be0185..18d50dd 100644
--- a/main.py
+++ b/main.py
@@ -13,14 +13,27 @@ DATA_COACH_DIR = "data/coach/{member_name}"
 DATA_COACH_FILE = "data/coach/{member_name}/{filename}"
 
 
+async def chesscom_requeset(url):
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as response:
+            if response.status != 200:
+                print(f"Encountered {response.status} when retrieving {url}.")
+                return response.text()
+    pass
+
+
 async def scrape_coach_links(page_no):
     """Scrape a single coach page listing."""
     links = []
-    href = f"https://www.chess.com/coaches?sortBy=alphabetical&page={page_no}"
-    async with aiohttp.ClientSession() as session:
-        async with session.get(href) as response:
+    url = f"https://www.chess.com/coaches?sortBy=alphabetical&page={page_no}"
+    async with aiohttp.ClientSession(
+        headers={
+            "User-Agent": "BoardWise (https://github.com/BoardWiseGG/chesscom-scraper)",
+        }
+    ) as session:
+        async with session.get(url) as response:
             if response.status != 200:
-                print(f"Encountered {response.status} when retrieving {href}.")
+                print(f"Encountered {response.status} when retrieving {url}.")
                 return
             html = await response.text()
             soup = BeautifulSoup(html, "html.parser")
@@ -51,7 +64,7 @@ async def scrape_all_coach_links(max_pages=64):
     return links
 
 
-async def download_member_info(member_name, filename, href):
+async def download_member_info(member_name, filename, url):
     """Download member-specific content.
 
     @return: True if we downloaded content. False if the download already
@@ -61,9 +74,9 @@ async def download_member_info(member_name, filename, href):
     if os.path.isfile(filepath):
         return False
     async with aiohttp.ClientSession() as session:
-        async with session.get(href) as response:
+        async with session.get(url) as response:
             if response.status != 200:
-                print(f"Encountered {response.status} when retrieving {href}")
+                print(f"Encountered {response.status} when retrieving {url}")
                 return
             with open(filepath, "w") as f:
                 f.write(await response.text())
@@ -72,14 +85,14 @@ async def download_member_info(member_name, filename, href):
 
 async def main():
     links = await scrape_all_coach_links()
-    for href in [link.strip() for link in links]:
-        member_name = href[len("https://www.chess.com/member/") :]
+    for url in [link.strip() for link in links]:
+        member_name = url[len("https://www.chess.com/member/") :]
         os.makedirs(DATA_COACH_DIR.format(member_name=member_name), exist_ok=True)
         downloaded = await asyncio.gather(
             download_member_info(
                 member_name,
                 f"{member_name}.html",
-                href,
+                url,
             ),
             download_member_info(
                 member_name,