From 932d97bf3b18c0e107400d3532fe842a6d1ee363 Mon Sep 17 00:00:00 2001
From: Joshua Potter <jrpotter2112@gmail.com>
Date: Mon, 27 Nov 2023 13:53:51 -0700
Subject: [PATCH] Create intermediate directories and explicit f-strings.

---
 main.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index 7e924cc..7be0185 100644
--- a/main.py
+++ b/main.py
@@ -8,8 +8,9 @@ from bs4 import BeautifulSoup
 
 
 # References to paths we use to save any scraped content.
-DATA_COACH_LIST = "data/pages/{}.txt"
-DATA_COACH_DIR = "data/coach/{}/{}"
+DATA_COACH_LIST = "data/pages/{page_no}.txt"
+DATA_COACH_DIR = "data/coach/{member_name}"
+DATA_COACH_FILE = "data/coach/{member_name}/{filename}"
 
 
 async def scrape_coach_links(page_no):
@@ -34,7 +35,7 @@ async def scrape_all_coach_links(max_pages=64):
     """Scan through https://www.chess.com/coaches for all member links."""
     links = []
     for i in range(1, max_pages + 1):
-        filepath = DATA_COACH_LIST.format(i)
+        filepath = DATA_COACH_LIST.format(page_no=i)
         if os.path.isfile(filepath):
             with open(filepath, "r") as f:
                 links.extend(f.readlines())
@@ -56,15 +57,15 @@ async def download_member_info(member_name, filename, href):
     @return: True if we downloaded content. False if the download already
     exists locally.
     """
-    target = DATA_COACH_DIR.format(member_name, filename)
-    if os.path.isfile(target):
+    filepath = DATA_COACH_FILE.format(member_name=member_name, filename=filename)
+    if os.path.isfile(filepath):
         return False
     async with aiohttp.ClientSession() as session:
         async with session.get(href) as response:
             if response.status != 200:
                 print(f"Encountered {response.status} when retrieving {href}")
                 return
-            with open(target, "w") as f:
+            with open(filepath, "w") as f:
                 f.write(await response.text())
     return True
 
@@ -73,6 +74,7 @@ async def main():
     links = await scrape_all_coach_links()
     for href in [link.strip() for link in links]:
         member_name = href[len("https://www.chess.com/member/") :]
+        os.makedirs(DATA_COACH_DIR.format(member_name=member_name), exist_ok=True)
         downloaded = await asyncio.gather(
             download_member_info(
                 member_name,