coach-scraper/app/database.py

163 lines
4.3 KiB
Python

import sys
from datetime import datetime
from typing import List, Literal
from typing_extensions import TypedDict
from app.types import Site, code_to_lang
SCHEMA_NAME = "coach_scraper"
MAIN_TABLE_NAME = "export"
LANG_TABLE_NAME = "languages"
RowKey = (
Literal["site"]
| Literal["username"]
| Literal["name"]
| Literal["image_url"]
| Literal["languages"]
| Literal["rapid"]
| Literal["blitz"]
| Literal["bullet"]
)
class Row(TypedDict, total=False):
"""Representation of a row of the export table.
The (site, username) make up a unique key for each coach.
"""
# Website the given coach was sourced from.
site: Site
# Username used on the source site.
username: str
# Real name.
name: str
# Profile image used on the source site.
image_url: str
# The list of languages the coach is fluent in.
languages: List[str]
# Rapid rating relative to the site they were sourced from.
rapid: int
# Blitz rating relative to the site they were sourced from.
blitz: int
# Bullet rating relative to the site they were sourced from.
bullet: int
def load_languages(conn):
"""Load all known languages into the languages table."""
cursor = None
try:
cursor = conn.cursor()
for pos, (code, name) in enumerate(list(code_to_lang.items())):
cursor.execute(
f"""
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
(code, name, pos)
VALUES
(%s, %s, %s)
ON CONFLICT
(code)
DO UPDATE SET
name = EXCLUDED.name;
""",
[code, name, pos],
)
conn.commit()
finally:
if cursor:
cursor.close()
def backup_database(conn):
"""Creates a backup of the export table.
Simply copies the table at time of invocation into another table with a
`_%t` suffix, where %t denotes the number of seconds since the Unix epoch.
"""
cursor = None
try:
cursor = conn.cursor()
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
for table_name in [MAIN_TABLE_NAME, LANG_TABLE_NAME]:
cursor.execute(
f"""
SELECT 1
FROM information_schema.tables
WHERE table_schema = '{SCHEMA_NAME}'
AND table_name = '{table_name}';
"""
)
result = cursor.fetchone()
if result is None:
print(f"Missing `{SCHEMA_NAME}.{table_name}` table.", file=sys.stderr)
sys.exit(1)
cursor.execute(
f"""
CREATE TABLE {SCHEMA_NAME}.{table_name}_{timestamp}
AS TABLE {SCHEMA_NAME}.{table_name}
"""
)
finally:
if cursor:
cursor.close()
def upsert_row(conn, row: Row):
"""Upsert the specified `Row` into the database table."""
cursor = None
try:
cursor = conn.cursor()
cursor.execute(
f"""
INSERT INTO {SCHEMA_NAME}.{MAIN_TABLE_NAME}
( site
, username
, name
, image_url
, languages
, rapid
, blitz
, bullet
)
VALUES
( %s
, %s
, %s
, %s
, %s
, %s
, %s
, %s
)
ON CONFLICT
(site, username)
DO UPDATE SET
name = EXCLUDED.name,
image_url = EXCLUDED.image_url,
languages = EXCLUDED.languages,
rapid = EXCLUDED.rapid,
blitz = EXCLUDED.blitz,
bullet = EXCLUDED.bullet;
""",
[
row["site"].value,
row["username"],
row.get("name"),
row.get("image_url"),
row.get("languages", []),
row.get("rapid"),
row.get("blitz"),
row.get("bullet"),
],
)
conn.commit()
finally:
if cursor:
cursor.close()