2023-12-05 18:43:13 +00:00
|
|
|
import sys
|
|
|
|
from datetime import datetime
|
2023-12-05 21:20:46 +00:00
|
|
|
from typing import List, Literal
|
2023-12-05 18:43:13 +00:00
|
|
|
|
|
|
|
from typing_extensions import TypedDict
|
|
|
|
|
2023-12-07 03:53:54 +00:00
|
|
|
from app.locale import Locale, locale_to_str, native_to_locale
|
|
|
|
from app.types import Site, Title
|
2023-12-05 19:54:12 +00:00
|
|
|
|
2023-12-05 18:43:13 +00:00
|
|
|
SCHEMA_NAME = "coach_scraper"
|
2023-12-05 22:15:42 +00:00
|
|
|
MAIN_TABLE_NAME = "export"
|
|
|
|
LANG_TABLE_NAME = "languages"
|
2023-12-05 18:43:13 +00:00
|
|
|
|
|
|
|
|
2023-12-05 19:54:12 +00:00
|
|
|
RowKey = (
|
|
|
|
Literal["site"]
|
|
|
|
| Literal["username"]
|
|
|
|
| Literal["name"]
|
|
|
|
| Literal["image_url"]
|
2023-12-07 02:52:40 +00:00
|
|
|
| Literal["title"]
|
2023-12-05 21:20:46 +00:00
|
|
|
| Literal["languages"]
|
2023-12-05 19:54:12 +00:00
|
|
|
| Literal["rapid"]
|
|
|
|
| Literal["blitz"]
|
|
|
|
| Literal["bullet"]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-12-05 18:43:13 +00:00
|
|
|
class Row(TypedDict, total=False):
|
|
|
|
"""Representation of a row of the export table.
|
|
|
|
|
|
|
|
The (site, username) make up a unique key for each coach.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Website the given coach was sourced from.
|
2023-12-05 19:54:12 +00:00
|
|
|
site: Site
|
2023-12-05 18:43:13 +00:00
|
|
|
# Username used on the source site.
|
|
|
|
username: str
|
|
|
|
# Real name.
|
|
|
|
name: str
|
|
|
|
# Profile image used on the source site.
|
|
|
|
image_url: str
|
2023-12-07 02:52:40 +00:00
|
|
|
# The FIDE title assigned to the coach on the source siste.
|
|
|
|
title: Title
|
2023-12-05 21:20:46 +00:00
|
|
|
# The list of languages the coach is fluent in.
|
2023-12-07 03:53:54 +00:00
|
|
|
languages: List[Locale]
|
2023-12-05 18:43:13 +00:00
|
|
|
# Rapid rating relative to the site they were sourced from.
|
|
|
|
rapid: int
|
|
|
|
# Blitz rating relative to the site they were sourced from.
|
|
|
|
blitz: int
|
|
|
|
# Bullet rating relative to the site they were sourced from.
|
|
|
|
bullet: int
|
|
|
|
|
|
|
|
|
2023-12-05 22:15:42 +00:00
|
|
|
def load_languages(conn):
|
|
|
|
"""Load all known languages into the languages table."""
|
|
|
|
cursor = None
|
|
|
|
try:
|
|
|
|
cursor = conn.cursor()
|
2023-12-07 03:53:54 +00:00
|
|
|
for pos, (name, loc) in enumerate(list(native_to_locale.items())):
|
2023-12-05 22:15:42 +00:00
|
|
|
cursor.execute(
|
|
|
|
f"""
|
|
|
|
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
|
2023-12-05 23:06:04 +00:00
|
|
|
(code, name, pos)
|
2023-12-05 22:15:42 +00:00
|
|
|
VALUES
|
2023-12-05 23:06:04 +00:00
|
|
|
(%s, %s, %s)
|
2023-12-05 22:15:42 +00:00
|
|
|
ON CONFLICT
|
|
|
|
(code)
|
|
|
|
DO UPDATE SET
|
|
|
|
name = EXCLUDED.name;
|
|
|
|
""",
|
2023-12-07 03:53:54 +00:00
|
|
|
[locale_to_str(loc), name, pos],
|
2023-12-05 22:15:42 +00:00
|
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
finally:
|
|
|
|
if cursor:
|
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
|
2023-12-05 18:43:13 +00:00
|
|
|
def backup_database(conn):
|
|
|
|
"""Creates a backup of the export table.
|
|
|
|
|
|
|
|
Simply copies the table at time of invocation into another table with a
|
|
|
|
`_%t` suffix, where %t denotes the number of seconds since the Unix epoch.
|
|
|
|
"""
|
|
|
|
cursor = None
|
|
|
|
try:
|
|
|
|
cursor = conn.cursor()
|
|
|
|
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
|
2023-12-05 22:15:42 +00:00
|
|
|
for table_name in [MAIN_TABLE_NAME, LANG_TABLE_NAME]:
|
|
|
|
cursor.execute(
|
|
|
|
f"""
|
|
|
|
SELECT 1
|
|
|
|
FROM information_schema.tables
|
|
|
|
WHERE table_schema = '{SCHEMA_NAME}'
|
|
|
|
AND table_name = '{table_name}';
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
result = cursor.fetchone()
|
|
|
|
if result is None:
|
|
|
|
print(f"Missing `{SCHEMA_NAME}.{table_name}` table.", file=sys.stderr)
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
cursor.execute(
|
|
|
|
f"""
|
|
|
|
CREATE TABLE {SCHEMA_NAME}.{table_name}_{timestamp}
|
|
|
|
AS TABLE {SCHEMA_NAME}.{table_name}
|
|
|
|
"""
|
|
|
|
)
|
2023-12-05 18:43:13 +00:00
|
|
|
finally:
|
|
|
|
if cursor:
|
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
|
|
|
|
def upsert_row(conn, row: Row):
|
|
|
|
"""Upsert the specified `Row` into the database table."""
|
|
|
|
cursor = None
|
|
|
|
try:
|
|
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute(
|
|
|
|
f"""
|
2023-12-05 22:15:42 +00:00
|
|
|
INSERT INTO {SCHEMA_NAME}.{MAIN_TABLE_NAME}
|
2023-12-05 18:43:13 +00:00
|
|
|
( site
|
|
|
|
, username
|
|
|
|
, name
|
|
|
|
, image_url
|
2023-12-07 02:52:40 +00:00
|
|
|
, title
|
2023-12-05 21:20:46 +00:00
|
|
|
, languages
|
2023-12-05 18:43:13 +00:00
|
|
|
, rapid
|
|
|
|
, blitz
|
|
|
|
, bullet
|
|
|
|
)
|
|
|
|
VALUES
|
|
|
|
( %s
|
|
|
|
, %s
|
|
|
|
, %s
|
|
|
|
, %s
|
|
|
|
, %s
|
|
|
|
, %s
|
|
|
|
, %s
|
2023-12-05 21:20:46 +00:00
|
|
|
, %s
|
2023-12-07 02:52:40 +00:00
|
|
|
, %s
|
2023-12-05 18:43:13 +00:00
|
|
|
)
|
|
|
|
ON CONFLICT
|
|
|
|
(site, username)
|
|
|
|
DO UPDATE SET
|
|
|
|
name = EXCLUDED.name,
|
|
|
|
image_url = EXCLUDED.image_url,
|
2023-12-07 02:52:40 +00:00
|
|
|
title = EXCLUDED.title,
|
2023-12-05 21:20:46 +00:00
|
|
|
languages = EXCLUDED.languages,
|
2023-12-05 18:43:13 +00:00
|
|
|
rapid = EXCLUDED.rapid,
|
|
|
|
blitz = EXCLUDED.blitz,
|
|
|
|
bullet = EXCLUDED.bullet;
|
|
|
|
""",
|
|
|
|
[
|
|
|
|
row["site"].value,
|
|
|
|
row["username"],
|
|
|
|
row.get("name"),
|
|
|
|
row.get("image_url"),
|
2023-12-07 02:52:40 +00:00
|
|
|
row["title"].value if "title" in row else None,
|
2023-12-07 03:53:54 +00:00
|
|
|
list(map(locale_to_str, row.get("languages", []))),
|
2023-12-05 18:43:13 +00:00
|
|
|
row.get("rapid"),
|
|
|
|
row.get("blitz"),
|
|
|
|
row.get("bullet"),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
finally:
|
2023-12-05 19:54:12 +00:00
|
|
|
if cursor:
|
|
|
|
cursor.close()
|