Load languages into the database.

main
Joshua Potter 2023-12-05 15:15:42 -07:00
parent ef5d296097
commit f20fc76081
3 changed files with 67 additions and 24 deletions

View File

@ -6,7 +6,7 @@ import aiohttp
import psycopg2 import psycopg2
from app.chesscom import Pipeline as ChesscomPipeline from app.chesscom import Pipeline as ChesscomPipeline
from app.database import backup_database from app.database import backup_database, load_languages
from app.lichess import Pipeline as LichessPipeline from app.lichess import Pipeline as LichessPipeline
from app.types import Site from app.types import Site
@ -68,6 +68,7 @@ def main():
port=args.port, port=args.port,
) )
backup_database(conn) backup_database(conn)
load_languages(conn)
asyncio.run( asyncio.run(
_entrypoint( _entrypoint(
conn=conn, conn=conn,

View File

@ -4,10 +4,11 @@ from typing import List, Literal
from typing_extensions import TypedDict from typing_extensions import TypedDict
from app.types import Site from app.types import Site, code_to_lang
SCHEMA_NAME = "coach_scraper" SCHEMA_NAME = "coach_scraper"
TABLE_NAME = "export" MAIN_TABLE_NAME = "export"
LANG_TABLE_NAME = "languages"
RowKey = ( RowKey = (
@ -46,6 +47,31 @@ class Row(TypedDict, total=False):
bullet: int bullet: int
def load_languages(conn):
"""Load all known languages into the languages table."""
cursor = None
try:
cursor = conn.cursor()
for code, name in code_to_lang.items():
cursor.execute(
f"""
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
(code, name)
VALUES
(%s, %s)
ON CONFLICT
(code)
DO UPDATE SET
name = EXCLUDED.name;
""",
[code, name],
)
conn.commit()
finally:
if cursor:
cursor.close()
def backup_database(conn): def backup_database(conn):
"""Creates a backup of the export table. """Creates a backup of the export table.
@ -55,27 +81,28 @@ def backup_database(conn):
cursor = None cursor = None
try: try:
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute(
f"""
SELECT 1
FROM information_schema.tables
WHERE table_schema = '{SCHEMA_NAME}'
AND table_name = '{TABLE_NAME}';
"""
)
result = cursor.fetchone()
if result is None:
print(f"Missing `{SCHEMA_NAME}.{TABLE_NAME}` table.", file=sys.stderr)
sys.exit(1)
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds()) timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
cursor.execute( for table_name in [MAIN_TABLE_NAME, LANG_TABLE_NAME]:
f""" cursor.execute(
CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME}_{timestamp} f"""
AS TABLE {SCHEMA_NAME}.{TABLE_NAME} SELECT 1
""" FROM information_schema.tables
) WHERE table_schema = '{SCHEMA_NAME}'
AND table_name = '{table_name}';
"""
)
result = cursor.fetchone()
if result is None:
print(f"Missing `{SCHEMA_NAME}.{table_name}` table.", file=sys.stderr)
sys.exit(1)
cursor.execute(
f"""
CREATE TABLE {SCHEMA_NAME}.{table_name}_{timestamp}
AS TABLE {SCHEMA_NAME}.{table_name}
"""
)
finally: finally:
if cursor: if cursor:
cursor.close() cursor.close()
@ -88,7 +115,7 @@ def upsert_row(conn, row: Row):
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute( cursor.execute(
f""" f"""
INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} INSERT INTO {SCHEMA_NAME}.{MAIN_TABLE_NAME}
( site ( site
, username , username
, name , name

View File

@ -20,3 +20,18 @@ ON
coach_scraper.export coach_scraper.export
USING USING
BTREE (site, username); BTREE (site, username);
DROP TABLE IF EXISTS coach_scraper.languages;
CREATE TABLE coach_scraper.languages
( id SERIAL PRIMARY KEY
, code VARCHAR(8) NOT NULL
, name VARCHAR(128) NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS
code_unique
ON
coach_scraper.languages
USING
BTREE (code);