Load languages into the database.

main
Joshua Potter 2023-12-05 15:15:42 -07:00
parent ef5d296097
commit f20fc76081
3 changed files with 67 additions and 24 deletions

View File

@ -6,7 +6,7 @@ import aiohttp
import psycopg2
from app.chesscom import Pipeline as ChesscomPipeline
from app.database import backup_database
from app.database import backup_database, load_languages
from app.lichess import Pipeline as LichessPipeline
from app.types import Site
@ -68,6 +68,7 @@ def main():
port=args.port,
)
backup_database(conn)
load_languages(conn)
asyncio.run(
_entrypoint(
conn=conn,

View File

@ -4,10 +4,11 @@ from typing import List, Literal
from typing_extensions import TypedDict
from app.types import Site
from app.types import Site, code_to_lang
SCHEMA_NAME = "coach_scraper"
TABLE_NAME = "export"
MAIN_TABLE_NAME = "export"
LANG_TABLE_NAME = "languages"
RowKey = (
@ -46,6 +47,31 @@ class Row(TypedDict, total=False):
bullet: int
def load_languages(conn):
"""Load all known languages into the languages table."""
cursor = None
try:
cursor = conn.cursor()
for code, name in code_to_lang.items():
cursor.execute(
f"""
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
(code, name)
VALUES
(%s, %s)
ON CONFLICT
(code)
DO UPDATE SET
name = EXCLUDED.name;
""",
[code, name],
)
conn.commit()
finally:
if cursor:
cursor.close()
def backup_database(conn):
"""Creates a backup of the export table.
@ -55,25 +81,26 @@ def backup_database(conn):
cursor = None
try:
cursor = conn.cursor()
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
for table_name in [MAIN_TABLE_NAME, LANG_TABLE_NAME]:
cursor.execute(
f"""
SELECT 1
FROM information_schema.tables
WHERE table_schema = '{SCHEMA_NAME}'
AND table_name = '{TABLE_NAME}';
AND table_name = '{table_name}';
"""
)
result = cursor.fetchone()
if result is None:
print(f"Missing `{SCHEMA_NAME}.{TABLE_NAME}` table.", file=sys.stderr)
print(f"Missing `{SCHEMA_NAME}.{table_name}` table.", file=sys.stderr)
sys.exit(1)
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
cursor.execute(
f"""
CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME}_{timestamp}
AS TABLE {SCHEMA_NAME}.{TABLE_NAME}
CREATE TABLE {SCHEMA_NAME}.{table_name}_{timestamp}
AS TABLE {SCHEMA_NAME}.{table_name}
"""
)
finally:
@ -88,7 +115,7 @@ def upsert_row(conn, row: Row):
cursor = conn.cursor()
cursor.execute(
f"""
INSERT INTO {SCHEMA_NAME}.{TABLE_NAME}
INSERT INTO {SCHEMA_NAME}.{MAIN_TABLE_NAME}
( site
, username
, name

View File

@ -20,3 +20,18 @@ ON
coach_scraper.export
USING
BTREE (site, username);
DROP TABLE IF EXISTS coach_scraper.languages;
CREATE TABLE coach_scraper.languages
( id SERIAL PRIMARY KEY
, code VARCHAR(8) NOT NULL
, name VARCHAR(128) NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS
code_unique
ON
coach_scraper.languages
USING
BTREE (code);