Load languages into the database.
parent
ef5d296097
commit
f20fc76081
|
@ -6,7 +6,7 @@ import aiohttp
|
|||
import psycopg2
|
||||
|
||||
from app.chesscom import Pipeline as ChesscomPipeline
|
||||
from app.database import backup_database
|
||||
from app.database import backup_database, load_languages
|
||||
from app.lichess import Pipeline as LichessPipeline
|
||||
from app.types import Site
|
||||
|
||||
|
@ -68,6 +68,7 @@ def main():
|
|||
port=args.port,
|
||||
)
|
||||
backup_database(conn)
|
||||
load_languages(conn)
|
||||
asyncio.run(
|
||||
_entrypoint(
|
||||
conn=conn,
|
||||
|
|
|
@ -4,10 +4,11 @@ from typing import List, Literal
|
|||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from app.types import Site
|
||||
from app.types import Site, code_to_lang
|
||||
|
||||
SCHEMA_NAME = "coach_scraper"
|
||||
TABLE_NAME = "export"
|
||||
MAIN_TABLE_NAME = "export"
|
||||
LANG_TABLE_NAME = "languages"
|
||||
|
||||
|
||||
RowKey = (
|
||||
|
@ -46,6 +47,31 @@ class Row(TypedDict, total=False):
|
|||
bullet: int
|
||||
|
||||
|
||||
def load_languages(conn):
|
||||
"""Load all known languages into the languages table."""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
for code, name in code_to_lang.items():
|
||||
cursor.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
|
||||
(code, name)
|
||||
VALUES
|
||||
(%s, %s)
|
||||
ON CONFLICT
|
||||
(code)
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name;
|
||||
""",
|
||||
[code, name],
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
|
||||
def backup_database(conn):
|
||||
"""Creates a backup of the export table.
|
||||
|
||||
|
@ -55,25 +81,26 @@ def backup_database(conn):
|
|||
cursor = None
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
|
||||
for table_name in [MAIN_TABLE_NAME, LANG_TABLE_NAME]:
|
||||
cursor.execute(
|
||||
f"""
|
||||
SELECT 1
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = '{SCHEMA_NAME}'
|
||||
AND table_name = '{TABLE_NAME}';
|
||||
AND table_name = '{table_name}';
|
||||
"""
|
||||
)
|
||||
|
||||
result = cursor.fetchone()
|
||||
if result is None:
|
||||
print(f"Missing `{SCHEMA_NAME}.{TABLE_NAME}` table.", file=sys.stderr)
|
||||
print(f"Missing `{SCHEMA_NAME}.{table_name}` table.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
|
||||
cursor.execute(
|
||||
f"""
|
||||
CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME}_{timestamp}
|
||||
AS TABLE {SCHEMA_NAME}.{TABLE_NAME}
|
||||
CREATE TABLE {SCHEMA_NAME}.{table_name}_{timestamp}
|
||||
AS TABLE {SCHEMA_NAME}.{table_name}
|
||||
"""
|
||||
)
|
||||
finally:
|
||||
|
@ -88,7 +115,7 @@ def upsert_row(conn, row: Row):
|
|||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
f"""
|
||||
INSERT INTO {SCHEMA_NAME}.{TABLE_NAME}
|
||||
INSERT INTO {SCHEMA_NAME}.{MAIN_TABLE_NAME}
|
||||
( site
|
||||
, username
|
||||
, name
|
||||
|
|
15
sql/init.sql
15
sql/init.sql
|
@ -20,3 +20,18 @@ ON
|
|||
coach_scraper.export
|
||||
USING
|
||||
BTREE (site, username);
|
||||
|
||||
DROP TABLE IF EXISTS coach_scraper.languages;
|
||||
|
||||
CREATE TABLE coach_scraper.languages
|
||||
( id SERIAL PRIMARY KEY
|
||||
, code VARCHAR(8) NOT NULL
|
||||
, name VARCHAR(128) NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS
|
||||
code_unique
|
||||
ON
|
||||
coach_scraper.languages
|
||||
USING
|
||||
BTREE (code);
|
||||
|
|
Loading…
Reference in New Issue