Load languages into the database.
parent
ef5d296097
commit
f20fc76081
|
@ -6,7 +6,7 @@ import aiohttp
|
||||||
import psycopg2
|
import psycopg2
|
||||||
|
|
||||||
from app.chesscom import Pipeline as ChesscomPipeline
|
from app.chesscom import Pipeline as ChesscomPipeline
|
||||||
from app.database import backup_database
|
from app.database import backup_database, load_languages
|
||||||
from app.lichess import Pipeline as LichessPipeline
|
from app.lichess import Pipeline as LichessPipeline
|
||||||
from app.types import Site
|
from app.types import Site
|
||||||
|
|
||||||
|
@ -68,6 +68,7 @@ def main():
|
||||||
port=args.port,
|
port=args.port,
|
||||||
)
|
)
|
||||||
backup_database(conn)
|
backup_database(conn)
|
||||||
|
load_languages(conn)
|
||||||
asyncio.run(
|
asyncio.run(
|
||||||
_entrypoint(
|
_entrypoint(
|
||||||
conn=conn,
|
conn=conn,
|
||||||
|
|
|
@ -4,10 +4,11 @@ from typing import List, Literal
|
||||||
|
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
from app.types import Site
|
from app.types import Site, code_to_lang
|
||||||
|
|
||||||
SCHEMA_NAME = "coach_scraper"
|
SCHEMA_NAME = "coach_scraper"
|
||||||
TABLE_NAME = "export"
|
MAIN_TABLE_NAME = "export"
|
||||||
|
LANG_TABLE_NAME = "languages"
|
||||||
|
|
||||||
|
|
||||||
RowKey = (
|
RowKey = (
|
||||||
|
@ -46,6 +47,31 @@ class Row(TypedDict, total=False):
|
||||||
bullet: int
|
bullet: int
|
||||||
|
|
||||||
|
|
||||||
|
def load_languages(conn):
|
||||||
|
"""Load all known languages into the languages table."""
|
||||||
|
cursor = None
|
||||||
|
try:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
for code, name in code_to_lang.items():
|
||||||
|
cursor.execute(
|
||||||
|
f"""
|
||||||
|
INSERT INTO {SCHEMA_NAME}.{LANG_TABLE_NAME}
|
||||||
|
(code, name)
|
||||||
|
VALUES
|
||||||
|
(%s, %s)
|
||||||
|
ON CONFLICT
|
||||||
|
(code)
|
||||||
|
DO UPDATE SET
|
||||||
|
name = EXCLUDED.name;
|
||||||
|
""",
|
||||||
|
[code, name],
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
if cursor:
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
|
||||||
def backup_database(conn):
|
def backup_database(conn):
|
||||||
"""Creates a backup of the export table.
|
"""Creates a backup of the export table.
|
||||||
|
|
||||||
|
@ -55,27 +81,28 @@ def backup_database(conn):
|
||||||
cursor = None
|
cursor = None
|
||||||
try:
|
try:
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
cursor.execute(
|
|
||||||
f"""
|
|
||||||
SELECT 1
|
|
||||||
FROM information_schema.tables
|
|
||||||
WHERE table_schema = '{SCHEMA_NAME}'
|
|
||||||
AND table_name = '{TABLE_NAME}';
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
result = cursor.fetchone()
|
|
||||||
if result is None:
|
|
||||||
print(f"Missing `{SCHEMA_NAME}.{TABLE_NAME}` table.", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
|
timestamp = int((datetime.now() - datetime(1970, 1, 1)).total_seconds())
|
||||||
cursor.execute(
|
for table_name in [MAIN_TABLE_NAME, LANG_TABLE_NAME]:
|
||||||
f"""
|
cursor.execute(
|
||||||
CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME}_{timestamp}
|
f"""
|
||||||
AS TABLE {SCHEMA_NAME}.{TABLE_NAME}
|
SELECT 1
|
||||||
"""
|
FROM information_schema.tables
|
||||||
)
|
WHERE table_schema = '{SCHEMA_NAME}'
|
||||||
|
AND table_name = '{table_name}';
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cursor.fetchone()
|
||||||
|
if result is None:
|
||||||
|
print(f"Missing `{SCHEMA_NAME}.{table_name}` table.", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
f"""
|
||||||
|
CREATE TABLE {SCHEMA_NAME}.{table_name}_{timestamp}
|
||||||
|
AS TABLE {SCHEMA_NAME}.{table_name}
|
||||||
|
"""
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
if cursor:
|
if cursor:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
@ -88,7 +115,7 @@ def upsert_row(conn, row: Row):
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
f"""
|
f"""
|
||||||
INSERT INTO {SCHEMA_NAME}.{TABLE_NAME}
|
INSERT INTO {SCHEMA_NAME}.{MAIN_TABLE_NAME}
|
||||||
( site
|
( site
|
||||||
, username
|
, username
|
||||||
, name
|
, name
|
||||||
|
|
15
sql/init.sql
15
sql/init.sql
|
@ -20,3 +20,18 @@ ON
|
||||||
coach_scraper.export
|
coach_scraper.export
|
||||||
USING
|
USING
|
||||||
BTREE (site, username);
|
BTREE (site, username);
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS coach_scraper.languages;
|
||||||
|
|
||||||
|
CREATE TABLE coach_scraper.languages
|
||||||
|
( id SERIAL PRIMARY KEY
|
||||||
|
, code VARCHAR(8) NOT NULL
|
||||||
|
, name VARCHAR(128) NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS
|
||||||
|
code_unique
|
||||||
|
ON
|
||||||
|
coach_scraper.languages
|
||||||
|
USING
|
||||||
|
BTREE (code);
|
||||||
|
|
Loading…
Reference in New Issue