From 1c0dc05b42dd6103a4014b6f1ad97fa1571f86b3 Mon Sep 17 00:00:00 2001 From: Joshua Potter Date: Mon, 4 Dec 2023 08:14:33 -0700 Subject: [PATCH] Separate initialization from loading. Prefer upserts. --- README.md | 19 ++++++++------- sql/{load_export.sql => export.sql} | 27 +++++++++++----------- sql/init.sql | 36 +++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 21 deletions(-) rename sql/{load_export.sql => export.sql} (58%) create mode 100644 sql/init.sql diff --git a/README.md b/README.md index 4ca693f..ee73333 100644 --- a/README.md +++ b/README.md @@ -65,16 +65,18 @@ $ pg_ctl -D db stop ### Loading Data -To load all exported coach data into a local postgres instance, use the provided -`sql/load_export.sql` file. First concatenate all exported content: +To load all exported coach data into a local Postgres instance, use the provided +`sql/*.sql` files. First initialize the export schema/table: +```bash +$ psql -h @scraper -f sql/init.sql +``` +Next, concatenate all exported content and dump into the newly created table: ```bash $ cat data/{chesscom,lichess}/export.json > data/export.json +$ psql -h @scraper -f sql/export.sql -v export="'$PWD/data/export.json'" ``` -Then (assuming your database cluster has been initialized at `@scraper`), you -can run: -```bash -$ psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'" -``` +Re-running will automatically create backups and replace the coach data found +in `coach_scraper.export`. ### E2E @@ -85,7 +87,8 @@ connection available at `@scraper`: ```bash nix run . -- --user-agent -s chesscom -s lichess cat data/{chesscom,lichess}/export.json > data/export.json -psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'" +psql -h @scraper -f sql/init.sql +psql -h @scraper -f sql/export.sql -v export="'$PWD/data/export.json'" ``` ## Development diff --git a/sql/load_export.sql b/sql/export.sql similarity index 58% rename from sql/load_export.sql rename to sql/export.sql index 1394df5..b5079e0 100644 --- a/sql/load_export.sql +++ b/sql/export.sql @@ -1,23 +1,12 @@ -CREATE SCHEMA IF NOT EXISTS coach_scraper; - DO $$ BEGIN EXECUTE format( - 'ALTER TABLE IF EXISTS coach_scraper.export ' - 'RENAME TO export_%s;', + 'CREATE TABLE coach_scraper.export_%s AS TABLE coach_scraper.export', TRUNC(EXTRACT(EPOCH FROM CURRENT_TIMESTAMP), 0) ); END; $$ LANGUAGE plpgsql; -CREATE TABLE coach_scraper.export - ( username VARCHAR(255) NOT NULL - , site VARCHAR(16) NOT NULL - , rapid INT - , blitz INT - , bullet INT - ); - CREATE TEMPORARY TABLE pg_temp.coach_scraper_export (data JSONB); SELECT format( @@ -26,6 +15,12 @@ SELECT format( ) \gexec INSERT INTO coach_scraper.export + ( username + , site + , rapid + , blitz + , bullet + ) SELECT data->>'username', data->>'site', @@ -33,4 +28,10 @@ SELECT (data->>'blitz')::INT, (data->>'bullet')::INT FROM - pg_temp.coach_scraper_export; + pg_temp.coach_scraper_export +ON CONFLICT + (site, username) +DO UPDATE SET + rapid = EXCLUDED.rapid, + blitz = EXCLUDED.blitz, + bullet = EXCLUDED.bullet; diff --git a/sql/init.sql b/sql/init.sql new file mode 100644 index 0000000..595e9b4 --- /dev/null +++ b/sql/init.sql @@ -0,0 +1,36 @@ +CREATE SCHEMA IF NOT EXISTS coach_scraper; + +CREATE TABLE IF NOT EXISTS coach_scraper.export + ( id SERIAL PRIMARY KEY + , username VARCHAR(255) NOT NULL + , site VARCHAR(16) NOT NULL + , rapid INT + , blitz INT + , bullet INT + ); + +CREATE UNIQUE INDEX IF NOT EXISTS + site_username_unique +ON + coach_scraper.export +USING + BTREE (site, username); + +DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM information_schema.constraint_column_usage + WHERE table_schema = 'coach_scraper' + AND table_name = 'export' + AND constraint_name = 'site_username_unique' + ) THEN + EXECUTE 'ALTER TABLE + coach_scraper.export + ADD CONSTRAINT + site_username_unique + UNIQUE USING INDEX + site_username_unique'; + END IF; + END; +$$ LANGUAGE plpgsql;