Separate initialization from loading. Prefer upserts.
parent
9a5274f0dd
commit
1c0dc05b42
19
README.md
19
README.md
|
@ -65,16 +65,18 @@ $ pg_ctl -D db stop
|
|||
|
||||
### Loading Data
|
||||
|
||||
To load all exported coach data into a local postgres instance, use the provided
|
||||
`sql/load_export.sql` file. First concatenate all exported content:
|
||||
To load all exported coach data into a local Postgres instance, use the provided
|
||||
`sql/*.sql` files. First initialize the export schema/table:
|
||||
```bash
|
||||
$ psql -h @scraper -f sql/init.sql
|
||||
```
|
||||
Next, concatenate all exported content and dump into the newly created table:
|
||||
```bash
|
||||
$ cat data/{chesscom,lichess}/export.json > data/export.json
|
||||
$ psql -h @scraper -f sql/export.sql -v export="'$PWD/data/export.json'"
|
||||
```
|
||||
Then (assuming your database cluster has been initialized at `@scraper`), you
|
||||
can run:
|
||||
```bash
|
||||
$ psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'"
|
||||
```
|
||||
Re-running will automatically create backups and replace the coach data found
|
||||
in `coach_scraper.export`.
|
||||
|
||||
### E2E
|
||||
|
||||
|
@ -85,7 +87,8 @@ connection available at `@scraper`:
|
|||
```bash
|
||||
nix run . -- --user-agent <your-email> -s chesscom -s lichess
|
||||
cat data/{chesscom,lichess}/export.json > data/export.json
|
||||
psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'"
|
||||
psql -h @scraper -f sql/init.sql
|
||||
psql -h @scraper -f sql/export.sql -v export="'$PWD/data/export.json'"
|
||||
```
|
||||
|
||||
## Development
|
||||
|
|
|
@ -1,23 +1,12 @@
|
|||
CREATE SCHEMA IF NOT EXISTS coach_scraper;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
EXECUTE format(
|
||||
'ALTER TABLE IF EXISTS coach_scraper.export '
|
||||
'RENAME TO export_%s;',
|
||||
'CREATE TABLE coach_scraper.export_%s AS TABLE coach_scraper.export',
|
||||
TRUNC(EXTRACT(EPOCH FROM CURRENT_TIMESTAMP), 0)
|
||||
);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TABLE coach_scraper.export
|
||||
( username VARCHAR(255) NOT NULL
|
||||
, site VARCHAR(16) NOT NULL
|
||||
, rapid INT
|
||||
, blitz INT
|
||||
, bullet INT
|
||||
);
|
||||
|
||||
CREATE TEMPORARY TABLE pg_temp.coach_scraper_export (data JSONB);
|
||||
|
||||
SELECT format(
|
||||
|
@ -26,6 +15,12 @@ SELECT format(
|
|||
) \gexec
|
||||
|
||||
INSERT INTO coach_scraper.export
|
||||
( username
|
||||
, site
|
||||
, rapid
|
||||
, blitz
|
||||
, bullet
|
||||
)
|
||||
SELECT
|
||||
data->>'username',
|
||||
data->>'site',
|
||||
|
@ -33,4 +28,10 @@ SELECT
|
|||
(data->>'blitz')::INT,
|
||||
(data->>'bullet')::INT
|
||||
FROM
|
||||
pg_temp.coach_scraper_export;
|
||||
pg_temp.coach_scraper_export
|
||||
ON CONFLICT
|
||||
(site, username)
|
||||
DO UPDATE SET
|
||||
rapid = EXCLUDED.rapid,
|
||||
blitz = EXCLUDED.blitz,
|
||||
bullet = EXCLUDED.bullet;
|
|
@ -0,0 +1,36 @@
|
|||
CREATE SCHEMA IF NOT EXISTS coach_scraper;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS coach_scraper.export
|
||||
( id SERIAL PRIMARY KEY
|
||||
, username VARCHAR(255) NOT NULL
|
||||
, site VARCHAR(16) NOT NULL
|
||||
, rapid INT
|
||||
, blitz INT
|
||||
, bullet INT
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS
|
||||
site_username_unique
|
||||
ON
|
||||
coach_scraper.export
|
||||
USING
|
||||
BTREE (site, username);
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM information_schema.constraint_column_usage
|
||||
WHERE table_schema = 'coach_scraper'
|
||||
AND table_name = 'export'
|
||||
AND constraint_name = 'site_username_unique'
|
||||
) THEN
|
||||
EXECUTE 'ALTER TABLE
|
||||
coach_scraper.export
|
||||
ADD CONSTRAINT
|
||||
site_username_unique
|
||||
UNIQUE USING INDEX
|
||||
site_username_unique';
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
Loading…
Reference in New Issue