Separate initialization from loading. Prefer upserts.
parent
9a5274f0dd
commit
1c0dc05b42
19
README.md
19
README.md
|
@ -65,16 +65,18 @@ $ pg_ctl -D db stop
|
||||||
|
|
||||||
### Loading Data
|
### Loading Data
|
||||||
|
|
||||||
To load all exported coach data into a local postgres instance, use the provided
|
To load all exported coach data into a local Postgres instance, use the provided
|
||||||
`sql/load_export.sql` file. First concatenate all exported content:
|
`sql/*.sql` files. First initialize the export schema/table:
|
||||||
|
```bash
|
||||||
|
$ psql -h @scraper -f sql/init.sql
|
||||||
|
```
|
||||||
|
Next, concatenate all exported content and dump into the newly created table:
|
||||||
```bash
|
```bash
|
||||||
$ cat data/{chesscom,lichess}/export.json > data/export.json
|
$ cat data/{chesscom,lichess}/export.json > data/export.json
|
||||||
|
$ psql -h @scraper -f sql/export.sql -v export="'$PWD/data/export.json'"
|
||||||
```
|
```
|
||||||
Then (assuming your database cluster has been initialized at `@scraper`), you
|
Re-running will automatically create backups and replace the coach data found
|
||||||
can run:
|
in `coach_scraper.export`.
|
||||||
```bash
|
|
||||||
$ psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'"
|
|
||||||
```
|
|
||||||
|
|
||||||
### E2E
|
### E2E
|
||||||
|
|
||||||
|
@ -85,7 +87,8 @@ connection available at `@scraper`:
|
||||||
```bash
|
```bash
|
||||||
nix run . -- --user-agent <your-email> -s chesscom -s lichess
|
nix run . -- --user-agent <your-email> -s chesscom -s lichess
|
||||||
cat data/{chesscom,lichess}/export.json > data/export.json
|
cat data/{chesscom,lichess}/export.json > data/export.json
|
||||||
psql -h @scraper -f sql/load_export.sql -v export="'$PWD/data/export.json'"
|
psql -h @scraper -f sql/init.sql
|
||||||
|
psql -h @scraper -f sql/export.sql -v export="'$PWD/data/export.json'"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
|
@ -1,23 +1,12 @@
|
||||||
CREATE SCHEMA IF NOT EXISTS coach_scraper;
|
|
||||||
|
|
||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
EXECUTE format(
|
EXECUTE format(
|
||||||
'ALTER TABLE IF EXISTS coach_scraper.export '
|
'CREATE TABLE coach_scraper.export_%s AS TABLE coach_scraper.export',
|
||||||
'RENAME TO export_%s;',
|
|
||||||
TRUNC(EXTRACT(EPOCH FROM CURRENT_TIMESTAMP), 0)
|
TRUNC(EXTRACT(EPOCH FROM CURRENT_TIMESTAMP), 0)
|
||||||
);
|
);
|
||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql;
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
CREATE TABLE coach_scraper.export
|
|
||||||
( username VARCHAR(255) NOT NULL
|
|
||||||
, site VARCHAR(16) NOT NULL
|
|
||||||
, rapid INT
|
|
||||||
, blitz INT
|
|
||||||
, bullet INT
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TEMPORARY TABLE pg_temp.coach_scraper_export (data JSONB);
|
CREATE TEMPORARY TABLE pg_temp.coach_scraper_export (data JSONB);
|
||||||
|
|
||||||
SELECT format(
|
SELECT format(
|
||||||
|
@ -26,6 +15,12 @@ SELECT format(
|
||||||
) \gexec
|
) \gexec
|
||||||
|
|
||||||
INSERT INTO coach_scraper.export
|
INSERT INTO coach_scraper.export
|
||||||
|
( username
|
||||||
|
, site
|
||||||
|
, rapid
|
||||||
|
, blitz
|
||||||
|
, bullet
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
data->>'username',
|
data->>'username',
|
||||||
data->>'site',
|
data->>'site',
|
||||||
|
@ -33,4 +28,10 @@ SELECT
|
||||||
(data->>'blitz')::INT,
|
(data->>'blitz')::INT,
|
||||||
(data->>'bullet')::INT
|
(data->>'bullet')::INT
|
||||||
FROM
|
FROM
|
||||||
pg_temp.coach_scraper_export;
|
pg_temp.coach_scraper_export
|
||||||
|
ON CONFLICT
|
||||||
|
(site, username)
|
||||||
|
DO UPDATE SET
|
||||||
|
rapid = EXCLUDED.rapid,
|
||||||
|
blitz = EXCLUDED.blitz,
|
||||||
|
bullet = EXCLUDED.bullet;
|
|
@ -0,0 +1,36 @@
|
||||||
|
CREATE SCHEMA IF NOT EXISTS coach_scraper;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS coach_scraper.export
|
||||||
|
( id SERIAL PRIMARY KEY
|
||||||
|
, username VARCHAR(255) NOT NULL
|
||||||
|
, site VARCHAR(16) NOT NULL
|
||||||
|
, rapid INT
|
||||||
|
, blitz INT
|
||||||
|
, bullet INT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS
|
||||||
|
site_username_unique
|
||||||
|
ON
|
||||||
|
coach_scraper.export
|
||||||
|
USING
|
||||||
|
BTREE (site, username);
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM information_schema.constraint_column_usage
|
||||||
|
WHERE table_schema = 'coach_scraper'
|
||||||
|
AND table_name = 'export'
|
||||||
|
AND constraint_name = 'site_username_unique'
|
||||||
|
) THEN
|
||||||
|
EXECUTE 'ALTER TABLE
|
||||||
|
coach_scraper.export
|
||||||
|
ADD CONSTRAINT
|
||||||
|
site_username_unique
|
||||||
|
UNIQUE USING INDEX
|
||||||
|
site_username_unique';
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
Loading…
Reference in New Issue