summaryrefslogtreecommitdiff
path: root/src/postprocess
diff options
context:
space:
mode:
Diffstat (limited to 'src/postprocess')
-rw-r--r--src/postprocess/postgres.sql110
-rw-r--r--src/postprocess/postgres.txt76
2 files changed, 110 insertions, 76 deletions
diff --git a/src/postprocess/postgres.sql b/src/postprocess/postgres.sql
new file mode 100644
index 0000000..2d89d1c
--- /dev/null
+++ b/src/postprocess/postgres.sql
@@ -0,0 +1,110 @@
1\timing on
2
3CREATE TABLE telefonbuch (
4 id SERIAL PRIMARY KEY,
5 presence_flag bigint,
6 reverse_flag bigint,
7 biz_flag bigint,
8 zip character varying(64)[] DEFAULT '{}',
9 nachname character varying(256)[] DEFAULT '{}',
10 vorname character varying(192)[] DEFAULT '{}',
11 zusaetze character varying(512)[] DEFAULT '{}',
12 strasse character varying(64)[] DEFAULT '{}',
13 hausnummer character varying(32)[] DEFAULT '{}',
14 verweise text[] DEFAULT '{}',
15 ort character varying(64)[] DEFAULT '{}',
16 vorwahl character varying(10)[] DEFAULT '{}',
17 rufnummer character varying(32)[] DEFAULT '{}',
18 web character varying(256)[] DEFAULT '{}',
19 email character varying(256)[] DEFAULT '{}',
20 coords character varying(32)[] DEFAULT '{}'
21);
22
23COPY Telefonbuch ( presence_flag, reverse_flag, biz_flag, zip, nachname, vorname, zusaetze, strasse, hausnummer, verweise, ort, vorwahl, rufnummer, web, email, coords ) FROM '/Users/erdgeist/Coding/Telefonbuch/work/telefonbuch.txt' WITH NULL AS '';
24
25-- # Clean up the old ones
26DROP TABLE IF EXISTS table_nachname, table_vorname, table_zusaetze, table_strasse, table_hausnummer, table_zip, table_ort, table_verweise, table_vorwahl, table_rufnummer, table_web, table_email;
27
28-- # tables we use for indexing
29CREATE TABLE table_nachname ( telefonbuch_id integer, offs integer, value text);
30CREATE TABLE table_vorname ( telefonbuch_id integer, offs integer, value text);
31CREATE TABLE table_zusaetze ( telefonbuch_id integer, offs integer, value text);
32CREATE TABLE table_strasse ( telefonbuch_id integer, offs integer, value text);
33CREATE TABLE table_hausnummer ( telefonbuch_id integer, offs integer, value text);
34CREATE TABLE table_zip ( telefonbuch_id integer, offs integer, value text);
35CREATE TABLE table_ort ( telefonbuch_id integer, offs integer, value text);
36CREATE TABLE table_verweise ( telefonbuch_id integer, offs integer, value text);
37CREATE TABLE table_vorwahl ( telefonbuch_id integer, offs integer, value text);
38CREATE TABLE table_rufnummer ( telefonbuch_id integer, offs integer, value text);
39CREATE TABLE table_web ( telefonbuch_id integer, offs integer, value text);
40CREATE TABLE table_email ( telefonbuch_id integer, offs integer, value text);
41
42-- # Take copies of all content rows for indexing
43INSERT INTO table_nachname ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(nachname ) WITH ORDINALITY u(value, offs);
44INSERT INTO table_vorname ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(vorname ) WITH ORDINALITY u(value, offs);
45INSERT INTO table_zusaetze ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(zusaetze ) WITH ORDINALITY u(value, offs);
46INSERT INTO table_strasse ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(strasse ) WITH ORDINALITY u(value, offs);
47INSERT INTO table_hausnummer ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(hausnummer ) WITH ORDINALITY u(value, offs);
48INSERT INTO table_zip ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(zip ) WITH ORDINALITY u(value, offs);
49INSERT INTO table_ort ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(ort ) WITH ORDINALITY u(value, offs);
50INSERT INTO table_verweise ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(verweise ) WITH ORDINALITY u(value, offs);
51INSERT INTO table_vorwahl ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(vorwahl ) WITH ORDINALITY u(value, offs);
52INSERT INTO table_rufnummer ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(rufnummer ) WITH ORDINALITY u(value, offs);
53INSERT INTO table_web ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(web ) WITH ORDINALITY u(value, offs);
54INSERT INTO table_email ( telefonbuch_id, offs, value ) SELECT id, offs, value FROM Telefonbuch CROSS JOIN unnest(email ) WITH ORDINALITY u(value, offs);
55
56ALTER TABLE table_nachname ADD PRIMARY KEY (telefonbuch_id, offs);
57ALTER TABLE table_vorname ADD PRIMARY KEY (telefonbuch_id, offs);
58ALTER TABLE table_zusaetze ADD PRIMARY KEY (telefonbuch_id, offs);
59ALTER TABLE table_strasse ADD PRIMARY KEY (telefonbuch_id, offs);
60ALTER TABLE table_hausnummer ADD PRIMARY KEY (telefonbuch_id, offs);
61ALTER TABLE table_zip ADD PRIMARY KEY (telefonbuch_id, offs);
62ALTER TABLE table_ort ADD PRIMARY KEY (telefonbuch_id, offs);
63ALTER TABLE table_verweise ADD PRIMARY KEY (telefonbuch_id, offs);
64ALTER TABLE table_vorwahl ADD PRIMARY KEY (telefonbuch_id, offs);
65ALTER TABLE table_rufnummer ADD PRIMARY KEY (telefonbuch_id, offs);
66ALTER TABLE table_web ADD PRIMARY KEY (telefonbuch_id, offs);
67ALTER TABLE table_email ADD PRIMARY KEY (telefonbuch_id, offs);
68
69-- # equality indexes
70CREATE INDEX idx_nachname ON table_nachname USING btree (value, telefonbuch_id, offs);
71CREATE INDEX idx_vorname ON table_vorname USING btree (value, telefonbuch_id, offs);
72CREATE INDEX idx_zusaetze ON table_zusaetze USING btree (value, telefonbuch_id, offs);
73CREATE INDEX idx_strasse ON table_strasse USING btree (value, telefonbuch_id, offs);
74CREATE INDEX idx_hausnummer ON table_hausnummer USING btree (value, telefonbuch_id, offs);
75CREATE INDEX idx_zip ON table_zip USING btree (value, telefonbuch_id, offs);
76CREATE INDEX idx_ort ON table_ort USING btree (value, telefonbuch_id, offs);
77CREATE INDEX idx_verweise ON table_verweise USING btree (value, telefonbuch_id, offs);
78CREATE INDEX idx_vorwahl ON table_vorwahl USING btree (value, telefonbuch_id, offs);
79CREATE INDEX idx_rufnummer ON table_rufnummer USING btree (value, telefonbuch_id, offs);
80CREATE INDEX idx_web ON table_web USING btree (value, telefonbuch_id, offs);
81CREATE INDEX idx_email ON table_email USING btree (value, telefonbuch_id, offs);
82
83-- # full text indexes
84CREATE INDEX trgm_idx_nachname ON table_nachname USING gin (value gin_trgm_ops, telefonbuch_id, offs);
85CREATE INDEX trgm_idx_vorname ON table_vorname USING gin (value gin_trgm_ops, telefonbuch_id, offs);
86CREATE INDEX trgm_idx_zusaetze ON table_zusaetze USING gin (value gin_trgm_ops, telefonbuch_id, offs);
87CREATE INDEX trgm_idx_strasse ON table_strasse USING gin (value gin_trgm_ops, telefonbuch_id, offs);
88CREATE INDEX trgm_idx_hausnummer ON table_hausnummer USING gin (value gin_trgm_ops, telefonbuch_id, offs);
89CREATE INDEX trgm_idx_zip ON table_zip USING gin (value gin_trgm_ops, telefonbuch_id, offs);
90CREATE INDEX trgm_idx_ort ON table_ort USING gin (value gin_trgm_ops, telefonbuch_id, offs);
91CREATE INDEX trgm_idx_verweise ON table_verweise USING gin (value gin_trgm_ops, telefonbuch_id, offs);
92CREATE INDEX trgm_idx_vorwahl ON table_vorwahl USING gin (value gin_trgm_ops, telefonbuch_id, offs);
93CREATE INDEX trgm_idx_rufnummer ON table_rufnummer USING gin (value gin_trgm_ops, telefonbuch_id, offs);
94CREATE INDEX trgm_idx_web ON table_web USING gin (value gin_trgm_ops, telefonbuch_id, offs);
95CREATE INDEX trgm_idx_email ON table_email USING gin (value gin_trgm_ops, telefonbuch_id, offs);
96
97-- # slow way:
98-- # SELECT * FROM Telefonbuch WHERE nachname = 'CCC';
99
100/*# using the index:
101# SELECT * FROM Telefonbuch
102# INNER JOIN indexed_view_nachname ON indexed_view_nachname.id = Telefonbuch.id
103# INNER JOIN indexed_view_zip ON indexed_view_zip.id = Telefonbuch.id
104#WHERE indexed_view_nachname.value = 'CCC'
105#AND indexed_view_zip.value = '10117';
106
107# Source: https://stackoverflow.com/questions/39480580/how-to-index-a-string-array-column-for-pg-trgm-term-any-array-column-que/39584236
108# Tricks: SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'table_zusaetze'::regclass;
109
110*/
diff --git a/src/postprocess/postgres.txt b/src/postprocess/postgres.txt
deleted file mode 100644
index e47ca00..0000000
--- a/src/postprocess/postgres.txt
+++ /dev/null
@@ -1,76 +0,0 @@
1CREATE TABLE telefonbuch (
2 id SERIAL PRIMARY KEY,
3 presence_flag bigint,
4 reverse_flag bigint,
5 biz_flag bigint,
6 zip character varying(64)[] DEFAULT '{}',
7 nachname character varying(256)[] DEFAULT '{}',
8 vorname character varying(192)[] DEFAULT '{}',
9 zusaetze character varying(512)[] DEFAULT '{}',
10 strasse character varying(64)[] DEFAULT '{}',
11 hausnummer character varying(32)[] DEFAULT '{}',
12 verweise text[] DEFAULT '{}',
13 ort character varying(64)[] DEFAULT '{}',
14 vorwahl character varying(10)[] DEFAULT '{}',
15 rufnummer character varying(32)[] DEFAULT '{}',
16 web character varying(256)[] DEFAULT '{}',
17 email character varying(256)[] DEFAULT '{}',
18 coords character varying(32)[] DEFAULT '{}'
19);
20
21COPY Telefonbuch ( presence_flag, reverse_flag, biz_flag, zip, nachname, vorname, zusaetze, strasse, hausnummer, verweise, ort, vorwahl, rufnummer, web, email, coords ) FROM 'telefonbuch.txt' WITH NULL AS '';
22
23CREATE MATERIALIZED VIEW indexed_view_nachname AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(nachname) WITH ORDINALITY u(value, row) WITH data;
24CREATE MATERIALIZED VIEW indexed_view_vorname AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(vorname) WITH ORDINALITY u(value, row) WITH data;
25CREATE MATERIALIZED VIEW indexed_view_vorwahl AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(vorwahl) WITH ORDINALITY u(value, row) WITH data;
26CREATE MATERIALIZED VIEW indexed_view_rufnummer AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(rufnummer) WITH ORDINALITY u(value, row) WITH data;
27CREATE MATERIALIZED VIEW indexed_view_strasse AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(strasse) WITH ORDINALITY u(value, row) WITH data;
28CREATE MATERIALIZED VIEW indexed_view_hausnummer AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(hausnummer) WITH ORDINALITY u(value, row) WITH data;
29CREATE MATERIALIZED VIEW indexed_view_zusaetze AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(zusaetze) WITH ORDINALITY u(value, row) WITH data;
30CREATE MATERIALIZED VIEW indexed_view_verweise AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(verweise) WITH ORDINALITY u(value, row) WITH data;
31CREATE MATERIALIZED VIEW indexed_view_zip AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(zip) WITH ORDINALITY u(value, row) WITH data;
32CREATE MATERIALIZED VIEW indexed_view_ort AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(ort) WITH ORDINALITY u(value, row) WITH data;
33CREATE MATERIALIZED VIEW indexed_view_web AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(web) WITH ORDINALITY u(value, row) WITH data;
34CREATE MATERIALIZED VIEW indexed_view_email AS SELECT id, value, row FROM Telefonbuch CROSS JOIN unnest(email) WITH ORDINALITY u(value, row) WITH data;
35
36# equality indexes
37CREATE INDEX idx_nachname ON indexed_view_nachname USING btree (value);
38CREATE INDEX idx_vorname ON indexed_view_vorname USING btree (value);
39CREATE INDEX idx_vorwahl ON indexed_view_vorwahl USING btree (value);
40CREATE INDEX idx_rufnummer ON indexed_view_rufnummer USING btree (value);
41CREATE INDEX idx_strasse ON indexed_view_strasse USING btree (value);
42CREATE INDEX idx_hausnummer ON indexed_view_hausnummer USING btree (value);
43CREATE INDEX idx_zusaetze ON indexed_view_zusaetze USING btree (value);
44CREATE INDEX idx_verweise ON indexed_view_verweise USING btree (value);
45CREATE INDEX idx_zip ON indexed_view_zip USING btree (value);
46CREATE INDEX idx_ort ON indexed_view_ort USING btree (value);
47CREATE INDEX idx_web ON indexed_view_web USING btree (value);
48CREATE INDEX idx_email ON indexed_view_email USING btree (value);
49
50# full text indexes
51CREATE INDEX trgm_idx_nachname ON indexed_view_nachname USING gin (value gin_trgm_ops);
52CREATE INDEX trgm_idx_vorname ON indexed_view_vorname USING gin (value gin_trgm_ops);
53CREATE INDEX trgm_idx_vorwahl ON indexed_view_vorwahl USING gin (value gin_trgm_ops);
54CREATE INDEX trgm_idx_rufnummer ON indexed_view_rufnummer USING gin (value gin_trgm_ops);
55CREATE INDEX trgm_idx_strasse ON indexed_view_strasse USING gin (value gin_trgm_ops);
56CREATE INDEX trgm_idx_hausnummer ON indexed_view_hausnummer USING gin (value gin_trgm_ops);
57CREATE INDEX trgm_idx_zusaetze ON indexed_view_zusaetze USING gin (value gin_trgm_ops);
58CREATE INDEX trgm_idx_verweise ON indexed_view_verweise USING gin (value gin_trgm_ops);
59CREATE INDEX trgm_idx_zip ON indexed_view_zip USING gin (value gin_trgm_ops);
60CREATE INDEX trgm_idx_ort ON indexed_view_ort USING gin (value gin_trgm_ops);
61CREATE INDEX trgm_idx_web ON indexed_view_web USING gin (value gin_trgm_ops);
62CREATE INDEX trgm_idx_email ON indexed_view_email USING gin (value gin_trgm_ops);
63
64# slow way:
65SELECT * FROM Telefonbuch WHERE nachname = 'CCC';
66
67# using the index:
68SELECT * FROM Telefonbuch
69 INNER JOIN indexed_view_nachname ON indexed_view_nachname.id = Telefonbuch.id
70 INNER JOIN indexed_view_zip ON indexed_view_zip.id = Telefonbuch.id
71WHERE indexed_view_nachname.value = 'CCC'
72AND indexed_view_zip.value = '10117';
73
74
75# Source: https://stackoverflow.com/questions/39480580/how-to-index-a-string-array-column-for-pg-trgm-term-any-array-column-que/39584236
76