From de32cef5308d64c76604db0733ae2a45713707f5 Mon Sep 17 00:00:00 2001 From: Elias Kohout Date: Sun, 12 Jan 2025 17:36:30 +0100 Subject: [PATCH] add documents table --- Makefile | 12 +++-- .../20250112141434_document_table.sql | 54 +++++++++++++++++++ .../20250112162651_rm_tsvector_column.sql | 15 ++++++ src/internal/model/database/articles.go | 8 +-- 4 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 src/assets/migrations/20250112141434_document_table.sql create mode 100644 src/assets/migrations/20250112162651_rm_tsvector_column.sql diff --git a/Makefile b/Makefile index 266899e..a4a9269 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,18 @@ DB_HOST="10.99.0.3" DB_PORT="5432" -DB_NAME="crowsnest" -DB_USER="crow" -DB_PASS="4LlKpnQ2RZPzL13BSpkW4k" +DB_NAME="crowsnest_dev" +DB_USER="crow_dev" +DB_PASS="hL0VlXkH2WoHL7c7FdRTHXMy" go-run: cd src; DB_USER=$(DB_USER) DB_PASS=$(DB_PASS) DB_NAME=$(DB_NAME) DB_HOST=$(DB_HOST) go run cmd/frontend/main.go +migrate-up: + goose -dir=./src/assets/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" up + +migrate-down: + goose -dir=./src/assets/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" down + docker-push: docker-build docker push git.kohout-dev.de/crowsnest/crowsnest:latest diff --git a/src/assets/migrations/20250112141434_document_table.sql b/src/assets/migrations/20250112141434_document_table.sql new file mode 100644 index 0000000..0c15003 --- /dev/null +++ b/src/assets/migrations/20250112141434_document_table.sql @@ -0,0 +1,54 @@ +-- +goose Up +-- +goose StatementBegin +BEGIN; + +CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content_hash CHAR(32) UNIQUE GENERATED ALWAYS AS (MD5(content)) STORED, + content TEXT +); + +ALTER TABLE articles ADD COLUMN document_id INT REFERENCES documents(id); + +INSERT INTO documents (content) + SELECT DISTINCT coalesce(title, '') || ' ' || coalesce(content, '') FROM articles; + +UPDATE articles + SET document_id = ( SELECT d.id FROM documents d WHERE d.content = coalesce(title, '') || ' ' || coalesce(content, '') ); + +CREATE OR REPLACE FUNCTION article_inserts_document() +RETURNS TRIGGER AS $$ + DECLARE + new_document_id INT; + BEGIN + INSERT INTO documents (content) + VALUES (coalesce(NEW.title, '') || ' ' || coalesce(NEW.content, '')) + RETURNING id INTO new_document_id; + + UPDATE articles + SET document_id = new_document_id + WHERE id = NEW.id; + + RETURN NEW; + END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER trigger_article_inserts_document +AFTER INSERT ON articles +FOR EACH ROW +EXECUTE FUNCTION article_inserts_document(); + +COMMIT; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +BEGIN; + +DROP TRIGGER IF EXISTS trigger_article_inserts_document ON articles; +DROP FUNCTION IF EXISTS article_inserts_document(); +ALTER TABLE articles DROP COLUMN IF EXISTS document_id; +DROP TABLE IF EXISTS documents; + +COMMIT; +-- +goose StatementEnd diff --git a/src/assets/migrations/20250112162651_rm_tsvector_column.sql b/src/assets/migrations/20250112162651_rm_tsvector_column.sql new file mode 100644 index 0000000..b0f44f6 --- /dev/null +++ b/src/assets/migrations/20250112162651_rm_tsvector_column.sql @@ -0,0 +1,15 @@ +-- +goose Up +-- +goose StatementBegin +DROP INDEX IF EXISTS articles_fts_idx; +ALTER TABLE articles DROP COLUMN IF EXISTS fts_vector; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +ALTER TABLE articles +ADD COLUMN fts_vector tsvector GENERATED ALWAYS AS ( + to_tsvector('german', coalesce(title, '') || ' ' || coalesce(content, '')) +) STORED; + +CREATE INDEX articles_fts_idx ON articles USING gin(fts_vector); +-- +goose StatementEnd \ No newline at end of file diff --git a/src/internal/model/database/articles.go b/src/internal/model/database/articles.go index 3d8c0db..3dd161d 100644 --- a/src/internal/model/database/articles.go +++ b/src/internal/model/database/articles.go @@ -62,10 +62,10 @@ func (m *ArticleModel) CountAll() (uint, error) { // database fails. func (m *ArticleModel) Search(query string) ([]model.Article, error) { stmt := ` - SELECT id, title, sourceurl, content, publishdate, fetchDate - FROM articles - WHERE fts_vector @@ to_tsquery('german', $1) - ORDER BY ts_rank(fts_vector, to_tsquery('german', $1)) DESC + SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate + FROM articles a JOIN documents d ON a.document_id = d.id + WHERE to_tsvector('german', d.content) @@ to_tsquery('german', $1) + ORDER BY ts_rank(to_tsvector('german', d.content), to_tsquery('german', $1)) DESC LIMIT 10 `