add documents table

This commit is contained in:
2025-01-12 17:36:30 +01:00
parent 48d8b99fc3
commit de32cef530
4 changed files with 82 additions and 7 deletions

View File

@@ -1,12 +1,18 @@
DB_HOST="10.99.0.3" DB_HOST="10.99.0.3"
DB_PORT="5432" DB_PORT="5432"
DB_NAME="crowsnest" DB_NAME="crowsnest_dev"
DB_USER="crow" DB_USER="crow_dev"
DB_PASS="4LlKpnQ2RZPzL13BSpkW4k" DB_PASS="hL0VlXkH2WoHL7c7FdRTHXMy"
go-run: go-run:
cd src; DB_USER=$(DB_USER) DB_PASS=$(DB_PASS) DB_NAME=$(DB_NAME) DB_HOST=$(DB_HOST) go run cmd/frontend/main.go cd src; DB_USER=$(DB_USER) DB_PASS=$(DB_PASS) DB_NAME=$(DB_NAME) DB_HOST=$(DB_HOST) go run cmd/frontend/main.go
migrate-up:
goose -dir=./src/assets/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" up
migrate-down:
goose -dir=./src/assets/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" down
docker-push: docker-build docker-push: docker-build
docker push git.kohout-dev.de/crowsnest/crowsnest:latest docker push git.kohout-dev.de/crowsnest/crowsnest:latest

View File

@@ -0,0 +1,54 @@
-- +goose Up
-- +goose StatementBegin
BEGIN;
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
content_hash CHAR(32) UNIQUE GENERATED ALWAYS AS (MD5(content)) STORED,
content TEXT
);
ALTER TABLE articles ADD COLUMN document_id INT REFERENCES documents(id);
INSERT INTO documents (content)
SELECT DISTINCT coalesce(title, '') || ' ' || coalesce(content, '') FROM articles;
UPDATE articles
SET document_id = ( SELECT d.id FROM documents d WHERE d.content = coalesce(title, '') || ' ' || coalesce(content, '') );
CREATE OR REPLACE FUNCTION article_inserts_document()
RETURNS TRIGGER AS $$
DECLARE
new_document_id INT;
BEGIN
INSERT INTO documents (content)
VALUES (coalesce(NEW.title, '') || ' ' || coalesce(NEW.content, ''))
RETURNING id INTO new_document_id;
UPDATE articles
SET document_id = new_document_id
WHERE id = NEW.id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_article_inserts_document
AFTER INSERT ON articles
FOR EACH ROW
EXECUTE FUNCTION article_inserts_document();
COMMIT;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
BEGIN;
DROP TRIGGER IF EXISTS trigger_article_inserts_document ON articles;
DROP FUNCTION IF EXISTS article_inserts_document();
ALTER TABLE articles DROP COLUMN IF EXISTS document_id;
DROP TABLE IF EXISTS documents;
COMMIT;
-- +goose StatementEnd

View File

@@ -0,0 +1,15 @@
-- +goose Up
-- +goose StatementBegin
DROP INDEX IF EXISTS articles_fts_idx;
ALTER TABLE articles DROP COLUMN IF EXISTS fts_vector;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
ALTER TABLE articles
ADD COLUMN fts_vector tsvector GENERATED ALWAYS AS (
to_tsvector('german', coalesce(title, '') || ' ' || coalesce(content, ''))
) STORED;
CREATE INDEX articles_fts_idx ON articles USING gin(fts_vector);
-- +goose StatementEnd

View File

@@ -62,10 +62,10 @@ func (m *ArticleModel) CountAll() (uint, error) {
// database fails. // database fails.
func (m *ArticleModel) Search(query string) ([]model.Article, error) { func (m *ArticleModel) Search(query string) ([]model.Article, error) {
stmt := ` stmt := `
SELECT id, title, sourceurl, content, publishdate, fetchDate SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
FROM articles FROM articles a JOIN documents d ON a.document_id = d.id
WHERE fts_vector @@ to_tsquery('german', $1) WHERE to_tsvector('german', d.content) @@ to_tsquery('german', $1)
ORDER BY ts_rank(fts_vector, to_tsquery('german', $1)) DESC ORDER BY ts_rank(to_tsvector('german', d.content), to_tsquery('german', $1)) DESC
LIMIT 10 LIMIT 10
` `