From f719c73b46331691f4e9b0cf1115c0acfeb511e4 Mon Sep 17 00:00:00 2001 From: Elias Kohout Date: Mon, 6 Jan 2025 19:58:01 +0100 Subject: [PATCH] prepare for multi db setup --- Makefile | 2 +- cmd/crawler/collectors/collector.go | 4 +- cmd/crawler/extractors/extractor.go | 6 +- cmd/crawler/main.go | 10 +- cmd/frontend/main.go | 46 ++++++--- internal/model/database/articles.go | 132 ++++++++++++++++++++++++++ internal/model/database/responeses.go | 103 ++++++++++++++++++++ internal/model/sqlite/articles.go | 109 --------------------- internal/model/sqlite/responeses.go | 91 ------------------ 9 files changed, 278 insertions(+), 225 deletions(-) create mode 100644 internal/model/database/articles.go create mode 100644 internal/model/database/responeses.go delete mode 100644 internal/model/sqlite/articles.go delete mode 100644 internal/model/sqlite/responeses.go diff --git a/Makefile b/Makefile index 5643c1f..f99b123 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ serv: - go run -tags='sqlite_fts5' cmd/frontend/* + DB_DRIVER="sqlite3" DB_URL="./persistence/app.db" go run -tags='sqlite_fts5' cmd/frontend/* crawl: go run -tags='sqlite_fts5' cmd/crawler/main.go diff --git a/cmd/crawler/collectors/collector.go b/cmd/crawler/collectors/collector.go index c822fc3..cb2461e 100644 --- a/cmd/crawler/collectors/collector.go +++ b/cmd/crawler/collectors/collector.go @@ -1,7 +1,7 @@ package collectors -import "crowsnest/internal/model/sqlite" +import "crowsnest/internal/model/database" type Collector struct { - Responses *sqlite.ResponseModel + Responses *database.ResponseModel } diff --git a/cmd/crawler/extractors/extractor.go b/cmd/crawler/extractors/extractor.go index b83736e..6a58950 100644 --- a/cmd/crawler/extractors/extractor.go +++ b/cmd/crawler/extractors/extractor.go @@ -1,8 +1,8 @@ package extractors -import "crowsnest/internal/model/sqlite" +import "crowsnest/internal/model/database" type Extractor struct { - Responses *sqlite.ResponseModel - Articles *sqlite.ArticleModel + Responses *database.ResponseModel + Articles *database.ArticleModel } diff --git a/cmd/crawler/main.go b/cmd/crawler/main.go index 377d269..5fd1543 100644 --- a/cmd/crawler/main.go +++ b/cmd/crawler/main.go @@ -3,7 +3,7 @@ package main import ( "crowsnest/cmd/crawler/collectors" "crowsnest/cmd/crawler/extractors" - "crowsnest/internal/model/sqlite" + "crowsnest/internal/model/database" "database/sql" "log" @@ -19,7 +19,7 @@ func main() { // collect websites _ = collectors.Collector{ - Responses: &sqlite.ResponseModel{DB: db}, + Responses: &database.ResponseModel{DB: db}, } //coll.Spiegel() @@ -27,9 +27,9 @@ func main() { // extract articles from websites extr := extractors.Extractor{ - Responses: &sqlite.ResponseModel{DB: db}, - Articles: &sqlite.ArticleModel{DB: db}, + Responses: &database.ResponseModel{DB: db}, + Articles: &database.ArticleModel{DB: db}, } - extr.Spiegel() + extr.Spiegel() } diff --git a/cmd/frontend/main.go b/cmd/frontend/main.go index ce2f9c5..d2ee89c 100644 --- a/cmd/frontend/main.go +++ b/cmd/frontend/main.go @@ -1,32 +1,50 @@ package main import ( - "crowsnest/internal/model/sqlite" + "crowsnest/internal/model/database" "database/sql" + "errors" "log" "net/http" + "os" + _ "github.com/lib/pq" _ "github.com/mattn/go-sqlite3" ) - type App struct { - articles *sqlite.ArticleModel + articles *database.ArticleModel } func main() { - db, err := sql.Open("sqlite3", "./persistence/app.db") - if err != nil { log.Fatal(err) } + // collect environement variables + databaseURL := os.Getenv("DB_URL") + dbDriver := os.Getenv("DB_DRIVER") - app := &App{ - articles: &sqlite.ArticleModel{ DB: db }, - } + // connect to database + var db *sql.DB + var err error + switch { + case dbDriver == "sqlite3": + db, err = sql.Open("sqlite3", databaseURL) + if err != nil { + log.Fatal(err) + } + default: + log.Fatal(errors.New("given DB_DRIVER is not supported")) + } - server := http.Server{ - Addr: ":8080", - Handler: app.routes(), - } + // define app + app := &App{ + articles: &database.ArticleModel{DB: db, DbDriver: dbDriver}, + } - log.Println("server started, listening on :8080") - server.ListenAndServe() + // start web server + server := http.Server{ + Addr: ":8080", + Handler: app.routes(), + } + + log.Println("server started, listening on :8080") + server.ListenAndServe() } diff --git a/internal/model/database/articles.go b/internal/model/database/articles.go new file mode 100644 index 0000000..97a2d2d --- /dev/null +++ b/internal/model/database/articles.go @@ -0,0 +1,132 @@ +package database + +import ( + "crowsnest/internal/model" + "database/sql" +) + +// TODO docstring +type ArticleModel struct { + DB *sql.DB + DbDriver string +} + +// TODO docstring +func (m *ArticleModel) All() ([]model.Article, error) { + stmt := ` + SELECT id, title, sourceUrl, author, content, publishDate, fetchDate + FROM articles + ORDER BY publishDate DESC + ` + rows, err := m.DB.Query(stmt) + if err != nil { + return nil, err + } + + articles := []model.Article{} + for rows.Next() { + a := model.Article{} + err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate) + if err != nil { + return nil, err + } + + articles = append(articles, a) + } + + if err = rows.Err(); err != nil { + return nil, err + } + + return articles, nil +} + +// TODO docstring +func (m *ArticleModel) Search(query string) ([]model.Article, error) { + stmt := ` + SELECT id, title, sourceUrl, author, content, publishDate, fetchDate + FROM articles JOIN ( + SELECT id as id2, rank FROM fts_articles WHERE content MATCH ? + ) ON id = id2 + ORDER BY rank ASC, publishDate DESC + LIMIT 10 + ` + rows, err := m.DB.Query(stmt, query) + if err != nil { + return nil, err + } + + articles := []model.Article{} + for rows.Next() { + a := model.Article{} + err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate) + if err != nil { + return nil, err + } + + articles = append(articles, a) + } + + if err = rows.Err(); err != nil { + return nil, err + } + + return articles, nil +} + +// Inserts a new article into the database. The id attribute of the given +// article will be ignored. May throw an error if the execution of the database +// query fails. +func (m *ArticleModel) Insert(a *model.Article) error { + // begin transaction + _, err := m.DB.Begin() + if err != nil { + return err + } + + // insert article + stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate) + VALUES (?, ?, ?, ?, ?, ?) + ` + result, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate) + if err != nil { + return err + } + lastId, err := result.LastInsertId() + if err != nil { + return err + } + + // insert into fts_articles for full-text search + stmt = `INSERT INTO fts_articles (id, content) + VALUES (?, ? || '\n' || ? || '\n' || ?) + ` + _, err = m.DB.Exec(stmt, lastId, a.Title, a.Author, a.Content) + return err +} + +// TODO docstring +func (m *ArticleModel) Update(a *model.Article) error { + // begin transaction + _, err := m.DB.Begin() + if err != nil { + return err + } + + // insert article + stmt := `UPDATE articles + SET title = ?, sourceUrl = ?, author = ?, content = ?, publishDate = ?, fetchDate = ? + WHERE id = ? + ` + _, err = m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier) + if err != nil { + return err + } + + // insert into fts_articles for full-text search + stmt = `INSERT INTO fts_articles (id, content) + VALUES (?, ? || '\n' || ? || '\n' || ?) + ` + _, err = m.DB.Exec(stmt, a.Identifier, a.Title, a.Author, a.Content) + return err +} diff --git a/internal/model/database/responeses.go b/internal/model/database/responeses.go new file mode 100644 index 0000000..1aa70ce --- /dev/null +++ b/internal/model/database/responeses.go @@ -0,0 +1,103 @@ +package database + +import ( + "crowsnest/internal/model" + "database/sql" +) + +type ResponseModel struct { + DB *sql.DB + DbDriver string +} + +// TODO docstring +func (m *ResponseModel) All() ([]model.Response, error) { + stmt := ` + SELECT url, content, fetchDate, processed + FROM responses + ` + rows, err := m.DB.Query(stmt) + if err != nil { + return nil, err + } + + responses := []model.Response{} + for rows.Next() { + r := model.Response{} + err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed) + if err != nil { + return nil, err + } + + responses = append(responses, r) + } + + if err = rows.Err(); err != nil { + return nil, err + } + + return responses, nil +} + +// TODO docstring +func (m *ResponseModel) UnprocessedUrls() ([]string, error) { + stmt := ` + SELECT url + FROM responses + WHERE NOT processed + ` + rows, err := m.DB.Query(stmt) + if err != nil { + return nil, err + } + + urls := make([]string, 0) + for rows.Next() { + r := "" + err := rows.Scan(&r) + if err != nil { + return nil, err + } + + urls = append(urls, r) + } + + if err = rows.Err(); err != nil { + return nil, err + } + + return urls, nil +} + +// TODO docstring +func (m *ResponseModel) GetByUrl(url string) (model.Response, error) { + stmt := ` + SELECT url, content, fetchDate, processed + FROM responses + WHERE url = ? + ` + + res := model.Response{} + row := m.DB.QueryRow(stmt, url) + err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed) + + return res, err +} + +// TODO docstring +func (m *ResponseModel) Insert(url string, content string) error { + // insert response + stmt := `INSERT INTO responses (url, content) VALUES (?, ?)` + _, err := m.DB.Exec(stmt, url, content) + + return err +} + +// TODO docstring +func (m *ResponseModel) Processed(url string) error { + // insert response + stmt := `UPDATE responses SET processed = true WHERE url = ?` + _, err := m.DB.Exec(stmt, url) + + return err +} diff --git a/internal/model/sqlite/articles.go b/internal/model/sqlite/articles.go deleted file mode 100644 index 12864c2..0000000 --- a/internal/model/sqlite/articles.go +++ /dev/null @@ -1,109 +0,0 @@ -package sqlite - -import ( - "crowsnest/internal/model" - "database/sql" -) - -// TODO docstring -type ArticleModel struct { - DB *sql.DB -} - -// TODO docstring -func (m *ArticleModel) All() ([]model.Article, error) { - stmt := ` - SELECT id, title, sourceUrl, author, content, publishDate, fetchDate - FROM articles - ORDER BY publishDate DESC - ` - rows, err := m.DB.Query(stmt) - if err != nil { return nil, err } - - articles := []model.Article{} - for rows.Next() { - a := model.Article{} - err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate) - if err != nil { return nil, err } - - articles = append(articles, a) - } - - if err = rows.Err(); err != nil { return nil, err } - - return articles, nil -} - -// TODO docstring -func (m *ArticleModel) Search(query string) ([]model.Article, error) { - stmt := ` - SELECT id, title, sourceUrl, author, content, publishDate, fetchDate - FROM articles JOIN ( - SELECT id as id2, rank FROM fts_articles WHERE content MATCH ? - ) ON id = id2 - ORDER BY rank ASC, publishDate DESC - LIMIT 10 - ` - rows, err := m.DB.Query(stmt, query) - if err != nil { return nil, err } - - articles := []model.Article{} - for rows.Next() { - a := model.Article{} - err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate) - if err != nil { return nil, err } - - articles = append(articles, a) - } - - if err = rows.Err(); err != nil { return nil, err } - - return articles, nil -} - -// Inserts a new article into the database. The id attribute of the given -// article will be ignored. May throw an error if the execution of the database -// query fails. -func (m *ArticleModel) Insert(a *model.Article) error { - // begin transaction - _, err := m.DB.Begin() - if err != nil { return err } - - // insert article - stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate) - VALUES (?, ?, ?, ?, ?, ?) - ` - result, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate) - if err != nil { return err } - lastId, err := result.LastInsertId() - if err != nil { return err } - - // insert into fts_articles for full-text search - stmt = `INSERT INTO fts_articles (id, content) - VALUES (?, ? || '\n' || ? || '\n' || ?) - ` - _, err = m.DB.Exec(stmt, lastId, a.Title, a.Author, a.Content) - return err -} - -// TODO docstring -func (m *ArticleModel) Update(a *model.Article) error { - // begin transaction - _, err := m.DB.Begin() - if err != nil { return err } - - // insert article - stmt := `UPDATE articles - SET title = ?, sourceUrl = ?, author = ?, content = ?, publishDate = ?, fetchDate = ? - WHERE id = ? - ` - _, err = m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier) - if err != nil { return err } - - // insert into fts_articles for full-text search - stmt = `INSERT INTO fts_articles (id, content) - VALUES (?, ? || '\n' || ? || '\n' || ?) - ` - _, err = m.DB.Exec(stmt, a.Identifier, a.Title, a.Author, a.Content) - return err -} diff --git a/internal/model/sqlite/responeses.go b/internal/model/sqlite/responeses.go deleted file mode 100644 index 66e46c0..0000000 --- a/internal/model/sqlite/responeses.go +++ /dev/null @@ -1,91 +0,0 @@ -package sqlite - -import ( - "crowsnest/internal/model" - "database/sql" -) - -type ResponseModel struct { - DB *sql.DB -} - - -// TODO docstring -func (m *ResponseModel) All() ([]model.Response, error) { - stmt := ` - SELECT url, content, fetchDate, processed - FROM responses - ` - rows, err := m.DB.Query(stmt) - if err != nil { return nil, err } - - responses := []model.Response{} - for rows.Next() { - r := model.Response{} - err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed) - if err != nil { return nil, err } - - responses = append(responses, r) - } - - if err = rows.Err(); err != nil { return nil, err } - - return responses, nil -} - -// TODO docstring -func (m *ResponseModel) UnprocessedUrls() ([]string, error) { - stmt := ` - SELECT url - FROM responses - WHERE NOT processed - ` - rows, err := m.DB.Query(stmt) - if err != nil { return nil, err } - - urls := make([]string, 0) - for rows.Next() { - r := "" - err := rows.Scan(&r) - if err != nil { return nil, err } - - urls = append(urls, r) - } - - if err = rows.Err(); err != nil { return nil, err } - - return urls, nil -} - -// TODO docstring -func (m *ResponseModel) GetByUrl(url string) (model.Response, error) { - stmt := ` - SELECT url, content, fetchDate, processed - FROM responses - WHERE url = ? - ` - - res := model.Response{} - row := m.DB.QueryRow(stmt, url) - err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed) - - return res, err -} - -// TODO docstring -func (m *ResponseModel) Insert(url string, content string) error { - // insert response - stmt := `INSERT INTO responses (url, content) VALUES (?, ?)` - _, err := m.DB.Exec(stmt, url, content) - - return err -} - -// TODO docstring -func (m *ResponseModel) Processed(url string) error { - // insert response - stmt := `UPDATE responses SET processed = true WHERE url = ?` - _, err := m.DB.Exec(stmt, url) - - return err -}