prepare for multi db setup

This commit is contained in:
2025-01-06 19:58:01 +01:00
parent f3125561eb
commit f719c73b46
9 changed files with 278 additions and 225 deletions

View File

@@ -1,4 +1,4 @@
serv: serv:
go run -tags='sqlite_fts5' cmd/frontend/* DB_DRIVER="sqlite3" DB_URL="./persistence/app.db" go run -tags='sqlite_fts5' cmd/frontend/*
crawl: crawl:
go run -tags='sqlite_fts5' cmd/crawler/main.go go run -tags='sqlite_fts5' cmd/crawler/main.go

View File

@@ -1,7 +1,7 @@
package collectors package collectors
import "crowsnest/internal/model/sqlite" import "crowsnest/internal/model/database"
type Collector struct { type Collector struct {
Responses *sqlite.ResponseModel Responses *database.ResponseModel
} }

View File

@@ -1,8 +1,8 @@
package extractors package extractors
import "crowsnest/internal/model/sqlite" import "crowsnest/internal/model/database"
type Extractor struct { type Extractor struct {
Responses *sqlite.ResponseModel Responses *database.ResponseModel
Articles *sqlite.ArticleModel Articles *database.ArticleModel
} }

View File

@@ -3,7 +3,7 @@ package main
import ( import (
"crowsnest/cmd/crawler/collectors" "crowsnest/cmd/crawler/collectors"
"crowsnest/cmd/crawler/extractors" "crowsnest/cmd/crawler/extractors"
"crowsnest/internal/model/sqlite" "crowsnest/internal/model/database"
"database/sql" "database/sql"
"log" "log"
@@ -19,7 +19,7 @@ func main() {
// collect websites // collect websites
_ = collectors.Collector{ _ = collectors.Collector{
Responses: &sqlite.ResponseModel{DB: db}, Responses: &database.ResponseModel{DB: db},
} }
//coll.Spiegel() //coll.Spiegel()
@@ -27,8 +27,8 @@ func main() {
// extract articles from websites // extract articles from websites
extr := extractors.Extractor{ extr := extractors.Extractor{
Responses: &sqlite.ResponseModel{DB: db}, Responses: &database.ResponseModel{DB: db},
Articles: &sqlite.ArticleModel{DB: db}, Articles: &database.ArticleModel{DB: db},
} }
extr.Spiegel() extr.Spiegel()

View File

@@ -1,27 +1,45 @@
package main package main
import ( import (
"crowsnest/internal/model/sqlite" "crowsnest/internal/model/database"
"database/sql" "database/sql"
"errors"
"log" "log"
"net/http" "net/http"
"os"
_ "github.com/lib/pq"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
) )
type App struct { type App struct {
articles *sqlite.ArticleModel articles *database.ArticleModel
} }
func main() { func main() {
db, err := sql.Open("sqlite3", "./persistence/app.db") // collect environement variables
if err != nil { log.Fatal(err) } databaseURL := os.Getenv("DB_URL")
dbDriver := os.Getenv("DB_DRIVER")
app := &App{ // connect to database
articles: &sqlite.ArticleModel{ DB: db }, var db *sql.DB
var err error
switch {
case dbDriver == "sqlite3":
db, err = sql.Open("sqlite3", databaseURL)
if err != nil {
log.Fatal(err)
}
default:
log.Fatal(errors.New("given DB_DRIVER is not supported"))
} }
// define app
app := &App{
articles: &database.ArticleModel{DB: db, DbDriver: dbDriver},
}
// start web server
server := http.Server{ server := http.Server{
Addr: ":8080", Addr: ":8080",
Handler: app.routes(), Handler: app.routes(),

View File

@@ -0,0 +1,132 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
)
// TODO docstring
type ArticleModel struct {
DB *sql.DB
DbDriver string
}
// TODO docstring
func (m *ArticleModel) All() ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles
ORDER BY publishDate DESC
`
rows, err := m.DB.Query(stmt)
if err != nil {
return nil, err
}
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil {
return nil, err
}
articles = append(articles, a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articles, nil
}
// TODO docstring
func (m *ArticleModel) Search(query string) ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles JOIN (
SELECT id as id2, rank FROM fts_articles WHERE content MATCH ?
) ON id = id2
ORDER BY rank ASC, publishDate DESC
LIMIT 10
`
rows, err := m.DB.Query(stmt, query)
if err != nil {
return nil, err
}
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil {
return nil, err
}
articles = append(articles, a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articles, nil
}
// Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database
// query fails.
func (m *ArticleModel) Insert(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil {
return err
}
// insert article
stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate)
VALUES (?, ?, ?, ?, ?, ?)
`
result, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate)
if err != nil {
return err
}
lastId, err := result.LastInsertId()
if err != nil {
return err
}
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, lastId, a.Title, a.Author, a.Content)
return err
}
// TODO docstring
func (m *ArticleModel) Update(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil {
return err
}
// insert article
stmt := `UPDATE articles
SET title = ?, sourceUrl = ?, author = ?, content = ?, publishDate = ?, fetchDate = ?
WHERE id = ?
`
_, err = m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier)
if err != nil {
return err
}
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, a.Identifier, a.Title, a.Author, a.Content)
return err
}

View File

@@ -0,0 +1,103 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
)
type ResponseModel struct {
DB *sql.DB
DbDriver string
}
// TODO docstring
func (m *ResponseModel) All() ([]model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
`
rows, err := m.DB.Query(stmt)
if err != nil {
return nil, err
}
responses := []model.Response{}
for rows.Next() {
r := model.Response{}
err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed)
if err != nil {
return nil, err
}
responses = append(responses, r)
}
if err = rows.Err(); err != nil {
return nil, err
}
return responses, nil
}
// TODO docstring
func (m *ResponseModel) UnprocessedUrls() ([]string, error) {
stmt := `
SELECT url
FROM responses
WHERE NOT processed
`
rows, err := m.DB.Query(stmt)
if err != nil {
return nil, err
}
urls := make([]string, 0)
for rows.Next() {
r := ""
err := rows.Scan(&r)
if err != nil {
return nil, err
}
urls = append(urls, r)
}
if err = rows.Err(); err != nil {
return nil, err
}
return urls, nil
}
// TODO docstring
func (m *ResponseModel) GetByUrl(url string) (model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
WHERE url = ?
`
res := model.Response{}
row := m.DB.QueryRow(stmt, url)
err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed)
return res, err
}
// TODO docstring
func (m *ResponseModel) Insert(url string, content string) error {
// insert response
stmt := `INSERT INTO responses (url, content) VALUES (?, ?)`
_, err := m.DB.Exec(stmt, url, content)
return err
}
// TODO docstring
func (m *ResponseModel) Processed(url string) error {
// insert response
stmt := `UPDATE responses SET processed = true WHERE url = ?`
_, err := m.DB.Exec(stmt, url)
return err
}

View File

@@ -1,109 +0,0 @@
package sqlite
import (
"crowsnest/internal/model"
"database/sql"
)
// TODO docstring
type ArticleModel struct {
DB *sql.DB
}
// TODO docstring
func (m *ArticleModel) All() ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles
ORDER BY publishDate DESC
`
rows, err := m.DB.Query(stmt)
if err != nil { return nil, err }
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil { return nil, err }
articles = append(articles, a)
}
if err = rows.Err(); err != nil { return nil, err }
return articles, nil
}
// TODO docstring
func (m *ArticleModel) Search(query string) ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles JOIN (
SELECT id as id2, rank FROM fts_articles WHERE content MATCH ?
) ON id = id2
ORDER BY rank ASC, publishDate DESC
LIMIT 10
`
rows, err := m.DB.Query(stmt, query)
if err != nil { return nil, err }
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil { return nil, err }
articles = append(articles, a)
}
if err = rows.Err(); err != nil { return nil, err }
return articles, nil
}
// Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database
// query fails.
func (m *ArticleModel) Insert(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil { return err }
// insert article
stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate)
VALUES (?, ?, ?, ?, ?, ?)
`
result, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate)
if err != nil { return err }
lastId, err := result.LastInsertId()
if err != nil { return err }
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, lastId, a.Title, a.Author, a.Content)
return err
}
// TODO docstring
func (m *ArticleModel) Update(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil { return err }
// insert article
stmt := `UPDATE articles
SET title = ?, sourceUrl = ?, author = ?, content = ?, publishDate = ?, fetchDate = ?
WHERE id = ?
`
_, err = m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier)
if err != nil { return err }
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, a.Identifier, a.Title, a.Author, a.Content)
return err
}

View File

@@ -1,91 +0,0 @@
package sqlite
import (
"crowsnest/internal/model"
"database/sql"
)
type ResponseModel struct {
DB *sql.DB
}
// TODO docstring
func (m *ResponseModel) All() ([]model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
`
rows, err := m.DB.Query(stmt)
if err != nil { return nil, err }
responses := []model.Response{}
for rows.Next() {
r := model.Response{}
err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed)
if err != nil { return nil, err }
responses = append(responses, r)
}
if err = rows.Err(); err != nil { return nil, err }
return responses, nil
}
// TODO docstring
func (m *ResponseModel) UnprocessedUrls() ([]string, error) {
stmt := `
SELECT url
FROM responses
WHERE NOT processed
`
rows, err := m.DB.Query(stmt)
if err != nil { return nil, err }
urls := make([]string, 0)
for rows.Next() {
r := ""
err := rows.Scan(&r)
if err != nil { return nil, err }
urls = append(urls, r)
}
if err = rows.Err(); err != nil { return nil, err }
return urls, nil
}
// TODO docstring
func (m *ResponseModel) GetByUrl(url string) (model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
WHERE url = ?
`
res := model.Response{}
row := m.DB.QueryRow(stmt, url)
err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed)
return res, err
}
// TODO docstring
func (m *ResponseModel) Insert(url string, content string) error {
// insert response
stmt := `INSERT INTO responses (url, content) VALUES (?, ?)`
_, err := m.DB.Exec(stmt, url, content)
return err
}
// TODO docstring
func (m *ResponseModel) Processed(url string) error {
// insert response
stmt := `UPDATE responses SET processed = true WHERE url = ?`
_, err := m.DB.Exec(stmt, url)
return err
}