prepare for multi db setup

This commit is contained in:
2025-01-06 19:58:01 +01:00
parent f3125561eb
commit f719c73b46
9 changed files with 278 additions and 225 deletions

View File

@@ -1,4 +1,4 @@
serv:
go run -tags='sqlite_fts5' cmd/frontend/*
DB_DRIVER="sqlite3" DB_URL="./persistence/app.db" go run -tags='sqlite_fts5' cmd/frontend/*
crawl:
go run -tags='sqlite_fts5' cmd/crawler/main.go

View File

@@ -1,7 +1,7 @@
package collectors
import "crowsnest/internal/model/sqlite"
import "crowsnest/internal/model/database"
type Collector struct {
Responses *sqlite.ResponseModel
Responses *database.ResponseModel
}

View File

@@ -1,8 +1,8 @@
package extractors
import "crowsnest/internal/model/sqlite"
import "crowsnest/internal/model/database"
type Extractor struct {
Responses *sqlite.ResponseModel
Articles *sqlite.ArticleModel
Responses *database.ResponseModel
Articles *database.ArticleModel
}

View File

@@ -3,7 +3,7 @@ package main
import (
"crowsnest/cmd/crawler/collectors"
"crowsnest/cmd/crawler/extractors"
"crowsnest/internal/model/sqlite"
"crowsnest/internal/model/database"
"database/sql"
"log"
@@ -19,7 +19,7 @@ func main() {
// collect websites
_ = collectors.Collector{
Responses: &sqlite.ResponseModel{DB: db},
Responses: &database.ResponseModel{DB: db},
}
//coll.Spiegel()
@@ -27,9 +27,9 @@ func main() {
// extract articles from websites
extr := extractors.Extractor{
Responses: &sqlite.ResponseModel{DB: db},
Articles: &sqlite.ArticleModel{DB: db},
Responses: &database.ResponseModel{DB: db},
Articles: &database.ArticleModel{DB: db},
}
extr.Spiegel()
extr.Spiegel()
}

View File

@@ -1,32 +1,50 @@
package main
import (
"crowsnest/internal/model/sqlite"
"crowsnest/internal/model/database"
"database/sql"
"errors"
"log"
"net/http"
"os"
_ "github.com/lib/pq"
_ "github.com/mattn/go-sqlite3"
)
type App struct {
articles *sqlite.ArticleModel
articles *database.ArticleModel
}
func main() {
db, err := sql.Open("sqlite3", "./persistence/app.db")
if err != nil { log.Fatal(err) }
// collect environement variables
databaseURL := os.Getenv("DB_URL")
dbDriver := os.Getenv("DB_DRIVER")
app := &App{
articles: &sqlite.ArticleModel{ DB: db },
}
// connect to database
var db *sql.DB
var err error
switch {
case dbDriver == "sqlite3":
db, err = sql.Open("sqlite3", databaseURL)
if err != nil {
log.Fatal(err)
}
default:
log.Fatal(errors.New("given DB_DRIVER is not supported"))
}
server := http.Server{
Addr: ":8080",
Handler: app.routes(),
}
// define app
app := &App{
articles: &database.ArticleModel{DB: db, DbDriver: dbDriver},
}
log.Println("server started, listening on :8080")
server.ListenAndServe()
// start web server
server := http.Server{
Addr: ":8080",
Handler: app.routes(),
}
log.Println("server started, listening on :8080")
server.ListenAndServe()
}

View File

@@ -0,0 +1,132 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
)
// TODO docstring
type ArticleModel struct {
DB *sql.DB
DbDriver string
}
// TODO docstring
func (m *ArticleModel) All() ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles
ORDER BY publishDate DESC
`
rows, err := m.DB.Query(stmt)
if err != nil {
return nil, err
}
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil {
return nil, err
}
articles = append(articles, a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articles, nil
}
// TODO docstring
func (m *ArticleModel) Search(query string) ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles JOIN (
SELECT id as id2, rank FROM fts_articles WHERE content MATCH ?
) ON id = id2
ORDER BY rank ASC, publishDate DESC
LIMIT 10
`
rows, err := m.DB.Query(stmt, query)
if err != nil {
return nil, err
}
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil {
return nil, err
}
articles = append(articles, a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articles, nil
}
// Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database
// query fails.
func (m *ArticleModel) Insert(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil {
return err
}
// insert article
stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate)
VALUES (?, ?, ?, ?, ?, ?)
`
result, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate)
if err != nil {
return err
}
lastId, err := result.LastInsertId()
if err != nil {
return err
}
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, lastId, a.Title, a.Author, a.Content)
return err
}
// TODO docstring
func (m *ArticleModel) Update(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil {
return err
}
// insert article
stmt := `UPDATE articles
SET title = ?, sourceUrl = ?, author = ?, content = ?, publishDate = ?, fetchDate = ?
WHERE id = ?
`
_, err = m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier)
if err != nil {
return err
}
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, a.Identifier, a.Title, a.Author, a.Content)
return err
}

View File

@@ -0,0 +1,103 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
)
type ResponseModel struct {
DB *sql.DB
DbDriver string
}
// TODO docstring
func (m *ResponseModel) All() ([]model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
`
rows, err := m.DB.Query(stmt)
if err != nil {
return nil, err
}
responses := []model.Response{}
for rows.Next() {
r := model.Response{}
err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed)
if err != nil {
return nil, err
}
responses = append(responses, r)
}
if err = rows.Err(); err != nil {
return nil, err
}
return responses, nil
}
// TODO docstring
func (m *ResponseModel) UnprocessedUrls() ([]string, error) {
stmt := `
SELECT url
FROM responses
WHERE NOT processed
`
rows, err := m.DB.Query(stmt)
if err != nil {
return nil, err
}
urls := make([]string, 0)
for rows.Next() {
r := ""
err := rows.Scan(&r)
if err != nil {
return nil, err
}
urls = append(urls, r)
}
if err = rows.Err(); err != nil {
return nil, err
}
return urls, nil
}
// TODO docstring
func (m *ResponseModel) GetByUrl(url string) (model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
WHERE url = ?
`
res := model.Response{}
row := m.DB.QueryRow(stmt, url)
err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed)
return res, err
}
// TODO docstring
func (m *ResponseModel) Insert(url string, content string) error {
// insert response
stmt := `INSERT INTO responses (url, content) VALUES (?, ?)`
_, err := m.DB.Exec(stmt, url, content)
return err
}
// TODO docstring
func (m *ResponseModel) Processed(url string) error {
// insert response
stmt := `UPDATE responses SET processed = true WHERE url = ?`
_, err := m.DB.Exec(stmt, url)
return err
}

View File

@@ -1,109 +0,0 @@
package sqlite
import (
"crowsnest/internal/model"
"database/sql"
)
// TODO docstring
type ArticleModel struct {
DB *sql.DB
}
// TODO docstring
func (m *ArticleModel) All() ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles
ORDER BY publishDate DESC
`
rows, err := m.DB.Query(stmt)
if err != nil { return nil, err }
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil { return nil, err }
articles = append(articles, a)
}
if err = rows.Err(); err != nil { return nil, err }
return articles, nil
}
// TODO docstring
func (m *ArticleModel) Search(query string) ([]model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate
FROM articles JOIN (
SELECT id as id2, rank FROM fts_articles WHERE content MATCH ?
) ON id = id2
ORDER BY rank ASC, publishDate DESC
LIMIT 10
`
rows, err := m.DB.Query(stmt, query)
if err != nil { return nil, err }
articles := []model.Article{}
for rows.Next() {
a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil { return nil, err }
articles = append(articles, a)
}
if err = rows.Err(); err != nil { return nil, err }
return articles, nil
}
// Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database
// query fails.
func (m *ArticleModel) Insert(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil { return err }
// insert article
stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate)
VALUES (?, ?, ?, ?, ?, ?)
`
result, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate)
if err != nil { return err }
lastId, err := result.LastInsertId()
if err != nil { return err }
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, lastId, a.Title, a.Author, a.Content)
return err
}
// TODO docstring
func (m *ArticleModel) Update(a *model.Article) error {
// begin transaction
_, err := m.DB.Begin()
if err != nil { return err }
// insert article
stmt := `UPDATE articles
SET title = ?, sourceUrl = ?, author = ?, content = ?, publishDate = ?, fetchDate = ?
WHERE id = ?
`
_, err = m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier)
if err != nil { return err }
// insert into fts_articles for full-text search
stmt = `INSERT INTO fts_articles (id, content)
VALUES (?, ? || '\n' || ? || '\n' || ?)
`
_, err = m.DB.Exec(stmt, a.Identifier, a.Title, a.Author, a.Content)
return err
}

View File

@@ -1,91 +0,0 @@
package sqlite
import (
"crowsnest/internal/model"
"database/sql"
)
type ResponseModel struct {
DB *sql.DB
}
// TODO docstring
func (m *ResponseModel) All() ([]model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
`
rows, err := m.DB.Query(stmt)
if err != nil { return nil, err }
responses := []model.Response{}
for rows.Next() {
r := model.Response{}
err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed)
if err != nil { return nil, err }
responses = append(responses, r)
}
if err = rows.Err(); err != nil { return nil, err }
return responses, nil
}
// TODO docstring
func (m *ResponseModel) UnprocessedUrls() ([]string, error) {
stmt := `
SELECT url
FROM responses
WHERE NOT processed
`
rows, err := m.DB.Query(stmt)
if err != nil { return nil, err }
urls := make([]string, 0)
for rows.Next() {
r := ""
err := rows.Scan(&r)
if err != nil { return nil, err }
urls = append(urls, r)
}
if err = rows.Err(); err != nil { return nil, err }
return urls, nil
}
// TODO docstring
func (m *ResponseModel) GetByUrl(url string) (model.Response, error) {
stmt := `
SELECT url, content, fetchDate, processed
FROM responses
WHERE url = ?
`
res := model.Response{}
row := m.DB.QueryRow(stmt, url)
err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed)
return res, err
}
// TODO docstring
func (m *ResponseModel) Insert(url string, content string) error {
// insert response
stmt := `INSERT INTO responses (url, content) VALUES (?, ?)`
_, err := m.DB.Exec(stmt, url, content)
return err
}
// TODO docstring
func (m *ResponseModel) Processed(url string) error {
// insert response
stmt := `UPDATE responses SET processed = true WHERE url = ?`
_, err := m.DB.Exec(stmt, url)
return err
}