From a5a1a974fc2d0cbb0601d627b9c5cd65021bc0a6 Mon Sep 17 00:00:00 2001 From: Elias Kohout Date: Wed, 22 Jan 2025 09:02:32 +0100 Subject: [PATCH] add summarization of article, when collected --- src/cmd/frontend/main.go | 47 ++++++++++++------- src/internal/app/article.go | 2 +- .../model/database/articlerepository.go | 15 +++--- .../model/database/documentrepository.go | 23 ++++++++- 4 files changed, 61 insertions(+), 26 deletions(-) diff --git a/src/cmd/frontend/main.go b/src/cmd/frontend/main.go index 8fc7f25..65d91a2 100644 --- a/src/cmd/frontend/main.go +++ b/src/cmd/frontend/main.go @@ -19,31 +19,46 @@ func main() { log.Fatal("failed to connect to database due to", err.Error()) } - // summarize documents - documents := &database.DocumentRepository{DB: db} + // summarize documents + documents := &database.DocumentRepository{DB: db} - go documents.Map(func(doc *model.Document) *model.Document { - if doc.Summary == "" { - summaryText, err := util.Summarize(doc.Content) - if err == nil { - doc.Summary = summaryText - return doc - } - log.Println(err.Error()) - } - return doc - }) + sumDoc := func(doc *model.Document) *model.Document { + if doc.Summary == "" { + summaryText, err := util.Summarize(doc.Content) + if err == nil { + doc.Summary = summaryText + return doc + } + log.Println(err.Error()) + } + return doc + } + go documents.Map(sumDoc) // run web crawlers - articles := &database.ArticleRepository{DB: db} + articles := &database.ArticleRepository{DB: db} crawler := crawler.CrawlerFacade{} crawler.Init() crawler.SubscribeToSpiegelFeed(func(a *model.Article) { - articles.Insert(a) + id, err := articles.Insert(a) + if err == nil { + doc, err := documents.ByArticleId(id) + if err == nil { + doc = sumDoc(doc) + documents.Update(doc) + } + } }) crawler.SubscribeToZeitFeed(func(a *model.Article) { - articles.Insert(a) + id, err := articles.Insert(a) + if err == nil { + doc, err := documents.ByArticleId(id) + if err == nil { + doc = sumDoc(doc) + documents.Update(doc) + } + } }) // define app diff --git a/src/internal/app/article.go b/src/internal/app/article.go index 028c666..81f56e2 100644 --- a/src/internal/app/article.go +++ b/src/internal/app/article.go @@ -17,7 +17,7 @@ func (app *App) Article(w http.ResponseWriter, req *http.Request) { } // get articles - article, err := app.articles.ById(int(id)) + article, err := app.articles.ById(int64(id)) if err != nil { // treat as no result http.Error(w, err.Error(), http.StatusInternalServerError) diff --git a/src/internal/model/database/articlerepository.go b/src/internal/model/database/articlerepository.go index 467a670..8c6c0a2 100644 --- a/src/internal/model/database/articlerepository.go +++ b/src/internal/model/database/articlerepository.go @@ -94,7 +94,7 @@ func (m *ArticleRepository) Search(query string) ([]*model.Article, error) { // Will return an article given an id. This may fail if the connection to the // database fails or there is no aritcle with the given id. -func (m *ArticleRepository) ById(id int) (*model.Article, error) { +func (m *ArticleRepository) ById(id int64) (*model.Article, error) { stmt := ` SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate FROM articles a @@ -112,15 +112,16 @@ func (m *ArticleRepository) ById(id int) (*model.Article, error) { } // Inserts a new article into the database. The id attribute of the given -// article will be ignored. May throw an error if the execution of the database -// query fails. -func (m *ArticleRepository) Insert(a *model.Article) error { - // insert article +// article will be ignored. Returns the id of the last inserted element. May +// throw an error if the execution of the database query fails. +func (m *ArticleRepository) Insert(a *model.Article) (int64, error) { stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate) VALUES ($1, $2, $3, $4, $5) + RETURNING id ` - _, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate) - return err + var lastInsertID int64 + err := m.DB.QueryRow(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate).Scan(&lastInsertID) + return lastInsertID, err } // Update an article in the database. Will use the id that is set in the article diff --git a/src/internal/model/database/documentrepository.go b/src/internal/model/database/documentrepository.go index 0bb6adc..a9e0618 100644 --- a/src/internal/model/database/documentrepository.go +++ b/src/internal/model/database/documentrepository.go @@ -40,12 +40,31 @@ func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, erro return docs, nil } -// Will return an article given an id. This may fail if the connection to the +// Will return an document given an id. This may fail if the connection to the // database fails or there is no aritcle with the given id. -func (m *DocumentRepository) ById(id int) (*model.Document, error) { +func (m *DocumentRepository) ById(id int64) (*model.Document, error) { stmt := ` SELECT id, content, summary FROM documents + WHERE id = $1 + ` + + rows := m.DB.QueryRow(stmt, id) + + d := &model.Document{} + if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil { + return nil, err + } + + return d, nil +} + +// Will return an document given an id of an article. This may fail if the +// connection to the database fails or there is no aritcle with the given id. +func (m *DocumentRepository) ByArticleId(id int64) (*model.Document, error) { + stmt := ` + SELECT d.id, d.content, d.summary + FROM documents d JOIN articles a ON d.id = a.document_id WHERE a.id = $1 `