add summarization of article, when collected

This commit is contained in:
2025-01-22 09:02:32 +01:00
parent 2a57a840a6
commit a5a1a974fc
4 changed files with 61 additions and 26 deletions

View File

@@ -19,31 +19,46 @@ func main() {
log.Fatal("failed to connect to database due to", err.Error())
}
// summarize documents
documents := &database.DocumentRepository{DB: db}
// summarize documents
documents := &database.DocumentRepository{DB: db}
go documents.Map(func(doc *model.Document) *model.Document {
if doc.Summary == "" {
summaryText, err := util.Summarize(doc.Content)
if err == nil {
doc.Summary = summaryText
return doc
}
log.Println(err.Error())
}
return doc
})
sumDoc := func(doc *model.Document) *model.Document {
if doc.Summary == "" {
summaryText, err := util.Summarize(doc.Content)
if err == nil {
doc.Summary = summaryText
return doc
}
log.Println(err.Error())
}
return doc
}
go documents.Map(sumDoc)
// run web crawlers
articles := &database.ArticleRepository{DB: db}
articles := &database.ArticleRepository{DB: db}
crawler := crawler.CrawlerFacade{}
crawler.Init()
crawler.SubscribeToSpiegelFeed(func(a *model.Article) {
articles.Insert(a)
id, err := articles.Insert(a)
if err == nil {
doc, err := documents.ByArticleId(id)
if err == nil {
doc = sumDoc(doc)
documents.Update(doc)
}
}
})
crawler.SubscribeToZeitFeed(func(a *model.Article) {
articles.Insert(a)
id, err := articles.Insert(a)
if err == nil {
doc, err := documents.ByArticleId(id)
if err == nil {
doc = sumDoc(doc)
documents.Update(doc)
}
}
})
// define app

View File

@@ -17,7 +17,7 @@ func (app *App) Article(w http.ResponseWriter, req *http.Request) {
}
// get articles
article, err := app.articles.ById(int(id))
article, err := app.articles.ById(int64(id))
if err != nil {
// treat as no result
http.Error(w, err.Error(), http.StatusInternalServerError)

View File

@@ -94,7 +94,7 @@ func (m *ArticleRepository) Search(query string) ([]*model.Article, error) {
// Will return an article given an id. This may fail if the connection to the
// database fails or there is no aritcle with the given id.
func (m *ArticleRepository) ById(id int) (*model.Article, error) {
func (m *ArticleRepository) ById(id int64) (*model.Article, error) {
stmt := `
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
FROM articles a
@@ -112,15 +112,16 @@ func (m *ArticleRepository) ById(id int) (*model.Article, error) {
}
// Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database
// query fails.
func (m *ArticleRepository) Insert(a *model.Article) error {
// insert article
// article will be ignored. Returns the id of the last inserted element. May
// throw an error if the execution of the database query fails.
func (m *ArticleRepository) Insert(a *model.Article) (int64, error) {
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
VALUES ($1, $2, $3, $4, $5)
RETURNING id
`
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate)
return err
var lastInsertID int64
err := m.DB.QueryRow(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate).Scan(&lastInsertID)
return lastInsertID, err
}
// Update an article in the database. Will use the id that is set in the article

View File

@@ -40,12 +40,31 @@ func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, erro
return docs, nil
}
// Will return an article given an id. This may fail if the connection to the
// Will return an document given an id. This may fail if the connection to the
// database fails or there is no aritcle with the given id.
func (m *DocumentRepository) ById(id int) (*model.Document, error) {
func (m *DocumentRepository) ById(id int64) (*model.Document, error) {
stmt := `
SELECT id, content, summary
FROM documents
WHERE id = $1
`
rows := m.DB.QueryRow(stmt, id)
d := &model.Document{}
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
return nil, err
}
return d, nil
}
// Will return an document given an id of an article. This may fail if the
// connection to the database fails or there is no aritcle with the given id.
func (m *DocumentRepository) ByArticleId(id int64) (*model.Document, error) {
stmt := `
SELECT d.id, d.content, d.summary
FROM documents d JOIN articles a ON d.id = a.document_id
WHERE a.id = $1
`