add summarization of article, when collected

This commit is contained in:
2025-01-22 09:02:32 +01:00
parent 2a57a840a6
commit a5a1a974fc
4 changed files with 61 additions and 26 deletions

View File

@@ -22,7 +22,7 @@ func main() {
// summarize documents // summarize documents
documents := &database.DocumentRepository{DB: db} documents := &database.DocumentRepository{DB: db}
go documents.Map(func(doc *model.Document) *model.Document { sumDoc := func(doc *model.Document) *model.Document {
if doc.Summary == "" { if doc.Summary == "" {
summaryText, err := util.Summarize(doc.Content) summaryText, err := util.Summarize(doc.Content)
if err == nil { if err == nil {
@@ -32,7 +32,8 @@ func main() {
log.Println(err.Error()) log.Println(err.Error())
} }
return doc return doc
}) }
go documents.Map(sumDoc)
// run web crawlers // run web crawlers
articles := &database.ArticleRepository{DB: db} articles := &database.ArticleRepository{DB: db}
@@ -40,10 +41,24 @@ func main() {
crawler.Init() crawler.Init()
crawler.SubscribeToSpiegelFeed(func(a *model.Article) { crawler.SubscribeToSpiegelFeed(func(a *model.Article) {
articles.Insert(a) id, err := articles.Insert(a)
if err == nil {
doc, err := documents.ByArticleId(id)
if err == nil {
doc = sumDoc(doc)
documents.Update(doc)
}
}
}) })
crawler.SubscribeToZeitFeed(func(a *model.Article) { crawler.SubscribeToZeitFeed(func(a *model.Article) {
articles.Insert(a) id, err := articles.Insert(a)
if err == nil {
doc, err := documents.ByArticleId(id)
if err == nil {
doc = sumDoc(doc)
documents.Update(doc)
}
}
}) })
// define app // define app

View File

@@ -17,7 +17,7 @@ func (app *App) Article(w http.ResponseWriter, req *http.Request) {
} }
// get articles // get articles
article, err := app.articles.ById(int(id)) article, err := app.articles.ById(int64(id))
if err != nil { if err != nil {
// treat as no result // treat as no result
http.Error(w, err.Error(), http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError)

View File

@@ -94,7 +94,7 @@ func (m *ArticleRepository) Search(query string) ([]*model.Article, error) {
// Will return an article given an id. This may fail if the connection to the // Will return an article given an id. This may fail if the connection to the
// database fails or there is no aritcle with the given id. // database fails or there is no aritcle with the given id.
func (m *ArticleRepository) ById(id int) (*model.Article, error) { func (m *ArticleRepository) ById(id int64) (*model.Article, error) {
stmt := ` stmt := `
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
FROM articles a FROM articles a
@@ -112,15 +112,16 @@ func (m *ArticleRepository) ById(id int) (*model.Article, error) {
} }
// Inserts a new article into the database. The id attribute of the given // Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database // article will be ignored. Returns the id of the last inserted element. May
// query fails. // throw an error if the execution of the database query fails.
func (m *ArticleRepository) Insert(a *model.Article) error { func (m *ArticleRepository) Insert(a *model.Article) (int64, error) {
// insert article
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate) stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
VALUES ($1, $2, $3, $4, $5) VALUES ($1, $2, $3, $4, $5)
RETURNING id
` `
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate) var lastInsertID int64
return err err := m.DB.QueryRow(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate).Scan(&lastInsertID)
return lastInsertID, err
} }
// Update an article in the database. Will use the id that is set in the article // Update an article in the database. Will use the id that is set in the article

View File

@@ -40,12 +40,31 @@ func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, erro
return docs, nil return docs, nil
} }
// Will return an article given an id. This may fail if the connection to the // Will return an document given an id. This may fail if the connection to the
// database fails or there is no aritcle with the given id. // database fails or there is no aritcle with the given id.
func (m *DocumentRepository) ById(id int) (*model.Document, error) { func (m *DocumentRepository) ById(id int64) (*model.Document, error) {
stmt := ` stmt := `
SELECT id, content, summary SELECT id, content, summary
FROM documents FROM documents
WHERE id = $1
`
rows := m.DB.QueryRow(stmt, id)
d := &model.Document{}
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
return nil, err
}
return d, nil
}
// Will return an document given an id of an article. This may fail if the
// connection to the database fails or there is no aritcle with the given id.
func (m *DocumentRepository) ByArticleId(id int64) (*model.Document, error) {
stmt := `
SELECT d.id, d.content, d.summary
FROM documents d JOIN articles a ON d.id = a.document_id
WHERE a.id = $1 WHERE a.id = $1
` `