add summarization of article, when collected
This commit is contained in:
@@ -22,7 +22,7 @@ func main() {
|
|||||||
// summarize documents
|
// summarize documents
|
||||||
documents := &database.DocumentRepository{DB: db}
|
documents := &database.DocumentRepository{DB: db}
|
||||||
|
|
||||||
go documents.Map(func(doc *model.Document) *model.Document {
|
sumDoc := func(doc *model.Document) *model.Document {
|
||||||
if doc.Summary == "" {
|
if doc.Summary == "" {
|
||||||
summaryText, err := util.Summarize(doc.Content)
|
summaryText, err := util.Summarize(doc.Content)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@@ -32,7 +32,8 @@ func main() {
|
|||||||
log.Println(err.Error())
|
log.Println(err.Error())
|
||||||
}
|
}
|
||||||
return doc
|
return doc
|
||||||
})
|
}
|
||||||
|
go documents.Map(sumDoc)
|
||||||
|
|
||||||
// run web crawlers
|
// run web crawlers
|
||||||
articles := &database.ArticleRepository{DB: db}
|
articles := &database.ArticleRepository{DB: db}
|
||||||
@@ -40,10 +41,24 @@ func main() {
|
|||||||
crawler.Init()
|
crawler.Init()
|
||||||
|
|
||||||
crawler.SubscribeToSpiegelFeed(func(a *model.Article) {
|
crawler.SubscribeToSpiegelFeed(func(a *model.Article) {
|
||||||
articles.Insert(a)
|
id, err := articles.Insert(a)
|
||||||
|
if err == nil {
|
||||||
|
doc, err := documents.ByArticleId(id)
|
||||||
|
if err == nil {
|
||||||
|
doc = sumDoc(doc)
|
||||||
|
documents.Update(doc)
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
crawler.SubscribeToZeitFeed(func(a *model.Article) {
|
crawler.SubscribeToZeitFeed(func(a *model.Article) {
|
||||||
articles.Insert(a)
|
id, err := articles.Insert(a)
|
||||||
|
if err == nil {
|
||||||
|
doc, err := documents.ByArticleId(id)
|
||||||
|
if err == nil {
|
||||||
|
doc = sumDoc(doc)
|
||||||
|
documents.Update(doc)
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
// define app
|
// define app
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ func (app *App) Article(w http.ResponseWriter, req *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get articles
|
// get articles
|
||||||
article, err := app.articles.ById(int(id))
|
article, err := app.articles.ById(int64(id))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// treat as no result
|
// treat as no result
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ func (m *ArticleRepository) Search(query string) ([]*model.Article, error) {
|
|||||||
|
|
||||||
// Will return an article given an id. This may fail if the connection to the
|
// Will return an article given an id. This may fail if the connection to the
|
||||||
// database fails or there is no aritcle with the given id.
|
// database fails or there is no aritcle with the given id.
|
||||||
func (m *ArticleRepository) ById(id int) (*model.Article, error) {
|
func (m *ArticleRepository) ById(id int64) (*model.Article, error) {
|
||||||
stmt := `
|
stmt := `
|
||||||
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
|
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
|
||||||
FROM articles a
|
FROM articles a
|
||||||
@@ -112,15 +112,16 @@ func (m *ArticleRepository) ById(id int) (*model.Article, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Inserts a new article into the database. The id attribute of the given
|
// Inserts a new article into the database. The id attribute of the given
|
||||||
// article will be ignored. May throw an error if the execution of the database
|
// article will be ignored. Returns the id of the last inserted element. May
|
||||||
// query fails.
|
// throw an error if the execution of the database query fails.
|
||||||
func (m *ArticleRepository) Insert(a *model.Article) error {
|
func (m *ArticleRepository) Insert(a *model.Article) (int64, error) {
|
||||||
// insert article
|
|
||||||
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
|
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
|
||||||
VALUES ($1, $2, $3, $4, $5)
|
VALUES ($1, $2, $3, $4, $5)
|
||||||
|
RETURNING id
|
||||||
`
|
`
|
||||||
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate)
|
var lastInsertID int64
|
||||||
return err
|
err := m.DB.QueryRow(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate).Scan(&lastInsertID)
|
||||||
|
return lastInsertID, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update an article in the database. Will use the id that is set in the article
|
// Update an article in the database. Will use the id that is set in the article
|
||||||
|
|||||||
@@ -40,12 +40,31 @@ func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, erro
|
|||||||
return docs, nil
|
return docs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Will return an article given an id. This may fail if the connection to the
|
// Will return an document given an id. This may fail if the connection to the
|
||||||
// database fails or there is no aritcle with the given id.
|
// database fails or there is no aritcle with the given id.
|
||||||
func (m *DocumentRepository) ById(id int) (*model.Document, error) {
|
func (m *DocumentRepository) ById(id int64) (*model.Document, error) {
|
||||||
stmt := `
|
stmt := `
|
||||||
SELECT id, content, summary
|
SELECT id, content, summary
|
||||||
FROM documents
|
FROM documents
|
||||||
|
WHERE id = $1
|
||||||
|
`
|
||||||
|
|
||||||
|
rows := m.DB.QueryRow(stmt, id)
|
||||||
|
|
||||||
|
d := &model.Document{}
|
||||||
|
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return d, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Will return an document given an id of an article. This may fail if the
|
||||||
|
// connection to the database fails or there is no aritcle with the given id.
|
||||||
|
func (m *DocumentRepository) ByArticleId(id int64) (*model.Document, error) {
|
||||||
|
stmt := `
|
||||||
|
SELECT d.id, d.content, d.summary
|
||||||
|
FROM documents d JOIN articles a ON d.id = a.document_id
|
||||||
WHERE a.id = $1
|
WHERE a.id = $1
|
||||||
`
|
`
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user