add summarization of article, when collected
This commit is contained in:
@@ -22,7 +22,7 @@ func main() {
|
||||
// summarize documents
|
||||
documents := &database.DocumentRepository{DB: db}
|
||||
|
||||
go documents.Map(func(doc *model.Document) *model.Document {
|
||||
sumDoc := func(doc *model.Document) *model.Document {
|
||||
if doc.Summary == "" {
|
||||
summaryText, err := util.Summarize(doc.Content)
|
||||
if err == nil {
|
||||
@@ -32,7 +32,8 @@ func main() {
|
||||
log.Println(err.Error())
|
||||
}
|
||||
return doc
|
||||
})
|
||||
}
|
||||
go documents.Map(sumDoc)
|
||||
|
||||
// run web crawlers
|
||||
articles := &database.ArticleRepository{DB: db}
|
||||
@@ -40,10 +41,24 @@ func main() {
|
||||
crawler.Init()
|
||||
|
||||
crawler.SubscribeToSpiegelFeed(func(a *model.Article) {
|
||||
articles.Insert(a)
|
||||
id, err := articles.Insert(a)
|
||||
if err == nil {
|
||||
doc, err := documents.ByArticleId(id)
|
||||
if err == nil {
|
||||
doc = sumDoc(doc)
|
||||
documents.Update(doc)
|
||||
}
|
||||
}
|
||||
})
|
||||
crawler.SubscribeToZeitFeed(func(a *model.Article) {
|
||||
articles.Insert(a)
|
||||
id, err := articles.Insert(a)
|
||||
if err == nil {
|
||||
doc, err := documents.ByArticleId(id)
|
||||
if err == nil {
|
||||
doc = sumDoc(doc)
|
||||
documents.Update(doc)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// define app
|
||||
|
||||
@@ -17,7 +17,7 @@ func (app *App) Article(w http.ResponseWriter, req *http.Request) {
|
||||
}
|
||||
|
||||
// get articles
|
||||
article, err := app.articles.ById(int(id))
|
||||
article, err := app.articles.ById(int64(id))
|
||||
if err != nil {
|
||||
// treat as no result
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
|
||||
@@ -94,7 +94,7 @@ func (m *ArticleRepository) Search(query string) ([]*model.Article, error) {
|
||||
|
||||
// Will return an article given an id. This may fail if the connection to the
|
||||
// database fails or there is no aritcle with the given id.
|
||||
func (m *ArticleRepository) ById(id int) (*model.Article, error) {
|
||||
func (m *ArticleRepository) ById(id int64) (*model.Article, error) {
|
||||
stmt := `
|
||||
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
|
||||
FROM articles a
|
||||
@@ -112,15 +112,16 @@ func (m *ArticleRepository) ById(id int) (*model.Article, error) {
|
||||
}
|
||||
|
||||
// Inserts a new article into the database. The id attribute of the given
|
||||
// article will be ignored. May throw an error if the execution of the database
|
||||
// query fails.
|
||||
func (m *ArticleRepository) Insert(a *model.Article) error {
|
||||
// insert article
|
||||
// article will be ignored. Returns the id of the last inserted element. May
|
||||
// throw an error if the execution of the database query fails.
|
||||
func (m *ArticleRepository) Insert(a *model.Article) (int64, error) {
|
||||
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
RETURNING id
|
||||
`
|
||||
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate)
|
||||
return err
|
||||
var lastInsertID int64
|
||||
err := m.DB.QueryRow(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate).Scan(&lastInsertID)
|
||||
return lastInsertID, err
|
||||
}
|
||||
|
||||
// Update an article in the database. Will use the id that is set in the article
|
||||
|
||||
@@ -40,12 +40,31 @@ func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, erro
|
||||
return docs, nil
|
||||
}
|
||||
|
||||
// Will return an article given an id. This may fail if the connection to the
|
||||
// Will return an document given an id. This may fail if the connection to the
|
||||
// database fails or there is no aritcle with the given id.
|
||||
func (m *DocumentRepository) ById(id int) (*model.Document, error) {
|
||||
func (m *DocumentRepository) ById(id int64) (*model.Document, error) {
|
||||
stmt := `
|
||||
SELECT id, content, summary
|
||||
FROM documents
|
||||
WHERE id = $1
|
||||
`
|
||||
|
||||
rows := m.DB.QueryRow(stmt, id)
|
||||
|
||||
d := &model.Document{}
|
||||
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Will return an document given an id of an article. This may fail if the
|
||||
// connection to the database fails or there is no aritcle with the given id.
|
||||
func (m *DocumentRepository) ByArticleId(id int64) (*model.Document, error) {
|
||||
stmt := `
|
||||
SELECT d.id, d.content, d.summary
|
||||
FROM documents d JOIN articles a ON d.id = a.document_id
|
||||
WHERE a.id = $1
|
||||
`
|
||||
|
||||
|
||||
Reference in New Issue
Block a user