remove model files for responses
This commit is contained in:
@@ -3,6 +3,5 @@ package collectors
|
||||
import "crowsnest/internal/model/database"
|
||||
|
||||
type Collector struct {
|
||||
Responses *database.ResponseModel
|
||||
Articles *database.ArticleModel
|
||||
Articles *database.ArticleModel
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ func (c *Collector) CollectSpiegel() {
|
||||
collycollector := colly.NewCollector(
|
||||
colly.AllowedDomains("www.spiegel.de", "spiegel.de"),
|
||||
colly.CacheDir("./persistence/spiegel_cache"),
|
||||
colly.MaxDepth(5),
|
||||
colly.MaxDepth(3),
|
||||
)
|
||||
|
||||
// store articles
|
||||
@@ -42,7 +42,7 @@ func (c *Collector) CollectSpiegel() {
|
||||
})
|
||||
|
||||
// go through archive
|
||||
startDate := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC)
|
||||
startDate := time.Date(2020, time.January, 1, 0, 0, 0, 0, time.UTC)
|
||||
currentDate := time.Now()
|
||||
|
||||
for date := startDate; date.Before(currentDate) || date.Equal(currentDate); date = date.AddDate(0, 0, 1) {
|
||||
|
||||
@@ -19,7 +19,7 @@ func (c *Collector) CollectZeit() {
|
||||
collycollector := colly.NewCollector(
|
||||
colly.AllowedDomains("www.zeit.de", "zeit.de"),
|
||||
colly.CacheDir("./persistence/zeit_cache"),
|
||||
colly.MaxDepth(5),
|
||||
colly.MaxDepth(3),
|
||||
)
|
||||
|
||||
// store articles
|
||||
@@ -44,7 +44,7 @@ func (c *Collector) CollectZeit() {
|
||||
})
|
||||
|
||||
// go through archive
|
||||
startDate := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC)
|
||||
startDate := time.Date(2020, time.January, 1, 0, 0, 0, 0, time.UTC)
|
||||
//startDate := time.Date(1946, time.January, 1, 0, 0, 0, 0, time.UTC)
|
||||
currentDate := time.Now()
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"database/sql"
|
||||
"log"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
)
|
||||
@@ -23,10 +24,20 @@ func main() {
|
||||
|
||||
// collect websites
|
||||
coll := collectors.Collector{
|
||||
Responses: &database.ResponseModel{DB: db},
|
||||
Articles: &database.ArticleModel{DB: db},
|
||||
Articles: &database.ArticleModel{DB: db},
|
||||
}
|
||||
|
||||
coll.SpiegelCollect()
|
||||
//coll.Zeit()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
coll.CollectSpiegel()
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
go coll.CollectZeit()
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"database/sql"
|
||||
)
|
||||
|
||||
type ResponseModel struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
// Get all the response object from the database. May throw an error if the
|
||||
// connection to the database fails.
|
||||
func (m *ResponseModel) All() ([]model.Response, error) {
|
||||
stmt := `
|
||||
SELECT url, content, fetchDate, processed
|
||||
FROM responses
|
||||
`
|
||||
rows, err := m.DB.Query(stmt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
responses := []model.Response{}
|
||||
for rows.Next() {
|
||||
r := model.Response{}
|
||||
err := rows.Scan(&r.Url, &r.Content, &r.FetchDate, &r.Processed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
responses = append(responses, r)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return responses, nil
|
||||
}
|
||||
|
||||
// Gets all those Response objects where the processed column is set to false.
|
||||
// May throw an error if the connection to the database fails.
|
||||
func (m *ResponseModel) UnprocessedUrls() ([]string, error) {
|
||||
stmt := `
|
||||
SELECT url
|
||||
FROM responses
|
||||
WHERE NOT processed
|
||||
`
|
||||
rows, err := m.DB.Query(stmt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
urls := make([]string, 0)
|
||||
for rows.Next() {
|
||||
r := ""
|
||||
err := rows.Scan(&r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
urls = append(urls, r)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return urls, nil
|
||||
}
|
||||
|
||||
// Checks if a given url exits in the database as a response. This may throw an
|
||||
// error if the connection to the database fails.
|
||||
func (m *ResponseModel) UrlExists(url string) (bool, error) {
|
||||
stmt := `
|
||||
SELECT count(url) > 0 FROM responses WHERE url = $1
|
||||
`
|
||||
|
||||
var result bool
|
||||
row := m.DB.QueryRow(stmt, url)
|
||||
err := row.Scan(&result)
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
// Gets a certain Response object by the unique url. This may throw an error if
|
||||
// there does not exist an response with the given url or the connection to the
|
||||
// database fails.
|
||||
func (m *ResponseModel) GetByUrl(url string) (model.Response, error) {
|
||||
stmt := `
|
||||
SELECT url, content, fetchDate, processed
|
||||
FROM responses
|
||||
WHERE url = $1
|
||||
`
|
||||
|
||||
res := model.Response{}
|
||||
row := m.DB.QueryRow(stmt, url)
|
||||
err := row.Scan(&res.Url, &res.Content, &res.FetchDate, &res.Processed)
|
||||
|
||||
return res, err
|
||||
}
|
||||
|
||||
// Inserts a new response object into the database given the url and response
|
||||
// body. This may fail on an unique contraint of the url or the connection to
|
||||
// the database.
|
||||
func (m *ResponseModel) Insert(url string, content []byte) error {
|
||||
// insert response
|
||||
stmt := `INSERT INTO responses (url, content) VALUES ($1, $2)`
|
||||
_, err := m.DB.Exec(stmt, url, content)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Updates a response object in the database using the url of the given response
|
||||
// object as the id. This may throw an error if connection to the database
|
||||
// fails.
|
||||
func (m *ResponseModel) Update(res *model.Response) error {
|
||||
// insert response
|
||||
stmt := `UPDATE responses SET content = $1, fetchDate = $2, processed = $3 WHERE url = $4`
|
||||
_, err := m.DB.Exec(stmt, res.Content, res.FetchDate, res.Processed, res.Url)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Sets the processed column of a certain response entry in the database, given
|
||||
// an url, to true. This may fail if the connection to the database fails.
|
||||
func (m *ResponseModel) Processed(url string) error {
|
||||
// insert response
|
||||
stmt := `UPDATE responses SET processed = true WHERE url = $1`
|
||||
_, err := m.DB.Exec(stmt, url)
|
||||
|
||||
return err
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
package model
|
||||
|
||||
import "time"
|
||||
|
||||
// A simple cache for requests.
|
||||
type Response struct {
|
||||
Url string
|
||||
Content []byte
|
||||
Processed bool
|
||||
FetchDate time.Time
|
||||
}
|
||||
Reference in New Issue
Block a user