make capitalization uniform
This commit is contained in:
42
src/internal/app/article.go
Normal file
42
src/internal/app/article.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"net/http"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Enpoint that returns a list of articles given search terms in the post
|
||||
// request of a search form. Uses the content template.
|
||||
func (app *App) Article(w http.ResponseWriter, req *http.Request) {
|
||||
// get id
|
||||
id, err := strconv.ParseUint(req.PathValue("id"), 10, 64)
|
||||
if err != nil {
|
||||
http.NotFound(w, req)
|
||||
return
|
||||
}
|
||||
|
||||
// get articles
|
||||
article, err := app.articles.ById(int(id))
|
||||
if err != nil {
|
||||
// treat as no result
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// render template
|
||||
t := template.Must(template.ParseFiles(
|
||||
"assets/templates/articlePage.html",
|
||||
"assets/templates/layout.html",
|
||||
))
|
||||
|
||||
data := map[string]interface{}{
|
||||
"SelectedNavItemArticle": false,
|
||||
"ArticlePageVM": article.PageViewModel(),
|
||||
}
|
||||
err = t.ExecuteTemplate(w, "base", data)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to render template", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
53
src/internal/app/index.go
Normal file
53
src/internal/app/index.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// List the latest articles using the base template.
|
||||
func (app *App) Index(w http.ResponseWriter, req *http.Request) {
|
||||
const pageSize = 15
|
||||
var limit, offset, pageId uint64 = pageSize, 0, 0
|
||||
var err error
|
||||
|
||||
// get page number
|
||||
if pageId, err = strconv.ParseUint(req.PathValue("id"), 10, 32); err == nil {
|
||||
pageId--
|
||||
offset = pageId * pageSize
|
||||
}
|
||||
|
||||
// get articles
|
||||
articleVMs, err := app.articles.AllArticleViewModels(int(limit), int(offset))
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// get count of total articles
|
||||
totalCount, err := app.articles.CountAll()
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
totalCount /= pageSize
|
||||
|
||||
// render template
|
||||
t := template.Must(template.ParseFiles(
|
||||
"assets/templates/article.html",
|
||||
"assets/templates/layout.html",
|
||||
"assets/templates/components/pagination.html"))
|
||||
|
||||
data := map[string]interface{}{
|
||||
"SelectedNavItemArticle": true,
|
||||
"ArticleVMs": &articleVMs,
|
||||
"Paginations": model.NewPaginationViewModel(uint(pageId+1), totalCount+1),
|
||||
}
|
||||
err = t.ExecuteTemplate(w, "base", data)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to render template", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
42
src/internal/app/upsearch.go
Normal file
42
src/internal/app/upsearch.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// Enpoint that returns a list of articles given search terms in the post
|
||||
// request of a search form. Uses the content template.
|
||||
func (app *App) UpSearch(w http.ResponseWriter, req *http.Request) {
|
||||
// construct search query
|
||||
searchTerms := req.FormValue("search")
|
||||
if searchTerms == "" {
|
||||
app.Index(w, req)
|
||||
return
|
||||
}
|
||||
|
||||
// get articles
|
||||
articleVMs, err := app.articles.SearchArticleViewModel(searchTerms)
|
||||
if err != nil {
|
||||
// treat as no result
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// render template
|
||||
t := template.Must(template.ParseFiles(
|
||||
"assets/templates/article.html",
|
||||
"assets/templates/layout.html",
|
||||
"assets/templates/components/pagination.html"))
|
||||
|
||||
data := map[string]interface{}{
|
||||
"SelectedNavItemArticle": true,
|
||||
"ArticleVMs": &articleVMs,
|
||||
"Paginations": nil,
|
||||
}
|
||||
err = t.ExecuteTemplate(w, "base", data)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to render template", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
69
src/internal/crawler/crawlerfacade.go
Normal file
69
src/internal/crawler/crawlerfacade.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"crowsnest/internal/util"
|
||||
|
||||
"github.com/gocolly/colly/v2"
|
||||
)
|
||||
|
||||
type CrawlerFacade struct {
|
||||
spiegelFeedDistributer *util.Distributer[*model.Article]
|
||||
zeitFeedDistributer *util.Distributer[*model.Article]
|
||||
}
|
||||
|
||||
func (cf *CrawlerFacade) Init() {
|
||||
// init
|
||||
cf.spiegelFeedDistributer = &util.Distributer[*model.Article]{}
|
||||
cf.spiegelFeedDistributer.Init()
|
||||
cf.zeitFeedDistributer = &util.Distributer[*model.Article]{}
|
||||
cf.zeitFeedDistributer.Init()
|
||||
|
||||
// run spiegel feed
|
||||
sf := &WebFeed{}
|
||||
sf.Init(
|
||||
"https://www.spiegel.de/",
|
||||
colly.AllowedDomains("www.spiegel.de", "spiegel.de"),
|
||||
colly.CacheDir("./persistence/spiegel_cache"),
|
||||
colly.MaxDepth(1),
|
||||
)
|
||||
sf_feed := sf.Feed()
|
||||
sf_converter := ConverterSpiegel{}
|
||||
sf_converter.Init()
|
||||
|
||||
go func() {
|
||||
for val := range sf_feed {
|
||||
article, err := sf_converter.Convert(val)
|
||||
if err != nil { continue }
|
||||
cf.spiegelFeedDistributer.Publish(article)
|
||||
}
|
||||
}()
|
||||
|
||||
// run zeit feed
|
||||
zf := &WebFeed{}
|
||||
zf.Init(
|
||||
"https://www.zeit.de/index",
|
||||
colly.AllowedDomains("www.zeit.de", "zeit.de"),
|
||||
colly.CacheDir("./persistence/zeit_cache"),
|
||||
colly.MaxDepth(1),
|
||||
)
|
||||
zf_feed := zf.Feed()
|
||||
zf_converter := ZeitConverter{}
|
||||
zf_converter.Init()
|
||||
|
||||
go func() {
|
||||
for val := range zf_feed {
|
||||
article, err := zf_converter.Convert(val)
|
||||
if err != nil { continue }
|
||||
cf.zeitFeedDistributer.Publish(article)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (cf *CrawlerFacade) SubscribeToSpiegelFeed(hook func(*model.Article)) {
|
||||
cf.spiegelFeedDistributer.Subscribe(hook)
|
||||
}
|
||||
|
||||
func (cf *CrawlerFacade) SubscribeToZeitFeed(hook func(*model.Article)) {
|
||||
cf.zeitFeedDistributer.Subscribe(hook)
|
||||
}
|
||||
6
src/internal/crawler/resource.go
Normal file
6
src/internal/crawler/resource.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package crawler
|
||||
|
||||
type Resource struct {
|
||||
Url string
|
||||
Body string
|
||||
}
|
||||
96
src/internal/crawler/spiegelconverter.go
Normal file
96
src/internal/crawler/spiegelconverter.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"errors"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type ConverterSpiegel struct {
|
||||
pattern_paywall *regexp.Regexp
|
||||
pattern_url *regexp.Regexp
|
||||
pattern_whitespace *regexp.Regexp
|
||||
}
|
||||
|
||||
func (c *ConverterSpiegel) Init() {
|
||||
c.pattern_paywall = regexp.MustCompile(`"paywall":{"attributes":{"is_active":true`)
|
||||
c.pattern_url = regexp.MustCompile(`^https://(www\.)?spiegel.de.*`)
|
||||
c.pattern_whitespace = regexp.MustCompile(`\s+`)
|
||||
}
|
||||
|
||||
func (c *ConverterSpiegel) Convert(res *Resource) (*model.Article, error) {
|
||||
// check url url pattern
|
||||
if !c.pattern_url.Match([]byte(res.Url)) {
|
||||
return nil, errors.New("invalid url pattern")
|
||||
}
|
||||
|
||||
// check for paywall
|
||||
if c.pattern_paywall.Match([]byte(res.Body)) {
|
||||
return nil, errors.New("unable to extract article due to paywal")
|
||||
}
|
||||
|
||||
// construct goquery doc
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(res.Body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// check for article type
|
||||
tag := doc.Find("meta[property='og:type']")
|
||||
pagetype, exists := tag.Attr("content")
|
||||
if !exists || pagetype != "article" {
|
||||
return nil, errors.New("unable to extract article, not of type article")
|
||||
}
|
||||
|
||||
// get title
|
||||
tag = doc.Find("meta[property='og:title']")
|
||||
title, exists := tag.Attr("content")
|
||||
if !exists {
|
||||
return nil, errors.New("unable to extract article, no title tag")
|
||||
}
|
||||
|
||||
// prepend description to content of article
|
||||
tag = doc.Find("meta[name='description']")
|
||||
content, exists := tag.Attr("content")
|
||||
content += " "
|
||||
if !exists {
|
||||
return nil, errors.New("unable to extract article, no description tag")
|
||||
}
|
||||
|
||||
// get publishing date
|
||||
tag = doc.Find("meta[name='date']")
|
||||
datestr, exists := tag.Attr("content")
|
||||
if !exists {
|
||||
return nil, errors.New("unable to extract article, no date tag")
|
||||
}
|
||||
|
||||
date, err := time.Parse("2006-01-02T15:04:05-07:00", datestr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// get content
|
||||
tag = doc.Find("main[id='Inhalt'] div > p")
|
||||
|
||||
tag.Each(func(index int, p *goquery.Selection) {
|
||||
content += " " + p.Text()
|
||||
})
|
||||
|
||||
// clean up content string
|
||||
content = string(c.pattern_whitespace.ReplaceAll([]byte(content), []byte(" ")))
|
||||
content = strings.ReplaceAll(content, "»", "\"")
|
||||
content = strings.ReplaceAll(content, "«", "\"")
|
||||
|
||||
// create new article
|
||||
return &model.Article{
|
||||
SourceUrl: res.Url,
|
||||
PublishDate: date,
|
||||
FetchDate: time.Now(),
|
||||
Title: title,
|
||||
Content: content,
|
||||
}, nil
|
||||
}
|
||||
67
src/internal/crawler/webfeed.go
Normal file
67
src/internal/crawler/webfeed.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"crowsnest/internal/util"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gocolly/colly/v2"
|
||||
)
|
||||
|
||||
type WebFeed struct {
|
||||
feed chan *Resource
|
||||
collector *colly.Collector
|
||||
}
|
||||
|
||||
// Init the WebFeed, starting the process of collecting Resources.
|
||||
func (sf *WebFeed) Init(indexUrl string, options ...colly.CollectorOption) {
|
||||
// create feed
|
||||
sf.feed = make(chan *Resource, 100)
|
||||
|
||||
// set cache, domain pattern and max recursion depth
|
||||
sf.collector = colly.NewCollector(options...)
|
||||
|
||||
// return IResources aka pages
|
||||
sf.collector.OnResponse(func(r *colly.Response) {
|
||||
url := r.Request.URL.String()
|
||||
body := string(r.Body)
|
||||
sf.feed <- &Resource{Url: url, Body: body}
|
||||
})
|
||||
|
||||
// cascade
|
||||
sf.collector.OnHTML("a[href]", func(e *colly.HTMLElement) {
|
||||
url := e.Attr("href")
|
||||
if !strings.HasPrefix(url, "http") {
|
||||
return
|
||||
}
|
||||
e.Request.Visit(url)
|
||||
})
|
||||
|
||||
// start runner
|
||||
go sf.runner(indexUrl)
|
||||
}
|
||||
|
||||
// Get the channel into which the collected Resources will be written.
|
||||
func (sf *WebFeed) Feed() <-chan *Resource {
|
||||
return sf.feed
|
||||
}
|
||||
|
||||
func (sf *WebFeed) runner(indexUrl string) {
|
||||
for {
|
||||
// sleep for 5min
|
||||
time.Sleep(time.Second * 300)
|
||||
|
||||
// collect index
|
||||
urls, err := util.GetAllURLs(indexUrl)
|
||||
if err != nil {
|
||||
log.Println("error in WebFeed runner: ", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// visit urls
|
||||
for _, url := range urls {
|
||||
sf.collector.Visit(url)
|
||||
}
|
||||
}
|
||||
}
|
||||
100
src/internal/crawler/zeitconverter.go
Normal file
100
src/internal/crawler/zeitconverter.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"errors"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type ZeitConverter struct {
|
||||
pattern_url *regexp.Regexp
|
||||
pattern_whitespace *regexp.Regexp
|
||||
}
|
||||
|
||||
func (c *ZeitConverter) Init() {
|
||||
c.pattern_url = regexp.MustCompile(`^https://(www\.)?zeit\.de[^#]*$`)
|
||||
c.pattern_whitespace = regexp.MustCompile(`\s+`)
|
||||
}
|
||||
|
||||
func (c *ZeitConverter) Convert(res *Resource) (*model.Article, error) {
|
||||
// check url url pattern
|
||||
if !c.pattern_url.Match([]byte(res.Url)) {
|
||||
return nil, errors.New("invalid url pattern")
|
||||
}
|
||||
|
||||
// construct goquery doc
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(res.Body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// check for article type
|
||||
tag := doc.Find("meta[property='og:type']")
|
||||
pagetype, exists := tag.Attr("content")
|
||||
if !exists || pagetype != "article" {
|
||||
return nil, errors.New("unable to extract article, not of type article")
|
||||
}
|
||||
|
||||
// check for paywall
|
||||
tag = doc.Find("meta[property='article:content_tier']")
|
||||
pagetype, exists = tag.Attr("content")
|
||||
if !exists || pagetype != "free" {
|
||||
return nil, errors.New("unable to extract article due to paywal")
|
||||
}
|
||||
|
||||
// get title
|
||||
tag = doc.Find("meta[property='og:title']")
|
||||
title, exists := tag.Attr("content")
|
||||
if !exists {
|
||||
return nil, errors.New("unable to extract article, no title tag")
|
||||
}
|
||||
|
||||
// prepend description to content of article
|
||||
tag = doc.Find("meta[name='description']")
|
||||
content, exists := tag.Attr("content")
|
||||
content += " "
|
||||
if !exists {
|
||||
return nil, errors.New("unable to extract article, no description tag")
|
||||
}
|
||||
|
||||
if strings.Contains(content, "Das Liveblog") {
|
||||
return nil, errors.New("unable to extract article, no support for liveblog")
|
||||
}
|
||||
|
||||
// get publishing date
|
||||
tag = doc.Find("meta[name='date']")
|
||||
datestr, exists := tag.Attr("content")
|
||||
if !exists {
|
||||
return nil, errors.New("unable to extract article, no date tag")
|
||||
}
|
||||
|
||||
date, err := time.Parse("2006-01-02T15:04:05-07:00", datestr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// get content
|
||||
tag = doc.Find("main > article > div.article-body p.article__item")
|
||||
|
||||
tag.Each(func(index int, p *goquery.Selection) {
|
||||
content += " " + p.Text()
|
||||
})
|
||||
|
||||
// clean up content string
|
||||
content = string(c.pattern_whitespace.ReplaceAll([]byte(content), []byte(" ")))
|
||||
content = strings.ReplaceAll(content, "»", "\"")
|
||||
content = strings.ReplaceAll(content, "«", "\"")
|
||||
|
||||
// create new article
|
||||
return &model.Article{
|
||||
SourceUrl: res.Url,
|
||||
PublishDate: date,
|
||||
FetchDate: time.Now(),
|
||||
Title: title,
|
||||
Content: content,
|
||||
}, nil
|
||||
}
|
||||
234
src/internal/model/database/articlerepository.go
Normal file
234
src/internal/model/database/articlerepository.go
Normal file
@@ -0,0 +1,234 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"database/sql"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ArticleRepository struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
// Gets all the article objects from the database. This may throw an error if
|
||||
// the connection to the database fails.
|
||||
func (m *ArticleRepository) All(limit int, offset int) ([]model.Article, error) {
|
||||
stmt := `
|
||||
SELECT id, title, sourceUrl, content, publishDate, fetchDate
|
||||
FROM articles
|
||||
ORDER BY publishDate DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
`
|
||||
rows, err := m.DB.Query(stmt, limit, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
articles := []model.Article{}
|
||||
for rows.Next() {
|
||||
a := model.Article{}
|
||||
err := rows.Scan(&a.Id, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
articles = append(articles, a)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
func (m *ArticleRepository) AllArticleViewModels(limit int, offset int) ([]*model.ArticleViewModel, error) {
|
||||
stmt := `
|
||||
SELECT a.id, a.title, a.sourceUrl, a.publishDate, d.summary
|
||||
FROM articles a JOIN documents d ON a.document_id = d.id
|
||||
ORDER BY a.publishDate DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
`
|
||||
rows, err := m.DB.Query(stmt, limit, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
articleVMs := []*model.ArticleViewModel{}
|
||||
var sourceUrl string
|
||||
for rows.Next() {
|
||||
a := model.ArticleViewModel{}
|
||||
|
||||
err := rows.Scan(&a.Id, &a.Title, &sourceUrl, &a.PublishDate, &a.Summary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// summary
|
||||
if a.Summary == "" {
|
||||
a.Summary = "N/A"
|
||||
}
|
||||
|
||||
// short url
|
||||
parsedURL, err := url.Parse(sourceUrl)
|
||||
if err == nil {
|
||||
a.ShortSource = parsedURL.Hostname()
|
||||
} else {
|
||||
a.ShortSource = ""
|
||||
}
|
||||
|
||||
// ai summary always false
|
||||
a.AiSummarized = false
|
||||
|
||||
articleVMs = append(articleVMs, &a)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return articleVMs, nil
|
||||
|
||||
}
|
||||
|
||||
// Counts all articles in the database. This may throw an error if the
|
||||
// connection to the database fails.
|
||||
func (m *ArticleRepository) CountAll() (uint, error) {
|
||||
stmt := `SELECT count(id) FROM articles `
|
||||
|
||||
rows := m.DB.QueryRow(stmt)
|
||||
|
||||
count := uint(0)
|
||||
if err := rows.Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// Will use the full-text search features of the underlying database to search
|
||||
// articles for a given search query. This may fail if the connection to the
|
||||
// database fails.
|
||||
func (m *ArticleRepository) SearchArticleViewModel(query string) ([]*model.ArticleViewModel, error) {
|
||||
stmt := `
|
||||
SELECT a.id, a.title, a.sourceUrl, a.publishDate, d.summary
|
||||
FROM articles a JOIN documents d ON a.document_id = d.id
|
||||
WHERE to_tsvector('german', d.content) @@ to_tsquery('german', $1)
|
||||
ORDER BY ts_rank(to_tsvector('german', d.content), to_tsquery('german', $1)) DESC
|
||||
LIMIT 10
|
||||
`
|
||||
|
||||
query = strings.Join(strings.Split(strings.TrimSpace(query), " "), " | ")
|
||||
rows, err := m.DB.Query(stmt, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
articleVMs := []*model.ArticleViewModel{}
|
||||
for rows.Next() {
|
||||
a := &model.ArticleViewModel{}
|
||||
var sourceUrl string
|
||||
err := rows.Scan(&a.Id, &a.Title, &sourceUrl, &a.PublishDate, &a.Summary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// summary
|
||||
if a.Summary == "" {
|
||||
a.Summary = "N/A"
|
||||
}
|
||||
|
||||
// short url
|
||||
parsedURL, err := url.Parse(sourceUrl)
|
||||
if err == nil {
|
||||
a.ShortSource = parsedURL.Hostname()
|
||||
} else {
|
||||
a.ShortSource = ""
|
||||
}
|
||||
|
||||
// ai summary always false
|
||||
a.AiSummarized = false
|
||||
|
||||
articleVMs = append(articleVMs, a)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return articleVMs, nil
|
||||
}
|
||||
|
||||
// Will use the full-text search features of the underlying database to search
|
||||
// articles for a given search query. This may fail if the connection to the
|
||||
// database fails.
|
||||
func (m *ArticleRepository) Search(query string) ([]model.Article, error) {
|
||||
stmt := `
|
||||
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
|
||||
FROM articles a JOIN documents d ON a.document_id = d.id
|
||||
WHERE to_tsvector('german', d.content) @@ to_tsquery('german', $1)
|
||||
ORDER BY ts_rank(to_tsvector('german', d.content), to_tsquery('german', $1)) DESC
|
||||
LIMIT 10
|
||||
`
|
||||
|
||||
query = strings.Join(strings.Split(strings.TrimSpace(query), " "), " | ")
|
||||
rows, err := m.DB.Query(stmt, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
articles := []model.Article{}
|
||||
for rows.Next() {
|
||||
a := model.Article{}
|
||||
err := rows.Scan(&a.Id, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
articles = append(articles, a)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
// Will return an article given an id. This may fail if the connection to the
|
||||
// database fails or there is no aritcle with the given id.
|
||||
func (m *ArticleRepository) ById(id int) (*model.Article, error) {
|
||||
stmt := `
|
||||
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
|
||||
FROM articles a
|
||||
WHERE a.id = $1
|
||||
`
|
||||
|
||||
rows := m.DB.QueryRow(stmt, id)
|
||||
|
||||
a := &model.Article{}
|
||||
if err := rows.Scan(&a.Id, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// Inserts a new article into the database. The id attribute of the given
|
||||
// article will be ignored. May throw an error if the execution of the database
|
||||
// query fails.
|
||||
func (m *ArticleRepository) Insert(a *model.Article) error {
|
||||
// insert article
|
||||
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
`
|
||||
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *ArticleRepository) Update(a *model.Article) error {
|
||||
stmt := `UPDATE articles
|
||||
SET title = $1, sourceUrl = $2, content = $4, publishDate = $5, fetchDate = $6
|
||||
WHERE id = $8
|
||||
`
|
||||
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate, a.Id)
|
||||
return err
|
||||
}
|
||||
109
src/internal/model/database/documentrepository.go
Normal file
109
src/internal/model/database/documentrepository.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"crowsnest/internal/model"
|
||||
"database/sql"
|
||||
)
|
||||
|
||||
type DocumentRepository struct {
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
// Gets all the documents objects from the database. This may throw an error if
|
||||
// the connection to the database fails.
|
||||
func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, error) {
|
||||
stmt := `
|
||||
SELECT id, content, summary
|
||||
FROM documents
|
||||
LIMIT $1 OFFSET $2
|
||||
`
|
||||
rows, err := d.DB.Query(stmt, limit, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
docs := []*model.Document{}
|
||||
for rows.Next() {
|
||||
d := model.Document{}
|
||||
err := rows.Scan(&d.Id, &d.Content, &d.Summary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
docs = append(docs, &d)
|
||||
}
|
||||
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return docs, nil
|
||||
}
|
||||
|
||||
// Will return an article given an id. This may fail if the connection to the
|
||||
// database fails or there is no aritcle with the given id.
|
||||
func (m *DocumentRepository) ById(id int) (*model.Document, error) {
|
||||
stmt := `
|
||||
SELECT id, content, summary
|
||||
FROM documents
|
||||
WHERE a.id = $1
|
||||
`
|
||||
|
||||
rows := m.DB.QueryRow(stmt, id)
|
||||
|
||||
d := &model.Document{}
|
||||
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Counts all documents in the database. This may throw an error if the
|
||||
// connection to the database fails.
|
||||
func (d *DocumentRepository) CountAll() (uint, error) {
|
||||
stmt := `SELECT count(id) FROM documents`
|
||||
|
||||
rows := d.DB.QueryRow(stmt)
|
||||
|
||||
count := uint(0)
|
||||
if err := rows.Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (m *DocumentRepository) Update(d *model.Document) error {
|
||||
stmt := `UPDATE documents
|
||||
SET content = $1, summary = $2
|
||||
WHERE id = $3
|
||||
`
|
||||
_, err := m.DB.Exec(stmt, d.Content, d.Summary, d.Id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *DocumentRepository) Map(transform func(*model.Document) *model.Document) (int, error) {
|
||||
processed := 0
|
||||
|
||||
count, err := d.CountAll()
|
||||
if err != nil {
|
||||
return processed, err
|
||||
}
|
||||
|
||||
for i := 0; i < int(count); i += 10 {
|
||||
docs, err := d.All(10, i)
|
||||
if err != nil {
|
||||
return processed, err
|
||||
}
|
||||
|
||||
for _, doc := range docs {
|
||||
new_doc := transform(doc)
|
||||
err = d.Update(new_doc)
|
||||
if err != nil { return processed, err }
|
||||
processed++
|
||||
}
|
||||
}
|
||||
|
||||
return processed, nil
|
||||
}
|
||||
37
src/internal/util/distributer.go
Normal file
37
src/internal/util/distributer.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package util
|
||||
|
||||
type Distributer[T IClone[T]] struct {
|
||||
queue chan T
|
||||
hooks []func(T)
|
||||
}
|
||||
|
||||
func (d *Distributer[T]) Init() {
|
||||
d.queue = make(chan T, 100)
|
||||
d.hooks = make([]func(T), 0)
|
||||
}
|
||||
|
||||
// Distribute a copy of an item to every hook that has described to this
|
||||
// Collector.
|
||||
func (d *Distributer[T]) Publish(item T) {
|
||||
d.queue <- item
|
||||
}
|
||||
|
||||
// Add a new hook to the Collector. The hook will be called async whenever a
|
||||
// new item is published.
|
||||
func (d *Distributer[T]) Subscribe(hook func(T)) {
|
||||
d.hooks = append(d.hooks, hook)
|
||||
if len(d.hooks) == 1 {
|
||||
go d.runner()
|
||||
}
|
||||
}
|
||||
|
||||
// Will be started to run async when Subscribe is first called. Whenever
|
||||
// Publish is called the runner will distribute a clone of the new item to
|
||||
// every hook.
|
||||
func (d *Distributer[T]) runner() {
|
||||
for val := range d.queue {
|
||||
for _, f := range d.hooks {
|
||||
go f(val.Clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
5
src/internal/util/iclone.go
Normal file
5
src/internal/util/iclone.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package util
|
||||
|
||||
type IClone[T any] interface {
|
||||
Clone() T
|
||||
}
|
||||
Reference in New Issue
Block a user