remove author column from articles

This commit is contained in:
2025-01-07 11:59:10 +01:00
parent b16ebb9572
commit ce10e1e62b
4 changed files with 74 additions and 57 deletions

View File

@@ -0,0 +1,36 @@
-- +goose Up
-- +goose StatementBegin
BEGIN;
DROP INDEX IF EXISTS articles_fts_idx;
ALTER TABLE articles DROP COLUMN IF EXISTS fts_vector;
ALTER TABLE articles DROP COLUMN IF EXISTS author;
ALTER TABLE articles
ADD COLUMN fts_vector tsvector GENERATED ALWAYS AS (
to_tsvector('german', coalesce(title, '') || ' ' || coalesce(content, ''))
) STORED;
CREATE INDEX articles_fts_idx ON articles USING gin(fts_vector);
COMMIT;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
BEGIN;
ALTER TABLE articles ADD COLUMN author VARCHAR(255) DEFAULT '';
DROP INDEX IF EXISTS articles_fts_idx;
ALTER TABLE articles DROP COLUMN IF EXISTS fts_vector;
ALTER TABLE articles
ADD COLUMN fts_vector tsvector GENERATED ALWAYS AS (
to_tsvector('german', coalesce(title, '') || ' ' || coalesce(content, '') || ' ' || coalesce(author, ''))
) STORED;
CREATE INDEX articles_fts_idx ON articles USING gin(fts_vector);
COMMIT;
-- +goose StatementEnd

View File

@@ -59,7 +59,7 @@ func (c *Collector) ExtractSpiegel(url string, body []byte) error {
whitespace := regexp.MustCompile(`\s+`) whitespace := regexp.MustCompile(`\s+`)
var exists bool var exists bool
var pagetype, title, content, datestr, author string var pagetype, title, content, datestr string
var tag *goquery.Selection var tag *goquery.Selection
var date time.Time var date time.Time
@@ -113,13 +113,6 @@ func (c *Collector) ExtractSpiegel(url string, body []byte) error {
return err return err
} }
// get author
tag = doc.Find("meta[name='author']")
author, exists = tag.Attr("content")
if !exists {
return errors.New("unable to extract article, no author tag")
}
// get content // get content
tag = doc.Find("main[id='Inhalt'] div > p") tag = doc.Find("main[id='Inhalt'] div > p")
@@ -139,7 +132,6 @@ func (c *Collector) ExtractSpiegel(url string, body []byte) error {
FetchDate: time.Now(), FetchDate: time.Now(),
Title: title, Title: title,
Content: content, Content: content,
Author: author,
} }
err = c.Articles.Insert(&article) err = c.Articles.Insert(&article)

View File

@@ -1,12 +1,10 @@
package model package model
import ( import (
"time"
//"strings"
"net/url" "net/url"
"time"
) )
// TODO docstring // TODO docstring
type Article struct { type Article struct {
Identifier int Identifier int
@@ -15,20 +13,17 @@ type Article struct {
FetchDate time.Time FetchDate time.Time
Title string Title string
Content string Content string
Author string
} }
// TODO docstring // TODO docstring
type ArticleViewModel struct { type ArticleViewModel struct {
Title string Title string
Author string
PublishDate string PublishDate string
SourceUrl string SourceUrl string
ShortSource string ShortSource string
Summary string Summary string
} }
// TODO docstring // TODO docstring
func (a *Article) ViewModel() *ArticleViewModel { func (a *Article) ViewModel() *ArticleViewModel {
summary := a.Content summary := a.Content
@@ -40,16 +35,10 @@ func (a *Article) ViewModel() *ArticleViewModel {
parsedURL, err := url.Parse(a.SourceUrl) parsedURL, err := url.Parse(a.SourceUrl)
if err == nil { if err == nil {
short_url = parsedURL.Hostname() short_url = parsedURL.Hostname()
//hostParts := strings.Split(short_url, ".")
//if len(hostParts) >= 2 {
// short_url = strings.Join(hostParts[len(hostParts)-2:], ".")
//}
} }
return &ArticleViewModel{ return &ArticleViewModel{
Title: a.Title, Title: a.Title,
Author: a.Author,
PublishDate: a.PublishDate.Local().Format("02.01.2006"), PublishDate: a.PublishDate.Local().Format("02.01.2006"),
SourceUrl: a.SourceUrl, SourceUrl: a.SourceUrl,
ShortSource: short_url, ShortSource: short_url,

View File

@@ -13,7 +13,7 @@ type ArticleModel struct {
// the connection to the database fails. // the connection to the database fails.
func (m *ArticleModel) All(limit int) ([]model.Article, error) { func (m *ArticleModel) All(limit int) ([]model.Article, error) {
stmt := ` stmt := `
SELECT id, title, sourceUrl, author, content, publishDate, fetchDate SELECT id, title, sourceUrl, content, publishDate, fetchDate
FROM articles FROM articles
ORDER BY publishDate DESC ORDER BY publishDate DESC
LIMIT $1 LIMIT $1
@@ -26,7 +26,7 @@ func (m *ArticleModel) All(limit int) ([]model.Article, error) {
articles := []model.Article{} articles := []model.Article{}
for rows.Next() { for rows.Next() {
a := model.Article{} a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate) err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -46,7 +46,7 @@ func (m *ArticleModel) All(limit int) ([]model.Article, error) {
// database fails. // database fails.
func (m *ArticleModel) Search(query string) ([]model.Article, error) { func (m *ArticleModel) Search(query string) ([]model.Article, error) {
stmt := ` stmt := `
SELECT id, title, sourceurl, author, content, publishdate, fetchDate SELECT id, title, sourceurl, content, publishdate, fetchDate
FROM articles FROM articles
WHERE fts_vector @@ to_tsquery('german', $1) WHERE fts_vector @@ to_tsquery('german', $1)
ORDER BY ts_rank(fts_vector, to_tsquery('german', $1)) DESC ORDER BY ts_rank(fts_vector, to_tsquery('german', $1)) DESC
@@ -61,7 +61,7 @@ func (m *ArticleModel) Search(query string) ([]model.Article, error) {
articles := []model.Article{} articles := []model.Article{}
for rows.Next() { for rows.Next() {
a := model.Article{} a := model.Article{}
err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Author, &a.Content, &a.PublishDate, &a.FetchDate) err := rows.Scan(&a.Identifier, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -80,19 +80,19 @@ func (m *ArticleModel) Search(query string) ([]model.Article, error) {
// query fails. // query fails.
func (m *ArticleModel) Insert(a *model.Article) error { func (m *ArticleModel) Insert(a *model.Article) error {
// insert article // insert article
stmt := `INSERT INTO articles (title, sourceUrl, author, content, publishDate, fetchDate) stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
VALUES ($1, $2, $3, $4, $5, $6) VALUES ($1, $2, $3, $4, $5)
` `
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate) _, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate)
return err return err
} }
// TODO docstring // TODO docstring
func (m *ArticleModel) Update(a *model.Article) error { func (m *ArticleModel) Update(a *model.Article) error {
stmt := `UPDATE articles stmt := `UPDATE articles
SET title = $1, sourceUrl = $2, author = $3, content = $4, publishDate = $5, fetchDate = $6 SET title = $1, sourceUrl = $2, content = $4, publishDate = $5, fetchDate = $6
WHERE id = $7 WHERE id = $7
` `
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Author, a.Content, a.PublishDate, a.FetchDate, a.Identifier) _, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate, a.Identifier)
return err return err
} }