Compare commits

...

34 Commits

Author SHA1 Message Date
211f9dd876 copy paste from personal-website because it does work there
All checks were successful
Build and Push Docker Container / build-and-push (push) Successful in 1m22s
2025-10-14 18:52:51 +02:00
2d387e7ae4 revert 04936b0aff
Some checks failed
Build and Push Docker Container / build-and-push (push) Failing after 5s
revert change username in pipeline
2025-10-14 18:47:50 +02:00
04936b0aff change username in pipeline
Some checks failed
Build and Push Docker Container / build-and-push (push) Failing after 4s
2025-10-14 18:47:23 +02:00
1d15fd9df6 Update Titel of pipeline step
Some checks failed
Build and Push Docker Container / build-and-push (push) Failing after 5s
2025-10-14 18:36:42 +02:00
1853267b76 change workflow to new git url
Some checks failed
Build and Push Docker Container / build-and-push (push) Failing after 5s
2025-10-14 18:32:03 +02:00
b5e5512c5e change ownership of container
Some checks failed
Build and Push Docker Container / build-and-push (push) Failing after 4s
2025-10-14 18:30:52 +02:00
20d826236c actual actual
All checks were successful
Build and Push Docker Container / build-and-push (push) Successful in 36s
2025-03-27 00:12:19 +01:00
f02fc536f2 correcting path
All checks were successful
Build and Push Docker Container / build-and-push (push) Successful in 36s
2025-03-26 23:10:34 +01:00
81e734b8b4 correct static files path in app
All checks were successful
Build and Push Docker Container / build-and-push (push) Successful in 40s
2025-03-26 22:40:29 +01:00
401ac01ba2 change build platform to arm64 2025-03-26 22:38:52 +01:00
f6cf715b3a adding ci/cd pipeline
All checks were successful
Build and Push Docker Container / build-and-push (push) Successful in 39s
2025-03-26 17:28:16 +01:00
923f1d5e73 fix slow search 2025-03-26 17:02:57 +01:00
1c25f67657 adapt Makefile to the moving of migrations dir 2025-03-26 16:40:59 +01:00
0357c4f4b2 move webserver files to ./web/ 2025-03-26 16:32:57 +01:00
f312489aa6 move to gomponents for rendering html 2025-03-26 15:12:07 +01:00
b7b4e74eff rename spiegel converter 2025-03-24 13:34:37 +01:00
9687f327fe add token counting for openai model 2025-01-27 09:17:25 +01:00
be41a4e84b fixing date format on article page 2025-01-22 09:56:40 +01:00
dafc060648 update article page #13 2025-01-22 09:48:25 +01:00
a5a1a974fc add summarization of article, when collected 2025-01-22 09:29:09 +01:00
2a57a840a6 add rss feed #9 2025-01-21 16:47:11 +01:00
cbc5bec053 fixing #14 2025-01-21 15:54:41 +01:00
dcba0ad890 article sql statement bug fix 2025-01-21 09:10:46 +01:00
0456c23e44 remove AiSummarized attribute from article view model 2025-01-20 22:13:23 +01:00
b1236140b6 clean up repositories; move viewmodel repo into own file; 2025-01-20 22:09:50 +01:00
d6cf444def adding comment to document repository 2025-01-20 21:43:20 +01:00
2f0a17763d remove duplicate files (#8)
duplicate because of capitalization

Reviewed-on: https://git.kohout-dev.de/crowsnest/crowsnest/pulls/8
2025-01-20 20:29:36 +00:00
7fb20c2ea9 make capitalization uniform 2025-01-20 21:22:45 +01:00
4fb1f25468 clean up summarizer function 2025-01-20 21:20:23 +01:00
e70d988ffd remove not needed converter file 2025-01-20 21:19:48 +01:00
ce0c5a60e6 add openai api key to build; remove not needed argument for docker build 2025-01-20 20:55:03 +01:00
fd25d7ebbc Merge branch 'main' of git.kohout-dev.de:crowsnest/crowsnest 2025-01-20 20:44:29 +01:00
32a0712124 adding .env file 2025-01-20 20:40:42 +01:00
637a5ebb0c add summarization for documents 2025-01-20 20:34:23 +01:00
59 changed files with 1161 additions and 416 deletions

View File

@@ -0,0 +1,27 @@
# .gitea/workflows/docker-build.yml
name: Build and Push Docker Container
on:
push:
branches:
- "main"
jobs:
build-and-push:
runs-on:
- ubuntu-22.04
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
registry: g.eliaskohout.de
username: ${{ secrets.USERNAME }}
password: ${{ secrets.ACCESS_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push
uses: docker/build-push-action@v6
with:
push: true
tags: "g.eliaskohout.de/eliaskohout/crowsnest:${{gitea.sha}},g.eliaskohout.de/eliaskohout/crowsnest:latest"

3
.gitignore vendored
View File

@@ -23,3 +23,6 @@ go.work
persistence
.DS_Store
# env
.env

View File

@@ -9,7 +9,7 @@ ENV GOARCH=amd64
WORKDIR /goose.git
RUN git clone https://github.com/pressly/goose.git .
RUN go build -tags='no_clickhouse no_libsql no_mssql no_mysql no_sqlite3 no_vertica no_ydb' \
RUN GOARCH=arm64 go build -tags='no_clickhouse no_libsql no_mssql no_mysql no_sqlite3 no_vertica no_ydb' \
-o goose ./cmd/goose
# Build app
@@ -21,11 +21,11 @@ RUN go mod download
# Copy the source code
COPY ./src/cmd ./cmd
COPY ./src/assets ./assets
COPY ./src/internal ./internal
COPY ./src/web ./web
# Build the application
RUN go build -o main cmd/frontend/*
RUN GOARCH=arm64 go build -o main cmd/frontend/*
# --- Stage 3: Run the application ---
@@ -36,9 +36,10 @@ WORKDIR /app
# Copy the built binary from the builder stage
COPY --from=builder /app/main .
COPY --from=builder /goose.git/goose .
COPY ./src/assets ./assets
COPY ./src/migrations ./migrations
COPY ./src/web ./web
EXPOSE 8080
# Command to run the application
CMD ["sh", "-c", "/app/goose -dir=/app/assets/migrations/ postgres postgresql://$DB_USER:$DB_PASS@$DB_HOST:$DB_PORT/$DB_NAME up && /app/main"]
CMD ["sh", "-c", "/app/goose -dir=/app/migrations/ postgres postgresql://$DB_USER:$DB_PASS@$DB_HOST:$DB_PORT/$DB_NAME up && /app/main"]

View File

@@ -1,17 +1,17 @@
DB_HOST="10.99.0.3"
DB_PORT="5432"
DB_NAME="crowsnest_dev"
DB_USER="crow_dev"
DB_PASS="hL0VlXkH2WoHL7c7FdRTHXMy"
# Load the .env file
ifneq (,$(wildcard ./.env))
include .env
export $(shell sed 's/=.*//' .env)
endif
go-run:
cd src; DB_USER=$(DB_USER) DB_PASS=$(DB_PASS) DB_NAME=$(DB_NAME) DB_HOST=$(DB_HOST) go run cmd/frontend/main.go
cd src; go run cmd/frontend/main.go;
migrate-up:
goose -dir=./src/assets/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" up
goose -dir=./src/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" up
migrate-down:
goose -dir=./src/assets/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" down
goose -dir=./src/migrations/ postgres "postgresql://$(DB_USER):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)" down
docker-push: docker-build
docker push git.kohout-dev.de/crowsnest/crowsnest:latest
@@ -23,14 +23,8 @@ docker-run: docker-build
--env DB_NAME=$(DB_NAME) \
--env DB_USER=$(DB_USER) \
--env DB_PASS=$(DB_PASS) \
--env OPENAI_API_KEY=$(OPENAI_API_KEY) \
--rm -p 8080:8080 git.kohout-dev.de/crowsnest/crowsnest:latest
docker-build:
docker build \
--build-arg DB_HOST=$(DB_HOST) \
--build-arg DB_PORT=$(DB_PORT) \
--build-arg DB_NAME=$(DB_NAME) \
--build-arg DB_USER=$(DB_USER) \
--build-arg DB_PASS=$(DB_PASS) \
--platform linux/amd64 \
-t git.kohout-dev.de/crowsnest/crowsnest:latest .
docker build --platform linux/arm64 -t git.kohout-dev.de/crowsnest/crowsnest:latest .

View File

@@ -1,27 +0,0 @@
{{ define "content" }}
<div class="content max-w-screen-lg flex flex-col mx-auto">
{{ range .ArticleVMs }}
<div tabindex="0" class="collapse bg-base-200 shadow mb-4">
<div class="collapse-title font-medium">{{ .Title }}</div>
<div class="collapse-content">
<p class="pb-2">
<span class="badge badge-outline">{{ .ShortSource }}</span>
<span class="badge badge-outline">{{ .PublishDate }}</span>
{{if .AiSummarized}}
<span class="badge badge-outline">ai summary</span>
{{end}}
</p>
<p class="card-text">{{ .Summary }}</p>
<div class="flex flex-row-reverse">
<a href="/article/{{ .Id }}" class="btn btn-active btn-sm btn-primary">Details</a>
</div>
</div>
</div>
{{ end }}
{{ template "pagination" .Paginations }}
</div>
{{ end }}

View File

@@ -1,27 +0,0 @@
{{ define "content" }}
<div class="content max-w-screen-lg flex flex-col mx-auto">
<div tabindex="0" class="card bg-base-200 shadow mb-4">
<div class="card-body">
<div class="flex flex-row pb-4">
<div class="divider divider-horizontal divider-primary"></div>
<div class="card-title font-medium">{{ .ArticlePageVM.Title }}</div>
</div>
<div class="px-5 pb-4">
<p><span class="badge badge-neutral me-4 w-20">Datum</span>{{ .ArticlePageVM.PublishDate }}</p>
<p><span class="badge badge-neutral me-4 w-20">Quelle</span>{{ .ArticlePageVM.ShortSource }}</p>
<p><span class="badge badge-neutral me-4 w-20">TLDR</span>{{ .ArticlePageVM.AiSummary }}</p>
<p><span class="badge badge-neutral me-4 w-20">Inhalt</span>{{ .ArticlePageVM.Content }}</p>
<div class="card-actions justify-end">
<a href="{{ .ArticlePageVM.SourceUrl }}">
<button class="btn btn-primary btn-sm">Seite besuchen</button>
</a>
</div>
</div>
</div>
</div>
</div>
{{ end }}

View File

@@ -1,10 +0,0 @@
{{ define "pagination" }}
<div class="join pagination p-5 mx-auto">
{{ range . }}
<a class="join-item btn btn-sm {{ if .Active }}btn-active{{ end }} {{ if .Disabled }}btn-disabled{{end}}" up-follow up-target=".content"
{{ if .Disabled }}{{else}}href="/page/{{ .Content }}" tabindex="0"{{end}}>{{ .Content }}</a>
{{ end }}
</div>
{{ end }}

View File

@@ -1,69 +0,0 @@
{{ define "base" }}
<!DOCTYPE html>
<html lang="de" data-theme="dark">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
{{/* Unpoly */}}
<link rel="stylesheet" type="text/css" href="/static/unpoly.min.css" />
<script src="/static/unpoly.min.js"></script>
{{/* DasyUi */}}
<link rel="stylesheet" type="text/css" href="/static/daisyui.min.css" />
<script src="/static/tailwindcss.min.js"></script>
</head>
<body>
<nav class="fixed top-0 z-50 w-full p-4">
<div class="navbar bg-base-300 rounded-box drop-shadow-md">
{{/* Logo with navigation */}}
<div class="flex-1">
<a href="/" tabindex="0" class="btn btn-ghost text-xl" up-follow up-target=".body">crowsnest</a>
<ul class="menu menu-horizontal hidden sm:flex">
<li><a href="/" tabindex="0" {{ if .SelectedNavItemArticle }} class="active" {{ end }}>Artikel</a></li>
<li><a tabindex="0">Themen</a></li>
</ul>
</div>
{{/* Search field for normal sized screen */}}
<div class="hidden sm:flex flex-none pe-4">
<form role="search" method="post" action="/up/search" up-submit up-autosubmit up-target=".content">
<label class="input input-bordered input-sm flex items-center gap-2">
<input name="search" type="search" class="grow" placeholder="Suche" />
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor"
class="h-4 w-4 opacity-70">
<path fill-rule="evenodd" d="M9.965 11.026a5 5 0 1 1 1.06-1.06l2.755 2.754a.75.75 0 1 1-1.06 1.06l-2.755-2.754ZM10.5 7a3.5 3.5 0 1 1-7 0 3.5 3.5 0 0 1 7 0Z" clip-rule="evenodd" />
</svg>
</label>
</form>
</div>
{{/* Dropdown for small screens */}}
<div class="dropdown dropdown-end sm:hidden">
<div tabindex="0" role="button" class="btn btn-ghost">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="2" stroke="currentColor"
class="h-6 w-6 opacity-70">
<path stroke-linecap="round" stroke-linejoin="round" d="M3.75 6.75h16.5M3.75 12h16.5M12 17.25h8.25" />
</svg>
</div>
<ul class="menu dropdown-content bg-base-100 rounded-box z-[1] mt-3 w-52 p-2 shadow">
<li><a href="/" tabindex="0" class="active">Artikel</a></li>
<li><a tabindex="0">Themen</a></li>
</ul>
</div>
</div>
</nav>
<div class="container mx-auto px-4 mt-28">
{{ template "content" . }}
</div>
<script src="https://cdn.tailwindcss.com"></script>
<script>
up.link.config.followSelectors.push('a[href]')
up.link.config.instantSelectors.push('a[href]')
</script>
</body>
</html>
{{ end }}

View File

@@ -1,13 +1,15 @@
package main
import (
"crowsnest/internal/app"
"crowsnest/internal/crawler"
"crowsnest/internal/middleware"
"crowsnest/internal/model"
"crowsnest/internal/model/database"
"crowsnest/internal/util"
"crowsnest/web/app"
"crowsnest/web/middleware"
"log"
"net/http"
"os"
_ "github.com/lib/pq"
)
@@ -17,23 +19,55 @@ func main() {
if err != nil {
log.Fatal("failed to connect to database due to", err.Error())
}
log.Println("connected to database successfully")
// summarize documents
documents := &database.DocumentRepository{DB: db}
openai := &util.OpenAi{ApiKey: os.Getenv("OPENAI_API_KEY")}
sumDoc := func(doc *model.Document) *model.Document {
if doc.Summary == "" {
summaryText, err := openai.Summarize(doc.Content)
if err == nil {
doc.Summary = summaryText
return doc
}
log.Println(err.Error())
}
return doc
}
go documents.Map(sumDoc)
// run web crawlers
articles := &database.ArticleRepository{DB: db}
articles := &database.ArticleRepository{DB: db}
crawler := crawler.CrawlerFacade{}
crawler.Init()
crawler.SubscribeToSpiegelFeed(func(a *model.Article) {
articles.Insert(a)
id, err := articles.Insert(a)
if err == nil {
doc, err := documents.ByArticleId(id)
if err == nil {
doc = sumDoc(doc)
documents.Update(doc)
}
}
})
crawler.SubscribeToZeitFeed(func(a *model.Article) {
articles.Insert(a)
id, err := articles.Insert(a)
if err == nil {
doc, err := documents.ByArticleId(id)
if err == nil {
doc = sumDoc(doc)
documents.Update(doc)
}
}
})
// define app
webapp := app.NewApp(db)
// create middle
// create middleware
stack := middleware.CreateStack(
middleware.Logging,
)

View File

@@ -18,6 +18,7 @@ require (
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/cors v1.7.2 // indirect
@@ -33,6 +34,7 @@ require (
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/flatbuffers v24.3.25+incompatible // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
@@ -50,6 +52,7 @@ require (
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.7 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
github.com/spf13/cobra v1.7.0 // indirect
@@ -76,4 +79,6 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
gorgonia.org/vecf32 v0.9.0 // indirect
gorgonia.org/vecf64 v0.9.0 // indirect
maragu.dev/gomponents v1.1.0 // indirect
maragu.dev/gomponents-heroicons/v3 v3.0.0 // indirect
)

View File

@@ -54,6 +54,8 @@ github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLc
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -132,6 +134,8 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
@@ -181,6 +185,8 @@ github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuR
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkoukk/tiktoken-go v0.1.7 h1:qOBHXX4PHtvIvmOtyg1EeKlwFRiMKAcoMp4Q+bLQDmw=
github.com/pkoukk/tiktoken-go v0.1.7/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
@@ -441,5 +447,9 @@ gorgonia.org/vecf64 v0.9.0 h1:bgZDP5x0OzBF64PjMGC3EvTdOoMEcmfAh1VCUnZFm1A=
gorgonia.org/vecf64 v0.9.0/go.mod h1:hp7IOWCnRiVQKON73kkC/AUMtEXyf9kGlVrtPQ9ccVA=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
maragu.dev/gomponents v1.1.0 h1:iCybZZChHr1eSlvkWp/JP3CrZGzctLudQ/JI3sBcO4U=
maragu.dev/gomponents v1.1.0/go.mod h1:oEDahza2gZoXDoDHhw8jBNgH+3UR5ni7Ur648HORydM=
maragu.dev/gomponents-heroicons/v3 v3.0.0 h1:QBw4CSST12mrdcYzl1XrEnbMxfhvQgnVunhFgQ4RPyI=
maragu.dev/gomponents-heroicons/v3 v3.0.0/go.mod h1:Rqc5BhSQUHBnGuWEPihg+IsQnOkiBY+Ibu1DDGEarsY=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

View File

@@ -1,49 +0,0 @@
package app
import (
"crowsnest/internal/model"
"html/template"
"net/http"
)
// Enpoint that returns a list of articles given search terms in the post
// request of a search form. Uses the content template.
func (app *App) UpSearch(w http.ResponseWriter, req *http.Request) {
// construct search query
searchTerms := req.FormValue("search")
if searchTerms == "" {
app.Index(w, req)
return
}
// get articles
articles, err := app.articles.Search(searchTerms)
if err != nil {
// treat as no result
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// convert to viewmodel
articleVMs := make([]*model.ArticleViewModel, 0, len(articles))
for _, a := range articles {
articleVMs = append(articleVMs, a.ViewModel())
}
// render template
t := template.Must(template.ParseFiles(
"assets/templates/article.html",
"assets/templates/layout.html",
"assets/templates/components/pagination.html"))
data := map[string]interface{}{
"SelectedNavItemArticle": true,
"ArticleVMs": &articleVMs,
"Paginations": nil,
}
err = t.ExecuteTemplate(w, "base", data)
if err != nil {
http.Error(w, "Failed to render template", http.StatusInternalServerError)
return
}
}

View File

@@ -1,69 +0,0 @@
package crawler
import (
"crowsnest/internal/model"
"crowsnest/internal/util"
"github.com/gocolly/colly/v2"
)
type CrawlerFacade struct {
spiegelFeedDistributer *util.Distributer[*model.Article]
zeitFeedDistributer *util.Distributer[*model.Article]
}
func (cf *CrawlerFacade) Init() {
// init
cf.spiegelFeedDistributer = &util.Distributer[*model.Article]{}
cf.spiegelFeedDistributer.Init()
cf.zeitFeedDistributer = &util.Distributer[*model.Article]{}
cf.zeitFeedDistributer.Init()
// run spiegel feed
sf := &WebFeed{}
sf.Init(
"https://www.spiegel.de/",
colly.AllowedDomains("www.spiegel.de", "spiegel.de"),
colly.CacheDir("./persistence/spiegel_cache"),
colly.MaxDepth(1),
)
sf_feed := sf.Feed()
sf_converter := ConverterSpiegel{}
sf_converter.Init()
go func() {
for val := range sf_feed {
article, err := sf_converter.Convert(val)
if err != nil { continue }
cf.spiegelFeedDistributer.Publish(article)
}
}()
// run zeit feed
zf := &WebFeed{}
zf.Init(
"https://www.zeit.de/index",
colly.AllowedDomains("www.zeit.de", "zeit.de"),
colly.CacheDir("./persistence/zeit_cache"),
colly.MaxDepth(1),
)
zf_feed := zf.Feed()
zf_converter := ZeitConverter{}
zf_converter.Init()
go func() {
for val := range zf_feed {
article, err := zf_converter.Convert(val)
if err != nil { continue }
cf.zeitFeedDistributer.Publish(article)
}
}()
}
func (cf *CrawlerFacade) SubscribeToSpiegelFeed(hook func(*model.Article)) {
cf.spiegelFeedDistributer.Subscribe(hook)
}
func (cf *CrawlerFacade) SubscribeToZeitFeed(hook func(*model.Article)) {
cf.zeitFeedDistributer.Subscribe(hook)
}

View File

@@ -0,0 +1,73 @@
package crawler
import (
"crowsnest/internal/model"
"crowsnest/internal/util"
"github.com/gocolly/colly/v2"
)
type CrawlerFacade struct {
spiegelFeedDistributer *util.Distributer[*model.Article]
zeitFeedDistributer *util.Distributer[*model.Article]
}
func (cf *CrawlerFacade) Init() {
// init
cf.spiegelFeedDistributer = &util.Distributer[*model.Article]{}
cf.spiegelFeedDistributer.Init()
cf.zeitFeedDistributer = &util.Distributer[*model.Article]{}
cf.zeitFeedDistributer.Init()
// run spiegel feed
sf := &WebFeed{}
sf.Init(
"https://www.spiegel.de/",
colly.AllowedDomains("www.spiegel.de", "spiegel.de"),
colly.CacheDir("./persistence/spiegel_cache"),
colly.MaxDepth(1),
)
sf_feed := sf.Feed()
sf_converter := SpiegelConverter{}
sf_converter.Init()
go func() {
for val := range sf_feed {
article, err := sf_converter.Convert(val)
if err != nil {
continue
}
cf.spiegelFeedDistributer.Publish(article)
}
}()
// run zeit feed
zf := &WebFeed{}
zf.Init(
"https://www.zeit.de/index",
colly.AllowedDomains("www.zeit.de", "zeit.de"),
colly.CacheDir("./persistence/zeit_cache"),
colly.MaxDepth(1),
)
zf_feed := zf.Feed()
zf_converter := ZeitConverter{}
zf_converter.Init()
go func() {
for val := range zf_feed {
article, err := zf_converter.Convert(val)
if err != nil {
continue
}
cf.zeitFeedDistributer.Publish(article)
}
}()
}
func (cf *CrawlerFacade) SubscribeToSpiegelFeed(hook func(*model.Article)) {
cf.spiegelFeedDistributer.Subscribe(hook)
}
func (cf *CrawlerFacade) SubscribeToZeitFeed(hook func(*model.Article)) {
cf.zeitFeedDistributer.Subscribe(hook)
}

View File

@@ -10,19 +10,19 @@ import (
"github.com/PuerkitoBio/goquery"
)
type ConverterSpiegel struct {
type SpiegelConverter struct {
pattern_paywall *regexp.Regexp
pattern_url *regexp.Regexp
pattern_whitespace *regexp.Regexp
}
func (c *ConverterSpiegel) Init() {
func (c *SpiegelConverter) Init() {
c.pattern_paywall = regexp.MustCompile(`"paywall":{"attributes":{"is_active":true`)
c.pattern_url = regexp.MustCompile(`^https://(www\.)?spiegel.de.*`)
c.pattern_whitespace = regexp.MustCompile(`\s+`)
}
func (c *ConverterSpiegel) Convert(res *Resource) (*model.Article, error) {
func (c *SpiegelConverter) Convert(res *Resource) (*model.Article, error) {
// check url url pattern
if !c.pattern_url.Match([]byte(res.Url)) {
return nil, errors.New("invalid url pattern")

View File

@@ -26,12 +26,11 @@ func (a *Article) Clone() *Article {
}
type ArticleViewModel struct {
Id int
Title string
PublishDate string
ShortSource string
Summary string
AiSummarized bool
Id int
Title string
PublishDate string
ShortSource string
Summary string
}
type ArticlePageViewModel struct {
@@ -40,35 +39,35 @@ type ArticlePageViewModel struct {
PublishDate string
Title string
Content string
AiSummary string
Summary string
}
// TODO docstring
func (a *Article) ViewModel() *ArticleViewModel {
var summary string
if len(a.Content) > 200 {
summary = a.Content[:200]
} else {
summary = a.Content
}
short_url := ""
parsedURL, err := url.Parse(a.SourceUrl)
if err == nil {
short_url = parsedURL.Hostname()
}
return &ArticleViewModel{
Id: a.Id,
Title: a.Title,
PublishDate: a.PublishDate.Local().Format("02.01.2006"),
ShortSource: short_url,
Summary: summary,
}
}
//func (a *Article) ViewModel() *ArticleViewModel {
// var summary string
// if len(a.Content) > 200 {
// summary = a.Content[:200]
// } else {
// summary = a.Content
// }
//
// short_url := ""
// parsedURL, err := url.Parse(a.SourceUrl)
// if err == nil {
// short_url = parsedURL.Hostname()
// }
//
// return &ArticleViewModel{
// Id: a.Id,
// Title: a.Title,
// PublishDate: a.PublishDate.Local().Format("02.01.2006"),
// ShortSource: short_url,
// Summary: summary,
// }
//}
func (a *Article) PageViewModel() *ArticlePageViewModel {
summary := "N/A"
summary := "N/A"
short_url := ""
parsedURL, err := url.Parse(a.SourceUrl)
@@ -82,6 +81,6 @@ func (a *Article) PageViewModel() *ArticlePageViewModel {
Title: a.Title,
PublishDate: a.PublishDate.Local().Format("02.01.2006 15:04"),
Content: a.Content,
AiSummary: summary,
Summary: summary,
}
}

View File

@@ -0,0 +1,42 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
"net/url"
"time"
)
type ArticlePageViewModelRepository struct {
DB *sql.DB
}
// Gets all the view model for the article pages given a article id. This may
// throw an error if the connection to the database fails.
func (m *ArticlePageViewModelRepository) ById(id int64) (*model.ArticlePageViewModel, error) {
stmt := `
SELECT a.sourceUrl, a.publishDate, a.title, a.content, d.summary
FROM articles a JOIN documents d ON a.document_id = d.id
WHERE a.id = $1
`
rows := m.DB.QueryRow(stmt, id)
a := &model.ArticlePageViewModel{}
var date time.Time
if err := rows.Scan(&a.SourceUrl, &date, &a.Title, &a.Content, &a.Summary); err != nil {
return nil, err
}
// short url
parsedURL, err := url.Parse(a.SourceUrl)
if err == nil {
a.ShortSource = parsedURL.Hostname()
} else {
a.ShortSource = ""
}
// publish date
a.PublishDate = date.Format("02.01.2006 15:04")
return a, nil
}

View File

@@ -3,7 +3,6 @@ package database
import (
"crowsnest/internal/model"
"database/sql"
"net/url"
"strings"
)
@@ -13,7 +12,7 @@ type ArticleRepository struct {
// Gets all the article objects from the database. This may throw an error if
// the connection to the database fails.
func (m *ArticleRepository) All(limit int, offset int) ([]model.Article, error) {
func (m *ArticleRepository) All(limit int, offset int) ([]*model.Article, error) {
stmt := `
SELECT id, title, sourceUrl, content, publishDate, fetchDate
FROM articles
@@ -25,9 +24,9 @@ func (m *ArticleRepository) All(limit int, offset int) ([]model.Article, error)
return nil, err
}
articles := []model.Article{}
articles := []*model.Article{}
for rows.Next() {
a := model.Article{}
a := &model.Article{}
err := rows.Scan(&a.Id, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil {
return nil, err
@@ -43,59 +42,10 @@ func (m *ArticleRepository) All(limit int, offset int) ([]model.Article, error)
return articles, nil
}
func (m *ArticleRepository) AllArticleViewModels(limit int, offset int) ([]*model.ArticleViewModel, error) {
stmt := `
SELECT a.id, a.title, a.sourceUrl, a.publishDate, d.summary
FROM articles a JOIN documents d ON a.document_id = d.id
ORDER BY a.publishDate DESC
LIMIT $1 OFFSET $2
`
rows, err := m.DB.Query(stmt, limit, offset)
if err != nil {
return nil, err
}
articleVMs := []*model.ArticleViewModel{}
var sourceUrl string
for rows.Next() {
a := model.ArticleViewModel{}
err := rows.Scan(&a.Id, &a.Title, &sourceUrl, &a.PublishDate, &a.Summary)
if err != nil {
return nil, err
}
// summary
if a.Summary == "" {
a.Summary = "N/A"
}
// short url
parsedURL, err := url.Parse(sourceUrl)
if err == nil {
a.ShortSource = parsedURL.Hostname()
} else {
a.ShortSource = ""
}
// ai summary always false
a.AiSummarized = false
articleVMs = append(articleVMs, &a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articleVMs, nil
}
// Counts all articles in the database. This may throw an error if the
// connection to the database fails.
func (m *ArticleRepository) CountAll() (uint, error) {
stmt := `SELECT count(id) FROM articles `
stmt := `SELECT count(id) FROM articles`
rows := m.DB.QueryRow(stmt)
@@ -110,12 +60,12 @@ func (m *ArticleRepository) CountAll() (uint, error) {
// Will use the full-text search features of the underlying database to search
// articles for a given search query. This may fail if the connection to the
// database fails.
func (m *ArticleRepository) Search(query string) ([]model.Article, error) {
func (m *ArticleRepository) Search(query string) ([]*model.Article, error) {
stmt := `
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
FROM articles a JOIN documents d ON a.document_id = d.id
WHERE to_tsvector('german', d.content) @@ to_tsquery('german', $1)
ORDER BY ts_rank(to_tsvector('german', d.content), to_tsquery('german', $1)) DESC
WHERE d.content_tsv @@ to_tsquery('german', $1)
ORDER BY ts_rank(d.content_tsv, to_tsquery('german', $1)) DESC
LIMIT 10
`
@@ -125,9 +75,9 @@ func (m *ArticleRepository) Search(query string) ([]model.Article, error) {
return nil, err
}
articles := []model.Article{}
articles := []*model.Article{}
for rows.Next() {
a := model.Article{}
a := &model.Article{}
err := rows.Scan(&a.Id, &a.Title, &a.SourceUrl, &a.Content, &a.PublishDate, &a.FetchDate)
if err != nil {
return nil, err
@@ -144,7 +94,7 @@ func (m *ArticleRepository) Search(query string) ([]model.Article, error) {
// Will return an article given an id. This may fail if the connection to the
// database fails or there is no aritcle with the given id.
func (m *ArticleRepository) ById(id int) (*model.Article, error) {
func (m *ArticleRepository) ById(id int64) (*model.Article, error) {
stmt := `
SELECT a.id, a.title, a.sourceurl, a.content, a.publishdate, a.fetchDate
FROM articles a
@@ -162,21 +112,24 @@ func (m *ArticleRepository) ById(id int) (*model.Article, error) {
}
// Inserts a new article into the database. The id attribute of the given
// article will be ignored. May throw an error if the execution of the database
// query fails.
func (m *ArticleRepository) Insert(a *model.Article) error {
// insert article
// article will be ignored. Returns the id of the last inserted element. May
// throw an error if the execution of the database query fails.
func (m *ArticleRepository) Insert(a *model.Article) (int64, error) {
stmt := `INSERT INTO articles (title, sourceUrl, content, publishDate, fetchDate)
VALUES ($1, $2, $3, $4, $5, $6)
VALUES ($1, $2, $3, $4, $5)
RETURNING id
`
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate)
return err
var lastInsertID int64
err := m.DB.QueryRow(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate).Scan(&lastInsertID)
return lastInsertID, err
}
// TODO docstring
// Update an article in the database. Will use the id that is set in the article
// object as an reference to the database row. This may throw an error if the
// connection to database fails.
func (m *ArticleRepository) Update(a *model.Article) error {
stmt := `UPDATE articles
SET title = $1, sourceUrl = $2, content = $4, publishDate = $5, fetchDate = $6
SET title = $1, sourceUrl = $2, content = $3, publishDate = $4, fetchDate = $5
WHERE id = $8
`
_, err := m.DB.Exec(stmt, a.Title, a.SourceUrl, a.Content, a.PublishDate, a.FetchDate, a.Id)

View File

@@ -0,0 +1,115 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
"net/url"
"strings"
"time"
)
type ArticleViewModelRepository struct {
DB *sql.DB
}
// Gets all the article as view models objects from the database. This may throw
// an error if the connection to the database fails.
func (m *ArticleViewModelRepository) All(limit int, offset int) ([]*model.ArticleViewModel, error) {
stmt := `
SELECT a.id, a.title, a.sourceUrl, a.publishDate, d.summary
FROM articles a JOIN documents d ON a.document_id = d.id
ORDER BY a.publishDate DESC
LIMIT $1 OFFSET $2
`
rows, err := m.DB.Query(stmt, limit, offset)
if err != nil {
return nil, err
}
articleVMs := []*model.ArticleViewModel{}
var sourceUrl string
for rows.Next() {
a := model.ArticleViewModel{}
var date time.Time
err := rows.Scan(&a.Id, &a.Title, &sourceUrl, &date, &a.Summary)
if err != nil {
return nil, err
}
// summary
if a.Summary == "" {
a.Summary = "N/A"
}
// short url
parsedURL, err := url.Parse(sourceUrl)
if err == nil {
a.ShortSource = parsedURL.Hostname()
} else {
a.ShortSource = ""
}
// format date
a.PublishDate = date.Format("02.01.2006")
articleVMs = append(articleVMs, &a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articleVMs, nil
}
// Will use the full-text search features of the underlying database to search
// articles as view models for a given search query. This may fail if the
// connection to the database fails.
func (m *ArticleViewModelRepository) Search(query string) ([]*model.ArticleViewModel, error) {
stmt := `
SELECT a.id, a.title, a.sourceUrl, a.publishDate, d.summary
FROM articles a JOIN documents d ON a.document_id = d.id
WHERE d.content_tsv @@ to_tsquery('german', $1)
ORDER BY ts_rank(d.content_tsv, to_tsquery('german', $1)) DESC
LIMIT 10
`
query = strings.Join(strings.Split(strings.TrimSpace(query), " "), " | ")
rows, err := m.DB.Query(stmt, query)
if err != nil {
return nil, err
}
articleVMs := []*model.ArticleViewModel{}
for rows.Next() {
a := &model.ArticleViewModel{}
var sourceUrl string
var date time.Time
err := rows.Scan(&a.Id, &a.Title, &sourceUrl, &date, &a.Summary)
if err != nil {
return nil, err
}
// summary
if a.Summary == "" {
a.Summary = "N/A"
}
// short url
parsedURL, err := url.Parse(sourceUrl)
if err == nil {
a.ShortSource = parsedURL.Hostname()
} else {
a.ShortSource = ""
}
// format date
a.PublishDate = date.Local().Format("02.01.2006")
articleVMs = append(articleVMs, a)
}
if err = rows.Err(); err != nil {
return nil, err
}
return articleVMs, nil
}

View File

@@ -0,0 +1,137 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
)
type DocumentRepository struct {
DB *sql.DB
}
// Gets all the documents objects from the database. This may throw an error if
// the connection to the database fails.
func (d *DocumentRepository) All(limit int, offset int) ([]*model.Document, error) {
stmt := `
SELECT id, content, summary
FROM documents
LIMIT $1 OFFSET $2
`
rows, err := d.DB.Query(stmt, limit, offset)
if err != nil {
return nil, err
}
docs := []*model.Document{}
for rows.Next() {
d := model.Document{}
err := rows.Scan(&d.Id, &d.Content, &d.Summary)
if err != nil {
return nil, err
}
docs = append(docs, &d)
}
if err = rows.Err(); err != nil {
return nil, err
}
return docs, nil
}
// Will return an document given an id. This may fail if the connection to the
// database fails or there is no aritcle with the given id.
func (m *DocumentRepository) ById(id int64) (*model.Document, error) {
stmt := `
SELECT id, content, summary
FROM documents
WHERE id = $1
`
rows := m.DB.QueryRow(stmt, id)
d := &model.Document{}
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
return nil, err
}
return d, nil
}
// Will return an document given an id of an article. This may fail if the
// connection to the database fails or there is no aritcle with the given id.
func (m *DocumentRepository) ByArticleId(id int64) (*model.Document, error) {
stmt := `
SELECT d.id, d.content, d.summary
FROM documents d JOIN articles a ON d.id = a.document_id
WHERE a.id = $1
`
rows := m.DB.QueryRow(stmt, id)
d := &model.Document{}
if err := rows.Scan(&d.Id, &d.Content, &d.Summary); err != nil {
return nil, err
}
return d, nil
}
// Counts all documents in the database. This may throw an error if the
// connection to the database fails.
func (d *DocumentRepository) CountAll() (uint, error) {
stmt := `SELECT count(id) FROM documents`
rows := d.DB.QueryRow(stmt)
count := uint(0)
if err := rows.Scan(&count); err != nil {
return 0, err
}
return count, nil
}
// Update an document in the database. Will use the id that is set in the
// document object as an reference to the database row. This may throw an error
// if the connection to database fails.
func (m *DocumentRepository) Update(d *model.Document) error {
stmt := `UPDATE documents
SET content = $1, summary = $2
WHERE id = $3
`
_, err := m.DB.Exec(stmt, d.Content, d.Summary, d.Id)
return err
}
// Will transform every document in the database given a transformation
// function. Will load the document, parse it to the transform function and call
// update on the returned document. Returns the number of processed documents.
// May throw an error if the connection to the database fails.
func (d *DocumentRepository) Map(transform func(*model.Document) *model.Document) (int, error) {
processed := 0
count, err := d.CountAll()
if err != nil {
return processed, err
}
for i := 0; i < int(count); i += 10 {
docs, err := d.All(10, i)
if err != nil {
return processed, err
}
for _, doc := range docs {
new_doc := transform(doc)
err = d.Update(new_doc)
if err != nil {
return processed, err
}
processed++
}
}
return processed, nil
}

View File

@@ -0,0 +1,60 @@
package database
import (
"crowsnest/internal/model"
"database/sql"
"strconv"
"time"
)
type RSSItemRepository struct {
DB *sql.DB
}
// Gets all the article as view models objects from the database. This may throw
// an error if the connection to the database fails.
func (m *RSSItemRepository) All(limit int) ([]model.RSSItem, error) {
stmt := `
SELECT a.id, a.title, a.sourceUrl, a.publishDate, d.summary
FROM articles a JOIN documents d ON a.document_id = d.id
ORDER BY a.publishDate DESC
LIMIT $1
`
rows, err := m.DB.Query(stmt, limit)
if err != nil {
return nil, err
}
items := make([]model.RSSItem, 0, limit)
for rows.Next() {
i := model.RSSItem{}
var id int
var sourceUrl string
var date time.Time
err := rows.Scan(&id, &i.Title, &sourceUrl, &date, &i.Description)
if err != nil {
return nil, err
}
// description
if i.Description == "" {
i.Description = "N/A"
}
// format date
i.PubDate = date.Format(time.RFC1123Z)
// link
i.Link = "https://crowsnest.kohout-dev.de/article/" + strconv.Itoa(id)
// identifier
i.Guid = model.RSSGuid{IsPermaLink: "false", Value: strconv.Itoa(id)}
items = append(items, i)
}
if err = rows.Err(); err != nil {
return nil, err
}
return items, nil
}

View File

@@ -0,0 +1,7 @@
package model
type Document struct {
Id int
Content string
Summary string
}

51
src/internal/model/rss.go Normal file
View File

@@ -0,0 +1,51 @@
package model
import (
"encoding/xml"
"time"
)
// RSS represents the RSS feed structure.
type RSSFeed struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Channel RSSChannel `xml:"channel"`
}
// Channel represents the channel element in the RSS feed.
type RSSChannel struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
Items []RSSItem `xml:"item"`
}
// Item represents an individual item in the RSS feed.
type RSSItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
Guid RSSGuid `xml:"guid"`
PubDate string `xml:"pubDate"`
}
type RSSGuid struct {
IsPermaLink string `xml:"isPermaLink,attr,omitempty"`
Value string `xml:",chardata"`
}
func RSSFeedFromItems(items []RSSItem) *RSSFeed {
rssChannel := RSSChannel{
Title: "crowsnest",
Link: "https://crowsnest.kohout-dev.de",
Description: "N/A",
PubDate: time.Now().Format(time.RFC1123Z),
Items: items,
}
return &RSSFeed{
Version: "2.0",
Channel: rssChannel,
}
}

View File

@@ -1,3 +0,0 @@
package util
type Converter[I any, O any] func(I) (O, error)

103
src/internal/util/openai.go Normal file
View File

@@ -0,0 +1,103 @@
package util
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"github.com/pkoukk/tiktoken-go"
)
type response struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
}
type OpenAi struct {
ApiKey string
}
func (oai *OpenAi) Summarize(text string) (string, error) {
apiURL := "https://api.openai.com/v1/chat/completions"
// Request payload
payload := map[string]interface{}{
"model": "gpt-4o-mini",
"messages": []map[string]string{
{
"role": "developer",
"content": "Fasse den folgenden Zeitungsartikel in maximal 75 Wörtern zusammen. Konzentriere dich auf die wichtigsten Informationen, wie das Hauptthema, die zentralen Aussagen und relevante Hintergründe. Gib **außschließlich** die Zusammenfassung zurück.",
},
{
"role": "user",
"content": text,
},
},
}
// Convert payload to JSON
jsonData, err := json.Marshal(payload)
if err != nil {
return "", err
}
// Create an HTTP request
req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
if err != nil {
return "", err
}
// Add headers
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", oai.ApiKey))
// Send the request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
// Read the response
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
// Unmarshal the JSON response
var response response
err = json.Unmarshal(body, &response)
if err != nil {
return "", err
}
// Extract and print the content
var content string
if len(response.Choices) > 0 {
content = response.Choices[0].Message.Content
} else {
return "", errors.New("could not find content in response")
}
return content, nil
}
func (oai *OpenAi) CountTokens(text string) int {
tkm, err := tiktoken.GetEncoding("o200k_base")
if err != nil {
err = fmt.Errorf("getEncoding: %v", err)
return -1
}
// encode
token := tkm.Encode(text, nil, nil)
return len(token)
}

View File

@@ -0,0 +1,48 @@
-- +goose Up
-- +goose StatementBegin
-- add the precomputed column
ALTER TABLE documents ADD COLUMN content_tsv tsvector;
-- populate the new column with the initial data
UPDATE documents SET content_tsv = to_tsvector('german', content);
-- Step 3: Create a trigger function to update the tsvector column upon insert or update
CREATE OR REPLACE FUNCTION update_tsvector()
RETURNS TRIGGER AS $$
BEGIN
NEW.content_tsv := to_tsvector('german', NEW.content);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- create the trigger on the documents table
CREATE TRIGGER documents_tsvector_update
BEFORE INSERT OR UPDATE ON documents
FOR EACH ROW EXECUTE FUNCTION update_tsvector();
-- create an index on the precomputed column for faster searches
CREATE INDEX idx_fts_documents ON documents USING GIN(content_tsv);
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
-- drop the trigger that updates the tsvector column
DROP TRIGGER IF EXISTS documents_tsvector_update ON documents;
-- Step 2: Drop the trigger function
DROP FUNCTION IF EXISTS update_tsvector();
-- Step 3: Drop the tsvector column
ALTER TABLE documents DROP COLUMN IF EXISTS content_tsv;
-- Step 4: Drop the index on the tsvector column
DROP INDEX IF EXISTS idx_fts_documents;
-- +goose StatementEnd

View File

@@ -7,12 +7,18 @@ import (
)
type App struct {
articles *database.ArticleRepository
articles *database.ArticleRepository
articleVMs *database.ArticleViewModelRepository
articlePageVMs *database.ArticlePageViewModelRepository
rssItems *database.RSSItemRepository
}
func NewApp(db *sql.DB) *App {
return &App{
articles: &database.ArticleRepository{DB: db},
articles: &database.ArticleRepository{DB: db},
articleVMs: &database.ArticleViewModelRepository{DB: db},
articlePageVMs: &database.ArticlePageViewModelRepository{DB: db},
rssItems: &database.RSSItemRepository{DB: db},
}
}
@@ -20,6 +26,8 @@ func (app *App) Routes() http.Handler {
mux := http.NewServeMux()
// dynamic routes
mux.Handle("GET /rss.xml", http.HandlerFunc(app.RSS))
mux.Handle("GET /", http.HandlerFunc(app.Index))
mux.Handle("GET /page/{id}", http.HandlerFunc(app.Index))
mux.Handle("POST /up/search", http.HandlerFunc(app.UpSearch))
@@ -27,7 +35,7 @@ func (app *App) Routes() http.Handler {
mux.Handle("GET /article/{id}", http.HandlerFunc(app.Article))
// serve files from the "static" directory
mux.Handle("GET /static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./assets/static"))))
mux.Handle("GET /static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./web/static/"))))
return mux
}

View File

@@ -1,7 +1,7 @@
package app
import (
"html/template"
"crowsnest/web/html"
"net/http"
"strconv"
)
@@ -17,24 +17,15 @@ func (app *App) Article(w http.ResponseWriter, req *http.Request) {
}
// get articles
article, err := app.articles.ById(int(id))
articlePageVM, err := app.articlePageVMs.ById(int64(id))
if err != nil {
// treat as no result
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// render template
t := template.Must(template.ParseFiles(
"assets/templates/articlePage.html",
"assets/templates/layout.html",
))
data := map[string]interface{}{
"SelectedNavItemArticle": false,
"ArticlePageVM": article.PageViewModel(),
}
err = t.ExecuteTemplate(w, "base", data)
// render page
err = html.ArticleLayout(articlePageVM).Render(w)
if err != nil {
http.Error(w, "Failed to render template", http.StatusInternalServerError)
return

View File

@@ -1,8 +1,7 @@
package app
import (
"crowsnest/internal/model"
"html/template"
"crowsnest/web/html"
"net/http"
"strconv"
)
@@ -10,7 +9,7 @@ import (
// List the latest articles using the base template.
func (app *App) Index(w http.ResponseWriter, req *http.Request) {
const pageSize = 15
var limit, offset, pageId uint64 = pageSize, 0, 0
var offset, pageId uint64 = 0, 0
var err error
// get page number
@@ -20,7 +19,7 @@ func (app *App) Index(w http.ResponseWriter, req *http.Request) {
}
// get articles
articleVMs, err := app.articles.AllArticleViewModels(int(limit), int(offset))
articleVMs, err := app.articleVMs.All(pageSize, int(offset))
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
@@ -34,18 +33,8 @@ func (app *App) Index(w http.ResponseWriter, req *http.Request) {
}
totalCount /= pageSize
// render template
t := template.Must(template.ParseFiles(
"assets/templates/article.html",
"assets/templates/layout.html",
"assets/templates/components/pagination.html"))
data := map[string]interface{}{
"SelectedNavItemArticle": true,
"ArticleVMs": &articleVMs,
"Paginations": model.NewPaginationViewModel(uint(pageId+1), totalCount+1),
}
err = t.ExecuteTemplate(w, "base", data)
// render page
err = html.IndexLayout(articleVMs, uint(pageId+1), totalCount+1).Render(w)
if err != nil {
http.Error(w, "Failed to render template", http.StatusInternalServerError)
return

28
src/web/app/rss.go Normal file
View File

@@ -0,0 +1,28 @@
package app
import (
"crowsnest/internal/model"
"encoding/xml"
"net/http"
)
// List the latest articles using the base template.
func (app *App) RSS(w http.ResponseWriter, req *http.Request) {
// set response headers
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
// get articles
feed, err := app.rssItems.All(30)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// write RSS feed to response
encoder := xml.NewEncoder(w)
encoder.Indent("", " ")
if err := encoder.Encode(model.RSSFeedFromItems(feed)); err != nil {
http.Error(w, "Error generating RSS feed", http.StatusInternalServerError)
}
}

32
src/web/app/upsearch.go Normal file
View File

@@ -0,0 +1,32 @@
package app
import (
"crowsnest/web/html"
"net/http"
)
// Enpoint that returns a list of articles given search terms in the post
// request of a search form. Uses the content template.
func (app *App) UpSearch(w http.ResponseWriter, req *http.Request) {
// construct search query
searchTerms := req.FormValue("search")
if searchTerms == "" {
app.Index(w, req)
return
}
// get articles
articleVMs, err := app.articleVMs.Search(searchTerms)
if err != nil {
// treat as no result
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// render page
err = html.IndexLayout(articleVMs, 0, 0).Render(w)
if err != nil {
http.Error(w, "Failed to render template", http.StatusInternalServerError)
return
}
}

39
src/web/html/article.go Normal file
View File

@@ -0,0 +1,39 @@
package html
import (
"crowsnest/internal/model"
"strconv"
g "maragu.dev/gomponents"
h "maragu.dev/gomponents/html"
)
func Articles(articles []*model.ArticleViewModel) g.Node {
return h.Div(h.Class("content max-w-screen-lg flex flex-col mx-auto"), g.Map(articles, Article))
}
func Article(article *model.ArticleViewModel) g.Node {
return h.Div(
h.Class("collapse bg-base-200 shadow mb-4"),
h.TabIndex("0"),
h.Div(h.Class("collapse-title font-medium"), g.Text(article.Title)),
h.Div(
h.Class("collapse-content"),
h.P(
h.Class("pb-2"),
h.Span(h.Class("badge badge-outline"), g.Text(article.ShortSource)),
h.Span(h.Class("badge badge-outline"), g.Text(article.PublishDate)),
),
h.P(h.Class("card-text"), g.Text(article.Summary)),
h.Div(
h.Class("flex flex-row-reverse"),
h.A(
h.Href("/article/"+strconv.Itoa(article.Id)),
h.Class("btn btn-active btn-sm btn-primary"),
g.Attr("up-follow"),
g.Text("Details"),
),
),
),
)
}

View File

@@ -0,0 +1,14 @@
package html
import (
"crowsnest/internal/model"
g "maragu.dev/gomponents"
)
func ArticleLayout(articlePageVMs *model.ArticlePageViewModel) g.Node {
return Layout(
"Crowsnest - "+articlePageVMs.Title,
ArticlePage(articlePageVMs),
)
}

View File

@@ -0,0 +1,55 @@
package html
import (
"crowsnest/internal/model"
g "maragu.dev/gomponents"
h "maragu.dev/gomponents/html"
)
func ArticlePageColumn(key string, value string) g.Node {
return h.P(
h.Span(
h.Class("badge badge-neutral me-4 w-20"),
g.Text(key),
),
g.Text(value),
)
}
func ArticlePage(articlePageVM *model.ArticlePageViewModel) g.Node {
return h.Div(
h.TabIndex("0"),
h.Class("card bg-base-200 shadow mb-4"),
h.Div(
h.Class("card-body"),
h.Div(
h.Class("flex flex-row pb-4"),
h.Div(
h.Class("divider divider-horizontal divider-primary"),
),
h.Div(
h.Class("card-title font-medium"),
g.Text(articlePageVM.Title),
),
),
h.Div(
h.Class("px-5 pb-4 grid gap-y-4 grid-cols-1"),
ArticlePageColumn("Datum", articlePageVM.PublishDate),
ArticlePageColumn("Quelle", articlePageVM.ShortSource),
ArticlePageColumn("TLDR", articlePageVM.Summary),
ArticlePageColumn("Inhalt", articlePageVM.Content),
h.Div(
h.Class("card-actions justify-end"),
h.A(
h.Href(articlePageVM.SourceUrl),
h.Button(
h.Class("btn btn-primary btn-sm"),
g.Text("Seite besuchen"),
),
),
),
),
),
)
}

View File

@@ -0,0 +1,17 @@
package html
import (
"crowsnest/internal/model"
g "maragu.dev/gomponents"
)
func IndexLayout(articleVMs []*model.ArticleViewModel, paginationCurrent uint, paginationTotal uint) g.Node {
return Layout(
"Crowsnest - Artikel",
g.Group{
Articles(articleVMs),
Pagination(paginationCurrent, paginationTotal),
},
)
}

43
src/web/html/layout.go Normal file
View File

@@ -0,0 +1,43 @@
package html
import (
g "maragu.dev/gomponents"
c "maragu.dev/gomponents/components"
h "maragu.dev/gomponents/html"
)
func Layout(title string, body g.Node) g.Node {
completeBody := []g.Node{
Navbar(),
h.Div(
h.Class("content container flex flex-col mx-auto px-4 mt-28"),
body,
),
h.Script(h.Src("https://cdn.tailwindcss.com")),
h.Script(
g.Text(
"up.link.config.followSelectors.push('a[href]')\n" +
"up.link.config.instantSelectors.push('a[href]')",
),
),
}
return c.HTML5(c.HTML5Props{
Title: title,
Language: "de",
Head: layoutHead(),
Body: completeBody,
HTMLAttrs: []g.Node{g.Attr("data-theme", "dark")},
})
}
func layoutHead() []g.Node {
return []g.Node{
h.Meta(h.Name("viewport"), h.Content("width=device-width, initial-scale=1")),
h.Link(h.Rel("stylesheet"), h.Type("text/css"), h.Href("/static/unpoly.min.css")),
h.Script(h.Src("/static/unpoly.min.js")),
h.Link(h.Rel("stylesheet"), h.Type("text/css"), h.Href("/static/daisyui.min.css")),
h.Script(h.Src("/static/tailwindcss.min.js")),
}
}

76
src/web/html/navbar.go Normal file
View File

@@ -0,0 +1,76 @@
package html
import (
g "maragu.dev/gomponents"
"maragu.dev/gomponents-heroicons/v3/outline"
h "maragu.dev/gomponents/html"
)
func NavbarLi(href string, text string, active bool) g.Node {
return h.Li(h.A(h.Href(href), h.TabIndex("0"), g.If(active, h.Class("active")), g.Text(text)))
}
func SearchForm() g.Node {
return h.Form(
h.Role("search"),
h.Method("post"),
h.Action("/up/search"),
g.Attr("up-submit"),
g.Attr("up-autosubmit"),
g.Attr("up-target", ".content"),
h.Label(
h.Class("input input-bordered input-sm flex items-center gap-2"),
h.Input(
h.Name("search"),
h.Type("search"),
h.Class("grow"),
h.Placeholder("Suche"),
),
outline.MagnifyingGlass(
h.Class("h-4 w-4 opacity-70"),
g.Attr("stroke", "currentColor"),
),
),
)
}
func Navbar() g.Node {
return h.Nav(
h.Class("fixed top-0 z-50 w-full p-4"),
h.Div(
h.Class("navbar bg-base-300 rounded-box drop-shadow-md"),
// links for large screens
h.Div(
h.Class("flex-1"),
h.A(h.Href("/"), h.TabIndex("0"), h.Class("btn btn-ghost text-xl"), g.Text("crowsnest")),
h.Ul(
h.Class("menu menu-horizontal hidden sm:flex"),
NavbarLi("/", "Artikel", true),
NavbarLi("/", "Themen", false),
),
),
// search
h.Div(h.Class("hidden sm:flex flex-none pe-4"), SearchForm()),
// small navbar dropdown
h.Div(
h.Class("dropdown dropdown-end sm:hidden"),
// dropdown button
h.Div(
h.TabIndex("0"),
h.Role("button"),
h.Class("btn btn-ghost"),
outline.Bars3BottomRight(
h.Class("h-6 w-6 opacity-70"),
g.Attr("stroke", "currentColor"),
),
),
// dropdown content
h.Ul(
h.Class("menu dropdown-content bg-base-100 rounded-box z-[1] mt-3 w-52 p-2 shadow"),
NavbarLi("/", "Artikel", true),
NavbarLi("/", "Themen", false),
),
),
),
)
}

View File

@@ -0,0 +1,45 @@
package html
import (
"strconv"
g "maragu.dev/gomponents"
h "maragu.dev/gomponents/html"
)
func PaginationButton(content string, active bool, disabled bool) g.Node {
classStr := "join-item btn btn-sm"
if disabled { classStr += " btn-disabled" }
if active { classStr += " btn-active" }
return h.A(
h.Class(classStr),
g.If(!disabled, h.Href("/page/"+content)),
g.If(!disabled, h.TabIndex("0")),
g.Attr("up-follow"),
g.Attr("up-target", ".content"),
g.Text(content),
)
}
func Pagination(currentPage uint, totalPages uint) g.Node {
buttons := make([]g.Node, 0)
if totalPages > 1 {
buttons = append(buttons, PaginationButton("1", currentPage == 1, false))
}
if currentPage > 3 {
buttons = append(buttons, PaginationButton("...", false, true))
}
for i := max(2, currentPage-1); i <= min(totalPages-1, currentPage+1); i++ {
buttons = append(buttons, PaginationButton(strconv.Itoa(int(i)), i == currentPage, false))
}
if currentPage < totalPages-2 {
buttons = append(buttons, PaginationButton("...", false, true))
}
if totalPages > 1 {
buttons = append(buttons, PaginationButton(strconv.Itoa(int(totalPages)), totalPages == currentPage, false))
}
return h.Div(h.Class("join pagination p-5 mx-auto"), h.Span(buttons...))
}