remove duplicate files #8

Merged
eliaskohout merged 12 commits from rm_duplicate_file into main 2025-01-20 21:29:36 +01:00
Showing only changes of commit d44105bc7f - Show all commits

View File

@@ -1,69 +0,0 @@
package crawler
import (
"crowsnest/internal/model"
"crowsnest/internal/util"
"github.com/gocolly/colly/v2"
)
type CrawlerFacade struct {
spiegelFeedDistributer *util.Distributer[*model.Article]
zeitFeedDistributer *util.Distributer[*model.Article]
}
func (cf *CrawlerFacade) Init() {
// init
cf.spiegelFeedDistributer = &util.Distributer[*model.Article]{}
cf.spiegelFeedDistributer.Init()
cf.zeitFeedDistributer = &util.Distributer[*model.Article]{}
cf.zeitFeedDistributer.Init()
// run spiegel feed
sf := &WebFeed{}
sf.Init(
"https://www.spiegel.de/",
colly.AllowedDomains("www.spiegel.de", "spiegel.de"),
colly.CacheDir("./persistence/spiegel_cache"),
colly.MaxDepth(1),
)
sf_feed := sf.Feed()
sf_converter := ConverterSpiegel{}
sf_converter.Init()
go func() {
for val := range sf_feed {
article, err := sf_converter.Convert(val)
if err != nil { continue }
cf.spiegelFeedDistributer.Publish(article)
}
}()
// run zeit feed
zf := &WebFeed{}
zf.Init(
"https://www.zeit.de/index",
colly.AllowedDomains("www.zeit.de", "zeit.de"),
colly.CacheDir("./persistence/zeit_cache"),
colly.MaxDepth(1),
)
zf_feed := zf.Feed()
zf_converter := ZeitConverter{}
zf_converter.Init()
go func() {
for val := range zf_feed {
article, err := zf_converter.Convert(val)
if err != nil { continue }
cf.zeitFeedDistributer.Publish(article)
}
}()
}
func (cf *CrawlerFacade) SubscribeToSpiegelFeed(hook func(*model.Article)) {
cf.spiegelFeedDistributer.Subscribe(hook)
}
func (cf *CrawlerFacade) SubscribeToZeitFeed(hook func(*model.Article)) {
cf.zeitFeedDistributer.Subscribe(hook)
}