package collectors

import (
	//"crowsnest/internal/model"
	//"regexp"
	//"time"
	//"strings"

	"fmt"
	"time"

	"github.com/gocolly/colly/v2"
)


func (c *Collector) Spiegel() {
	collycollector := colly.NewCollector(
		colly.AllowedDomains("www.spiegel.de", "spiegel.de"),
		colly.CacheDir("./persistence/spiegel_cache"),
		colly.MaxDepth(3),
	)

	// cascade
	collycollector.OnHTML("a[href]", func(e *colly.HTMLElement) {
		e.Request.Visit(e.Attr("href"))
	})

    // cache
	collycollector.OnScraped(func(r *colly.Response) {
        c.Responses.Insert(r.Request.URL.String(), string(r.Body))
	})

    // go through archive
	startDate := time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC)
	currentDate := time.Now()

	for date := startDate; date.Before(currentDate) || date.Equal(currentDate); date = date.AddDate(0, 0, 1) {
		urlDate := date.Format("02.01.2006")
		url := fmt.Sprintf("https://www.spiegel.de/nachrichtenarchiv/artikel-%s.html", urlDate)
		
        collycollector.Visit(url)
	}

	//// create entry if not behind paywall
	//paywall_false_pattern := regexp.MustCompile("\"paywall\":{\"attributes\":{\"is_active\":false")
	//collycollector.OnResponse(func(r *colly.Response) {
	//	if paywall_false_pattern.Match(r.Body) {
	//		url := r.Request.URL.String()
	//		(*results)[url] = &model.Article{
	//			SourceUrl: url,
	//			FetchDate: time.Now(),
	//			Content:   "",
	//		}
	//	}

	//})

	//// check for article type
	//collycollector.OnHTML("meta[property='og:type']", func(e *colly.HTMLElement) {
	//	if e.Attr("content") != "article" {
    //        delete(*results, e.Request.URL.String())
	//	} 
	//})

	//// add title
	//collycollector.OnHTML("meta[property='og:title']", func(e *colly.HTMLElement) {
	//	if val, ok := (*results)[e.Request.URL.String()]; ok {
	//		val.Title = e.Attr("content")
	//	}
	//})

	//// prepend description to content of article
	//collycollector.OnHTML("meta[name='description']", func(e *colly.HTMLElement) {
	//	if val, ok := (*results)[e.Request.URL.String()]; ok {
	//		val.Content = e.Attr("content") + val.Content
	//	}
	//})

    //// add publishing date
	//collycollector.OnHTML("meta[name='date']", func(e *colly.HTMLElement) {
	//	if val, ok := (*results)[e.Request.URL.String()]; ok {
	//		t, err := time.Parse("2006-01-02T15:04:05-07:00", e.Attr("content"))
	//		if err != nil {
	//			panic(err)
	//		}
	//		val.PublishDate = t
	//	}
	//})

	//// add author
	//collycollector.OnHTML("meta[name='author']", func(e *colly.HTMLElement) {
	//    if val, ok := (*results)[e.Request.URL.String()]; ok {
	//        val.Author = e.Attr("content")
	//    }
	//})

    //// add content
	//collycollector.OnHTML("main[id='Inhalt'] div > p", func(e *colly.HTMLElement) {
	//	if val, ok := (*results)[e.Request.URL.String()]; ok {
    //        cont := val.Content

    //        pattern := regexp.MustCompile("\\s+")
    //        cont = string(pattern.ReplaceAll([]byte(cont), []byte(" ")))
    //        cont = strings.ReplaceAll(cont, "»", "\"")
    //        cont = strings.ReplaceAll(cont, "«", "\"")
	//		val.Content = cont + " " + e.Text
	//	}
	//})

}