sanftsorgfaeltig/main.go

150 lines
2.6 KiB
Go
Raw Normal View History

2021-10-24 21:48:36 +02:00
//nolint:gochecknoglobals
package main
import (
"context"
"fmt"
"io"
"log"
"net/http"
"os"
"regexp"
"runtime"
"strconv"
"sync"
"time"
"github.com/gocolly/colly/v2"
"go.xsfx.dev/workgroups"
)
const (
URL = "https://susallefolgen.netlify.app/"
)
type Episode struct {
Title string
Number int
Date time.Time
URL string
file string
}
type Episodes struct {
sync.Mutex
Episodes []Episode
}
func (e *Episode) ParseTitle(title string) error {
re := regexp.MustCompile(`^#(?P<episode>\d+)\s(?P<title>.+)`)
match := re.FindStringSubmatch(title)
number, err := strconv.Atoi(match[1])
if err != nil {
return fmt.Errorf("could not convert number string to int: %w", err)
}
e.Number = number
e.Title = match[2]
return nil
}
var episodes Episodes
func (e *Episode) Download() error {
log.Printf("downloading: %d - %s", e.Number, e.Title)
resp, err := http.Get(e.URL)
if err != nil {
return fmt.Errorf("could not download: %w", err)
}
defer resp.Body.Close()
f, err := os.Create(fmt.Sprintf("%d.mp3", e.Number))
if err != nil {
return fmt.Errorf("could not create file: %w", err)
}
defer f.Close()
if _, err := io.Copy(f, resp.Body); err != nil {
return fmt.Errorf("could not copy body to file: %w", err)
}
return nil
}
//nolint:funlen
func main() {
c := colly.NewCollector()
c.OnHTML("body div.container div", func(e *colly.HTMLElement) {
episode := Episode{}
e.ForEach("strong", func(_ int, el *colly.HTMLElement) {
if err := episode.ParseTitle(el.Text); err != nil {
log.Fatal(err)
}
})
e.ForEach("table.u-full-width tbody tr td em", func(_ int, el *colly.HTMLElement) {
t, err := time.Parse("Mon, 02 Jan 2006", el.Text)
if err != nil {
log.Fatal(err)
}
episode.Date = t
})
e.ForEach("table.u-full-width tbody tr td a", func(_ int, el *colly.HTMLElement) {
episode.URL = el.Attr("href")
})
episodes.Lock()
episodes.Episodes = append(episodes.Episodes, episode)
episodes.Unlock()
})
c.Visit(URL)
// j, err := json.MarshalIndent(episodes, "", " ")
// if err != nil {
// log.Fatal(err)
// }
// fmt.Print(string(j))
d, ctx := workgroups.NewDispatcher(
context.Background(),
runtime.GOMAXPROCS(0),
len(episodes.Episodes),
)
d.Start()
for _, ep := range episodes.Episodes {
ep := ep
d.Append(workgroups.NewJob(ctx, func(ctx context.Context) error {
select {
default:
case <-ctx.Done():
return fmt.Errorf("got error from context: %w", ctx.Err())
}
if err := ep.Download(); err != nil {
return fmt.Errorf("could not download: %w", err)
}
return nil
}))
}
d.Close()
if err := d.Wait(); err != nil {
log.Fatal(err)
}
}