//nolint:gochecknoglobals package main import ( "context" "fmt" "io" "log" "net/http" "os" "regexp" "runtime" "strconv" "sync" "time" "github.com/gocolly/colly/v2" id3 "github.com/mikkyang/id3-go" "go.xsfx.dev/workgroups" ) const ( URL = "https://susallefolgen.netlify.app/" Artist = "Sanft & Sorgfältig" ) type Episode struct { Title string Number int Date time.Time URL string file string } type Episodes struct { sync.Mutex Episodes []Episode } func (e *Episode) ParseTitle(title string) error { re := regexp.MustCompile(`^#(?P\d+)\s(?P.+)`) match := re.FindStringSubmatch(title) number, err := strconv.Atoi(match[1]) if err != nil { return fmt.Errorf("could not convert number string to int: %w", err) } e.Number = number e.Title = match[2] return nil } var episodes Episodes func (e *Episode) Download() error { log.Printf("downloading: %d - %s", e.Number, e.Title) resp, err := http.Get(e.URL) if err != nil { return fmt.Errorf("could not download: %w", err) } defer resp.Body.Close() e.file = fmt.Sprintf("%d.mp3", e.Number) f, err := os.Create(e.file) if err != nil { return fmt.Errorf("could not create file: %w", err) } defer f.Close() if _, err := io.Copy(f, resp.Body); err != nil { return fmt.Errorf("could not copy body to file: %w", err) } return nil } func (e *Episode) Tag() error { f, err := id3.Open(e.file) if err != nil { return fmt.Errorf("could not open file: %w", err) } defer f.Close() title := fmt.Sprintf("#%03d %s", e.Number, e.Title) f.SetArtist(Artist) f.SetAlbum(title) f.SetTitle(title) return nil } //nolint:funlen func main() { c := colly.NewCollector() c.OnHTML("body div.container div", func(e *colly.HTMLElement) { episode := Episode{} e.ForEach("strong", func(_ int, el *colly.HTMLElement) { if err := episode.ParseTitle(el.Text); err != nil { log.Fatal(err) } }) e.ForEach("table.u-full-width tbody tr td em", func(_ int, el *colly.HTMLElement) { t, err := time.Parse("Mon, 02 Jan 2006", el.Text) if err != nil { log.Fatal(err) } episode.Date = t }) e.ForEach("table.u-full-width tbody tr td a", func(_ int, el *colly.HTMLElement) { episode.URL = el.Attr("href") }) episodes.Lock() episodes.Episodes = append(episodes.Episodes, episode) episodes.Unlock() }) c.Visit(URL) // j, err := json.MarshalIndent(episodes, "", " ") // if err != nil { // log.Fatal(err) // } // fmt.Print(string(j)) d, ctx := workgroups.NewDispatcher( context.Background(), runtime.GOMAXPROCS(0), len(episodes.Episodes[:1]), ) d.Start() for _, ep := range episodes.Episodes[:1] { ep := ep d.Append(workgroups.NewJob(ctx, func(ctx context.Context) error { select { default: case <-ctx.Done(): return fmt.Errorf("got error from context: %w", ctx.Err()) } if err := ep.Download(); err != nil { return fmt.Errorf("could not download: %w", err) } if err := ep.Tag(); err != nil { return fmt.Errorf("could not tag: %w", err) } return nil })) } d.Close() if err := d.Wait(); err != nil { log.Fatal(err) } }