150 lines
2.6 KiB
Go
150 lines
2.6 KiB
Go
|
//nolint:gochecknoglobals
|
||
|
package main
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"log"
|
||
|
"net/http"
|
||
|
"os"
|
||
|
"regexp"
|
||
|
"runtime"
|
||
|
"strconv"
|
||
|
"sync"
|
||
|
"time"
|
||
|
|
||
|
"github.com/gocolly/colly/v2"
|
||
|
"go.xsfx.dev/workgroups"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
URL = "https://susallefolgen.netlify.app/"
|
||
|
)
|
||
|
|
||
|
type Episode struct {
|
||
|
Title string
|
||
|
Number int
|
||
|
Date time.Time
|
||
|
URL string
|
||
|
file string
|
||
|
}
|
||
|
|
||
|
type Episodes struct {
|
||
|
sync.Mutex
|
||
|
Episodes []Episode
|
||
|
}
|
||
|
|
||
|
func (e *Episode) ParseTitle(title string) error {
|
||
|
re := regexp.MustCompile(`^#(?P<episode>\d+)\s(?P<title>.+)`)
|
||
|
match := re.FindStringSubmatch(title)
|
||
|
|
||
|
number, err := strconv.Atoi(match[1])
|
||
|
if err != nil {
|
||
|
return fmt.Errorf("could not convert number string to int: %w", err)
|
||
|
}
|
||
|
|
||
|
e.Number = number
|
||
|
|
||
|
e.Title = match[2]
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
var episodes Episodes
|
||
|
|
||
|
func (e *Episode) Download() error {
|
||
|
log.Printf("downloading: %d - %s", e.Number, e.Title)
|
||
|
|
||
|
resp, err := http.Get(e.URL)
|
||
|
if err != nil {
|
||
|
return fmt.Errorf("could not download: %w", err)
|
||
|
}
|
||
|
defer resp.Body.Close()
|
||
|
|
||
|
f, err := os.Create(fmt.Sprintf("%d.mp3", e.Number))
|
||
|
if err != nil {
|
||
|
return fmt.Errorf("could not create file: %w", err)
|
||
|
}
|
||
|
defer f.Close()
|
||
|
|
||
|
if _, err := io.Copy(f, resp.Body); err != nil {
|
||
|
return fmt.Errorf("could not copy body to file: %w", err)
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
//nolint:funlen
|
||
|
func main() {
|
||
|
c := colly.NewCollector()
|
||
|
|
||
|
c.OnHTML("body div.container div", func(e *colly.HTMLElement) {
|
||
|
episode := Episode{}
|
||
|
|
||
|
e.ForEach("strong", func(_ int, el *colly.HTMLElement) {
|
||
|
if err := episode.ParseTitle(el.Text); err != nil {
|
||
|
log.Fatal(err)
|
||
|
}
|
||
|
})
|
||
|
|
||
|
e.ForEach("table.u-full-width tbody tr td em", func(_ int, el *colly.HTMLElement) {
|
||
|
t, err := time.Parse("Mon, 02 Jan 2006", el.Text)
|
||
|
if err != nil {
|
||
|
log.Fatal(err)
|
||
|
}
|
||
|
|
||
|
episode.Date = t
|
||
|
})
|
||
|
|
||
|
e.ForEach("table.u-full-width tbody tr td a", func(_ int, el *colly.HTMLElement) {
|
||
|
episode.URL = el.Attr("href")
|
||
|
})
|
||
|
|
||
|
episodes.Lock()
|
||
|
episodes.Episodes = append(episodes.Episodes, episode)
|
||
|
episodes.Unlock()
|
||
|
})
|
||
|
|
||
|
c.Visit(URL)
|
||
|
|
||
|
// j, err := json.MarshalIndent(episodes, "", " ")
|
||
|
// if err != nil {
|
||
|
// log.Fatal(err)
|
||
|
// }
|
||
|
|
||
|
// fmt.Print(string(j))
|
||
|
|
||
|
d, ctx := workgroups.NewDispatcher(
|
||
|
context.Background(),
|
||
|
runtime.GOMAXPROCS(0),
|
||
|
len(episodes.Episodes),
|
||
|
)
|
||
|
|
||
|
d.Start()
|
||
|
|
||
|
for _, ep := range episodes.Episodes {
|
||
|
ep := ep
|
||
|
|
||
|
d.Append(workgroups.NewJob(ctx, func(ctx context.Context) error {
|
||
|
select {
|
||
|
default:
|
||
|
case <-ctx.Done():
|
||
|
return fmt.Errorf("got error from context: %w", ctx.Err())
|
||
|
}
|
||
|
|
||
|
if err := ep.Download(); err != nil {
|
||
|
return fmt.Errorf("could not download: %w", err)
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}))
|
||
|
}
|
||
|
|
||
|
d.Close()
|
||
|
|
||
|
if err := d.Wait(); err != nil {
|
||
|
log.Fatal(err)
|
||
|
}
|
||
|
}
|