-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebscraper_rotten_tomatoes_popular.go
103 lines (84 loc) · 2.07 KB
/
webscraper_rotten_tomatoes_popular.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"sync"
"github.com/gocolly/colly"
)
type RT_Show struct {
title, avg_audience_score, avg_critic_score, rt_url, synopsis, genre string
}
func main() {
var wg sync.WaitGroup
c := colly.NewCollector()
c.OnHTML("div.flex-container", func(e *colly.HTMLElement) {
wg.Add(1)
go func() {
defer wg.Done()
url := "https://www.rottentomatoes.com" + e.ChildAttr("a", "href")
c.Visit(url)
}()
})
var rt_shows []RT_Show
c.OnHTML("div.container.rt-layout__body", func(e *colly.HTMLElement) {
rt_show := RT_Show{}
rt_show.title = e.ChildText("h1.unset[slot=titleIntro]")
rt_show.rt_url = e.Request.URL.String()
rt_show.avg_audience_score = e.ChildText("rt-button[slot=audienceScore]")
rt_show.avg_critic_score = e.ChildText("rt-button[slot=criticsScore]")
rt_show.synopsis = e.ChildText("div.synopsis-wrap rt-text:not(.key)")
e.ForEach("div.category-wrap", func(_ int, h *colly.HTMLElement) {
if h.ChildText("dt") == "Genre" {
rt_show.genre = h.ChildText("rt-link")
}
})
rt_shows = append(rt_shows, rt_show)
})
c.OnError(func(r *colly.Response, e error) {
fmt.Println("An error occurred!:", e)
})
c.Visit("https://www.rottentomatoes.com/browse/tv_series_browse/sort:popular")
wg.Wait()
if err := write_RT_Shows_to_CSV(rt_shows); err != nil {
log.Fatalf("Failed to write to CSV: %v", err)
}
}
func write_RT_Shows_to_CSV(shows []RT_Show) error {
file, err := os.Create("rt_shows.csv")
if err != nil {
return err
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
headers := []string{
"title",
"genre",
"avg_audience_score",
"avg_tomatometer",
"synopsis",
"rt_url",
}
if err := writer.Write(headers); err != nil {
return err
}
for _, rt_show := range shows {
if rt_show.title == "" {
continue
}
record := []string{
rt_show.title,
rt_show.genre,
rt_show.avg_audience_score,
rt_show.avg_critic_score,
rt_show.synopsis,
rt_show.rt_url,
}
if err := writer.Write(record); err != nil {
return err
}
}
return nil
}