From 38e27a3ad686baadf383a27f64aa2db21bd16016 Mon Sep 17 00:00:00 2001 From: Austin Davis Date: Tue, 27 Feb 2024 04:47:20 -0700 Subject: [PATCH] All sites now deduplicate links before grabbing individual posts. --- scraper/interest/interest.go | 5 +++-- scraper/sitec/sitec.go | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scraper/interest/interest.go b/scraper/interest/interest.go index 57ec9a1..0a04d3c 100644 --- a/scraper/interest/interest.go +++ b/scraper/interest/interest.go @@ -24,15 +24,16 @@ func CheckIfInterested(description string) bool { type JobInfoGetter func(string, string) (string, error) -func FilterInterest(proxyUrl string, Jobs []job.Job, jobInfoGetter JobInfoGetter) []job.Job { +func FilterInterest(proxyUrl string, possibleJobs []job.Job, jobInfoGetter JobInfoGetter) []job.Job { interestingJobs := []job.Job{} + possibleJobs = job.DeduplicatedLinks(possibleJobs) var wg sync.WaitGroup // Number of concurrent goroutines maxGoroutines := 10 var goroutineCount int - for _, possibleJob := range Jobs { + for _, possibleJob := range possibleJobs { wg.Add(1) goroutineCount++ diff --git a/scraper/sitec/sitec.go b/scraper/sitec/sitec.go index 2941f39..d3d2585 100644 --- a/scraper/sitec/sitec.go +++ b/scraper/sitec/sitec.go @@ -65,7 +65,6 @@ func ScanNewJobs(sitecBaseUrl string, proxyUrl string) []job.Job { jobs = append(jobs, <-jobChannel...) } - jobs = job.DeduplicatedLinks(jobs) log.Println(sitecBaseUrl+" total jobs found", len(jobs)) interestingJobs := interest.FilterInterest(proxyUrl, jobs, getSiteCJobInfo) log.Println(sitecBaseUrl+" interesting jobs", len(interestingJobs))