Skip to content

Commit

Permalink
Merge pull request #13 from austin1237/duplicate-links
Browse files Browse the repository at this point in the history
All sites now deduplicate links before grabbing individual posts.
  • Loading branch information
austin1237 authored Feb 27, 2024
2 parents bf674d7 + 38e27a3 commit 4d04af2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
5 changes: 3 additions & 2 deletions scraper/interest/interest.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,16 @@ func CheckIfInterested(description string) bool {

type JobInfoGetter func(string, string) (string, error)

func FilterInterest(proxyUrl string, Jobs []job.Job, jobInfoGetter JobInfoGetter) []job.Job {
func FilterInterest(proxyUrl string, possibleJobs []job.Job, jobInfoGetter JobInfoGetter) []job.Job {
interestingJobs := []job.Job{}
possibleJobs = job.DeduplicatedLinks(possibleJobs)
var wg sync.WaitGroup

// Number of concurrent goroutines
maxGoroutines := 10
var goroutineCount int

for _, possibleJob := range Jobs {
for _, possibleJob := range possibleJobs {
wg.Add(1)
goroutineCount++

Expand Down
1 change: 0 additions & 1 deletion scraper/sitec/sitec.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ func ScanNewJobs(sitecBaseUrl string, proxyUrl string) []job.Job {
jobs = append(jobs, <-jobChannel...)
}

jobs = job.DeduplicatedLinks(jobs)
log.Println(sitecBaseUrl+" total jobs found", len(jobs))
interestingJobs := interest.FilterInterest(proxyUrl, jobs, getSiteCJobInfo)
log.Println(sitecBaseUrl+" interesting jobs", len(interestingJobs))
Expand Down

0 comments on commit 4d04af2

Please sign in to comment.