Skip to content

Commit

Permalink
Merge pull request #16 from austin1237/sitea-local
Browse files Browse the repository at this point in the history
siteA now pulls local jobs as well
  • Loading branch information
austin1237 authored Mar 27, 2024
2 parents 1cc6126 + 4af48d7 commit ecf91c7
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 15 deletions.
5 changes: 3 additions & 2 deletions scraper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ func init() {
}

dynamoTable = os.Getenv("DYNAMO_TABLE")
if scraperSiteFBaseURL == "" {
log.Fatal("Environment variable SCRAPER_SITEF_BASEURL must be set")
if dynamoTable == "" {
log.Fatal("Environment variable DYNAMO_TABLE must be set")
}

}
Expand Down Expand Up @@ -120,6 +120,7 @@ func lookForNewJobs() {
for range sites {
<-doneChannel
}

}

func handler(ctx context.Context) error {
Expand Down
45 changes: 32 additions & 13 deletions scraper/sitea/sitea.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"scraper/job"
"strconv"
"strings"
"sync"

"github.com/PuerkitoBio/goquery"
)
Expand Down Expand Up @@ -65,21 +66,39 @@ func GetSiteAJobInfo(jobLink string, proxyUrl string) (string, error) {
}

func ScanNewJobs(siteABaseUrl string, proxyUrl string, cache *cache.Cache) ([]job.Job, []job.Job) {
var wg sync.WaitGroup
jobsChan := make(chan []job.Job)

fetchJobs := func(url string) {
defer wg.Done()
finished := false
page := 1
for !finished && page <= 15 {
pageStr := strconv.Itoa(page)
url := url + "?page=" + pageStr
jobs := job.GetNewJobs(url, proxyUrl, siteAJobListParser)
jobsChan <- jobs
// No new jobs found were done
if len(jobs) == 0 {
finished = true
}
page++
}
}

wg.Add(2)
go fetchJobs(siteABaseUrl + "/jobs/remote/nationwide/dev-engineering")
// lat and lon is obfuscated / local hospital
go fetchJobs(siteABaseUrl + "/jobs/hybrid/office/dev-engineering?search=Software+Engineer&location=Englewood-CO-USA&longitude=-104.99350&latitude=39.65464&searcharea=25mi")

go func() {
wg.Wait()
close(jobsChan)
}()

possibleJobs := []job.Job{}
finished := false
page := 1

for !finished || page > 15 {
currentJobCount := len(possibleJobs)
pageStr := strconv.Itoa(page)
url := siteABaseUrl + "/jobs/remote/nationwide/dev-engineering?page=" + pageStr
jobs := job.GetNewJobs(url, proxyUrl, siteAJobListParser)
for jobs := range jobsChan {
possibleJobs = append(possibleJobs, jobs...)
// No new jobs found were done
if currentJobCount == len(possibleJobs) {
finished = true
}
page++
}

log.Println(siteABaseUrl+" total jobs found", len(possibleJobs))
Expand Down

0 comments on commit ecf91c7

Please sign in to comment.