Skip to content

Commit

Permalink
Merge pull request #22 from austin1237/local-jobApi
Browse files Browse the repository at this point in the history
local script now uses job api instead of writing to a file.
  • Loading branch information
austin1237 authored Apr 25, 2024
2 parents b02528f + 249a9cd commit b0620a4
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 19 deletions.
130 changes: 129 additions & 1 deletion local/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions local/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
},
"devDependencies": {
"@types/jest": "^29.5.12",
"axios-mock-adapter": "^1.22.0",
"jest": "^29.7.0",
"ts-jest": "^29.1.2",
"typescript": "^5.3.3"
},
"dependencies": {
"axios": "^1.6.8",
"puppeteer": "^22.4.0"
}
}
11 changes: 0 additions & 11 deletions local/src/fileHandler.ts

This file was deleted.

9 changes: 8 additions & 1 deletion local/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import puppeteer from 'puppeteer';
import { scrap } from './scraper/sitea';
import { JobService } from './jobService';

const main = async () =>{
let jobCount = 0;
const link = process.env.LOCAL_SCRAPER_SITEA;
const jobApiEndpoint = process.env.JOB_API_ENDPOINT;
if (!link) {
console.error('LOCAL_SCRAPER_SITEA environment variable isnt set');
return;
}
if (!jobApiEndpoint) {
console.error('JOB_API_ENDPOINT environment variable isnt set');
return;
}
const browser = await puppeteer.launch({ headless: false });
await scrap(browser, link, jobCount);
const jobService = new JobService(jobApiEndpoint);
await scrap(browser, link, jobCount, jobService);
console.log('Scraping complete');
await browser.close();
}
Expand Down
27 changes: 27 additions & 0 deletions local/src/jobService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import axios from 'axios';

export interface Job {
title: string;
company: string;
keyword: string;
link: string;
}

export interface Response {
total: number;
uncached: number;
duplicates: number;
}

export class JobService {
private endpoint: string;

constructor(endpoint: string) {
this.endpoint = endpoint;
}

async sendJobs(jobs: Job[]): Promise<Response> {
const response = await axios.post(this.endpoint, { jobs });
return response.data as Response;
}
}
19 changes: 13 additions & 6 deletions local/src/scraper/sitea.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Browser, Page } from 'puppeteer';
import { evaluateJobInterest } from '../interest';
import { writeToFile } from '../fileHandler';
import { JobService, Job } from '../jobService';
const urls: string[] = [];

const didUrlChange = async (page: Page): Promise<boolean> => {
Expand All @@ -18,7 +18,8 @@ const didUrlChange = async (page: Page): Promise<boolean> => {
};


export const scrap = async(browser : Browser, link : string, jobCount : number) => {
export const scrap = async(browser : Browser, link : string, jobCount : number, jobService: JobService) => {
let interestingJobs: Job[] = [];
try{
const page = await browser.newPage();
await page.goto(link);
Expand Down Expand Up @@ -54,16 +55,22 @@ export const scrap = async(browser : Browser, link : string, jobCount : number)
const pageUrl = urls[urls.length - 1]; // Get the current page URL
const jobCategory = evaluateJobInterest(jobTitle, companyName, jobDescriptionText);

if(jobCategory){
writeToFile(pageUrl, jobTitle, companyName, jobCategory);
jobCount++;
console.log(`Job found ${jobCount}`);
if(jobCategory && jobTitle && companyName && pageUrl){
interestingJobs.push({ title: jobTitle, company: companyName, keyword: jobCategory, link: pageUrl });
}

await new Promise(resolve => setTimeout(resolve, 5000)); // Sleep for 5 seconds to avoid bot detection
}
}
}

// Send the jobs to the API
if (interestingJobs.length) {
const response = await jobService.sendJobs(interestingJobs);
jobCount += response.uncached;
console.log(`Total jobs: ${jobCount}`);
interestingJobs = []
}

// Click on the "Next" button and wait for the next page to load
const nextButton = await page.$('[data-testid="pagination-page-next"]');
Expand Down
43 changes: 43 additions & 0 deletions local/tests/jobService.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import axios from 'axios';
import MockAdapter from 'axios-mock-adapter';
import { JobService, Job, Response } from '../src/jobService';

describe('JobService', () => {
let mockAxios: MockAdapter;
let jobService: JobService;

beforeEach(() => {
mockAxios = new MockAdapter(axios);
jobService = new JobService('http://mock-endpoint.com');
});

afterEach(() => {
mockAxios.reset();
});

it('should send jobs and return the response', async () => {
const jobs: Job[] = [
{ title: 'Software Engineer', company: 'Company1', keyword: 'Go', link: 'http://example.com/job1' },
{ title: 'Data Analyst', company: 'Company2', keyword: 'Python', link: 'http://example.com/job2' },
{ title: 'Financial Advisor', company: 'Company3', keyword: 'Finance', link: 'http://example.com/job3' },
];

const expectedResponse: Response = { total: 3, uncached: 1, duplicates: 2 };

mockAxios.onPost('http://mock-endpoint.com').reply(200, expectedResponse);

const response = await jobService.sendJobs(jobs);

expect(response).toEqual(expectedResponse);
});

it('should throw an error if the request fails', async () => {
const jobs: Job[] = [
{ title: 'Software Engineer', company: 'Company1', keyword: 'Go', link: 'http://example.com/job1' },
];

mockAxios.onPost('http://mock-endpoint.com').networkError();

await expect(jobService.sendJobs(jobs)).rejects.toThrow('Network Error');
});
});

0 comments on commit b0620a4

Please sign in to comment.