Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

local script now uses job api instead of writing to a file. #22

Merged
merged 1 commit into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 129 additions & 1 deletion local/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions local/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
},
"devDependencies": {
"@types/jest": "^29.5.12",
"axios-mock-adapter": "^1.22.0",
"jest": "^29.7.0",
"ts-jest": "^29.1.2",
"typescript": "^5.3.3"
},
"dependencies": {
"axios": "^1.6.8",
"puppeteer": "^22.4.0"
}
}
11 changes: 0 additions & 11 deletions local/src/fileHandler.ts

This file was deleted.

9 changes: 8 additions & 1 deletion local/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import puppeteer from 'puppeteer';
import { scrap } from './scraper/sitea';
import { JobService } from './jobService';

const main = async () =>{
let jobCount = 0;
const link = process.env.LOCAL_SCRAPER_SITEA;
const jobApiEndpoint = process.env.JOB_API_ENDPOINT;
if (!link) {
console.error('LOCAL_SCRAPER_SITEA environment variable isnt set');
return;
}
if (!jobApiEndpoint) {
console.error('JOB_API_ENDPOINT environment variable isnt set');
return;
}
const browser = await puppeteer.launch({ headless: false });
await scrap(browser, link, jobCount);
const jobService = new JobService(jobApiEndpoint);
await scrap(browser, link, jobCount, jobService);
console.log('Scraping complete');
await browser.close();
}
Expand Down
27 changes: 27 additions & 0 deletions local/src/jobService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import axios from 'axios';

export interface Job {
title: string;
company: string;
keyword: string;
link: string;
}

export interface Response {
total: number;
uncached: number;
duplicates: number;
}

export class JobService {
private endpoint: string;

constructor(endpoint: string) {
this.endpoint = endpoint;
}

async sendJobs(jobs: Job[]): Promise<Response> {
const response = await axios.post(this.endpoint, { jobs });
return response.data as Response;
}
}
19 changes: 13 additions & 6 deletions local/src/scraper/sitea.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Browser, Page } from 'puppeteer';
import { evaluateJobInterest } from '../interest';
import { writeToFile } from '../fileHandler';
import { JobService, Job } from '../jobService';
const urls: string[] = [];

const didUrlChange = async (page: Page): Promise<boolean> => {
Expand All @@ -18,7 +18,8 @@ const didUrlChange = async (page: Page): Promise<boolean> => {
};


export const scrap = async(browser : Browser, link : string, jobCount : number) => {
export const scrap = async(browser : Browser, link : string, jobCount : number, jobService: JobService) => {
let interestingJobs: Job[] = [];
try{
const page = await browser.newPage();
await page.goto(link);
Expand Down Expand Up @@ -54,16 +55,22 @@ export const scrap = async(browser : Browser, link : string, jobCount : number)
const pageUrl = urls[urls.length - 1]; // Get the current page URL
const jobCategory = evaluateJobInterest(jobTitle, companyName, jobDescriptionText);

if(jobCategory){
writeToFile(pageUrl, jobTitle, companyName, jobCategory);
jobCount++;
console.log(`Job found ${jobCount}`);
if(jobCategory && jobTitle && companyName && pageUrl){
interestingJobs.push({ title: jobTitle, company: companyName, keyword: jobCategory, link: pageUrl });
}

await new Promise(resolve => setTimeout(resolve, 5000)); // Sleep for 5 seconds to avoid bot detection
}
}
}

// Send the jobs to the API
if (interestingJobs.length) {
const response = await jobService.sendJobs(interestingJobs);
jobCount += response.uncached;
console.log(`Total jobs: ${jobCount}`);
interestingJobs = []
}

// Click on the "Next" button and wait for the next page to load
const nextButton = await page.$('[data-testid="pagination-page-next"]');
Expand Down
43 changes: 43 additions & 0 deletions local/tests/jobService.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import axios from 'axios';
import MockAdapter from 'axios-mock-adapter';
import { JobService, Job, Response } from '../src/jobService';

describe('JobService', () => {
let mockAxios: MockAdapter;
let jobService: JobService;

beforeEach(() => {
mockAxios = new MockAdapter(axios);
jobService = new JobService('http://mock-endpoint.com');
});

afterEach(() => {
mockAxios.reset();
});

it('should send jobs and return the response', async () => {
const jobs: Job[] = [
{ title: 'Software Engineer', company: 'Company1', keyword: 'Go', link: 'http://example.com/job1' },
{ title: 'Data Analyst', company: 'Company2', keyword: 'Python', link: 'http://example.com/job2' },
{ title: 'Financial Advisor', company: 'Company3', keyword: 'Finance', link: 'http://example.com/job3' },
];

const expectedResponse: Response = { total: 3, uncached: 1, duplicates: 2 };

mockAxios.onPost('http://mock-endpoint.com').reply(200, expectedResponse);

const response = await jobService.sendJobs(jobs);

expect(response).toEqual(expectedResponse);
});

it('should throw an error if the request fails', async () => {
const jobs: Job[] = [
{ title: 'Software Engineer', company: 'Company1', keyword: 'Go', link: 'http://example.com/job1' },
];

mockAxios.onPost('http://mock-endpoint.com').networkError();

await expect(jobService.sendJobs(jobs)).rejects.toThrow('Network Error');
});
});
Loading