From 249a9cd0acea9e6aa49aa58d3516bd260c42faab Mon Sep 17 00:00:00 2001 From: Austin Davis Date: Thu, 25 Apr 2024 01:09:25 -0600 Subject: [PATCH] local script now uses job api instead of writing to a file. --- local/package-lock.json | 130 ++++++++++++++++++++++++++++++++- local/package.json | 2 + local/src/fileHandler.ts | 11 --- local/src/index.ts | 9 ++- local/src/jobService.ts | 27 +++++++ local/src/scraper/sitea.ts | 19 +++-- local/tests/jobService.test.ts | 43 +++++++++++ 7 files changed, 222 insertions(+), 19 deletions(-) delete mode 100644 local/src/fileHandler.ts create mode 100644 local/src/jobService.ts create mode 100644 local/tests/jobService.test.ts diff --git a/local/package-lock.json b/local/package-lock.json index 93db025..c4bbea7 100644 --- a/local/package-lock.json +++ b/local/package-lock.json @@ -5,11 +5,12 @@ "packages": { "": { "dependencies": { + "axios": "^1.6.8", "puppeteer": "^22.4.0" }, "devDependencies": { - "@jest/globals": "^29.7.0", "@types/jest": "^29.5.12", + "axios-mock-adapter": "^1.22.0", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.3.3" @@ -1524,6 +1525,34 @@ "node": ">=4" } }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.6.8", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz", + "integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/axios-mock-adapter": { + "version": "1.22.0", + "resolved": "https://registry.npmjs.org/axios-mock-adapter/-/axios-mock-adapter-1.22.0.tgz", + "integrity": "sha512-dmI0KbkyAhntUR05YY96qg2H6gg0XMl2+qTW0xmYg6Up+BFBAJYRLROMXRdDEL06/Wqwa0TJThAYvFtSFdRCZw==", + "dev": true, + "dependencies": { + "fast-deep-equal": "^3.1.3", + "is-buffer": "^2.0.5" + }, + "peerDependencies": { + "axios": ">= 0.17.0" + } + }, "node_modules/b4a": { "version": "1.6.6", "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", @@ -2027,6 +2056,17 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -2237,6 +2277,14 @@ "node": ">= 14" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/detect-newline": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", @@ -2450,6 +2498,12 @@ "@types/yauzl": "^2.9.1" } }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, "node_modules/fast-fifo": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", @@ -2503,6 +2557,38 @@ "node": ">=8" } }, + "node_modules/follow-redirects": { + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/fs-extra": { "version": "11.2.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", @@ -2787,6 +2873,29 @@ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==" }, + "node_modules/is-buffer": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", + "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "engines": { + "node": ">=4" + } + }, "node_modules/is-core-module": { "version": "2.13.1", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.1.tgz", @@ -4633,6 +4742,25 @@ "node": ">=8.6" } }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/mimic-fn": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", diff --git a/local/package.json b/local/package.json index 7c815e8..b88a191 100644 --- a/local/package.json +++ b/local/package.json @@ -5,11 +5,13 @@ }, "devDependencies": { "@types/jest": "^29.5.12", + "axios-mock-adapter": "^1.22.0", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.3.3" }, "dependencies": { + "axios": "^1.6.8", "puppeteer": "^22.4.0" } } diff --git a/local/src/fileHandler.ts b/local/src/fileHandler.ts deleted file mode 100644 index 5d0ea15..0000000 --- a/local/src/fileHandler.ts +++ /dev/null @@ -1,11 +0,0 @@ -import fs from 'fs'; - -// Create a text document named with today's date -const date = new Date(); - -const fileName = `${date.getFullYear()}-${date.getMonth() + 1}-${date.getDate()}-jobs.txt`; - -export function writeToFile(pageUrl: string, jobTitle: string | null, companyName: string | null, matchedKeyword: string) { - const fileContent = `${pageUrl}, ${jobTitle}, ${companyName}, ${matchedKeyword}\n`; - fs.appendFileSync(fileName, fileContent); -} diff --git a/local/src/index.ts b/local/src/index.ts index fc408e7..59977c1 100644 --- a/local/src/index.ts +++ b/local/src/index.ts @@ -1,15 +1,22 @@ import puppeteer from 'puppeteer'; import { scrap } from './scraper/sitea'; +import { JobService } from './jobService'; const main = async () =>{ let jobCount = 0; const link = process.env.LOCAL_SCRAPER_SITEA; + const jobApiEndpoint = process.env.JOB_API_ENDPOINT; if (!link) { console.error('LOCAL_SCRAPER_SITEA environment variable isnt set'); return; } + if (!jobApiEndpoint) { + console.error('JOB_API_ENDPOINT environment variable isnt set'); + return; + } const browser = await puppeteer.launch({ headless: false }); - await scrap(browser, link, jobCount); + const jobService = new JobService(jobApiEndpoint); + await scrap(browser, link, jobCount, jobService); console.log('Scraping complete'); await browser.close(); } diff --git a/local/src/jobService.ts b/local/src/jobService.ts new file mode 100644 index 0000000..1018514 --- /dev/null +++ b/local/src/jobService.ts @@ -0,0 +1,27 @@ +import axios from 'axios'; + +export interface Job { + title: string; + company: string; + keyword: string; + link: string; +} + +export interface Response { + total: number; + uncached: number; + duplicates: number; +} + +export class JobService { + private endpoint: string; + + constructor(endpoint: string) { + this.endpoint = endpoint; + } + + async sendJobs(jobs: Job[]): Promise { + const response = await axios.post(this.endpoint, { jobs }); + return response.data as Response; + } +} \ No newline at end of file diff --git a/local/src/scraper/sitea.ts b/local/src/scraper/sitea.ts index 2ad3170..a807005 100644 --- a/local/src/scraper/sitea.ts +++ b/local/src/scraper/sitea.ts @@ -1,6 +1,6 @@ import { Browser, Page } from 'puppeteer'; import { evaluateJobInterest } from '../interest'; -import { writeToFile } from '../fileHandler'; +import { JobService, Job } from '../jobService'; const urls: string[] = []; const didUrlChange = async (page: Page): Promise => { @@ -18,7 +18,8 @@ const didUrlChange = async (page: Page): Promise => { }; -export const scrap = async(browser : Browser, link : string, jobCount : number) => { +export const scrap = async(browser : Browser, link : string, jobCount : number, jobService: JobService) => { + let interestingJobs: Job[] = []; try{ const page = await browser.newPage(); await page.goto(link); @@ -54,16 +55,22 @@ export const scrap = async(browser : Browser, link : string, jobCount : number) const pageUrl = urls[urls.length - 1]; // Get the current page URL const jobCategory = evaluateJobInterest(jobTitle, companyName, jobDescriptionText); - if(jobCategory){ - writeToFile(pageUrl, jobTitle, companyName, jobCategory); - jobCount++; - console.log(`Job found ${jobCount}`); + if(jobCategory && jobTitle && companyName && pageUrl){ + interestingJobs.push({ title: jobTitle, company: companyName, keyword: jobCategory, link: pageUrl }); } await new Promise(resolve => setTimeout(resolve, 5000)); // Sleep for 5 seconds to avoid bot detection } } } + + // Send the jobs to the API + if (interestingJobs.length) { + const response = await jobService.sendJobs(interestingJobs); + jobCount += response.uncached; + console.log(`Total jobs: ${jobCount}`); + interestingJobs = [] + } // Click on the "Next" button and wait for the next page to load const nextButton = await page.$('[data-testid="pagination-page-next"]'); diff --git a/local/tests/jobService.test.ts b/local/tests/jobService.test.ts new file mode 100644 index 0000000..a83d86f --- /dev/null +++ b/local/tests/jobService.test.ts @@ -0,0 +1,43 @@ +import axios from 'axios'; +import MockAdapter from 'axios-mock-adapter'; +import { JobService, Job, Response } from '../src/jobService'; + +describe('JobService', () => { + let mockAxios: MockAdapter; + let jobService: JobService; + + beforeEach(() => { + mockAxios = new MockAdapter(axios); + jobService = new JobService('http://mock-endpoint.com'); + }); + + afterEach(() => { + mockAxios.reset(); + }); + + it('should send jobs and return the response', async () => { + const jobs: Job[] = [ + { title: 'Software Engineer', company: 'Company1', keyword: 'Go', link: 'http://example.com/job1' }, + { title: 'Data Analyst', company: 'Company2', keyword: 'Python', link: 'http://example.com/job2' }, + { title: 'Financial Advisor', company: 'Company3', keyword: 'Finance', link: 'http://example.com/job3' }, + ]; + + const expectedResponse: Response = { total: 3, uncached: 1, duplicates: 2 }; + + mockAxios.onPost('http://mock-endpoint.com').reply(200, expectedResponse); + + const response = await jobService.sendJobs(jobs); + + expect(response).toEqual(expectedResponse); + }); + + it('should throw an error if the request fails', async () => { + const jobs: Job[] = [ + { title: 'Software Engineer', company: 'Company1', keyword: 'Go', link: 'http://example.com/job1' }, + ]; + + mockAxios.onPost('http://mock-endpoint.com').networkError(); + + await expect(jobService.sendJobs(jobs)).rejects.toThrow('Network Error'); + }); +});