From 0408c924332f6fb8a8244ca3b0aeaaa72d541bdd Mon Sep 17 00:00:00 2001 From: Divjot Singh Date: Thu, 20 Oct 2016 00:08:35 +0900 Subject: [PATCH] Initial Release --- README.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++-- index.js | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 14 +++++++++++ test.js | 10 ++++++++ 4 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 index.js create mode 100644 package.json create mode 100644 test.js diff --git a/README.md b/README.md index 76cca42..53048fc 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,61 @@ -# hukamnama.json -Fetches Hukamnama from Harimandir Sahib, Amritsar via SGPC's PDF +# 🙏🏻 🙏🏼 🙏🏽 🙏🏾 🙏🏿 hukamnama-json + +NodeJS module that fetches Hukamnama from Harimandir Sahib, Amritsar via [SGPC's PDF](http://old.sgpc.net/hukumnama/jpeg%20hukamnama/hukamnama.pdf) + +# Installation + +Requires Node v5+ + +```bash +npm i hukamnama-json +``` + +# Usage + +```javascript +var hukamnama = require('hukamnama'); // ES5 +import hukamnama from 'hukamnama'; // ES2015 + +hukamnama() // Returns a promise +.then(hukam => { + let { ang, content, gurakhr, punjabi, english } = hukam; + console.log(ang, content, gurakhr, punjabi, english); +}) +.catch(error => console.log(errror)); +``` +# Changelog + +## 1.0.0 +* [ ] I don't know `¯\_(ツ)_/¯`, why don't you [suggest](https://github.com/bogas04/hukamnama-json/issues/new)? + +## 0.5.0 +* [ ] Extract date of hukamnama. +* [ ] Ensure it's `battle-tested`. +* [ ] Ensure it works with multiple-page pdf. + +## 0.0.1 (Initial Release) +* [x] Use rudimentary text parsing to extract. + * [x] entire content. + * [x] hukamnama in gurakhr. + * [x] punjabi translation. + * [x] english translation. + * [x] ang. +* [x] Use [PDFJS](https://mozilla.github.io/pdf.js/) to extract contents out of the pdf. + +# Contributing + +```bash +# Clone +git clone https://github.com/bogas04/hukamnama-json +cd hukamnama.json + +# Build +npm i + +# Test +npm test +``` + +# License + +MIT diff --git a/index.js b/index.js new file mode 100644 index 0000000..069aa5c --- /dev/null +++ b/index.js @@ -0,0 +1,65 @@ +'use strict'; + +const http = require('http'); +const PDFJS = require('pdfjs-dist'); +const SGPC_URL = `http://old.sgpc.net/hukumnama/jpeg%20hukamnama/hukamnama.pdf`; + +function hukamJS ({ } = { }) { + + const injectNewLine = str => [ + e => e.endsWith("TODAY'S HUKAMNAMA FROM SRI DARBAR SAHIB, Sri Amritsar. "), + e => e.endsWith(". IST] "), + e => /, \d{4}$/.test(e) && (str = `\n${str}`), + e => e.startsWith('(AMg:') && (str = `\n${str}`), + e => e.startsWith('English Translation') && (str = `\n${str}`), + e => e.indexOf('Nanakshahi') > 0 && (str = `\n${str}`), + ].some(condition => condition(str)) ? `${str}\n` : str; + + return new Promise((resolve, reject) => ( + http.get(SGPC_URL, res => { + let buff = []; + + res.on('data', chunk => buff.push(chunk)); + + res.on('end', () => PDFJS.getDocument(new Buffer.concat(buff)) + .then(pdf => pdf.getPage(1)) + .then(page => page.getTextContent()) + .then(({ items = [] }) => { + let content = '', gurakhr = '', punjabi = '', english = '', ang = 0; + + let gurakhrBegan = false, gurakhrEnded = false; + let punjabiBegan = false, punjabiEnded = false; + let englishBegan = false, englishEnded = false; + + items.forEach(({ str }) => { + content += injectNewLine(str); + + if (str.startsWith('(AMg:')) { + gurakhrEnded = true; + punjabiBegan = true; + } + + if (gurakhrBegan && !gurakhrEnded) gurakhr += str; + if (punjabiBegan && !punjabiEnded) punjabi += str; + if (englishBegan && !englishEnded) english += str; + + if (str.startsWith('English Translation')) { + punjabiEnded = true; + englishBegan = true; + } + + if (str.endsWith(' IST] ')) { gurakhrBegan = true; } + + if (str.indexOf('(Page:') > 0) { ang = parseInt(/\(Page: ([\d ]+)/.exec(str)[1]); } + }); + + resolve({ content, ang, gurakhr, punjabi, english }); + }) + .catch(err => reject(err)) + ); + + }) + )); +} + +module.exports = hukamJS; diff --git a/package.json b/package.json new file mode 100644 index 0000000..396addb --- /dev/null +++ b/package.json @@ -0,0 +1,14 @@ +{ + "name": "hukamnama-json", + "version": "0.0.1", + "description": "NodeJS module that fetches Hukamnama from Harimandir Sahib, Amritsar via SGPC's PDF", + "main": "index.js", + "scripts": { + "test": "node test" + }, + "author": "bogas04", + "license": "MIT", + "dependencies": { + "pdfjs-dist": "^1.6.258" + } +} diff --git a/test.js b/test.js new file mode 100644 index 0000000..7f6eb21 --- /dev/null +++ b/test.js @@ -0,0 +1,10 @@ +'use strict'; + +const hukam = require('./index.js'); + +hukam() + .then(({ ang, content, english, gurakhr, punjabi }) => { + if (ang < 1 || ang > 1430) { console.log("Invalid Ang ", ang); } + else { console.log("All tests passed!"); } + }) + .catch(err => console.log(err));