Skip to content

Commit

Permalink
create endpoint to get articles by url
Browse files Browse the repository at this point in the history
  • Loading branch information
this-pama committed Nov 20, 2024
1 parent 0e9d7b1 commit d14adfd
Show file tree
Hide file tree
Showing 8 changed files with 542 additions and 11 deletions.
1 change: 1 addition & 0 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ app.get("/version", (req, res) => {

//BLOG DATA APIs
app.get("/blogs", verifyToken, routes.api.browse_data);
app.get("/articles", verifyToken, routes.api.get_articles);
app.get("/blogs/stats", verifyToken, routes.api.get_blog_stats);

app.post("/get-webpage-content", verifyToken, routes.api.getWebContent);
Expand Down
44 changes: 44 additions & 0 deletions controllers/blog/api/get_articles.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
const { DB } = include("db/");

exports.get_articles = async (req, res) => {
const { id, url } = req.query;

// Ensure at least one of `id` or `url` is provided
if (
(!id || (Array.isArray(id) && id.length === 0)) &&
(!url || (Array.isArray(url) && url.length === 0))
) {
return res
.status(400)
.json({ error: "At least one 'id' or 'url' must be provided." });
}

// Normalize `id` and `url` into arrays
const idList = id ? (Array.isArray(id) ? id : [id]).map(Number) : [];
const urlList = url ? (Array.isArray(url) ? url : [url]) : [];

try {
const results = await DB.blog.any(
`
SELECT a.id, a.url, a.article_type, a.title, a.iso3, a.posted_date, a.posted_date_str, a.parsed_date, a.language, a.created_at, c.html_content,
regexp_replace(
regexp_replace(COALESCE(b.content, c.html_content), E'\\n', ' ', 'g'),
E'<iframe[^>]*>.*?</iframe>',
'',
'gi'
) AS content
FROM articles a
JOIN article_content b ON b.article_id = a.id
JOIN article_html_content c ON c.article_id = a.id
WHERE (array_length($1::int[], 1) > 0 AND a.id = ANY ($1::int[]))
OR (array_length($2::text[], 1) > 0 AND a.url = ANY ($2::text[]))
`,
[idList, urlList]
);

return res.status(200).json(results);
} catch (err) {
console.error("Database query failed:", err);
return res.status(500).json({ error: "An unexpected error occurred." });
}
};
2 changes: 1 addition & 1 deletion controllers/blog/api/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ exports.searchBlogQuery = (
return {
text: `
WITH search_results AS (
SELECT a.url, a.article_type, a.title, a.iso3, a.posted_date, a.posted_date_str, a.parsed_date, a.language, a.created_at, c.html_content,
SELECT a.id, a.url, a.article_type, a.title, a.iso3, a.posted_date, a.posted_date_str, a.parsed_date, a.language, a.created_at, c.html_content,
regexp_replace(
regexp_replace(${textColumn}, E'\\n', ' ', 'g'),
E'<iframe[^>]*>.*?</iframe>',
Expand Down
8 changes: 5 additions & 3 deletions controllers/blog/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const { browse_data } = require("./api/data");
const { main } = require("./api/stats");
const { DB } = include("db");
const cleanup = require('./scrapper/clean-up')
const cleanup = require("./scrapper/clean-up");

exports.browse_data = async (req, res) => {
const data = await browse_data(DB.blog, req, res);
Expand All @@ -19,5 +19,7 @@ exports.cleanup = async (req, res) => {
cleanup(DB.blog, req, res);
};

exports.getWebContent = require('../blog/api/scap')
exports.scrap_medium_posts = require('./blog_type/medium').scrap_medium_posts
exports.getWebContent = require("../blog/api/scap");
exports.scrap_medium_posts = require("./blog_type/medium").scrap_medium_posts;

exports.get_articles = require("./api/get_articles").get_articles;
5 changes: 3 additions & 2 deletions controllers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ const {
get_stats,
cleanup,
getWebContent,
scrap_medium_posts
scrap_medium_posts,
get_articles,
} = require("./blog");
const { get_toolkit_data, toolkit_scrapper } = require("./toolkits");

Expand All @@ -18,4 +19,4 @@ exports.cleanup = cleanup;
exports.scrap_medium_posts = scrap_medium_posts;

exports.getWebContent = getWebContent;

exports.get_articles = get_articles;
3 changes: 2 additions & 1 deletion routes/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const {
scrap_medium_posts,
getWebContent,
get_ce_rave,
get_articles,
} = include("/controllers");
if (!exports.api) {
exports.api = {};
Expand All @@ -25,7 +26,7 @@ exports.api.get_toolkit_data = get_toolkit_data;
exports.api.cleanup = cleanup;

exports.api.getWebContent = getWebContent;

exports.api.get_articles = get_articles;

exports.cron.scrap_medium_posts = scrap_medium_posts;
exports.cron.toolkit_scrapper = toolkit_scrapper;
Loading

0 comments on commit d14adfd

Please sign in to comment.