Skip to content

Commit 9893318

Browse files
authored
feat: add router for CPTA(中国人事考试网) (#18097)
* feat: add router for CPTA * fix: prolong the caching time. * fix: fix anti-crawler restriction. * fix: deduce the account of request * fix: remove expiration time. * fix: fix param description and request actions
1 parent f0cf4da commit 9893318

File tree

2 files changed

+142
-0
lines changed

2 files changed

+142
-0
lines changed

lib/routes/cpta/handler.ts

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import { DataItem, Route } from '@/types';
2+
import cache from '@/utils/cache';
3+
import got from '@/utils/got';
4+
import { load } from 'cheerio';
5+
import asyncPool from 'tiny-async-pool';
6+
7+
type NewsCategory = {
8+
title: string;
9+
baseUrl: string;
10+
description: string;
11+
};
12+
13+
const WEBSITE_URL = 'http://www.cpta.com.cn';
14+
15+
const NEWS_TYPES: Record<string, NewsCategory> = {
16+
notice: {
17+
title: '通知公告',
18+
baseUrl: 'http://www.cpta.com.cn/notice.html',
19+
description: '中国人事考试网 考试通知公告汇总',
20+
},
21+
performance: {
22+
title: '成绩公布',
23+
baseUrl: 'http://www.cpta.com.cn/performance.html',
24+
description: '中国人事考试网 考试成绩公布汇总',
25+
},
26+
};
27+
28+
const handler: Route['handler'] = async (context) => {
29+
const category = context.req.param('category');
30+
const BASE_URL = NEWS_TYPES[category].baseUrl;
31+
// Fetch the index page
32+
const { data: listResponse } = await got(BASE_URL);
33+
const $ = load(listResponse);
34+
35+
// Select all list items containing news information
36+
const ITEM_SELECTOR = 'ul[class*="list_14"] > li:has(*)';
37+
const listItems = $(ITEM_SELECTOR);
38+
39+
// Map through each list item to extract details
40+
const contentLinkList = listItems
41+
.toArray()
42+
.map((element) => {
43+
const title = $(element).find('a').attr('title')!;
44+
const date = $(element).find('i').text()!.replaceAll(/[[\]]/g, '');
45+
const relativeLink = $(element).find('a').attr('href')!;
46+
const absoluteLink = new URL(relativeLink, WEBSITE_URL).href;
47+
return {
48+
title,
49+
date,
50+
link: absoluteLink,
51+
};
52+
})
53+
.sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime())
54+
.slice(0, 10);
55+
56+
const fetchDataItem = (item: { title: string; date: string; link: string }) =>
57+
cache.tryGet(item.link, async () => {
58+
const CONTENT_SELECTOR = '#p_content';
59+
const { data: contentResponse } = await got(item.link);
60+
const contentPage = load(contentResponse);
61+
const content = contentPage(CONTENT_SELECTOR).html() || '';
62+
return {
63+
title: item.title,
64+
pubDate: item.date,
65+
link: item.link,
66+
description: content,
67+
category: ['study'],
68+
guid: item.link,
69+
id: item.link,
70+
image: 'https://www.gov.cn/images/gtrs_logo_lt.png',
71+
content: {
72+
html: content,
73+
text: content,
74+
},
75+
updated: item.date,
76+
language: 'zh-CN',
77+
} as DataItem;
78+
});
79+
80+
const dataItems: DataItem[] = [];
81+
82+
for await (const item of await asyncPool(1, contentLinkList, fetchDataItem)) {
83+
dataItems.push(item as DataItem);
84+
}
85+
86+
return {
87+
title: `中国人事考试网-${NEWS_TYPES[category].title}`,
88+
description: NEWS_TYPES[category].description,
89+
link: BASE_URL,
90+
image: 'https://www.gov.cn/images/gtrs_logo_lt.png',
91+
item: dataItems,
92+
allowEmpty: true,
93+
language: 'zh-CN',
94+
feedLink: `https://rsshub.app/cpta/${category}`,
95+
id: `https://rsshub.app/cpta/${category}`,
96+
};
97+
};
98+
99+
export const route: Route = {
100+
path: '/:category',
101+
name: '中国人事考试网发布',
102+
maintainers: ['PrinOrange'],
103+
parameters: {
104+
category: '栏目参数,可见下表描述。',
105+
},
106+
description: `
107+
| Category | Title | Description |
108+
|-------------|-----------|-------------------------------------|
109+
| notice | 通知公告 | 中国人事考试网 考试通知公告汇总 |
110+
| performance | 成绩公布 | 中国人事考试网 考试成绩公布汇总 |
111+
`,
112+
handler,
113+
categories: ['study'],
114+
features: {
115+
requireConfig: false,
116+
requirePuppeteer: false,
117+
supportBT: false,
118+
supportPodcast: false,
119+
supportScihub: false,
120+
supportRadar: true,
121+
antiCrawler: true,
122+
},
123+
radar: [
124+
{
125+
title: '中国人事考试网通知公告',
126+
source: ['www.cpta.com.cn/notice.html', 'www.cpta.com.cn'],
127+
target: `/notice`,
128+
},
129+
{
130+
title: '中国人事考试网成绩发布',
131+
source: ['www.cpta.com.cn/performance.html', 'www.cpta.com.cn'],
132+
target: `/performance`,
133+
},
134+
],
135+
example: '/cpta/notice',
136+
};

lib/routes/cpta/namespace.ts

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: '中国人事考试网',
5+
url: 'www.cpta.com.cn',
6+
};

0 commit comments

Comments
 (0)