Skip to content

Commit 05cfd4a

Browse files
authored
doc: Use unstable_cache for dependents caching (#535)
* doc: Use unstable_cache for dependents caching * chore: Error logging * chore: Add logging * chore: Parallelise crawling and add perf logs
1 parent 0db86bb commit 05cfd4a

File tree

4 files changed

+49
-439
lines changed

4 files changed

+49
-439
lines changed

packages/docs/src/app/(pages)/_landing/bundle-size.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ export async function BundleSize() {
1010
return prettyBytes(size)
1111
} catch (error) {
1212
console.error(error)
13-
return 'less than 4KB'
13+
return 'less than 5KB'
1414
}
1515
}

packages/docs/src/app/(pages)/_landing/dependents/crawler.ts

+40-20
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import * as cheerio from 'cheerio'
2+
import { unstable_cache } from 'next/cache'
23

34
enum PackageId {
45
nuqs = 'UGFja2FnZS00MjczNzAxNTA5',
@@ -12,45 +13,57 @@ export type Result = {
1213
avatarID: string
1314
}
1415

15-
export async function crawlDependents() {
16-
const allResults: Result[] = []
17-
let url = `https://github.com/47ng/nuqs/network/dependents?package_id=${PackageId.nuqs}`
18-
while (true) {
19-
const { results, nextPage } = await crawlDependentsPage(url)
20-
allResults.push(...results)
21-
if (nextPage === null) {
22-
break
23-
}
24-
url = nextPage
16+
export const crawlDependents = unstable_cache(
17+
_crawlDependents,
18+
['crawlDependents'],
19+
{
20+
revalidate: 86_400
2521
}
26-
url = `https://github.com/47ng/nuqs/network/dependents?package_id=${PackageId.nextUseQueryState}`
27-
while (true) {
28-
const { results, nextPage } = await crawlDependentsPage(url)
29-
allResults.push(...results)
30-
if (nextPage === null) {
31-
break
32-
}
33-
url = nextPage
34-
}
35-
return allResults
22+
)
23+
24+
async function _crawlDependents() {
25+
const tick = performance.now()
26+
const allResults: Result[] = []
27+
await Promise.allSettled([
28+
crawlPackageDependents(PackageId.nuqs, allResults),
29+
crawlPackageDependents(PackageId.nextUseQueryState, allResults)
30+
])
31+
const out = allResults
3632
.sort((a, b) => b.stars - a.stars)
3733
.filter(
3834
// remove duplicates by repo
3935
(result, index, self) =>
4036
index === self.findIndex(r => r.repo === result.repo)
4137
)
4238
.slice(0, 100)
39+
console.log(`Dependents crawled in ${performance.now() - tick}ms`)
40+
return out
41+
}
42+
43+
async function crawlPackageDependents(pkgId: string, allResults: Result[]) {
44+
let url = `https://github.com/47ng/nuqs/network/dependents?package_id=${pkgId}`
45+
while (true) {
46+
const { results, nextPage } = await crawlDependentsPage(url)
47+
allResults.push(...results)
48+
if (nextPage === null) {
49+
return
50+
}
51+
url = nextPage
52+
}
4353
}
4454

4555
async function crawlDependentsPage(url: string) {
56+
const tick = performance.now()
4657
const pkg =
4758
new URLSearchParams(url.split('?')[1]).get('package_id') === PackageId.nuqs
4859
? 'nuqs'
4960
: 'next-usequerystate'
5061
const html = await fetch(url, {
5162
cache: 'no-store'
5263
}).then(res => res.text())
64+
const endOfFetch = performance.now()
5365
const $ = cheerio.load(html)
66+
const endOfParse = performance.now()
5467
const results: Result[] = []
5568
$('[data-test-id="dg-repo-pkg-dependent"]').each((index, element) => {
5669
const img = $(element).find('img').attr('src') // ?.replace('s=40', 's=64')
@@ -83,6 +96,13 @@ async function crawlDependentsPage(url: string) {
8396
})
8497
const nextButton = $('div.paginate-container a:contains(Next)')
8598
const nextPage = nextButton?.attr('href') ?? null
99+
console.log(
100+
'Crawled page %s (fetch: %s, parse: %s, extract: %s)',
101+
url,
102+
(endOfFetch - tick).toFixed(2),
103+
(endOfParse - endOfFetch).toFixed(2),
104+
(performance.now() - endOfParse).toFixed(2)
105+
)
86106
return { results, nextPage }
87107
}
88108

0 commit comments

Comments
 (0)