1
1
import * as cheerio from 'cheerio'
2
+ import { unstable_cache } from 'next/cache'
2
3
3
4
enum PackageId {
4
5
nuqs = 'UGFja2FnZS00MjczNzAxNTA5' ,
@@ -12,45 +13,57 @@ export type Result = {
12
13
avatarID : string
13
14
}
14
15
15
- export async function crawlDependents ( ) {
16
- const allResults : Result [ ] = [ ]
17
- let url = `https://github.com/47ng/nuqs/network/dependents?package_id=${ PackageId . nuqs } `
18
- while ( true ) {
19
- const { results, nextPage } = await crawlDependentsPage ( url )
20
- allResults . push ( ...results )
21
- if ( nextPage === null ) {
22
- break
23
- }
24
- url = nextPage
16
+ export const crawlDependents = unstable_cache (
17
+ _crawlDependents ,
18
+ [ 'crawlDependents' ] ,
19
+ {
20
+ revalidate : 86_400
25
21
}
26
- url = `https://github.com/47ng/nuqs/network/dependents?package_id= ${ PackageId . nextUseQueryState } `
27
- while ( true ) {
28
- const { results , nextPage } = await crawlDependentsPage ( url )
29
- allResults . push ( ... results )
30
- if ( nextPage === null ) {
31
- break
32
- }
33
- url = nextPage
34
- }
35
- return allResults
22
+ )
23
+
24
+ async function _crawlDependents ( ) {
25
+ const tick = performance . now ( )
26
+ const allResults : Result [ ] = [ ]
27
+ await Promise . allSettled ( [
28
+ crawlPackageDependents ( PackageId . nuqs , allResults ) ,
29
+ crawlPackageDependents ( PackageId . nextUseQueryState , allResults )
30
+ ] )
31
+ const out = allResults
36
32
. sort ( ( a , b ) => b . stars - a . stars )
37
33
. filter (
38
34
// remove duplicates by repo
39
35
( result , index , self ) =>
40
36
index === self . findIndex ( r => r . repo === result . repo )
41
37
)
42
38
. slice ( 0 , 100 )
39
+ console . log ( `Dependents crawled in ${ performance . now ( ) - tick } ms` )
40
+ return out
41
+ }
42
+
43
+ async function crawlPackageDependents ( pkgId : string , allResults : Result [ ] ) {
44
+ let url = `https://github.com/47ng/nuqs/network/dependents?package_id=${ pkgId } `
45
+ while ( true ) {
46
+ const { results, nextPage } = await crawlDependentsPage ( url )
47
+ allResults . push ( ...results )
48
+ if ( nextPage === null ) {
49
+ return
50
+ }
51
+ url = nextPage
52
+ }
43
53
}
44
54
45
55
async function crawlDependentsPage ( url : string ) {
56
+ const tick = performance . now ( )
46
57
const pkg =
47
58
new URLSearchParams ( url . split ( '?' ) [ 1 ] ) . get ( 'package_id' ) === PackageId . nuqs
48
59
? 'nuqs'
49
60
: 'next-usequerystate'
50
61
const html = await fetch ( url , {
51
62
cache : 'no-store'
52
63
} ) . then ( res => res . text ( ) )
64
+ const endOfFetch = performance . now ( )
53
65
const $ = cheerio . load ( html )
66
+ const endOfParse = performance . now ( )
54
67
const results : Result [ ] = [ ]
55
68
$ ( '[data-test-id="dg-repo-pkg-dependent"]' ) . each ( ( index , element ) => {
56
69
const img = $ ( element ) . find ( 'img' ) . attr ( 'src' ) // ?.replace('s=40', 's=64')
@@ -83,6 +96,13 @@ async function crawlDependentsPage(url: string) {
83
96
} )
84
97
const nextButton = $ ( 'div.paginate-container a:contains(Next)' )
85
98
const nextPage = nextButton ?. attr ( 'href' ) ?? null
99
+ console . log (
100
+ 'Crawled page %s (fetch: %s, parse: %s, extract: %s)' ,
101
+ url ,
102
+ ( endOfFetch - tick ) . toFixed ( 2 ) ,
103
+ ( endOfParse - endOfFetch ) . toFixed ( 2 ) ,
104
+ ( performance . now ( ) - endOfParse ) . toFixed ( 2 )
105
+ )
86
106
return { results, nextPage }
87
107
}
88
108
0 commit comments