diff --git a/.github/workflows/link_checker.yml b/.github/workflows/link_checker.yml index 51cb9b3d..fe2bcdab 100644 --- a/.github/workflows/link_checker.yml +++ b/.github/workflows/link_checker.yml @@ -1,6 +1,7 @@ name: Link checker on: + pull_request: workflow_dispatch: schedule: - cron: '0 12 * * *' @@ -27,8 +28,20 @@ jobs: - name: Run crawler id: crawler - run: yarn tsx scripts/checkLinks.ts continue-on-error: true + run: | + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + echo "Running PR workflow: Building and starting local server..." + yarn build + sleep 5 + yarn start & + sleep 15 + export BASE_URL="http://localhost:3000" + else + export BASE_URL="https://www.docs.sei.io/" + fi + echo "Running link checker for ${BASE_URL}" + yarn tsx scripts/checkLinks.ts - name: Upload broken links artifact if: steps.crawler.outcome == 'failure' diff --git a/scripts/checkLinks.ts b/scripts/checkLinks.ts index 0620ed04..880838e1 100644 --- a/scripts/checkLinks.ts +++ b/scripts/checkLinks.ts @@ -7,9 +7,8 @@ const visitedLinks = new Set(); async function main() { const browserInstance = await chromium.launch(); - const baseUrl = 'https://www.docs.sei.io/'; + const baseUrl = process.env.BASE_URL || 'https://www.docs.sei.io/'; await crawlPages(baseUrl, browserInstance, 'main'); - fs.writeFileSync('brokenLinks.json', JSON.stringify([...brokenLinks], null, 2)); if (brokenLinks.size > 0) { @@ -31,7 +30,7 @@ async function crawlPages(url: string, browser: Browser, path: string) { } function isInternal(url: string) { - return url.includes('docs.sei'); + return url.includes('docs.sei') || url.includes('localhost:3000'); } async function checkInternalLinks(url: string, page: Page, path: string, browser: Browser) { @@ -58,7 +57,7 @@ async function checkExternalLinks(url: string, page: Page, path: string, browser async function isLinkBroken(page: Page, url: string, path: string) { if (visitedLinks.has(url)) return false; - if (url.includes('localhost') || url.includes('.tar.gz')) return false; + if ((url.includes('localhost') && !url.includes(':3000')) || url.includes('.tar.gz')) return false; let pageResponse: Response; try {