From 98299cb04171527ac744cba81338186c31437473 Mon Sep 17 00:00:00 2001 From: Nathan Sarrazin Date: Wed, 26 Feb 2025 08:34:33 +0000 Subject: [PATCH] feat: use webworker for markdown parsing --- .../components/chat/MarkdownRenderer.svelte | 52 +++++++++++---- src/lib/utils/{getMarked.ts => marked.ts} | 65 ++++++++++++++----- src/lib/workers/markdownWorker.ts | 29 ++------- 3 files changed, 96 insertions(+), 50 deletions(-) rename src/lib/utils/{getMarked.ts => marked.ts} (76%) diff --git a/src/lib/components/chat/MarkdownRenderer.svelte b/src/lib/components/chat/MarkdownRenderer.svelte index 1792ee7f7a4..976828809dd 100644 --- a/src/lib/components/chat/MarkdownRenderer.svelte +++ b/src/lib/components/chat/MarkdownRenderer.svelte @@ -1,33 +1,61 @@ -{#each processedContent as token} +{#each tokens as token} {#if token.type === "text"} - {@html token.html} + {#await token.html then html} + + {@html DOMPurify.sanitize(html)} + {/await} {:else if token.type === "code"} {/if} diff --git a/src/lib/utils/getMarked.ts b/src/lib/utils/marked.ts similarity index 76% rename from src/lib/utils/getMarked.ts rename to src/lib/utils/marked.ts index 7414a43d759..23cccdfc841 100644 --- a/src/lib/utils/getMarked.ts +++ b/src/lib/utils/marked.ts @@ -1,6 +1,5 @@ import katex from "katex"; import "katex/dist/contrib/mhchem.mjs"; -import DOMPurify from "isomorphic-dompurify"; import { Marked } from "marked"; import type { Tokens, TokenizerExtension, RendererExtension } from "marked"; import type { WebSearchSource } from "$lib/types/WebSearch"; @@ -19,13 +18,6 @@ interface katexInlineToken extends Tokens.Generic { displayMode: false; } -DOMPurify.addHook("afterSanitizeAttributes", (node) => { - if (node.tagName === "A") { - node.setAttribute("rel", "noreferrer"); - node.setAttribute("target", "_blank"); - } -}); - export const katexBlockExtension: TokenizerExtension & RendererExtension = { name: "katexBlock", level: "block", @@ -152,12 +144,10 @@ function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = [] }); } -let markedInstance: Marked | null = null; - function createMarkedInstance(sources: WebSearchSource[]): Marked { return new Marked({ hooks: { - postprocess: (html) => DOMPurify.sanitize(addInlineCitations(html, sources)), + postprocess: (html) => addInlineCitations(html, sources), }, extensions: [katexBlockExtension, katexInlineExtension], renderer: { @@ -170,8 +160,53 @@ function createMarkedInstance(sources: WebSearchSource[]): Marked { } export function getMarked(sources: WebSearchSource[]): Marked { - if (!markedInstance) { - markedInstance = createMarkedInstance(sources); - } - return markedInstance; + return createMarkedInstance(sources); } + +type CodeToken = { + type: "code"; + lang: string; + code: string; +}; + +type TextToken = { + type: "text"; + html: string | Promise; +}; + +export async function processTokens(content: string, sources: WebSearchSource[]): Promise { + const marked = getMarked(sources); + const tokens = marked.lexer(content); + + const processedTokens = await Promise.all( + tokens.map(async (token) => { + if (token.type === "code") { + return { + type: "code" as const, + lang: token.lang, + code: token.text, + }; + } else { + return { + type: "text" as const, + html: marked.parse(token.raw), + }; + } + }) + ); + + return processedTokens; +} + +export function processTokensSync(content: string, sources: WebSearchSource[]): Token[] { + const marked = getMarked(sources); + const tokens = marked.lexer(content); + return tokens.map((token) => { + if (token.type === "code") { + return { type: "code" as const, lang: token.lang, code: token.text }; + } + return { type: "text" as const, html: marked.parse(token.raw) }; + }); +} + +export type Token = CodeToken | TextToken; diff --git a/src/lib/workers/markdownWorker.ts b/src/lib/workers/markdownWorker.ts index 341a0755eb1..944e4364919 100644 --- a/src/lib/workers/markdownWorker.ts +++ b/src/lib/workers/markdownWorker.ts @@ -1,15 +1,15 @@ import type { WebSearchSource } from "$lib/types/WebSearch"; -import { getMarked } from "$lib/utils/getMarked"; +import { processTokens, type Token } from "$lib/utils/marked"; -type IncomingMessage = { +export type IncomingMessage = { type: "process"; content: string; sources: WebSearchSource[]; }; -type OutgoingMessage = { +export type OutgoingMessage = { type: "processed"; - content: string; + tokens: Token[]; }; onmessage = async (event) => { @@ -18,27 +18,10 @@ onmessage = async (event) => { } const message = event.data as IncomingMessage; - const { content, sources } = message; - const marked = getMarked(sources); - const tokens = marked.lexer(content); + const { content, sources } = message; - const processedTokens = await Promise.all( - tokens.map(async (token) => { - if (token.type === "code") { - return { - type: "code", - lang: token.lang, - code: token.text, - }; - } else { - return { - type: "text", - html: await marked.parse(token.raw), - }; - } - }) - ); + const processedTokens = await processTokens(content, sources); postMessage({ type: "processed", tokens: processedTokens } satisfies OutgoingMessage); };