Skip to content

Commit

Permalink
feat: use webworker for markdown parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
nsarrazin committed Feb 26, 2025
1 parent 521014c commit 98299cb
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 50 deletions.
52 changes: 40 additions & 12 deletions src/lib/components/chat/MarkdownRenderer.svelte
Original file line number Diff line number Diff line change
@@ -1,33 +1,61 @@
<script lang="ts">
import type { WebSearchSource } from "$lib/types/WebSearch";
import { getMarked } from "$lib/utils/getMarked";
import { processTokens, processTokensSync, type Token } from "$lib/utils/marked";
import MarkdownWorker from "$lib/workers/markdownWorker?worker";
import CodeBlock from "../CodeBlock.svelte";
import type { IncomingMessage, OutgoingMessage } from "$lib/workers/markdownWorker";
import { browser } from "$app/environment";
import DOMPurify from "isomorphic-dompurify";
interface Props {
content: string;
sources?: WebSearchSource[];
}
const worker = browser && window.Worker ? new MarkdownWorker() : null;
let { content, sources = [] }: Props = $props();
let processedContent = $state([]);
let tokens: Token[] = $state(processTokensSync(content, sources));
function processContent(content: string, sources: WebSearchSource[]) {
if (typeof Worker !== "undefined") {
const worker = new MarkdownWorker();
worker.postMessage({ content, sources });
worker.onmessage = (event) => {
content = event.data.content;
};
async function processContent(content: string, sources: WebSearchSource[]): Promise<Token[]> {
if (worker) {
return new Promise((resolve) => {
worker.onmessage = (event: MessageEvent<OutgoingMessage>) => {
if (event.data.type !== "processed") {
throw new Error("Invalid message type");
}
resolve(event.data.tokens);
};
worker.postMessage({ content, sources, type: "process" } as IncomingMessage);
});
} else {
processedContent = getMarked(sources).parse(content);
return processTokens(content, sources);
}
}
$effect(() => {
if (!browser) {
tokens = processTokensSync(content, sources);
} else {
(async () => {
if (!browser) {
tokens = processTokensSync(content, sources);
} else {
tokens = await processContent(content, sources);
}
})();
}
});
</script>

{#each processedContent as token}
{#each tokens as token}
{#if token.type === "text"}
{@html token.html}
{#await token.html then html}
<!-- eslint-disable-next-line svelte/no-at-html-tags -->
{@html DOMPurify.sanitize(html)}
{/await}
{:else if token.type === "code"}
<CodeBlock lang={token.lang} code={token.code} />
{/if}
Expand Down
65 changes: 50 additions & 15 deletions src/lib/utils/getMarked.ts → src/lib/utils/marked.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import katex from "katex";
import "katex/dist/contrib/mhchem.mjs";
import DOMPurify from "isomorphic-dompurify";
import { Marked } from "marked";
import type { Tokens, TokenizerExtension, RendererExtension } from "marked";
import type { WebSearchSource } from "$lib/types/WebSearch";
Expand All @@ -19,13 +18,6 @@ interface katexInlineToken extends Tokens.Generic {
displayMode: false;
}

DOMPurify.addHook("afterSanitizeAttributes", (node) => {
if (node.tagName === "A") {
node.setAttribute("rel", "noreferrer");
node.setAttribute("target", "_blank");
}
});

export const katexBlockExtension: TokenizerExtension & RendererExtension = {
name: "katexBlock",
level: "block",
Expand Down Expand Up @@ -152,12 +144,10 @@ function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []
});
}

let markedInstance: Marked | null = null;

function createMarkedInstance(sources: WebSearchSource[]): Marked {
return new Marked({
hooks: {
postprocess: (html) => DOMPurify.sanitize(addInlineCitations(html, sources)),
postprocess: (html) => addInlineCitations(html, sources),
},
extensions: [katexBlockExtension, katexInlineExtension],
renderer: {
Expand All @@ -170,8 +160,53 @@ function createMarkedInstance(sources: WebSearchSource[]): Marked {
}

export function getMarked(sources: WebSearchSource[]): Marked {
if (!markedInstance) {
markedInstance = createMarkedInstance(sources);
}
return markedInstance;
return createMarkedInstance(sources);
}

type CodeToken = {
type: "code";
lang: string;
code: string;
};

type TextToken = {
type: "text";
html: string | Promise<string>;
};

export async function processTokens(content: string, sources: WebSearchSource[]): Promise<Token[]> {
const marked = getMarked(sources);
const tokens = marked.lexer(content);

const processedTokens = await Promise.all(
tokens.map(async (token) => {
if (token.type === "code") {
return {
type: "code" as const,
lang: token.lang,
code: token.text,
};
} else {
return {
type: "text" as const,
html: marked.parse(token.raw),
};
}
})
);

return processedTokens;
}

export function processTokensSync(content: string, sources: WebSearchSource[]): Token[] {
const marked = getMarked(sources);
const tokens = marked.lexer(content);
return tokens.map((token) => {
if (token.type === "code") {
return { type: "code" as const, lang: token.lang, code: token.text };
}
return { type: "text" as const, html: marked.parse(token.raw) };
});
}

export type Token = CodeToken | TextToken;
29 changes: 6 additions & 23 deletions src/lib/workers/markdownWorker.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import type { WebSearchSource } from "$lib/types/WebSearch";
import { getMarked } from "$lib/utils/getMarked";
import { processTokens, type Token } from "$lib/utils/marked";

type IncomingMessage = {
export type IncomingMessage = {
type: "process";
content: string;
sources: WebSearchSource[];
};

type OutgoingMessage = {
export type OutgoingMessage = {
type: "processed";
content: string;
tokens: Token[];
};

onmessage = async (event) => {
Expand All @@ -18,27 +18,10 @@ onmessage = async (event) => {
}

const message = event.data as IncomingMessage;
const { content, sources } = message;
const marked = getMarked(sources);

const tokens = marked.lexer(content);
const { content, sources } = message;

const processedTokens = await Promise.all(
tokens.map(async (token) => {
if (token.type === "code") {
return {
type: "code",
lang: token.lang,
code: token.text,
};
} else {
return {
type: "text",
html: await marked.parse(token.raw),
};
}
})
);
const processedTokens = await processTokens(content, sources);

postMessage({ type: "processed", tokens: processedTokens } satisfies OutgoingMessage);
};

0 comments on commit 98299cb

Please sign in to comment.