feat: an implementation to cache tags to disk?

mbifulco · Feb 16, 2025 · 99e88c9 · 99e88c9
1 parent 33861ec
commit 99e88c9
Show file tree

Hide file tree

Showing 4 changed files with 221 additions and 77 deletions.
diff --git a/src/lib/blog.ts b/src/lib/blog.ts
@@ -4,6 +4,7 @@ import { join } from 'path';
 import type { BlogPost } from '../data/content-types';
 import { getAllContentFromDirectory } from './content-loaders/getAllContentFromDirectory';
 import { getContentBySlug } from './content-loaders/getContentBySlug';
+import { getContentSlugsForTag } from './tags';
 
 // Add markdown files in `src/content/blog`
 const postsDirectory = join(process.cwd(), 'src', 'data', 'posts');
@@ -27,7 +28,8 @@ export const getAllPosts = async () => {
 export const getAllPostsByTag = async (tag: string) => {
   try {
     const posts = await getAllPosts();
-    return posts.filter((post) => post.frontmatter.tags?.includes(tag));
+    const slugsForTag = await getContentSlugsForTag(tag);
+    return posts.filter((post) => slugsForTag.includes(post.slug));
   } catch (error) {
     console.error('Error getting posts by tag:', error);
     return [];

diff --git a/src/lib/external-references.ts b/src/lib/external-references.ts
@@ -3,6 +3,7 @@ import { join } from 'path';
 import type { Article } from '../data/content-types';
 import { getAllContentFromDirectory } from './content-loaders/getAllContentFromDirectory';
 import { getContentBySlug } from './content-loaders/getContentBySlug';
+import { getContentSlugsForTag } from './tags';
 
 // directory reference to `src/content/external-references`
 const externalReferencesDirectory = join(
@@ -36,10 +37,12 @@ export const getAllExternalReferences = async () => {
 };
 
 export const getAllExternalReferencesByTag = async (tag: string) => {
-  const refs = await getAllExternalReferences();
-
-  return refs.filter((article) => {
-    const tags = article.frontmatter?.tags ?? [];
-    return tags.includes(tag);
-  });
+  try {
+    const refs = await getAllExternalReferences();
+    const slugsForTag = await getContentSlugsForTag(tag);
+    return refs.filter((article) => slugsForTag.includes(article.slug));
+  } catch (error) {
+    console.error('Error getting external references by tag:', error);
+    return [];
+  }
 };
diff --git a/src/lib/newsletters.ts b/src/lib/newsletters.ts
@@ -1,13 +1,56 @@
 import { join } from 'path';
 
-import type { Newsletter } from '../data/content-types';
+import type { MarkdownDocument, Newsletter } from '../data/content-types';
 import { getAllContentFromDirectory } from './content-loaders/getAllContentFromDirectory';
 import { getContentBySlug } from './content-loaders/getContentBySlug';
+import { getContentSlugsForTag } from './tags';
 
 // directory reference to `src/content/newsletters`
-const newslettersDirectory = join(process.cwd(), 'src', 'data', 'newsletters');
+export const newslettersDirectory = join(
+  process.cwd(),
+  'src',
+  'data',
+  'newsletters'
+);
 
-const NEWSLETTERS_CONTENT_TYPE = 'newsletter';
+export const NEWSLETTERS_CONTENT_TYPE = 'newsletter';
+
+// Helper function to safely process raw content into newsletters
+export const processNewslettersContent = (
+  rawContent: MarkdownDocument[]
+): Newsletter[] => {
+  if (!rawContent) {
+    console.warn('processNewslettersContent: content is null or undefined');
+    return [];
+  }
+
+  if (!Array.isArray(rawContent)) {
+    console.warn(
+      `processNewslettersContent: content is not an array, got ${typeof rawContent}`
+    );
+    return [];
+  }
+
+  // filter out newsletters that don't have a slug
+  return rawContent.filter((newsletter) => {
+    if (!newsletter) {
+      console.warn(
+        'processNewslettersContent: found null/undefined newsletter entry'
+      );
+      return false;
+    }
+
+    if (!newsletter.frontmatter?.slug) {
+      console.warn(
+        'processNewslettersContent: found newsletter without slug:',
+        JSON.stringify(newsletter.frontmatter, null, 2)
+      );
+      return false;
+    }
+
+    return true;
+  }) as Newsletter[];
+};
 
 export const getNewsletterBySlug = async (slug: string) => {
   const reference = await getContentBySlug(
@@ -20,44 +63,11 @@ export const getNewsletterBySlug = async (slug: string) => {
 
 export const getAllNewsletters = async () => {
   try {
-    const newsletters = (await getAllContentFromDirectory(
+    const rawContent = await getAllContentFromDirectory(
       newslettersDirectory,
       NEWSLETTERS_CONTENT_TYPE
-    )) as Newsletter[];
-
-    if (!newsletters) {
-      console.warn('getAllNewsletters: newsletters is null or undefined');
-      return [];
-    }
-
-    if (!Array.isArray(newsletters)) {
-      console.warn(
-        `getAllNewsletters: newsletters is not an array, got ${typeof newsletters}`
-      );
-      return [];
-    }
-
-    // filter out newsletters that don't have a slug
-    const filteredNewsletters = newsletters.filter((newsletter) => {
-      if (!newsletter) {
-        console.warn(
-          'getAllNewsletters: found null/undefined newsletter entry'
-        );
-        return false;
-      }
-
-      if (!newsletter.frontmatter?.slug) {
-        console.warn(
-          'getAllNewsletters: found newsletter without slug:',
-          JSON.stringify(newsletter.frontmatter, null, 2)
-        );
-        return false;
-      }
-
-      return true;
-    });
-
-    return filteredNewsletters;
+    );
+    return processNewslettersContent(rawContent);
   } catch (error) {
     console.error('Error in getAllNewsletters:', error);
     // Re-throw the error to be handled by the page's error boundary
@@ -67,10 +77,10 @@ export const getAllNewsletters = async () => {
 
 export const getAllNewslettersByTag = async (tag: string) => {
   try {
-    // Use the safe directory reader
     const newsletters = await getAllNewsletters();
+    const slugsForTag = await getContentSlugsForTag(tag);
     return newsletters.filter((newsletter) =>
-      newsletter.frontmatter.tags?.includes(tag)
+      slugsForTag.includes(newsletter.slug)
     );
   } catch (error) {
     console.error('Error getting newsletters by tag:', error);

diff --git a/src/lib/tags.ts b/src/lib/tags.ts
@@ -1,42 +1,171 @@
-import { getAllPosts } from './blog';
-import { getAllNewsletters } from './newsletters';
+import fs from 'fs';
+import path from 'path';
+
+import type { MarkdownDocument } from '../data/content-types';
+
+const CACHE_FILE = path.join(
+  process.cwd(),
+  '.next',
+  'cache',
+  'tag-registry.json'
+);
 
 export const parseTag = (tag: string) => {
   return tag.split(' ').join('-').toLocaleLowerCase();
 };
 
-export const getAllTags = async () => {
-  try {
-    // Safely fetch content with fallbacks
-    const posts = await getAllPosts().catch(() => []);
-    const newsletters = await getAllNewsletters().catch(() => []);
-
-    const allTags = new Set<string>();
-    const allContent = [...posts, ...newsletters];
-
-    // More defensive processing of content
-    allContent.forEach((content) => {
-      const tags = content?.frontmatter?.tags;
-      if (!tags) return;
-
-      if (!Array.isArray(tags)) return;
-
-      tags.forEach((tag) => {
-        if (typeof tag === 'string') {
-          const parsedTag = parseTag(tag);
-          if (parsedTag) {
-            allTags.add(parsedTag);
-          }
-        }
-      });
+type TagMap = {
+  [tag: string]: string[]; // tag -> array of content slugs
+};
+
+// Singleton to manage tags across the application
+class TagRegistry {
+  private static instance: TagRegistry;
+  private tagMap: Map<string, Set<string>> = new Map(); // tag -> set of content slugs
+  private initialized = false;
+
+  private constructor() {}
+
+  static getInstance(): TagRegistry {
+    if (!TagRegistry.instance) {
+      TagRegistry.instance = new TagRegistry();
+    }
+    return TagRegistry.instance;
+  }
+
+  private registerContentTags(content: MarkdownDocument) {
+    const tags = content?.frontmatter?.tags;
+    if (!tags || !Array.isArray(tags)) return;
+
+    const slug = content.frontmatter.slug;
+    if (!slug) return;
+
+    tags.forEach((tag) => {
+      if (typeof tag !== 'string') return;
+      const parsedTag = parseTag(tag);
+      if (!parsedTag) return;
+
+      if (!this.tagMap.has(parsedTag)) {
+        this.tagMap.set(parsedTag, new Set());
+      }
+      this.tagMap.get(parsedTag)?.add(slug);
+    });
+  }
+
+  registerContent(content: MarkdownDocument[]) {
+    content.forEach((item) => this.registerContentTags(item));
+    this.initialized = true;
+  }
+
+  isInitialized(): boolean {
+    return this.initialized;
+  }
+
+  getAllTags(): string[] {
+    return Array.from(this.tagMap.keys()).sort();
+  }
+
+  getContentSlugsForTag(tag: string): string[] {
+    const parsedTag = parseTag(tag);
+    return Array.from(this.tagMap.get(parsedTag) || []);
+  }
+
+  hasTag(tag: string): boolean {
+    return this.tagMap.has(parseTag(tag));
+  }
+
+  // Save the current state to disk
+  saveToCache(): void {
+    const cacheDir = path.dirname(CACHE_FILE);
+    if (!fs.existsSync(cacheDir)) {
+      fs.mkdirSync(cacheDir, { recursive: true });
+    }
+
+    const serializedMap: TagMap = {};
+    this.tagMap.forEach((slugs, tag) => {
+      serializedMap[tag] = Array.from(slugs);
     });
 
-    // Convert to array and sort
-    const uniqueTags = Array.from(allTags).filter(Boolean).sort();
+    fs.writeFileSync(CACHE_FILE, JSON.stringify(serializedMap, null, 2));
+  }
+
+  // Load state from disk
+  loadFromCache(): boolean {
+    try {
+      if (!fs.existsSync(CACHE_FILE)) {
+        return false;
+      }
+
+      const data = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf-8')) as TagMap;
+      this.tagMap.clear();
+
+      Object.entries(data).forEach(([tag, slugs]) => {
+        this.tagMap.set(tag, new Set(slugs));
+      });
+
+      this.initialized = true;
+      return true;
+    } catch (error) {
+      console.error('Error loading tag registry cache:', error);
+      return false;
+    }
+  }
+}
+
+// Export functions that use the registry
+export const initializeTagRegistry = async (content: MarkdownDocument[]) => {
+  try {
+    const registry = TagRegistry.getInstance();
 
-    return uniqueTags;
+    // Try to load from cache first
+    if (!registry.isInitialized() && !registry.loadFromCache()) {
+      // If cache doesn't exist or is invalid, rebuild and save
+      registry.registerContent(content);
+      registry.saveToCache();
+    }
+  } catch (error) {
+    console.error('Error initializing tag registry:', error);
+  }
+};
+
+export const getAllTags = async () => {
+  try {
+    const registry = TagRegistry.getInstance();
+    if (!registry.isInitialized() && !registry.loadFromCache()) {
+      console.warn('Tag registry accessed before initialization');
+      return [];
+    }
+    return registry.getAllTags();
   } catch (error) {
     console.error('Error in getAllTags:', error);
     return [];
   }
 };
+
+export const getContentSlugsForTag = async (tag: string) => {
+  try {
+    const registry = TagRegistry.getInstance();
+    if (!registry.isInitialized() && !registry.loadFromCache()) {
+      console.warn('Tag registry accessed before initialization');
+      return [];
+    }
+    return registry.getContentSlugsForTag(tag);
+  } catch (error) {
+    console.error('Error getting content for tag:', error);
+    return [];
+  }
+};
+
+export const hasTag = async (tag: string) => {
+  try {
+    const registry = TagRegistry.getInstance();
+    if (!registry.isInitialized() && !registry.loadFromCache()) {
+      console.warn('Tag registry accessed before initialization');
+      return false;
+    }
+    return registry.hasTag(tag);
+  } catch (error) {
+    console.error('Error checking tag existence:', error);
+    return false;
+  }
+};