Skip to content

Commit

Permalink
feat: an implementation to cache tags to disk?
Browse files Browse the repository at this point in the history
  • Loading branch information
mbifulco committed Feb 16, 2025
1 parent 33861ec commit 99e88c9
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 77 deletions.
4 changes: 3 additions & 1 deletion src/lib/blog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { join } from 'path';
import type { BlogPost } from '../data/content-types';
import { getAllContentFromDirectory } from './content-loaders/getAllContentFromDirectory';
import { getContentBySlug } from './content-loaders/getContentBySlug';
import { getContentSlugsForTag } from './tags';

// Add markdown files in `src/content/blog`
const postsDirectory = join(process.cwd(), 'src', 'data', 'posts');
Expand All @@ -27,7 +28,8 @@ export const getAllPosts = async () => {
export const getAllPostsByTag = async (tag: string) => {
try {
const posts = await getAllPosts();
return posts.filter((post) => post.frontmatter.tags?.includes(tag));
const slugsForTag = await getContentSlugsForTag(tag);
return posts.filter((post) => slugsForTag.includes(post.slug));
} catch (error) {
console.error('Error getting posts by tag:', error);
return [];
Expand Down
15 changes: 9 additions & 6 deletions src/lib/external-references.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { join } from 'path';
import type { Article } from '../data/content-types';
import { getAllContentFromDirectory } from './content-loaders/getAllContentFromDirectory';
import { getContentBySlug } from './content-loaders/getContentBySlug';
import { getContentSlugsForTag } from './tags';

// directory reference to `src/content/external-references`
const externalReferencesDirectory = join(
Expand Down Expand Up @@ -36,10 +37,12 @@ export const getAllExternalReferences = async () => {
};

export const getAllExternalReferencesByTag = async (tag: string) => {
const refs = await getAllExternalReferences();

return refs.filter((article) => {
const tags = article.frontmatter?.tags ?? [];
return tags.includes(tag);
});
try {
const refs = await getAllExternalReferences();
const slugsForTag = await getContentSlugsForTag(tag);
return refs.filter((article) => slugsForTag.includes(article.slug));
} catch (error) {
console.error('Error getting external references by tag:', error);
return [];
}
};
92 changes: 51 additions & 41 deletions src/lib/newsletters.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,56 @@
import { join } from 'path';

import type { Newsletter } from '../data/content-types';
import type { MarkdownDocument, Newsletter } from '../data/content-types';
import { getAllContentFromDirectory } from './content-loaders/getAllContentFromDirectory';
import { getContentBySlug } from './content-loaders/getContentBySlug';
import { getContentSlugsForTag } from './tags';

// directory reference to `src/content/newsletters`
const newslettersDirectory = join(process.cwd(), 'src', 'data', 'newsletters');
export const newslettersDirectory = join(
process.cwd(),
'src',
'data',
'newsletters'
);

const NEWSLETTERS_CONTENT_TYPE = 'newsletter';
export const NEWSLETTERS_CONTENT_TYPE = 'newsletter';

// Helper function to safely process raw content into newsletters
export const processNewslettersContent = (
rawContent: MarkdownDocument[]
): Newsletter[] => {
if (!rawContent) {
console.warn('processNewslettersContent: content is null or undefined');
return [];
}

if (!Array.isArray(rawContent)) {
console.warn(
`processNewslettersContent: content is not an array, got ${typeof rawContent}`
);
return [];
}

// filter out newsletters that don't have a slug
return rawContent.filter((newsletter) => {
if (!newsletter) {
console.warn(
'processNewslettersContent: found null/undefined newsletter entry'
);
return false;
}

if (!newsletter.frontmatter?.slug) {
console.warn(
'processNewslettersContent: found newsletter without slug:',
JSON.stringify(newsletter.frontmatter, null, 2)
);
return false;
}

return true;
}) as Newsletter[];
};

export const getNewsletterBySlug = async (slug: string) => {
const reference = await getContentBySlug(
Expand All @@ -20,44 +63,11 @@ export const getNewsletterBySlug = async (slug: string) => {

export const getAllNewsletters = async () => {
try {
const newsletters = (await getAllContentFromDirectory(
const rawContent = await getAllContentFromDirectory(
newslettersDirectory,
NEWSLETTERS_CONTENT_TYPE
)) as Newsletter[];

if (!newsletters) {
console.warn('getAllNewsletters: newsletters is null or undefined');
return [];
}

if (!Array.isArray(newsletters)) {
console.warn(
`getAllNewsletters: newsletters is not an array, got ${typeof newsletters}`
);
return [];
}

// filter out newsletters that don't have a slug
const filteredNewsletters = newsletters.filter((newsletter) => {
if (!newsletter) {
console.warn(
'getAllNewsletters: found null/undefined newsletter entry'
);
return false;
}

if (!newsletter.frontmatter?.slug) {
console.warn(
'getAllNewsletters: found newsletter without slug:',
JSON.stringify(newsletter.frontmatter, null, 2)
);
return false;
}

return true;
});

return filteredNewsletters;
);
return processNewslettersContent(rawContent);
} catch (error) {
console.error('Error in getAllNewsletters:', error);
// Re-throw the error to be handled by the page's error boundary
Expand All @@ -67,10 +77,10 @@ export const getAllNewsletters = async () => {

export const getAllNewslettersByTag = async (tag: string) => {
try {
// Use the safe directory reader
const newsletters = await getAllNewsletters();
const slugsForTag = await getContentSlugsForTag(tag);
return newsletters.filter((newsletter) =>
newsletter.frontmatter.tags?.includes(tag)
slugsForTag.includes(newsletter.slug)
);
} catch (error) {
console.error('Error getting newsletters by tag:', error);
Expand Down
187 changes: 158 additions & 29 deletions src/lib/tags.ts
Original file line number Diff line number Diff line change
@@ -1,42 +1,171 @@
import { getAllPosts } from './blog';
import { getAllNewsletters } from './newsletters';
import fs from 'fs';
import path from 'path';

import type { MarkdownDocument } from '../data/content-types';

const CACHE_FILE = path.join(
process.cwd(),
'.next',
'cache',
'tag-registry.json'
);

export const parseTag = (tag: string) => {
return tag.split(' ').join('-').toLocaleLowerCase();
};

export const getAllTags = async () => {
try {
// Safely fetch content with fallbacks
const posts = await getAllPosts().catch(() => []);
const newsletters = await getAllNewsletters().catch(() => []);

const allTags = new Set<string>();
const allContent = [...posts, ...newsletters];

// More defensive processing of content
allContent.forEach((content) => {
const tags = content?.frontmatter?.tags;
if (!tags) return;

if (!Array.isArray(tags)) return;

tags.forEach((tag) => {
if (typeof tag === 'string') {
const parsedTag = parseTag(tag);
if (parsedTag) {
allTags.add(parsedTag);
}
}
});
type TagMap = {
[tag: string]: string[]; // tag -> array of content slugs
};

// Singleton to manage tags across the application
class TagRegistry {
private static instance: TagRegistry;
private tagMap: Map<string, Set<string>> = new Map(); // tag -> set of content slugs
private initialized = false;

private constructor() {}

static getInstance(): TagRegistry {
if (!TagRegistry.instance) {
TagRegistry.instance = new TagRegistry();
}
return TagRegistry.instance;
}

private registerContentTags(content: MarkdownDocument) {
const tags = content?.frontmatter?.tags;
if (!tags || !Array.isArray(tags)) return;

const slug = content.frontmatter.slug;
if (!slug) return;

tags.forEach((tag) => {
if (typeof tag !== 'string') return;
const parsedTag = parseTag(tag);
if (!parsedTag) return;

if (!this.tagMap.has(parsedTag)) {
this.tagMap.set(parsedTag, new Set());
}
this.tagMap.get(parsedTag)?.add(slug);
});
}

registerContent(content: MarkdownDocument[]) {
content.forEach((item) => this.registerContentTags(item));
this.initialized = true;
}

isInitialized(): boolean {
return this.initialized;
}

getAllTags(): string[] {
return Array.from(this.tagMap.keys()).sort();
}

getContentSlugsForTag(tag: string): string[] {
const parsedTag = parseTag(tag);
return Array.from(this.tagMap.get(parsedTag) || []);
}

hasTag(tag: string): boolean {
return this.tagMap.has(parseTag(tag));
}

// Save the current state to disk
saveToCache(): void {
const cacheDir = path.dirname(CACHE_FILE);
if (!fs.existsSync(cacheDir)) {
fs.mkdirSync(cacheDir, { recursive: true });
}

const serializedMap: TagMap = {};
this.tagMap.forEach((slugs, tag) => {
serializedMap[tag] = Array.from(slugs);
});

// Convert to array and sort
const uniqueTags = Array.from(allTags).filter(Boolean).sort();
fs.writeFileSync(CACHE_FILE, JSON.stringify(serializedMap, null, 2));
}

// Load state from disk
loadFromCache(): boolean {
try {
if (!fs.existsSync(CACHE_FILE)) {
return false;
}

const data = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf-8')) as TagMap;
this.tagMap.clear();

Object.entries(data).forEach(([tag, slugs]) => {
this.tagMap.set(tag, new Set(slugs));
});

this.initialized = true;
return true;
} catch (error) {
console.error('Error loading tag registry cache:', error);
return false;
}
}
}

// Export functions that use the registry
export const initializeTagRegistry = async (content: MarkdownDocument[]) => {
try {
const registry = TagRegistry.getInstance();

return uniqueTags;
// Try to load from cache first
if (!registry.isInitialized() && !registry.loadFromCache()) {
// If cache doesn't exist or is invalid, rebuild and save
registry.registerContent(content);
registry.saveToCache();
}
} catch (error) {
console.error('Error initializing tag registry:', error);
}
};

export const getAllTags = async () => {
try {
const registry = TagRegistry.getInstance();
if (!registry.isInitialized() && !registry.loadFromCache()) {
console.warn('Tag registry accessed before initialization');
return [];
}
return registry.getAllTags();
} catch (error) {
console.error('Error in getAllTags:', error);
return [];
}
};

export const getContentSlugsForTag = async (tag: string) => {
try {
const registry = TagRegistry.getInstance();
if (!registry.isInitialized() && !registry.loadFromCache()) {
console.warn('Tag registry accessed before initialization');
return [];
}
return registry.getContentSlugsForTag(tag);
} catch (error) {
console.error('Error getting content for tag:', error);
return [];
}
};

export const hasTag = async (tag: string) => {
try {
const registry = TagRegistry.getInstance();
if (!registry.isInitialized() && !registry.loadFromCache()) {
console.warn('Tag registry accessed before initialization');
return false;
}
return registry.hasTag(tag);
} catch (error) {
console.error('Error checking tag existence:', error);
return false;
}
};

0 comments on commit 99e88c9

Please sign in to comment.