
Outstatic and FlexSearch indexing
I've been trying to get site search working with FlexSearch and the awkward part isn't the search itself — it's where to build the index. The obvious answer is "a script referenced from build in package.json", but that's annoying to wire up with TypeScript: you need ts-node, ESM imports trip over themselves, and it just sits outside the Next.js build rather than being part of it.
Since Next.js already supports generating a sitemap during the build, and I'm iterating over every post there anyway, building the index in the same place feels like a fair trade. It ends up looking like this:
import { MetadataRoute } from "next";
import fs from "fs";
import { NEXT_PUBLIC_APP_URL } from "../lib/constants";
import { load } from "outstatic/server";
import { Page, Section, pageIndex, sectionIndex } from "@/lib/search";
import { join } from "path";
import matter from "gray-matter";
const postsDirectory = join(process.cwd(), "outstatic/content/posts");
const publicDirectory = join(process.cwd(), "public");
function getPostBySlug(slug: string) {
const fullPath = join(postsDirectory, `${slug}.md`);
const fileContents = fs.readFileSync(fullPath, "utf8");
const { data, content } = matter(fileContents);
return {
...data,
content,
title: data.title,
url: `${NEXT_PUBLIC_APP_URL}/${data.slug}`,
slug: data.slug,
};
}
export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
const db = await load();
const items = await db
.find(
{ collection: "posts", status: "published" },
["slug", "publishedAt"],
)
.toArray();
items.forEach((item, index) => {
const post = getPostBySlug(item.slug);
const { title, content, url } = post;
const paragraphs = content.split("\n");
sectionIndex.add({
id: post.slug,
url,
title,
pageId: `page_${index}`,
content: title,
...(paragraphs[0] && { display: paragraphs[0] }),
});
paragraphs.forEach((paragraph, i) => {
sectionIndex.add({
id: `${url}_${i}`,
url,
title,
pageId: `page_${index}`,
content: paragraph,
});
});
pageIndex.add({
id: index,
title,
content: `${title} ${content}`,
});
});
const indexes: {
pageIndex: { [key: string]: Page };
sectionIndex: { [key: string]: Section };
} = { pageIndex: {}, sectionIndex: {} };
await pageIndex.export(async (key, data) => {
indexes.pageIndex[key] = data;
});
await sectionIndex.export(async (key, data) => {
indexes.sectionIndex[key] = data;
});
fs.writeFileSync(
join(publicDirectory, "search-index.json"),
JSON.stringify(indexes),
);
return items.map((post) => ({
url: `${NEXT_PUBLIC_APP_URL}/${post.slug}`,
lastModified: post.publishedAt,
}));
}FlexSearch's model is that the index is a JSON file that ships to the client, and the search happens in the browser. So this writes the combined page and section indexes to public/search-index.json, which you can see at https://ahlstrand.es/search-index.json.
It's a fairly chunky file already, so I'm a bit suspicious of how this scales as the post count grows. But that's a problem for future me.