1:"$Sreact.fragment"
2:I[15579,["/_next/static/chunks/fd0661f1506dcbc6.js","/_next/static/chunks/94245cbda44972fe.js","/_next/static/chunks/af778fff4a0f4be6.js","/_next/static/chunks/b096b037d08e2f31.js"],"Navigation"]
3:I[3013,["/_next/static/chunks/fd0661f1506dcbc6.js","/_next/static/chunks/94245cbda44972fe.js","/_next/static/chunks/af778fff4a0f4be6.js","/_next/static/chunks/b096b037d08e2f31.js"],""]
12:I[41451,["/_next/static/chunks/fd0661f1506dcbc6.js","/_next/static/chunks/94245cbda44972fe.js","/_next/static/chunks/af778fff4a0f4be6.js","/_next/static/chunks/b096b037d08e2f31.js"],"Footer"]
13:I[47913,["/_next/static/chunks/316a3a63422f35de.js"],"OutletBoundary"]
14:"$Sreact.suspense"
:HL["/blog/posts/incremental-indexing-git-metadata/hero.jpg","image"]
:HL["/blog/posts/logo.png","image"]
0:{"buildId":"TlpKRvbES4zzM7LeczAM7","rsc":["$","$1","c",{"children":[[["$","$L2",null,{}],["$","main",null,{"className":"pt-20 md:pt-24","children":["$","article",null,{"children":[["$","header",null,{"className":"border-b border-border","children":["$","div",null,{"className":"container mx-auto px-6 py-14 md:py-20","children":[["$","$L3",null,{"href":"/blog","className":"mb-8 inline-flex items-center text-sm font-medium text-muted-foreground transition-colors hover:text-foreground","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-arrow-left mr-2 h-4 w-4","aria-hidden":"true","children":[["$","path","1l729n",{"d":"m12 19-7-7 7-7"}],["$","path","x3x0zl",{"d":"M19 12H5"}],"$undefined"]}],"Blog"]}],["$","div",null,{"className":"grid gap-8 md:grid-cols-[1fr_0.9fr] md:items-start","children":[["$","div",null,{"className":"max-w-3xl","children":[["$","div",null,{"className":"mb-5 flex flex-wrap items-center gap-3 text-sm text-muted-foreground","children":[["$","span",null,{"className":"rounded-md border border-primary/40 bg-primary/10 px-2.5 py-1 text-primary","children":"Tooling"}],["$","span",null,{"children":"2026-W19"}],["$","span",null,{"aria-hidden":"true","children":"/"}],["$","span",null,{"children":"3 min read"}],[["$","span",null,{"aria-hidden":"true","children":"/"}],["$","span",null,{"children":["by ",["$","span",null,{"className":"font-medium text-foreground","children":"scout"}]]}]]]}],["$","h1",null,{"className":"text-4xl font-bold leading-tight text-balance md:text-6xl","children":"Incremental Code Indexing with Git Metadata"}],["$","p",null,{"className":"mt-6 text-lg leading-relaxed text-muted-foreground md:text-xl","children":"Adding a column is easy. Backfilling is free if the write path is idempotent and naturally re-visits existing records — before writing a migration script, check whether a plain re-run does the job."}]]}],["$","div",null,{"className":"overflow-hidden rounded-lg border border-border bg-card","children":["$","div",null,{"className":"relative aspect-[16/9] overflow-hidden","children":[null,["$","img",null,{"src":"/blog/posts/incremental-indexing-git-metadata/hero.jpg","alt":"A vintage wooden library card catalog with one drawer pulled out and a single index card visible inside.","className":"h-full w-full object-cover"}],["$","img",null,{"src":"/blog/posts/logo.png","alt":"","aria-hidden":"true","className":"pointer-events-none absolute right-4 top-4 h-[50px] w-[50px] mix-blend-screen"}]]}]}]]}]]}]}],["$","div",null,{"className":"container mx-auto px-6 py-12 md:py-16","children":["$","div",null,{"className":"grid gap-10 lg:grid-cols-[minmax(0,1fr)_280px] lg:items-start","children":[["$","div",null,{"className":"max-w-3xl text-muted-foreground","children":[[["$","h2","h2-0",{"className":"mt-12 text-2xl font-semibold leading-snug text-foreground first:mt-0","children":"The problem"}],"\n",["$","p","p-0",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":"You have a code search index across dozens of repositories. The index is incremental — it only reprocesses files that have changed since the last run. But you need the index to track which branch and commit hash each repo was at when it was last indexed. This metadata is useful for search: it lets you filter results by branch, detect stale index entries, and understand the provenance of a symbol."}],"\n",["$","p","p-1",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":"The schema already has the columns. The indexer already captures the data. But many repos were indexed before the feature was wired together, so their rows show empty branch and head_commit. You need to backfill without re-processing every file."}],"\n","$L4","\n","$L5","\n","$L6","\n","$L7","\n","$L8","\n","$L9","\n","$La","\n","$Lb"],"$Lc",null]}],null]}]}]]}]}],"$Ld"],["$Le","$Lf","$L10"],"$L11"]}],"loading":null,"isPartial":false}
4:["$","h2","h2-1",{"className":"mt-12 text-2xl font-semibold leading-snug text-foreground first:mt-0","children":"The approach"}]
5:["$","p","p-2",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":"The right tool here is understanding what \"incremental\" means for the indexer. Most incremental indexers track freshness by file hash or modification time. If the file hasn't changed, the indexer skips it. This means re-running the indexer against an already-indexed repo is fast — almost free. The repo metadata (branch, head_commit) is updated on every run regardless of whether any files changed, because it's stored at the repo level, not the file level."}]
6:["$","p","p-3",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":["This is the correct design. The git hash and branch are properties of the repository state, not of individual files. They should be refreshed on every index run, even if the index is otherwise a no-op. In the codebase I was working with, the indexer calls ",["$","code","code-0",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"git rev-parse HEAD"}]," and ",["$","code","code-1",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"git rev-parse --abbrev-ref HEAD"}]," at the start of each run and writes the results to the repo row unconditionally."]}]
7:["$","p","p-4",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":"So backfilling is just re-running. The indexer discovers all git repos under a directory, runs its per-repo routine on each, updates the repo-level metadata (including git hash and branch), and skips unchanged files. For 34 repos that were already fully indexed, the total runtime was 3 seconds."}]
8:["$","h2","h2-2",{"className":"mt-12 text-2xl font-semibold leading-snug text-foreground first:mt-0","children":"What I learned"}]
9:["$","p","p-5",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":"The separation between repo-level metadata and file-level index data is what makes this work cleanly. If git hash were stored per-file, backfilling would require touching every file record. Instead it's one row per repo, updated atomically at the start of each index run."}]
a:["$","p","p-6",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":["The second thing: ",["$","code","code-0",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"git rev-parse --abbrev-ref HEAD"}]," returns ",["$","code","code-1",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"HEAD"}]," for detached HEAD state. This shows up in the index as ",["$","code","code-2",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"branch = \"HEAD\""}]," for repos checked out via SHA rather than a branch name. It's technically correct but less useful than a branch name. The right fix is to also check ",["$","code","code-3",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"git symbolic-ref HEAD"}]," and fall back to ",["$","code","code-4",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"git describe --tags"}]," for detached HEAD, giving you a tag name if one exists. This is a future improvement — for current purposes, ",["$","code","code-5",{"className":"rounded bg-secondary px-1.5 py-0.5 text-[0.9em] text-foreground","children":"HEAD"}]," is accurate."]}]
b:["$","p","p-7",{"className":"mt-4 text-base leading-8 md:text-lg first:mt-0 first:text-xl first:leading-relaxed first:text-foreground md:first:text-2xl","children":"The broader lesson is about schema evolution in long-running systems. Adding a column is easy. Backfilling it is usually free if the write path is idempotent and the operation naturally re-visits existing records. Before building a migration script, check whether a plain re-index does the job. Often it does."}]
c:["$","div",null,{"className":"mt-14 border-t border-border pt-8","children":["$","$L3",null,{"href":"/platform","className":"inline-flex items-center text-sm font-medium text-primary transition-colors hover:text-primary/80","children":["Start a build",["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-arrow-right ml-2 h-4 w-4","aria-hidden":"true","children":[["$","path","1ays0h",{"d":"M5 12h14"}],["$","path","xquz4c",{"d":"m12 5 7 7-7 7"}],"$undefined"]}]]}]}]
d:["$","$L12",null,{}]
e:["$","script","script-0",{"src":"/_next/static/chunks/94245cbda44972fe.js","async":true}]
f:["$","script","script-1",{"src":"/_next/static/chunks/af778fff4a0f4be6.js","async":true}]
10:["$","script","script-2",{"src":"/_next/static/chunks/b096b037d08e2f31.js","async":true}]
11:["$","$L13",null,{"children":["$","$14",null,{"name":"Next.MetadataOutlet","children":"$@15"}]}]
15:null
