src/generate-source-browser.lua

960 lines

1#!/usr/bin/env luajit
2
3-- {{{ generate-source-browser.lua
4-- Issue 10-052: Self-hosted, link-only source browser.
5--
6-- WHAT IT DOES (for a CEO): GitHub has no "visible only with the link" setting --
7-- a repo is either public (anyone can find it) or private (invite-only). This
8-- script instead turns the project's own code, issues, and docs into a small
9-- website that lives next to the poetry pages already shared by link. Whoever
10-- has that link can browse the source; the private monorepo never leaves your
11-- machine. It is, in effect, a "git push that builds a webpage instead".
12--
13-- HOW: it asks git for the list of tracked files (so .gitignore is obeyed),
14-- keeps only an ALLOWLIST of code/doc directories (never the private input/ or
15-- transcripts), renders each text file as a syntax-highlighted, line-numbered
16-- page with a collapsible file-tree sidebar, and writes an index. Images are
17-- shown; other binaries are listed but not inlined.
18--
19-- Usage:
20-- luajit src/generate-source-browser.lua [DIR]
21-- Output:
22-- output/source/index.html -- the tree + welcome
23-- output/source/<path>.html -- one page per published file
24-- }}}
25
26-- {{{ setup_dir_path()
27local function setup_dir_path(provided_dir)
28 if provided_dir then return provided_dir end
29 return "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
30end
31-- }}}
32
33-- {{{ parse_args()
34local function parse_args(args)
35 for _, a in ipairs(args or {}) do
36 if not a:match("^%-") then return a end
37 end
38 return nil
39end
40-- }}}
41
42local DIR = setup_dir_path(parse_args(arg))
43package.path = DIR .. "/libs/?.lua;" .. DIR .. "/src/?.lua;" .. package.path
44local utils = require("utils")
45-- Issue 10-055: render Markdown (.md, .info.md, and extensionless prose like the
46-- vision doc) as formatted HTML instead of flat text.
47local markdown = require("markdown")
48
49-- {{{ Configuration
50-- ALLOWLIST: only files under these top-level directories are published. This
51-- is a denylist's safer cousin -- we can only ever leak what we explicitly name,
52-- so the private input/ corpus and llm-transcripts/ cannot slip in by accident.
53-- Widen this list deliberately if you want more published.
54local INCLUDE_DIRS = {
55 src = true, libs = true, scripts = true, issues = true,
56 docs = true, notes = true, demos = true,
57 -- archive/ holds the dated word-cloud snapshots (and the first-published
58 -- keepsake) -- deliberately published so the history of how the cloud changed
59 -- is browsable alongside the source. Safe: it is the project's own output, not
60 -- the private input/ corpus.
61 archive = true,
62}
63-- Root-level files are published only if their extension is a code/doc type.
64local ROOT_FILE_EXTS = {
65 lua = true, sh = true, md = true, c = true, h = true,
66 json = true, css = true, html = true, txt = true,
67}
68-- How each extension is treated.
69local TEXT_EXTS = {
70 lua = "lua", c = "c", h = "c", sh = "sh", md = "md", txt = "text",
71 json = "json", css = "css", html = "html", js = "js",
72 info = "md", -- name.info.md tail handled by ext()
73}
74local IMAGE_EXTS = { png = true, jpg = true, jpeg = true, gif = true, webp = true, svg = true }
75
76-- Saved-webpage out-links (Issue 10-055, Feature F). A "saved page" (an .html with
77-- a sibling "<name>_files/" assets dir) is NOT the project's own work -- it is a
78-- mirror of an article elsewhere on the web. We do NOT host a copy: the platform
79-- is no-JS, hosting a stranger's scripts/trackers/CSS is wrong, and the live
80-- article is always fresher. Instead its table-of-contents entry is an external
81-- link to the real page, and neither the .html nor its _files/ assets are
82-- published. The canonical URL is read from the saved file's own
83-- <link rel="canonical"> / <meta property="og:url"> (self-describing, never
84-- stale). This override map is only for a saved page that lacks those tags; the
85-- key is the file's repo-relative path. Left empty because the one saved page we
86-- have carries its canonical URL inline.
87local MIRROR_URL_OVERRIDES = {
88 -- ["docs/Some Saved Page.html"] = "https://example.com/the-original",
89}
90
91-- Token CSS classes (colors live in _style.css, derived from the project's own
92-- palette: gold = literal text/poems, blue = structure, teal = annotations).
93local TOK = { comment = "c-cm", string = "c-st", keyword = "c-kw", number = "c-nu" }
94-- }}}
95
96-- {{{ Per-language syntax (dispatch table by language id)
97-- Each entry: line comment prefix, block comment open/close, string delimiters,
98-- and a keyword set. A nil field simply disables that feature for the language.
99local function set(words)
100 local t = {}
101 for w in words:gmatch("%S+") do t[w] = true end
102 return t
103end
104local LANGS = {
105 lua = {
106 line = "--", block_open = "--[[", block_close = "]]",
107 strings = { ['"'] = '"', ["'"] = "'" },
108 keywords = set("and break do else elseif end false for function goto if in local nil not or repeat return then true until while self"),
109 },
110 c = {
111 line = "//", block_open = "/*", block_close = "*/",
112 strings = { ['"'] = '"', ["'"] = "'" },
113 keywords = set("auto break case char const continue default do double else enum extern float for goto if inline int long register return short signed sizeof static struct switch typedef union unsigned void volatile while"),
114 },
115 sh = {
116 line = "#", strings = { ['"'] = '"', ["'"] = "'" },
117 keywords = set("if then else elif fi for while do done case esac function in return local export readonly echo exit"),
118 },
119 -- These render with line numbers but no token coloring.
120 md = {}, json = {}, css = {}, html = {}, js = {}, text = {},
121}
122-- }}}
123
124-- {{{ ext() -- extension/language of a path
125-- "name.info.md" -> md, "x.lua" -> lua. Returns (ext, language_or_nil).
126local function ext(path)
127 local e = path:match("%.([%w]+)$") or ""
128 e = e:lower()
129 return e, TEXT_EXTS[e]
130end
131-- }}}
132
133-- {{{ ensure_dir()
134-- Create a directory tree. Unlike utils.ensure_directory (unquoted mkdir, which
135-- breaks on the spaces in e.g. a saved-webpage folder), this single-quotes the
136-- path so any name works. io.open below handles spaced paths natively; only the
137-- shell mkdir needed the quoting. Single command, not chained.
138local function ensure_dir(path)
139 os.execute("mkdir -p '" .. path:gsub("'", "'\\''") .. "'")
140end
141-- }}}
142
143-- {{{ escape_html()
144local function escape_html(s)
145 return (s:gsub("&", "&amp;"):gsub("<", "&lt;"):gsub(">", "&gt;"))
146end
147-- }}}
148
149-- {{{ span() -- wrap escaped text in a token-class span
150local function span(cls, text)
151 return string.format('<span class="%s">%s</span>', cls, text)
152end
153-- }}}
154
155-- {{{ highlight_line()
156-- Tokenize ONE raw line for language `lang`, carrying block-comment state across
157-- lines via `state` (a table with .in_block). Returns (html, state). We tokenize
158-- the RAW text and escape each token as we emit it, so HTML entities never get
159-- matched as code. Imperfect by design (no full grammar) but readable.
160local function highlight_line(raw, lang, state)
161 if not lang or not lang.keywords and not lang.line and not lang.block_open then
162 return escape_html(raw), state
163 end
164 local out, i, n = {}, 1, #raw
165 local function emit(color, text)
166 out[#out + 1] = color and span(color, escape_html(text)) or escape_html(text)
167 end
168 while i <= n do
169 if state.in_block then
170 -- Inside a block comment: consume until the close delimiter.
171 local close = lang.block_close
172 local s, e = raw:find(close, i, true)
173 if s then
174 emit(TOK.comment, raw:sub(i, e)); i = e + 1; state.in_block = false
175 else
176 emit(TOK.comment, raw:sub(i)); i = n + 1
177 end
178 else
179 local c = raw:sub(i, i)
180 if lang.block_open and raw:sub(i, i + #lang.block_open - 1) == lang.block_open then
181 state.in_block = true
182 -- Re-loop; the block branch above will consume from here.
183 elseif lang.line and raw:sub(i, i + #lang.line - 1) == lang.line then
184 emit(TOK.comment, raw:sub(i)); i = n + 1
185 elseif lang.strings and lang.strings[c] then
186 -- Consume a string, honoring backslash escapes.
187 local closer, j = lang.strings[c], i + 1
188 while j <= n do
189 local cj = raw:sub(j, j)
190 if cj == "\\" then j = j + 2
191 elseif cj == closer then j = j + 1; break
192 else j = j + 1 end
193 end
194 emit(TOK.string, raw:sub(i, j - 1)); i = j
195 elseif c:match("%d") and (i == 1 or not raw:sub(i - 1, i - 1):match("[%w_]")) then
196 local j = i
197 while j <= n and raw:sub(j, j):match("[%w%.xX]") do j = j + 1 end
198 emit(TOK.number, raw:sub(i, j - 1)); i = j
199 elseif c:match("[%a_]") then
200 local j = i
201 while j <= n and raw:sub(j, j):match("[%w_]") do j = j + 1 end
202 local word = raw:sub(i, j - 1)
203 emit(lang.keywords and lang.keywords[word] and TOK.keyword or nil, word)
204 i = j
205 else
206 emit(nil, c); i = i + 1
207 end
208 end
209 end
210 return table.concat(out), state
211end
212-- }}}
213
214-- {{{ list_published_files()
215-- Ask git for tracked files (honors .gitignore), then keep only allowlisted
216-- ones. Returns (kept_list, skipped_dir_counts). Read-only git, no chaining.
217local function list_published_files()
218 local pipe = io.popen(string.format("git -C %q ls-files", DIR))
219 if not pipe then error("could not run git ls-files in " .. DIR) end
220 local kept, skipped = {}, {}
221 for line in pipe:lines() do
222 local top = line:match("^([^/]+)/") or line -- top-level dir, or root file
223 local published
224 if line:match("/") then
225 published = INCLUDE_DIRS[top] == true
226 else
227 local e = ext(line)
228 published = ROOT_FILE_EXTS[e] == true
229 end
230 if published then
231 kept[#kept + 1] = line
232 else
233 skipped[top] = (skipped[top] or 0) + 1
234 end
235 end
236 pipe:close()
237 table.sort(kept)
238 return kept, skipped
239end
240-- }}}
241
242-- {{{ build_tree()
243-- Turn the flat file list into a nested tree: node = {dirs={name->node}, files={{name,rel}}}.
244local function build_tree(files)
245 local root = { dirs = {}, files = {} }
246 for _, rel in ipairs(files) do
247 local node, acc = root, ""
248 local parts = {}
249 for p in rel:gmatch("[^/]+") do parts[#parts + 1] = p end
250 for idx = 1, #parts - 1 do
251 local d = parts[idx]
252 node.dirs[d] = node.dirs[d] or { dirs = {}, files = {} }
253 node = node.dirs[d]
254 end
255 node.files[#node.files + 1] = { name = parts[#parts], rel = rel }
256 end
257 return root
258end
259-- }}}
260
261-- {{{ sorted_keys()
262local function sorted_keys(t)
263 local k = {}
264 for key in pairs(t) do k[#k + 1] = key end
265 table.sort(k)
266 return k
267end
268-- }}}
269
270-- {{{ is_viewable_html()
271-- Issue 10-063: archived HTML pages (the word-cloud snapshots under archive/) are
272-- published as VIEWABLE pages, not as source text -- clicking one opens the rendered
273-- cloud. Scoped to archive/ so ordinary HTML elsewhere still renders as code.
274-- Defined before render_sidebar AND classify_file because both consult it.
275local function is_viewable_html(rel)
276 return (rel:match("^archive/") and rel:match("%.html$")) and true or false
277end
278-- }}}
279
280-- {{{ render_sidebar()
281-- Render the whole tree as nested <details> (collapsible, needs no JS). Dirs on
282-- the path to `current_rel` are opened so the reader sees where they are.
283-- link_prefix climbs back to output/source/ from the current page.
284local function render_sidebar(node, current_rel, link_prefix, path_set, mirror_url)
285 local out = {}
286 for _, d in ipairs(sorted_keys(node.dirs)) do
287 local open = path_set[d] and " open" or ""
288 out[#out + 1] = string.format("<details%s><summary>%s/</summary>", open, escape_html(d))
289 out[#out + 1] = render_sidebar(node.dirs[d], current_rel, link_prefix, path_set, mirror_url)
290 out[#out + 1] = "</details>"
291 end
292 local files = node.files
293 table.sort(files, function(a, b) return a.name < b.name end)
294 for _, f in ipairs(files) do
295 local here = (f.rel == current_rel)
296 local label = escape_html(f.name)
297 local ext_url = mirror_url and mirror_url[f.rel]
298 if here then
299 out[#out + 1] = string.format('<div class="cur">%s</div>', label)
300 elseif is_viewable_html(f.rel) then
301 -- Issue 10-063: an archived HTML page (word-cloud snapshot) is published
302 -- as a real page, not a source view. Link straight to the copied .html
303 -- (no ".html" source-view suffix) and open it in a new tab -- an
304 -- external-style link that happens to point at an internal page.
305 out[#out + 1] = string.format(
306 '<a class="ext" href="%s" target="_blank" rel="noopener">%s</a>',
307 link_prefix .. f.rel, label)
308 elseif ext_url then
309 -- A saved page (Feature F) is a mirror of an article elsewhere; we host
310 -- no copy. Its entry is an external link to the real page -- a new tab,
311 -- rel=noopener (don't hand the destination a window handle), and a
312 -- trailing arrow so a reader knows this one leaves the site. The URL is
313 -- absolute, so link_prefix (the climb back to output/source/) is N/A.
314 out[#out + 1] = string.format(
315 '<a class="ext" href="%s" target="_blank" rel="noopener">%s</a>',
316 escape_html(ext_url), label)
317 else
318 -- Every other file links to its rendered in-site page.
319 out[#out + 1] = string.format('<a href="%s">%s</a>', link_prefix .. f.rel .. ".html", label)
320 end
321 end
322 return table.concat(out, "\n")
323end
324-- }}}
325
326-- {{{ page_shell()
327-- One page: a sticky left "table of contents" sidebar + the content pane. The
328-- look lives in the shared _style.css (the "Machine Codex" theme); fonts come
329-- from Google Fonts with serif/mono fallbacks for offline. ../ is
330-- substituted by the caller with the climb back to output/source/.
331local function page_shell(title, sidebar, content)
332 return string.format([[<!DOCTYPE html>
333<html lang="en">
334<head>
335<meta charset="UTF-8">
336<meta name="viewport" content="width=device-width, initial-scale=1">
337<title>%s</title>
338<link rel="preconnect" href="https://fonts.googleapis.com">
339<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
340<link href="https://fonts.googleapis.com/css2?family=Fraunces:ital,opsz,wght@0,9..144,500;0,9..144,600;1,9..144,400&family=JetBrains+Mono:ital,wght@0,400;0,500;0,700;1,400&display=swap" rel="stylesheet">
341<link rel="stylesheet" href="%s_style.css">
342</head>
343<body>
344<div id="wrap">
345<nav id="side">
346<a class="home" href="%sindex.html"><span class="brand">Machine&#160;Codex</span><span class="tagline">the source, read as a book</span></a>
347<a class="site-link" href="%../%../wordcloud.html">&#8617;&#160;back to the poetry site</a>
348<div class="tree">%s</div>
349<a class="output-link" href="%../%../wordcloud.html#poem-index">output/&#160;&#8594;&#160;every generated page</a>
350<a class="download-link" href="%../%../similar-different.zip" download>&#11015;&#160;download everything (site&#160;+&#160;source)</a>
351</nav>
352<main id="main">%s</main>
353</div>
354</body>
355</html>]], escape_html(title), "../", "../", sidebar, content)
356end
357-- }}}
358
359-- {{{ relpath_prefix()
360-- "../" repeated to climb from output/source/<rel>.html back to output/source/.
361local function relpath_prefix(rel)
362 local depth = select(2, rel:gsub("/", "/"))
363 return string.rep("../", depth)
364end
365-- }}}
366
367-- {{{ page_header()
368-- The "<dir>name" title + rule shared by every content page. Split so the
369-- directory dims and the filename stands out. `meta` is an optional subtitle
370-- (e.g. "240 lines"); omit it for rendered prose where a line count is noise.
371local function page_header(rel, meta)
372 local dir, name = rel:match("^(.*/)([^/]+)$")
373 if not name then dir, name = "", rel end
374 local meta_html = meta and string.format('<p class="meta">%s</p>', meta) or ""
375 return string.format('<h1><span class="dir">%s</span>%s</h1>%s<div class="rule"></div>',
376 escape_html(dir), escape_html(name), meta_html)
377end
378-- }}}
379
380-- {{{ fold_marker_kind()
381-- Issue 10-055: the project brackets every function with vimfold markers
382-- (`-- {{{ name` ... `-- }}}`, per CLAUDE.md). Those markers ARE the fold
383-- boundaries. We only treat a line as a fold marker when the language has a
384-- line-comment AND the marker sits in that comment, so a stray { in code can
385-- never be mistaken for a fold. Returns "open", "close", or nil.
386local function fold_marker_kind(line, lang)
387 local cprefix = lang and lang.line
388 if not cprefix then return nil end
389 local p = "^%s*" .. cprefix:gsub("(%W)", "%%%1") .. "%s*"
390 if line:match(p .. "{{{") then return "open" end
391 if line:match(p .. "}}}") then return "close" end
392 return nil
393end
394-- }}}
395
396-- {{{ render_text_page()
397-- A code/text file: numbered, highlighted lines (gutter line numbers double as
398-- #L<n> anchors for deep links). Issue 10-055: vimfold regions become clickable
399-- <details> blocks -- mouse-driven folds with no JavaScript, the same mechanism
400-- the sidebar uses. Folds default OPEN so the page reads top-to-bottom (and so a
401-- deep link to a line inside a fold still resolves) until the reader collapses
402-- one. Unbalanced markers are closed defensively so the HTML never breaks.
403local function render_text_page(rel, body, lang)
404 local raw = {}
405 for line in (body .. "\n"):gmatch("(.-)\n") do raw[#raw + 1] = line end
406
407 -- Each line is its own block (a .cl div, or the <summary> for a fold-open
408 -- line), so line breaks come from the elements -- not from newline characters
409 -- fighting with the block-level fold <details>. white-space:pre on .cl keeps
410 -- each line's own indentation. The marker line opens a <details>; its body
411 -- lines are the collapsible content; the closing marker line ends it.
412 local function cl(gutter, hl) return '<div class="cl">' .. gutter .. hl .. "</div>" end
413
414 local state, parts, depth = { in_block = false }, {}, 0
415 for n = 1, #raw do
416 local line = raw[n]
417 local hl; hl, state = highlight_line(line, lang, state)
418 local gutter = string.format('<span class="ln" id="L%d">%d</span>', n, n)
419 local kind = fold_marker_kind(line, lang)
420 if kind == "open" then
421 parts[#parts + 1] = '<details class="fold" open><summary class="cl">'
422 .. gutter .. hl .. '</summary>'
423 depth = depth + 1
424 elseif kind == "close" and depth > 0 then
425 parts[#parts + 1] = cl(gutter, hl) .. '</details>'
426 depth = depth - 1
427 else
428 parts[#parts + 1] = cl(gutter, hl)
429 end
430 end
431 while depth > 0 do parts[#parts + 1] = '</details>'; depth = depth - 1 end
432
433 -- A <div>, not a <pre>: the fold regions are <details> (flow content), which
434 -- is not valid inside <pre> (phrasing-only). Per-line .cl blocks reproduce the
435 -- line layout while letting the folds nest validly.
436 return page_header(rel, string.format("%d lines", #raw))
437 .. '<div class="code">' .. table.concat(parts, "") .. "</div>"
438end
439-- }}}
440
441-- {{{ render_markdown_page()
442-- Issue 10-055: a Markdown file rendered as formatted HTML (headings, tables,
443-- lists, code, links) rather than numbered plain text. Used for .md / .info.md
444-- and for extensionless prose (the vision doc). The renderer escapes all text,
445-- so untrusted markup cannot inject tags.
446local function render_markdown_page(rel, body)
447 return page_header(rel) .. '<div class="md">' .. markdown.render(body) .. "</div>"
448end
449-- }}}
450
451-- {{{ render_image_page()
452-- An image file: show it. The original lives at DIR/<rel>; from output/source/
453-- <rel>.html that is up (depth+2) directories then <rel>.
454local function render_image_page(rel)
455 -- The image file is copied next to this page (output/source/<rel>) by main(),
456 -- so reference it by BASENAME -- a relative, self-contained src. The old code
457 -- pointed at the original under DIR/<rel>, which works locally but 404s once
458 -- deployed (only output/source/ is uploaded), leaving just the path header.
459 local dir, name = rel:match("^(.*/)([^/]+)$")
460 if not name then dir, name = "", rel end
461 -- Encode spaces etc. in the filename (no slashes to keep -- same directory).
462 local src = name:gsub("[^%w%-%._~]", function(c) return string.format("%%%02X", string.byte(c)) end)
463 return string.format(
464 '<h1><span class="dir">%s</span>%s</h1><div class="rule"></div><img src="%s" alt="%s">',
465 escape_html(dir), escape_html(name), src, escape_html(rel))
466end
467-- }}}
468
469-- {{{ write_style_file()
470-- The "Machine Codex" theme: the source browser as an illuminated manuscript of
471-- a machine. Deep ink, a warm upper-left glow + grain for atmosphere, a
472-- characterful serif (Fraunces) for the human-facing chrome and crisp mono
473-- (JetBrains Mono) for the machine's own text. Syntax colors are DERIVED from
474-- the project's identity palette: gold = literal text (poems are gold on the
475-- main site), blue = structure, teal = the "why" of comments. Written once and
476-- linked by every page (small pages, one cohesive source of truth for the look).
477local function write_style_file(out_root)
478 local css = [[
479/* Machine Codex -- source browser theme (Issue 10-052). */
480:root{
481 --ink:#0b0c10; --ink-2:#13151f; --panel:#0e1018;
482 --paper:#e9e4d8; --paper-dim:#9b9484; --paper-faint:#6a6457;
483 --gold:#d8b14a; --gold-soft:#c6b257; --blue:#74a0e6; --teal:#5f9c92; --coral:#e08363;
484 --rule:#24262f; --rule-soft:#191b22;
485}
486*{box-sizing:border-box;}
487html{scroll-behavior:smooth;}
488body{
489 margin:0; background:var(--ink); color:var(--paper);
490 font-family:'JetBrains Mono',ui-monospace,'Hack',Consolas,monospace;
491 font-size:13.5px; line-height:1.62; -webkit-font-smoothing:antialiased;
492 background-image:
493 radial-gradient(820px 460px at 8% -12%, rgba(216,177,74,.08), transparent 60%),
494 radial-gradient(680px 480px at 104% -6%, rgba(95,156,146,.06), transparent 55%);
495 background-attachment:fixed;
496}
497/* fine grain overlay for paper-like depth */
498body::after{
499 content:""; position:fixed; inset:0; pointer-events:none; z-index:9; opacity:.04;
500 background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='140' height='140'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='.85' numOctaves='2' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E");
501}
502a{color:var(--blue); text-decoration:none; transition:color .14s ease;}
503a:hover{color:var(--gold);}
504#wrap{display:flex; align-items:flex-start; min-height:100vh;}
505
506/* ---- sidebar: a book's table of contents ---- */
507#side{
508 width:308px; min-width:308px; height:100vh; position:sticky; top:0; overflow:auto;
509 padding:1.6rem 1.25rem 3rem; border-right:1px solid var(--rule);
510 background:linear-gradient(176deg, var(--ink-2), var(--ink) 70%);
511 font-size:12.5px; animation:sideIn .55s cubic-bezier(.2,.8,.2,1) both;
512}
513#side .home{display:block; margin-bottom:1.1rem;}
514#side .brand{
515 display:block; font-family:'Fraunces',Georgia,serif; font-weight:600; font-size:1.5rem;
516 color:var(--gold); letter-spacing:.005em; line-height:1.05;
517}
518#side .tagline{
519 display:block; margin-top:.4rem; font-size:.62rem; letter-spacing:.22em;
520 text-transform:uppercase; color:var(--paper-faint);
521}
522#side .home:hover .brand{color:var(--gold);}
523#side .tree{border-top:1px solid var(--rule); padding-top:.9rem;}
524#side details{margin:.05rem 0; border-left:1px solid var(--rule-soft); padding-left:.6rem;}
525#side summary{
526 cursor:pointer; list-style:none; padding:1.5px 0; color:var(--teal);
527 font-family:'Fraunces',Georgia,serif; font-style:italic; font-size:.96rem;
528}
529#side summary::-webkit-details-marker{display:none;}
530#side summary::before{content:"\203A\00a0"; color:var(--rule); font-style:normal;}
531#side details[open]>summary::before{content:"\02C5\00a0"; color:var(--gold-soft);}
532#side summary:hover{color:var(--gold);}
533#side a{
534 display:block; padding:1.5px 0 1.5px .35rem; color:var(--paper-dim);
535 border-left:2px solid transparent; transition:color .12s ease, border-color .12s ease, padding .12s ease;
536 white-space:nowrap; overflow:hidden; text-overflow:ellipsis;
537}
538#side a:hover{color:var(--gold); border-left-color:var(--gold); padding-left:.65rem;}
539/* Feature F: a saved page links OUT to the real article (a new tab), not to a
540 hosted copy. The arrow tells the reader this link leaves the site. */
541#side a.ext::after{content:"\00a0\2197"; color:var(--paper-faint); font-size:.85em;}
542#side a.ext:hover::after{color:var(--gold);}
543#side .cur{
544 display:block; padding:1.5px .4rem; margin-left:.1rem; color:var(--ink);
545 background:linear-gradient(90deg,var(--gold),var(--gold-soft)); border-radius:3px; font-weight:700;
546}
547#side .cur::before{content:"\25A0\00a0"; opacity:.7;}
548
549/* ---- main content ---- */
550#main{flex:1; min-width:0; padding:2.3rem 2.9rem 7rem; overflow-x:auto; animation:mainIn .65s cubic-bezier(.2,.8,.2,1) both;}
551#main h1{
552 font-family:'JetBrains Mono',monospace; font-weight:700; font-size:1.05rem;
553 color:var(--paper); margin:0 0 .15rem; word-break:break-all; letter-spacing:-.01em;
554}
555#main h1 .dir{color:var(--paper-faint); font-weight:400;}
556#main .meta{color:var(--paper-dim); font-size:.74rem; letter-spacing:.12em; text-transform:uppercase; margin:.1rem 0 1rem;}
557#main .rule{height:1px; background:linear-gradient(90deg,var(--gold) 0,var(--gold) 64px,var(--rule) 64px,transparent); margin:0 0 1.6rem;}
558
559/* code */
560/* a div, not a <pre>, so the fold <details> nest validly. Each line is a .cl
561 block that carries white-space:pre to keep its own indentation. */
562.code{margin:0; font-size:13px; tab-size:4; line-height:1.6; display:block; overflow-x:auto;}
563.cl{white-space:pre; display:block;}
564/* an issue number inside a comment, linked to its page (Issue 10-055, Feature D) */
565.issue-ref{color:var(--gold-soft); border-bottom:1px dotted var(--gold-soft); font-style:normal;}
566.issue-ref:hover{color:var(--gold); border-bottom-color:var(--gold);}
567.ln{
568 color:var(--rule); user-select:none; display:inline-block;
569 min-width:3.4rem; padding-right:1.4rem; text-align:right; font-variant-numeric:tabular-nums;
570}
571.ln:target{color:var(--gold);}
572.c-cm{color:var(--teal); font-style:italic;}
573.c-st{color:var(--gold-soft);}
574.c-kw{color:var(--blue);}
575.c-nu{color:var(--coral);}
576img{max-width:100%; height:auto; border:1px solid var(--rule); border-radius:2px;}
577
578/* ---- back-to-site link (Issue 10-055) ---- */
579#side .site-link{
580 display:block; margin:-.4rem 0 1rem; padding:.2rem 0; color:var(--paper-dim);
581 font-family:'Fraunces',Georgia,serif; font-style:italic; font-size:.92rem;
582}
583#side .site-link:hover{color:var(--gold);}
584/* output/ is not a real tree branch (it is git-ignored, ~23k pages); it is a
585 single deep link to the live site's poem index, where every page is reachable. */
586#side .output-link{
587 display:block; margin:.9rem 0 0; padding:.5rem .6rem; color:var(--paper-dim);
588 border:1px dashed var(--rule); border-radius:3px; font-size:.82rem; letter-spacing:.02em;
589}
590#side .output-link:hover{color:var(--gold); border-color:var(--gold-soft);}
591/* the "download everything" archive (site + source): a solid call-to-action box,
592 a touch more prominent than the dashed output/ link it sits beside. */
593#side .download-link{
594 display:block; margin:.55rem 0 0; padding:.5rem .6rem; color:var(--paper-dim);
595 border:1px solid var(--gold-soft); border-radius:3px; font-size:.82rem; letter-spacing:.02em;
596}
597#side .download-link:hover{color:var(--gold); border-color:var(--gold);}
598
599/* ---- code folds (Issue 10-055): vimfold regions as no-JS <details> ---- */
600/* Inside the code <pre>: the marker line is the clickable summary; its body
601 lines collapse under it. Default open so the page reads straight through. */
602.fold{display:block;}
603.fold>summary{cursor:pointer; list-style:none;}
604.fold>summary::-webkit-details-marker{display:none;}
605.fold>summary .ln{position:relative;}
606/* a small triangle in the gutter margin so it reads as a fold handle */
607.fold>summary::before{content:"\25BE"; color:var(--gold-soft); margin-right:.25rem;}
608.fold:not([open])>summary::before{content:"\25B8"; color:var(--paper-faint);}
609.fold:not([open])>summary{color:var(--paper-dim);}
610
611/* ---- rendered markdown (Issue 10-055) ---- */
612.md{max-width:80ch; line-height:1.72;}
613.md h1,.md h2,.md h3,.md h4{font-family:'Fraunces',Georgia,serif; color:var(--gold); line-height:1.2; margin:1.7rem 0 .6rem;}
614.md h1{font-size:1.9rem;} .md h2{font-size:1.45rem;} .md h3{font-size:1.18rem;} .md h4{font-size:1rem;}
615.md p{margin:0 0 1rem;}
616.md ul,.md ol{margin:0 0 1rem 1.5rem;}
617.md li{margin:.25rem 0;}
618.md code{font-family:'JetBrains Mono',monospace; background:var(--ink-2); padding:.1rem .32rem; border-radius:3px; color:var(--gold-soft); font-size:.92em;}
619.md pre{background:var(--ink-2); padding:1rem 1.1rem; border-radius:4px; overflow-x:auto; margin:0 0 1rem; border:1px solid var(--rule);}
620.md pre code{background:none; padding:0; color:var(--paper); font-size:13px;}
621.md blockquote{border-left:3px solid var(--gold-soft); margin:0 0 1rem; padding:.2rem 0 .2rem 1rem; color:var(--paper-dim); font-style:italic;}
622.md table{border-collapse:collapse; margin:0 0 1.2rem;}
623.md th,.md td{border:1px solid var(--rule); padding:.4rem .85rem; text-align:left; vertical-align:top;}
624.md th{background:var(--ink-2); color:var(--gold-soft); font-weight:600;}
625.md hr{border:0; border-top:1px solid var(--rule); margin:1.7rem 0;}
626.md a{color:var(--blue);} .md a:hover{color:var(--gold);}
627
628/* welcome / index */
629.welcome{max-width:62ch;}
630.welcome h1{
631 font-family:'Fraunces',Georgia,serif; font-weight:600; font-size:2.7rem;
632 color:var(--gold); margin:.2rem 0 .2rem; line-height:1.04; letter-spacing:.005em;
633}
634.welcome .kicker{font-size:.66rem; letter-spacing:.28em; text-transform:uppercase; color:var(--paper-faint); margin:0 0 .4rem;}
635.welcome p{font-family:'Fraunces',Georgia,serif; font-size:1.18rem; line-height:1.7; color:var(--paper); margin:0 0 1.1rem;}
636.welcome .mono{font-family:'JetBrains Mono',monospace; font-size:.82rem; line-height:1.7; color:var(--paper-dim);}
637.welcome .mono b{color:var(--gold-soft); font-weight:500;}
638.welcome .div{height:1px; background:linear-gradient(90deg,var(--gold),transparent); margin:1.6rem 0;}
639
640@keyframes sideIn{from{opacity:0; transform:translateX(-14px);} to{opacity:1; transform:none;}}
641@keyframes mainIn{from{opacity:0; transform:translateY(16px);} to{opacity:1; transform:none;}}
642@media(prefers-reduced-motion:reduce){*{animation:none !important;}}
643@media(max-width:760px){
644 #wrap{flex-direction:column;}
645 #side{width:100%; min-width:0; height:auto; position:static; border-right:0; border-bottom:1px solid var(--rule);}
646 #main{padding:1.6rem 1.15rem 4rem;}
647 .welcome h1{font-size:2rem;}
648}
649]]
650 return utils.write_file(out_root .. "/_style.css", css)
651end
652-- }}}
653
654-- {{{ main()
655-- {{{ extract_canonical_url()
656-- Issue 10-055 (Feature F): a saved webpage links OUT to the real article, so we
657-- need its original address. Browsers' "Save Page As" preserves the page's own
658-- <link rel="canonical" href="..."> and <meta property="og:url" content="...">,
659-- which name exactly that. Reading them from the file keeps the link honest and
660-- self-updating -- no hand-maintained URL to drift out of date. We try canonical
661-- first (the page's declared identity), then og:url, and accept either attribute
662-- order. Returns the URL string or nil (caller warns and skips -- never guesses).
663local function extract_canonical_url(html)
664 local u = html:match('<link[^>]-rel=["\']canonical["\'][^>]-href=["\']([^"\']+)["\']')
665 or html:match('<link[^>]-href=["\']([^"\']+)["\'][^>]-rel=["\']canonical["\']')
666 or html:match('<meta[^>]-property=["\']og:url["\'][^>]-content=["\']([^"\']+)["\']')
667 or html:match('<meta[^>]-content=["\']([^"\']+)["\'][^>]-property=["\']og:url["\']')
668 return u
669end
670-- }}}
671
672-- {{{ copy_raw()
673-- Byte-for-byte copy (binary-safe rb/wb), used to place an image next to its
674-- rendered page so it displays once deployed. Pure Lua I/O on purpose: the project
675-- bans shell exec for file targeting, and io handles spaces in paths natively.
676local function copy_raw(src_abs, dst_abs)
677 local src = io.open(src_abs, "rb")
678 if not src then return false end
679 local data = src:read("*all"); src:close()
680 ensure_dir(dst_abs:match("^(.*)/[^/]+$"))
681 local dst = io.open(dst_abs, "wb")
682 if not dst then return false end
683 dst:write(data); dst:close()
684 return true
685end
686-- }}}
687
688-- {{{ find_mirror_pages()
689-- A saved webpage is an .html with a sibling "<name>_files/" directory of its
690-- assets (how browsers "Save Page As"). Feature F: these are NOT published -- the
691-- table of contents links straight to the original article instead. Returns:
692-- mirror_url[rel] = external URL (saved pages we resolved a link for)
693-- is_asset(rel) (true for any "<name>_files/..." asset path)
694-- missing (list of saved pages whose URL we couldn't find)
695-- Asset prefixes come from EVERY structural saved page (even one missing a URL),
696-- so a mirror's raw asset bytes are never mistaken for project source and dumped.
697local function find_mirror_pages(all_files)
698 local has_prefix = {}
699 for _, rel in ipairs(all_files) do
700 local dir = rel:match("^(.*/)[^/]+$")
701 if dir then has_prefix[dir] = true end
702 end
703 local mirror_url, asset_prefixes, missing = {}, {}, {}
704 for _, rel in ipairs(all_files) do
705 local stem = rel:match("^(.*)%.html$")
706 if stem and has_prefix[stem .. "_files/"] then
707 asset_prefixes[#asset_prefixes + 1] = stem .. "_files/"
708 -- An explicit override wins; otherwise read the URL the page declares
709 -- about itself. A saved page with neither is reported (missing), not
710 -- linked to a guessed address.
711 local url = MIRROR_URL_OVERRIDES[rel]
712 if not url then
713 local html = utils.read_file(DIR .. "/" .. rel)
714 url = html and extract_canonical_url(html)
715 end
716 if url then mirror_url[rel] = url
717 else missing[#missing + 1] = rel end
718 end
719 end
720 local function is_asset(rel)
721 for _, p in ipairs(asset_prefixes) do
722 if rel:sub(1, #p) == p then return true end
723 end
724 return false
725 end
726 return mirror_url, is_asset, missing
727end
728-- }}}
729
730-- {{{ classify_file()
731-- Decide how a published path renders: "md" (formatted markdown), "code"
732-- (numbered + highlighted source), "image", or "skip" (a binary -- listed
733-- nowhere, so it can never become a dead link). Issue 10-055: extensionless
734-- tracked files (e.g. notes/vision) used to fall through to "skip" while the
735-- sidebar still linked them -> a guaranteed 404; now an extensionless TEXT file
736-- renders as prose and only a genuine binary is skipped. Returns (kind, lang_id).
737local function classify_file(rel)
738 -- Issue 10-063: an archived HTML page is shown rendered (kind "view"), not as
739 -- highlighted source -- it is copied raw and the sidebar links straight to it.
740 if is_viewable_html(rel) then return "view" end
741 local e, lang_id = ext(rel)
742 if lang_id == "md" or TEXT_EXTS[e] == "md" then
743 return "md"
744 elseif lang_id or TEXT_EXTS[e] then
745 return "code", lang_id
746 elseif IMAGE_EXTS[e] then
747 return "image"
748 elseif e == "" then
749 -- No extension: prose unless the bytes say binary (a NUL is the giveaway).
750 local body = utils.read_file(DIR .. "/" .. rel)
751 if body and not body:find("\0", 1, true) then return "md" end
752 return "skip"
753 end
754 return "skip"
755end
756-- }}}
757
758-- {{{ build_issue_index()
759-- Issue 10-055 (Feature D): map each published issue's NUMBER to its page path,
760-- so a comment that mentions "Issue 10-036" can link to it. The number is the
761-- filename's leading token (10-036, 9-005b, 002...). The file may live in
762-- issues/ or issues/completed/ or a phase subdir, so it is looked up here, never
763-- hard-coded.
764local function build_issue_index(rels)
765 local index = {}
766 for _, rel in ipairs(rels) do
767 if rel:match("^issues/") and rel:match("%.md$") then
768 local base = rel:match("([^/]+)$")
769 local num = base:match("^(%d+%-%d+%l?)") or base:match("^(%d+%l?)")
770 if num then index[num] = rel end
771 end
772 end
773 return index
774end
775-- }}}
776
777-- {{{ linkify_issues()
778-- Turn "Issue 10-036" mentions in already-rendered (escaped) comment HTML into
779-- links to that issue's page. Only the number is linked, and only when it
780-- resolves to a published issue -- an unknown number stays plain text, so we
781-- never emit a dead link. `prefix` climbs from the current page to output/source/.
782local function linkify_issues(html, index, prefix)
783 return (html:gsub("(Issues?%s+)(%d+%-?%d*%l?)", function(lead, num)
784 local rel = index[num]
785 if not rel then return lead .. num end
786 return string.format('%s<a class="issue-ref" href="%s%s.html">%s</a>',
787 lead, prefix, rel, num)
788 end))
789end
790-- }}}
791
792local function main()
793 local all_files, skipped = list_published_files()
794 local out_root = DIR .. "/output/source"
795 ensure_dir(out_root)
796 write_style_file(out_root)
797
798 -- Pass 1 -- classify. Build the renderable list (and therefore the tree) from
799 -- ONLY files that will actually get a page, so the table of contents can never
800 -- link a page we did not write (the old extensionless-file 404). Genuine
801 -- binaries are counted for the report but kept out of the tree entirely.
802 -- Feature F: saved webpages (an .html with a sibling _files/ dir) are NOT
803 -- hosted. Each appears in the tree as an external link to the original article;
804 -- no page is written for it and its assets are held back (we host no copy).
805 local mirror_url, is_asset, mirror_missing = find_mirror_pages(all_files)
806 local missing_set = {}
807 for _, rel in ipairs(mirror_missing) do missing_set[rel] = true end
808 local renderable, held_assets, skipped_files = {}, 0, 0
809 for _, rel in ipairs(all_files) do
810 if mirror_url[rel] then
811 -- Keep it in the tree (so the table of contents still lists it) but
812 -- give it no page -- the sidebar entry links straight out to the web.
813 renderable[#renderable + 1] = { rel = rel, kind = "mirror" }
814 elseif missing_set[rel] then
815 -- A saved page we found no out-link for: never dump its raw HTML as
816 -- "source" (it isn't ours). Hold it back; the report names it so the
817 -- author can add a MIRROR_URL_OVERRIDES entry.
818 skipped_files = skipped_files + 1
819 elseif is_asset(rel) then
820 held_assets = held_assets + 1 -- a saved page's asset: not published
821 else
822 local kind, lang_id = classify_file(rel)
823 if kind == "skip" then
824 skipped_files = skipped_files + 1
825 else
826 renderable[#renderable + 1] = { rel = rel, kind = kind, lang_id = lang_id }
827 end
828 end
829 end
830
831 local rels = {}
832 for _, f in ipairs(renderable) do rels[#rels + 1] = f.rel end
833 local tree = build_tree(rels)
834 local issue_index = build_issue_index(rels) -- Feature D: comment -> issue links
835 local written, images, write_failed = 0, 0, {}
836
837 -- Pass 2 -- render each renderable file by its kind.
838 for _, f in ipairs(renderable) do
839 local rel = f.rel
840
841 if f.kind == "mirror" then
842 -- Feature F: a saved page is listed in the tree but never written out
843 -- -- its sidebar entry links to the original article on the web. There
844 -- is nothing to render here; the out-link lives in render_sidebar.
845 goto continue
846 end
847
848 if f.kind == "view" then
849 -- Issue 10-063: an archived HTML page (a word-cloud snapshot) is copied
850 -- byte-for-byte into the source tree so the link resolves on the deployed
851 -- site (only output/source/ is uploaded). No source-view page is written
852 -- -- its sidebar entry (render_sidebar) opens this file directly, rendered.
853 if copy_raw(DIR .. "/" .. rel, out_root .. "/" .. rel) then
854 written = written + 1
855 else
856 write_failed[#write_failed + 1] = rel
857 end
858 goto continue
859 end
860
861 -- Build the set of directory names on the path to this file, so the
862 -- sidebar opens them.
863 local path_set = {}
864 for p in rel:gmatch("[^/]+") do path_set[p] = true end
865 local prefix = relpath_prefix(rel)
866 local sidebar = render_sidebar(tree, rel, prefix, path_set, mirror_url)
867
868 local content
869 if f.kind == "image" then
870 content = render_image_page(rel)
871 -- Copy the image into the source tree (beside its page) so it actually
872 -- displays when deployed -- only output/source/ is uploaded, not the
873 -- original under DIR/<rel>. The page references it by basename.
874 copy_raw(DIR .. "/" .. rel, out_root .. "/" .. rel)
875 else
876 local body = utils.read_file(DIR .. "/" .. rel)
877 if not body then skipped_files = skipped_files + 1; goto continue end
878 if f.kind == "md" then
879 content = render_markdown_page(rel, body)
880 else
881 content = render_text_page(rel, body, LANGS[f.lang_id or "text"])
882 -- Feature D: comments here mention issues; link them to their pages.
883 content = linkify_issues(content, issue_index, prefix)
884 end
885 end
886
887 local page = page_shell(rel, sidebar, content):gsub("%../%", prefix)
888 local out_file = out_root .. "/" .. rel .. ".html"
889 ensure_dir(out_file:match("^(.*)/[^/]+$"))
890 -- A failed write is a warning we must surface, not swallow -- it usually
891 -- means a path with characters io.open/mkdir choke on (spaces, quotes).
892 if not utils.write_file(out_file, page) then
893 write_failed[#write_failed + 1] = rel
894 elseif f.kind == "image" then
895 images = images + 1
896 else
897 written = written + 1
898 end
899 ::continue::
900 end
901
902 -- Feature F: a saved page's assets (CSS, images, fonts) are deliberately NOT
903 -- copied -- we host no mirror of someone else's article. The held-back count is
904 -- reported below for auditability.
905
906 -- Index: welcome + the full tree (nothing current, root prefix).
907 do
908 local sidebar = render_sidebar(tree, nil, "", {}, mirror_url)
909 local welcome = string.format([[<div class="welcome">
910<p class="kicker">A link-only view of the machine</p>
911<h1>The source, read as a book.</h1>
912<p>This is the code, the issues, and the documentation that build the poetry
913collection &mdash; rendered as a small website so it can be shared by link
914without a public repository. The private monorepo never leaves the machine.</p>
915<div class="div"></div>
916<p class="mono"><b>%d</b> text files and <b>%d</b> images, published from an
917allowlist of directories. The private input corpus and the transcripts are
918deliberately held back. Pick a file from the table of contents on the left, or
919follow any path down through the tree.</p>
920</div>]], written, images)
921 local page = page_shell("Machine Codex", sidebar, welcome):gsub("%../%", "")
922 utils.write_file(out_root .. "/index.html", page)
923 end
924
925 -- Report what was published and -- loudly -- what was held back.
926 print(string.format("[source-browser] published %d text + %d images to %s",
927 written, images, out_root))
928 do
929 -- Feature F: count saved pages now linked out to the web, and the assets we
930 -- therefore did NOT host. This makes the "host nothing, link out" policy
931 -- visible in the build log.
932 local linked_out = 0
933 for _ in pairs(mirror_url) do linked_out = linked_out + 1 end
934 if linked_out > 0 then
935 print(string.format("[source-browser] %d saved page(s) link out to the original article; %d of their assets held back (not hosted)",
936 linked_out, held_assets))
937 end
938 if #mirror_missing > 0 then
939 print(string.format("[source-browser] WARNING: %d saved page(s) had no canonical/og:url and were held back -- add a MIRROR_URL_OVERRIDES entry: %s",
940 #mirror_missing, table.concat(mirror_missing, ", ")))
941 end
942 end
943 if skipped_files > 0 then
944 print(string.format("[source-browser] skipped %d non-text/non-image files (binaries)", skipped_files))
945 end
946 if #write_failed > 0 then
947 print(string.format("[source-browser] WARNING: %d pages failed to write (odd path characters): %s",
948 #write_failed, table.concat(write_failed, ", ")))
949 end
950 local skip_report = {}
951 for dir, count in pairs(skipped) do skip_report[#skip_report + 1] = string.format("%s (%d)", dir, count) end
952 table.sort(skip_report)
953 if #skip_report > 0 then
954 print("[source-browser] NOT published (allowlist held these back): " .. table.concat(skip_report, ", "))
955 end
956end
957-- }}}
958
959main()
960