src/flat-html-generator.lua
1#!/usr/bin/env lua
2
3-- Core flat HTML page generation system for neocities-modernization
4-- Generates 13,680+ pages with similarity/diversity ranking in compiled.txt format
5
6-- {{{ local function setup_dir_path
7local function setup_dir_path(provided_dir)
8 if provided_dir then
9 return provided_dir
10 end
11 return "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
12end
13-- }}}
14
15-- Script configuration - handle args properly to avoid -I interfering with DIR
16local DIR = setup_dir_path()
17if arg then
18 for _, arg_val in ipairs(arg) do
19 if arg_val ~= "-I" and not arg_val:match("^%-") then
20 DIR = arg_val
21 break
22 end
23 end
24end
25
26-- Load required libraries
27package.path = DIR .. "/libs/?.lua;" .. DIR .. "/src/?.lua;" .. package.path
28local utils = require("utils")
29local dkjson = require("dkjson")
30-- Issue 8-056: Shared text formatting module for whitespace preservation
31local text_formatter = require("text-formatter")
32-- Shared in-place progress bar (same look + TTY/--debug rules as the GPU stages)
33local progress = require("progress-display")
34-- Issue 9-013: render ranked image entries (pseudo-poems) as image boxes
35local image_render = require("image-render")
36-- Issue 11-005: the explore pages read their prose from editable input/pages/*.txt
37-- files; this fills the {MARKER} placeholders with the live numbers.
38local page_template = require("page-template")
39
40-- Issue 10-003: Load unified config from config.lua
41local config_loader = require("config-loader")
42config_loader.set_project_root(DIR)
43local unified_config = config_loader.load()
44
45-- inference-server-config tells us which embedding model the rest of the pipeline is
46-- pointed at. We use it to derive the cache-directory name in the two
47-- diversity/similarity loader fallbacks below; previously those defaulted
48-- to "embeddinggemma:latest" as a literal string, which silently broke
49-- after a model swap.
50local inference_config = require("inference-server-config")
51inference_config.set_project_root(DIR)
52
53-- Initialize asset path configuration (CLI --dir takes precedence over config)
54utils.init_assets_root(arg)
55
56-- Load effil for parallel processing (optional - falls back to single-threaded if unavailable)
57-- CRITICAL: effil.so is a C library, must be in cpath not path
58package.cpath = package.cpath .. ';/home/ritz/programming/ai-stuff/libs/lua/effil-jit/build/?.so'
59local effil = nil
60local has_threading = false
61
62local success, err = pcall(function()
63 effil = require('effil')
64 has_threading = true
65end)
66
67-- Issue 10-034: Orchestrator message types for lazy loading parallel HTML generation
68-- Main thread acts as cache server, sending 80KB work slices instead of workers loading 700MB
69local MSG_REQUEST_WORK = "get_work" -- Worker → Main: "give me a poem to process"
70local MSG_WORK_SLICE = "work" -- Main → Worker: poem_index + rankings
71local MSG_WORK_DONE = "done" -- Worker → Main: "finished poem X"
72local MSG_SHUTDOWN = "shutdown" -- Main → Worker: "no more work, exit"
73
74local M = {}
75
76-- Mock color assignment for testing (until we have real embeddings)
77local MOCK_POEM_COLORS = {
78 [1] = "blue", -- Introduction post
79 [2] = "purple", -- Philosophy/metaphysics
80 [3] = "red", -- Passion/energy
81 [5] = "orange", -- Programming/technical
82 [4625] = "red", -- Politics/passion
83 [4626] = "gray", -- Short post
84 [4624] = "green" -- Hope/future themes
85}
86
87-- Color configuration for progress bars
88local COLOR_CONFIG = {
89 red = "#dc3c3c",
90 blue = "#3c78dc",
91 green = "#3cb45a",
92 purple = "#8c3cc8",
93 orange = "#e68c3c",
94 yellow = "#c8b428",
95 gray = "#787878"
96}
97
98-- Issue 8-057: Boost visual formatting color scheme
99-- Based on /notes/boost post image style.png design reference
100local BOOST_COLOR_CONFIG = {
101 arrow = "#dc3c3c", -- Red/Magenta: ◀─ and ─▶ arrows, [BOOST] label
102 outer_frame = "#3c78dc", -- Blue/Navy: ╔═╗║╚═╝ outer frame
103 inner_box = "#2aa198", -- Teal/Cyan: ┌─┐│└─┘ inner content box
104 content_text = "#c8b428" -- Yellow: The actual boosted text content
105}
106
107-- The boost frame is drawn by ONE shared module so the main + worker + word-page
108-- copies cannot drift (they had: misaligned walls, wrong junction columns, ▢
109-- corruption). See src/boost-bars.lua + src/boost-bars.test.lua.
110local boost_bars = require("boost-bars")
111boost_bars.configure(BOOST_COLOR_CONFIG)
112
113-- {{{ Issue 16-010: Monospace font enforcement
114-- Font stack prioritizes Hack Nerd Font (user's preference), then falls back
115-- to other popular monospace fonts for consistent rendering across browsers.
116-- Uses CSS font-stack approach (no external font files required).
117local FONT_STYLE = [[
118<style>
119body, pre {
120 font-family: 'Hack Nerd Font', 'Hack', 'Fira Code', 'JetBrains Mono',
121 'Cascadia Code', 'Consolas', 'Monaco', 'Liberation Mono',
122 'Courier New', monospace;
123}
124/* True page-centering for the poem column. The old <table align="center">
125 shrink-wrapped to its WIDEST line -- and an attached image (up to 800px) is
126 wider than the ~84-char text frame, so the cell stretched and the frames
127 hugged the left of that wide cell, landing the whole column left-of-center.
128 Fix: the cell centers its children, each <pre> is an inline-block that
129 centers as a block (text stays left-aligned inside), and media centers via
130 auto margins. Now a vertical line down the page bisects every poem AND image,
131 regardless of how wide any single image is. */
132td { text-align: center; }
133pre { display: inline-block; text-align: left; margin: 0 auto; }
134img, video, audio { margin-left: auto; margin-right: auto; }
135</style>
136]]
137-- }}}
138
139-- Pagination configuration defaults
140-- Issue 10-003: These values are overridden by unified config (config.lua) if present
141-- See Issue 8-020 for hybrid pagination strategy (45GB storage constraint)
142-- Issue 9-003 Fix F: Added chronological pagination settings
143local PAGINATION_CONFIG = {
144 poems_per_page = 100,
145 minimum_pages = 1,
146 -- COMPUTED per build by compute_storage_max_pages (Issue 10-057), not a config
147 -- value. This placeholder is only the operative value if that computation is
148 -- skipped (e.g. a caller that loads pagination just for chronological mapping);
149 -- kept finite so %d logging and math.min() stay well-defined.
150 max_pages_per_poem = 9999,
151 page_number_padding = 2,
152 generate_txt_exports = true,
153 generate_html_archives = false, -- Disabled: redundant with paginated pages
154 chronological_paginated = false, -- Set to true to split chronological.html into multiple pages
155 chronological_poems_per_page = 500 -- Poems per page when chronological_paginated is true
156}
157
158-- Storage configuration (for display purposes)
159-- Issue 10-003: Loaded from unified config (config.lua) if present
160local STORAGE_CONFIG = {
161 limit_gb = 45,
162 reserved_for_maze_gb = 0.031,
163 reserved_headroom_gb = 5
164}
165
166-- Layout constants: Single source of truth for box widths and positions
167-- Issue 8-037: Centralized to prevent drift between calculations
168-- Issue 10-003: Values can be overridden in unified config (config.lua) "layout" section
169-- Reference: All progress bars, nav boxes, and content should use these
170local LAYOUT = {
171 -- Total visible width for regular poems (positions 0-82)
172 REGULAR_POEM_WIDTH = 82,
173 -- Total visible width for golden poems: 84 chars
174 -- Structure: ╔ (1) + interior (82) + ┐ (1) = 84
175 GOLDEN_POEM_WIDTH = 84,
176 -- Maximum text content width (80 chars by default +1 space padding on left and +1 on right)
177 TEXT_CONTENT_WIDTH = 80,
178
179 -- Regular poem nav box positions (within 83-char line):
180 -- ┌─────────┐ ┌───────────┐
181 -- positions: 0-10 = left box (11 chars), 11-69 = gap (59 chars), 70-82 = right box (13 chars)
182 REGULAR_LEFT_BOX_WIDTH = 11, -- ┌─────────┐
183 REGULAR_RIGHT_BOX_WIDTH = 13, -- ┌───────────┐
184 REGULAR_GAP_WIDTH = 59, -- 83 - 11 - 13 = 59
185 REGULAR_LEFT_JUNCTION_POS = 10, -- Position of ┐/┴ under left box
186 REGULAR_RIGHT_JUNCTION_POS = 70, -- Position of ┌/┴ under right box
187
188 -- Golden poem nav box positions (within 84-char line):
189 -- Structure: ║ (1) + content (80) + space (1) + │ (1) = 83 interior + corners
190 GOLDEN_LEFT_BOX_WIDTH = 11,
191 GOLDEN_RIGHT_BOX_WIDTH = 13,
192 GOLDEN_GAP_WIDTH = 58, -- 84 - 2 corners - 11 - 13 = 58
193 -- Issue 8-055: Fixed junction positions to align ╧/┴ under ┐/┌ corners
194 GOLDEN_LEFT_JUNCTION_POS = 10, -- Same as regular (left box ┐ at position 10)
195 GOLDEN_RIGHT_JUNCTION_POS = 71, -- Regular + 1 (right box ┌ at position 71 due to wider golden)
196}
197
198-- {{{ function load_layout_from_config
199-- Issue 10-003: Loads layout settings from unified config, with fallback to LAYOUT defaults
200local function load_layout_from_config()
201 local layout = unified_config.layout
202 if not layout then return end
203
204 -- Override LAYOUT values from config
205 if layout.regular_poem_width then LAYOUT.REGULAR_POEM_WIDTH = layout.regular_poem_width end
206 if layout.golden_poem_width then LAYOUT.GOLDEN_POEM_WIDTH = layout.golden_poem_width end
207 if layout.text_content_width then LAYOUT.TEXT_CONTENT_WIDTH = layout.text_content_width end
208 if layout.left_box_width then
209 LAYOUT.REGULAR_LEFT_BOX_WIDTH = layout.left_box_width
210 LAYOUT.GOLDEN_LEFT_BOX_WIDTH = layout.left_box_width
211 end
212 if layout.right_box_width then
213 LAYOUT.REGULAR_RIGHT_BOX_WIDTH = layout.right_box_width
214 LAYOUT.GOLDEN_RIGHT_BOX_WIDTH = layout.right_box_width
215 end
216 if layout.gap_width then
217 LAYOUT.REGULAR_GAP_WIDTH = layout.gap_width
218 LAYOUT.GOLDEN_GAP_WIDTH = layout.gap_width
219 end
220 if layout.left_junction_pos then LAYOUT.REGULAR_LEFT_JUNCTION_POS = layout.left_junction_pos end
221 if layout.right_junction_pos then LAYOUT.REGULAR_RIGHT_JUNCTION_POS = layout.right_junction_pos end
222end
223-- }}}
224
225-- Load layout from config on module initialization
226load_layout_from_config()
227
228-- Diversity cache (pre-computed GPU sequences for fast HTML generation)
229-- Loaded from assets/embeddings/embeddinggemma_latest/diversity_cache.json
230local DIVERSITY_CACHE = nil
231
232-- Similarity rankings cache (pre-sorted similarity rankings for fast HTML generation)
233-- Loaded from assets/embeddings/embeddinggemma_latest/similarity_rankings_cache.json
234local SIMILARITY_RANKINGS_CACHE = nil
235
236-- {{{ local function load_diversity_cache
237-- Loads pre-computed diversity sequences from GPU cache (required for HTML generation)
238-- Errors out if cache doesn't exist - no fallback to on-the-fly computation
239local function load_diversity_cache(model_name)
240 model_name = model_name or inference_config.get_selected_model()
241 local model_dir = model_name:gsub(":", "_")
242 -- Issue 10-054: diversity stays on disk (embeddings_dir_disk).
243 local cache_file = utils.embeddings_dir_disk(model_name) .. "/diversity_cache.json"
244
245 if not utils.file_exists(cache_file) then
246 error(string.format([[
247Diversity cache not found: %s
248
249The diversity cache is required for HTML generation.
250Generate it with: ./run.sh --generate-diversity
251
252This takes ~1 minute with GPU (or ~42 hours with CPU using --cpu-only).
253]], cache_file))
254 end
255
256 utils.log_info("Loading diversity cache from: " .. cache_file)
257 local cache_data = utils.read_json_file(cache_file)
258
259 if not cache_data then
260 error("Failed to parse diversity cache JSON file")
261 end
262
263 if not cache_data.sequences then
264 error("Diversity cache has invalid format (missing sequences table)")
265 end
266
267 DIVERSITY_CACHE = cache_data
268 return cache_data
269end
270-- }}}
271
272-- {{{ local function load_similarity_rankings_cache
273-- Loads pre-sorted similarity rankings from cache (required for HTML generation)
274-- Errors out if cache doesn't exist - no fallback to on-the-fly sorting
275local function load_similarity_rankings_cache(model_name)
276 model_name = model_name or inference_config.get_selected_model()
277 local model_dir = model_name:gsub(":", "_")
278 -- Issue 10-054: similarity ranking cache is movable (embeddings_dir, RAM).
279 local cache_file = utils.embeddings_dir(model_name) .. "/similarity_rankings_cache.json"
280
281 if not utils.file_exists(cache_file) then
282 error(string.format([[
283Similarity rankings cache not found: %s
284
285The similarity rankings cache is required for fast HTML generation.
286Generate it with: ./run.sh --generate-similarity
287
288This is a post-processing step that pre-sorts similarity rankings.
289]], cache_file))
290 end
291
292 utils.log_info("Loading similarity rankings cache from: " .. cache_file)
293 local cache_data = utils.read_json_file(cache_file)
294
295 if not cache_data then
296 error("Failed to parse similarity rankings cache JSON file")
297 end
298
299 if not cache_data.rankings then
300 error("Similarity rankings cache has invalid format (missing rankings table)")
301 end
302
303 -- Count rankings (for logging)
304 local count = 0
305 for _ in pairs(cache_data.rankings) do count = count + 1 end
306
307 -- Validate cache is not empty (Issue: empty cache generated by standalone script)
308 if count == 0 then
309 error(string.format([[
310Similarity rankings cache is empty (0 poems): %s
311
312This usually means the cache was generated before similarity files existed,
313or the standalone script encountered a path issue.
314
315To fix, regenerate with: ./run.sh --generate-similarity --force
316
317This will regenerate both similarity files AND the rankings cache.
318]], cache_file))
319 end
320
321 SIMILARITY_RANKINGS_CACHE = cache_data
322 return cache_data
323end
324-- }}}
325
326-- {{{ local function media_href
327-- Where a file lives under output/media/, url-encoded for an <img src>/href.
328-- Art images (path under input/images/<source>/...) KEEP their source + subdir
329-- structure -- their human basenames collide (e.g. my-art/x.png vs
330-- my-art/game-design/x.png) and a flat output/media/<basename> would let one
331-- overwrite the other. Mastodon attachments (hashes, NOT under input/images/)
332-- keep just the basename. This MUST match flatten_media_files' target layout and
333-- image-render.lua's copy of this rule, or the src points at the wrong file.
334-- Slashes preserved; space / ? / # / % percent-encoded.
335local function media_href(path)
336 path = path or ""
337 local sub = path:match("input/images/(.+)$") or (path:match("([^/]+)$") or path)
338 return (sub:gsub("[^%w%-%._~/]", function(c)
339 return string.format("%%%02X", string.byte(c))
340 end))
341end
342-- }}}
343
344-- {{{ local function flatten_media_files
345-- Issue 8-048: Copy every configured image into output/media/ for easy deploy.
346-- TWO layouts, by species (kept in lockstep with media_href in the renderers):
347-- * Mastodon media: collapse the ~7-level content-addressed nesting to the
348-- bare hash basename (output/media/abc.png) -- unique already.
349-- * Art images (input/images/<source>/...): keep <source>/<subpath>
350-- (output/media/my-art/game-design/x.png), because human-given basenames
351-- collide across subdirs and a flat layout silently dropped the duplicates.
352-- Called once at start of HTML generation; skips files that already exist (idempotent)
353local media_flattening_done = false
354
355local function flatten_media_files(output_dir)
356 -- Skip if already done this session (idempotent)
357 if media_flattening_done then
358 return true
359 end
360
361 -- The configured image sources are the source of truth for where to
362 -- look. Each entry has an internal project-relative path (where the
363 -- sync script drops files) and may also have an external source path
364 -- (where the operator's actual files live on the wider file system).
365 -- We prefer the internal path when present, and fall back to the
366 -- external source so a configured-but-not-yet-synced entry still
367 -- contributes media. A configured entry that is missing from both
368 -- is a warning, not a fatal error — operators may legitimately
369 -- declare more sources than are populated at any given moment.
370 local sources_loader = require("sources-loader")
371 sources_loader.set_project_root(DIR)
372 local image_dirs = sources_loader.get_directories_with_external("images")
373
374 if not image_dirs or #image_dirs == 0 then
375 utils.log_warn("No image sources configured in sources.images.directories; skipping media flattening")
376 media_flattening_done = true
377 return true
378 end
379
380 local target_dir = output_dir .. "/media"
381 os.execute('mkdir -p "' .. target_dir .. '"')
382
383 local copied = 0
384 local skipped = 0
385 local errors = 0
386 local sources_used = 0
387
388 for _, dir in ipairs(image_dirs) do
389 -- sources-loader's resolve_path already returns an ABSOLUTE path (it
390 -- prepends the project root to relative config entries), so use dir.path
391 -- directly. Prepending DIR again produced a doubled "/root//root/..." path
392 -- that never resolved, so every source looked "missing" -- which the
393 -- mandatory-source check below then turned into a fatal stage-9 failure.
394 local internal_path = dir.path
395 local external_path = dir.external and dir.external.source or nil
396 local resolved_path = nil
397
398 local internal_test = io.open(internal_path, "r")
399 if internal_test then
400 internal_test:close()
401 resolved_path = internal_path
402 elseif external_path then
403 local external_test = io.open(external_path, "r")
404 if external_test then
405 external_test:close()
406 resolved_path = external_path
407 end
408 end
409
410 if not resolved_path then
411 -- Every configured image source is mandatory (the "optional" concept was
412 -- removed): a missing source means media we expected to ship is absent,
413 -- so we fail loudly here rather than silently skip it. Fix it by running
414 -- the sync/extraction that populates the path, or remove the source from
415 -- config.lua if it is genuinely gone.
416 error(string.format(
417 "Image source '%s' not found at internal '%s'%s -- every source is required; sync/extract it or remove it from config.lua",
418 dir.name or "(unnamed)",
419 dir.path or "(no path)",
420 external_path and (" or external '" .. external_path .. "'") or ""))
421 else
422 sources_used = sources_used + 1
423
424 -- Find every file under the resolved source and place it under
425 -- output/media/. TWO species, two layouts (must match media_href in
426 -- the renderers exactly, or the <img src> points at the wrong file):
427 -- * art sources (path .../input/images/<source>): keep
428 -- <source>/<subpath>, so two files that share a basename in
429 -- different subdirs (e.g. my-art/x.png and my-art/game-design/x.png)
430 -- stay distinct instead of one silently overwriting the other.
431 -- * everything else (Mastodon media, content-addressed hashes):
432 -- flatten to the bare basename -- already unique, and this
433 -- collapses the ~7-level Mastodon nesting.
434 -- No leading-^ anchor: dir.path is absolute (see above), so we match the
435 -- "input/images/<rest>" tail wherever it appears -- the same tail
436 -- media_href() extracts in the renderers, keeping the two layouts identical.
437 local ns_prefix = dir.path and dir.path:match("input/images/(.+)$") or nil
438 local find_cmd = string.format('find "%s" -type f', resolved_path)
439 local handle = io.popen(find_cmd)
440 if handle then
441 for source_path in handle:lines() do
442 -- this file's path within its own source dir (art subdirs kept)
443 local within = source_path:sub(#resolved_path + 2)
444 local target_sub
445 if ns_prefix then
446 target_sub = ns_prefix .. "/" .. within
447 else
448 target_sub = source_path:match("([^/]+)$")
449 end
450 if target_sub and target_sub ~= "" then
451 local target_path = target_dir .. "/" .. target_sub
452 local exists_check = io.open(target_path, "r")
453 if exists_check then
454 exists_check:close()
455 skipped = skipped + 1
456 else
457 -- create the subdirectory before copying (art paths
458 -- now nest one or more levels under output/media/)
459 local parent = target_path:match("^(.*)/[^/]+$")
460 if parent then os.execute('mkdir -p "' .. parent .. '"') end
461 local cp_cmd = string.format('cp "%s" "%s"', source_path, target_path)
462 local success = os.execute(cp_cmd)
463 if success == 0 or success == true then
464 copied = copied + 1
465 else
466 errors = errors + 1
467 utils.log_warn("Failed to copy: " .. source_path)
468 end
469 end
470 end
471 end
472 handle:close()
473 else
474 utils.log_warn("Could not scan image source: " .. resolved_path)
475 end
476 end
477 end
478
479 utils.log_info(string.format(
480 "Media flattening: %d sources used | %d copied, %d skipped, %d errors",
481 sources_used, copied, skipped, errors))
482
483 media_flattening_done = true
484 return errors == 0
485end
486-- }}}
487
488-- {{{ local function load_pagination_config
489-- Issue 10-003: Loads pagination and storage settings from unified config
490-- Updated for Issue 8-020: Hybrid pagination strategy with storage constraints
491-- Note: Only loads and logs once per session (idempotent)
492local pagination_config_loaded = false
493
494local function load_pagination_config()
495 -- Skip if already loaded (idempotent)
496 if pagination_config_loaded then
497 return PAGINATION_CONFIG
498 end
499
500 -- Load pagination settings from unified config
501 if unified_config.pagination then
502 for key, value in pairs(unified_config.pagination) do
503 if key ~= "_comment" and PAGINATION_CONFIG[key] ~= nil then
504 PAGINATION_CONFIG[key] = value
505 end
506 end
507 end
508
509 -- Load storage settings from unified config (Issue 8-020)
510 if unified_config.storage then
511 for key, value in pairs(unified_config.storage) do
512 if key ~= "_comment" and STORAGE_CONFIG[key] ~= nil then
513 STORAGE_CONFIG[key] = value
514 end
515 end
516 end
517
518 pagination_config_loaded = true
519 return PAGINATION_CONFIG
520end
521-- }}}
522
523-- {{{ local function compute_storage_max_pages
524-- Issue 10-057 follow-up: derive how many similar/different pages per poem fit the
525-- storage quota instead of freezing a guess in config. Everything is MEASURED from
526-- the last build's output on disk -- a self-correcting validator, not an estimate:
527-- budget = storage.limit_gb (the Neocities quota; the one real config fact)
528-- avg_page_size = bytes of output/similar / number of those page files
529-- per_page_level = avg_page_size x num_poems x 2 (each poem gets one similar AND
530-- one different page per page-level)
531-- fixed = everything else already in output/ (media, wordcloud, chrono,
532-- gallery) -- does NOT grow with the page count
533-- max_pages = floor((budget - fixed) / per_page_level)
534-- Pages reference images via <img src>, so a page on disk is text; the picture bytes
535-- are the single output/media cost, folded into `fixed`. Measurements use du/find
536-- (read-only) with block-rounded bytes -- conservative (rounds the cap DOWN, the safe
537-- direction for a quota). First build (no pages to measure): warn and DO NOT cap; the
538-- next build measures real sizes and applies the cap.
539local function compute_storage_max_pages(output_dir, num_poems)
540 local function popen_num(cmd)
541 local h = io.popen(cmd)
542 if not h then return nil end
543 local out = h:read("*a"); h:close()
544 return tonumber((out or ""):match("(%d+)"))
545 end
546 local function dir_bytes(path)
547 return popen_num(string.format("du -s --block-size=1 %q", path)) or 0
548 end
549
550 local sim_dir = output_dir .. "/similar"
551 local diff_dir = output_dir .. "/different"
552 local page_count = popen_num(string.format("find %q -maxdepth 1 -name '*.html' | wc -l", sim_dir)) or 0
553 if page_count == 0 or num_poems == 0 then
554 -- First build: nothing to measure yet. Fall back to the NATURAL maximum (every
555 -- other poem could fill pages), i.e. effectively uncapped, and warn. A finite
556 -- value keeps %d logging and the math.min() cap well-defined; the next build
557 -- measures real page sizes and applies the storage cap.
558 local per_page = PAGINATION_CONFIG.poems_per_page
559 local natural_max = math.max(1, math.ceil(num_poems / (per_page > 0 and per_page or 1)))
560 utils.log_warn("Storage page cap: no pages in " .. sim_dir .. " to measure -- "
561 .. "generating UNCAPPED this build (natural max " .. natural_max
562 .. " pages/poem); re-run to apply the measured cap.")
563 return natural_max
564 end
565
566 local sim_bytes = dir_bytes(sim_dir)
567 local avg_page = sim_bytes / page_count
568 local per_page_level = avg_page * num_poems * 2
569 local fixed = math.max(0, dir_bytes(output_dir) - sim_bytes - dir_bytes(diff_dir))
570 local budget = STORAGE_CONFIG.limit_gb * 1e9 -- decimal GB; conservative vs GiB
571
572 local max_pages = math.floor((budget - fixed) / per_page_level)
573 if max_pages < 1 then max_pages = 1 end
574
575 utils.log_info(string.format(
576 "Storage page cap (measured): %d page(s)/poem -- budget %dGB, fixed output %.1fGB, "
577 .. "%.0fKB/page x %d poems x 2 sides", max_pages, STORAGE_CONFIG.limit_gb,
578 fixed / 1e9, avg_page / 1000, num_poems))
579 return max_pages
580end
581-- }}}
582
583-- {{{ local function calculate_page_count
584-- Calculates the total number of pages needed for a given poem count
585-- Returns: number of pages (always at least 1)
586local function calculate_page_count(total_poems)
587 local poems_per_page = PAGINATION_CONFIG.poems_per_page
588 return math.ceil(total_poems / poems_per_page)
589end
590-- }}}
591
592-- {{{ local function parse_pages_specification
593-- Parses the --pages flag value into a list of page numbers or special value
594-- Supports formats:
595-- nil or "default" → Use minimum_pages from config (usually {1})
596-- "all" → Generate all pages up to max_pages_per_poem limit
597-- "N" → Single page number, e.g., "1" → {1}, "5" → {5}
598-- "N-M" → Range of pages, e.g., "1-10" → {1,2,...,10}
599-- Returns: {pages = {1,2,3,...}, is_all = boolean}
600-- is_all flag indicates if we should generate all pages (respecting max_pages limit)
601local function parse_pages_specification(pages_spec, total_pages_possible)
602 -- Ensure pagination config is loaded
603 load_pagination_config()
604
605 -- Default: use minimum_pages from config
606 if not pages_spec or pages_spec == "" or pages_spec == "default" then
607 local pages = {}
608 for i = 1, PAGINATION_CONFIG.minimum_pages do
609 table.insert(pages, i)
610 end
611 return {pages = pages, is_all = false}
612 end
613
614 -- "all" means generate all pages up to max_pages_per_poem limit
615 if pages_spec == "all" then
616 return {pages = nil, is_all = true} -- nil means "generate all" in context
617 end
618
619 -- Single page number: "5" → {5}
620 local single_num = tonumber(pages_spec)
621 if single_num then
622 return {pages = {single_num}, is_all = false}
623 end
624
625 -- Range: "1-10" → {1,2,3,...,10}
626 local start_page, end_page = pages_spec:match("^(%d+)%-(%d+)$")
627 if start_page and end_page then
628 start_page = tonumber(start_page)
629 end_page = tonumber(end_page)
630
631 if start_page and end_page and start_page <= end_page then
632 local pages = {}
633 for i = start_page, end_page do
634 table.insert(pages, i)
635 end
636 return {pages = pages, is_all = false}
637 else
638 utils.log_error(string.format("Invalid page range: %s (start must be <= end)", pages_spec))
639 return {pages = {1}, is_all = false} -- Fallback to page 1
640 end
641 end
642
643 -- Invalid format - fallback to page 1
644 utils.log_error(string.format("Invalid --pages format: '%s'. Expected: 1, all, or 1-10", pages_spec))
645 return {pages = {1}, is_all = false}
646end
647-- }}}
648
649-- {{{ local function get_poems_for_page
650-- Extracts poems for a specific page from a sorted list
651-- page_num is 1-indexed
652-- Returns: table of poem entries for that page
653local function get_poems_for_page(sorted_poems, page_num)
654 local poems_per_page = PAGINATION_CONFIG.poems_per_page
655 local start_idx = ((page_num - 1) * poems_per_page) + 1
656 local end_idx = math.min(start_idx + poems_per_page - 1, #sorted_poems)
657
658 local page_poems = {}
659 for i = start_idx, end_idx do
660 if sorted_poems[i] then
661 table.insert(page_poems, sorted_poems[i])
662 end
663 end
664
665 return page_poems
666end
667-- }}}
668
669-- {{{ local function get_unique_poem_filename_id
670-- Generates a unique identifier for poem filenames using category prefix
671-- Solves cross-category ID collisions: fediverse/0002.txt and messages/0002.txt
672-- both have id=2 but become "fediverse-0002" and "messages-0002". See Issue 8-019.
673-- poem: poem object with id and category fields
674-- Returns: unique filename identifier like "fediverse-0002" or "messages-0767"
675local function get_unique_poem_filename_id(poem)
676 local category = poem.category or "unknown"
677 local id = poem.id or 0
678 return string.format("%s-%04d", category, id)
679end
680-- }}}
681
682-- {{{ local function get_poem_anchor_id
683-- Generates HTML anchor ID for linking to poems in chronological.html
684-- Issue 8-030: Add chronological anchor links
685-- Issue 16-006: Changed to use poem_index for simpler, machine-readable format
686-- Old format: "poem-fediverse-0042" (leaked category info)
687-- New format: "poem-4625" (just the unique poem_index)
688-- poem: poem object with poem_index field
689-- Returns: anchor ID like "poem-4625"
690local function get_poem_anchor_id(poem)
691 local poem_index = poem.poem_index or 0
692 return string.format("poem-%d", poem_index)
693end
694-- }}}
695
696-- {{{ local function format_page_number
697-- Formats a page number with zero-padding
698-- Returns: padded string like "01", "02", etc.
699local function format_page_number(page_num)
700 local padding = PAGINATION_CONFIG.page_number_padding
701 return string.format("%0" .. padding .. "d", page_num)
702end
703-- }}}
704
705-- {{{ local function generate_page_filename
706-- Generates the filename for a paginated page
707-- poem_id: the starting poem ID (for similarity/diversity pages)
708-- page_num: 1-indexed page number
709-- page_type: "similar" or "different"
710-- Returns: filename like "similar/0068-01.html"
711local function generate_page_filename(poem_id, page_num, page_type)
712 local padded_id = string.format("%04d", poem_id)
713 local padded_page = format_page_number(page_num)
714 return string.format("%s/%s-%s.html", page_type, padded_id, padded_page)
715end
716-- }}}
717
718-- {{{ local function generate_prev_next_navigation
719-- Generates prev/next navigation links for paginated pages
720-- current_page: 1-indexed current page
721-- total_pages: total number of pages (may be capped by max_pages_per_poem)
722-- poem_id: starting poem ID (nil for chronological)
723-- page_type: "similar", "different", or "chronological"
724-- total_corpus: optional - total poems in corpus (for storage context display)
725-- Returns: HTML string with navigation
726-- Updated for Issue 8-020: Shows storage constraint message on last page
727local function generate_prev_next_navigation(current_page, total_pages, poem_id, page_type, total_corpus)
728 local nav_parts = {}
729
730 -- Calculate poem range for this page
731 local poems_per_page = PAGINATION_CONFIG.poems_per_page
732 local max_pages = PAGINATION_CONFIG.max_pages_per_poem
733 local start_poem = ((current_page - 1) * poems_per_page) + 1
734 local end_poem = math.min(current_page * poems_per_page, total_pages * poems_per_page)
735
736 -- Check if this is a storage-constrained last page
737 local is_storage_limited = (total_pages == max_pages) and (total_corpus and total_corpus > end_poem)
738 local poems_shown = end_poem
739 local poems_omitted = total_corpus and (total_corpus - poems_shown) or 0
740
741 -- Header line with page info
742 table.insert(nav_parts, "════════════════════════════════════════════════════════════════════════════════")
743
744 if page_type == "chronological" then
745 table.insert(nav_parts, string.format(" Page %d of %d │ Poems %d-%d",
746 current_page, total_pages, start_poem, end_poem))
747 else
748 local padded_id = string.format("%04d", poem_id)
749 if is_storage_limited then
750 -- Show storage context on capped pages (Issue 8-020)
751 table.insert(nav_parts, string.format(" %s to Poem %s │ Page %d of %d │ Showing top %d poems",
752 page_type == "similar" and "Similar" or "Different",
753 padded_id, current_page, total_pages, poems_shown))
754 else
755 table.insert(nav_parts, string.format(" %s to Poem %s │ Page %d of %d │ Poems %d-%d",
756 page_type == "similar" and "Similar" or "Different",
757 padded_id, current_page, total_pages, start_poem, end_poem))
758 end
759 end
760
761 table.insert(nav_parts, "════════════════════════════════════════════════════════════════════════════════")
762
763 -- Storage constraint notice on last page (Issue 8-020)
764 if is_storage_limited and current_page == total_pages and poems_omitted > 0 then
765 table.insert(nav_parts, string.format(" (%d additional poems omitted for storage constraints)",
766 poems_omitted))
767 end
768
769 table.insert(nav_parts, "")
770
771 -- Navigation links
772 local nav_line = ""
773
774 -- Previous link (left aligned)
775 if current_page > 1 then
776 local prev_file
777 if page_type == "chronological" then
778 -- Issue 8-039 Fix: Chronological pages now in subdirectory, use relative paths
779 prev_file = string.format("%s.html", format_page_number(current_page - 1))
780 else
781 prev_file = string.format("%s-%s.html", string.format("%04d", poem_id), format_page_number(current_page - 1))
782 end
783 nav_line = string.format("[<a href=\"%s\">◀ Previous Page</a>]", prev_file)
784 else
785 nav_line = "[◀ Previous Page]" -- Disabled
786 end
787
788 -- Calculate padding to push next link to right side
789 local padding = 80 - #nav_line - 16 -- 16 chars for next link
790 if padding < 0 then padding = 0 end
791 nav_line = nav_line .. string.rep(" ", padding)
792
793 -- Next link (right aligned)
794 if current_page < total_pages then
795 local next_file
796 if page_type == "chronological" then
797 -- Issue 8-039 Fix: Chronological pages now in subdirectory, use relative paths
798 next_file = string.format("%s.html", format_page_number(current_page + 1))
799 else
800 next_file = string.format("%s-%s.html", string.format("%04d", poem_id), format_page_number(current_page + 1))
801 end
802 nav_line = nav_line .. string.format("[<a href=\"%s\">Next Page ▶</a>]", next_file)
803 else
804 nav_line = nav_line .. "[Next Page ▶]" -- Disabled
805 end
806
807 table.insert(nav_parts, nav_line)
808 table.insert(nav_parts, "────────────────────────────────────────────────────────────────────────────────")
809
810 return table.concat(nav_parts, "\n")
811end
812-- }}}
813
814-- {{{ function load_poem_colors
815-- Note: Only loads and logs once per session (idempotent)
816local cached_poem_colors = nil
817
818local function load_poem_colors()
819 -- Skip if already loaded (idempotent)
820 if cached_poem_colors then
821 return cached_poem_colors
822 end
823
824 local poem_colors_file = utils.embeddings_dir() .. "/poem_colors.json"
825 local poem_colors_data = utils.read_json_file(poem_colors_file)
826
827 if poem_colors_data and poem_colors_data.poem_colors then
828 -- Count actual entries dynamically (stored total_poems may be stale)
829 cached_poem_colors = poem_colors_data.poem_colors
830 return cached_poem_colors
831 else
832 utils.log_warn("Could not load poem colors, using mock colors")
833 cached_poem_colors = MOCK_POEM_COLORS
834 return cached_poem_colors
835 end
836end
837-- }}}
838
839-- {{{ function get_file_creation_timestamp
840local function get_file_creation_timestamp(file_path)
841 -- Use bash stat command to get file modification time (best approximation)
842 local cmd = string.format("stat -c %%Y '%s' 2>/dev/null", file_path)
843 local handle = io.popen(cmd)
844
845 if handle then
846 local result = handle:read("*a")
847 handle:close()
848
849 if result and result:match("^%d+") then
850 return tonumber(result:match("^%d+"))
851 end
852 end
853
854 return nil
855end
856-- }}}
857
858-- {{{ function extract_post_date_from_poem
859local function extract_post_date_from_poem(poem_data)
860 -- First, try to use the creation_date metadata field (if available)
861 local creation_date = poem_data.creation_date or (poem_data.metadata and poem_data.metadata.creation_date)
862 if creation_date then
863 -- Parse ISO 8601 format: "2023-04-20T05:22:03" or "2023-04-20T05:22:03Z"
864 local year, month, day, hour, min, sec = creation_date:match("(%d+)-(%d+)-(%d+)T(%d+):(%d+):(%d+)")
865 if year and month and day then
866 local parsed_time = os.time({
867 year = tonumber(year),
868 month = tonumber(month),
869 day = tonumber(day),
870 hour = tonumber(hour) or 0,
871 min = tonumber(min) or 0,
872 sec = tonumber(sec) or 0
873 })
874 if parsed_time then return parsed_time end
875 end
876
877 -- Fallback: try to extract just date part
878 year, month, day = creation_date:match("(%d+)-(%d+)-(%d+)")
879 if year and month and day then
880 local parsed_time = os.time({
881 year = tonumber(year),
882 month = tonumber(month),
883 day = tonumber(day),
884 hour = 0, min = 0, sec = 0
885 })
886 if parsed_time then return parsed_time end
887 end
888 end
889
890 -- Fallback: Look for date patterns in poem content (legacy logic)
891 local content = poem_data.content or ""
892
893 -- First, try to extract YYYY-MM-DD from the very beginning (processing artifact dates)
894 local year, month, day = content:match("^(%d%d%d%d)%-(%d%d)%-(%d%d)")
895 if year and month and day then
896 return os.time({year=tonumber(year), month=tonumber(month), day=tonumber(day)})
897 end
898
899 -- Try to extract date from first line (other patterns)
900 local date_line = content:match("^([^\n]+)")
901 if date_line then
902 -- MM/DD/YYYY format
903 local month, day, year = date_line:match("(%d%d)/(%d%d)/(%d%d%d%d)")
904 if month and day and year then
905 return os.time({year=tonumber(year), month=tonumber(month), day=tonumber(day)})
906 end
907
908 -- Month DD, YYYY format (like "april 16th 2023")
909 local month_name, day_num, year_num = date_line:match("(%w+)%s+(%d+)%w*%s+(%d%d%d%d)")
910 if month_name and day_num and year_num then
911 local month_map = {
912 january=1, february=2, march=3, april=4, may=5, june=6,
913 july=7, august=8, september=9, october=10, november=11, december=12
914 }
915 local month_num = month_map[month_name:lower()]
916 if month_num then
917 return os.time({year=tonumber(year_num), month=month_num, day=tonumber(day_num)})
918 end
919 end
920 end
921
922 -- Fallback to file creation time if available
923 if poem_data.filepath then
924 local timestamp = get_file_creation_timestamp(poem_data.filepath)
925 if timestamp then
926 return timestamp
927 end
928 end
929
930 -- Final fallback to poem ID as timestamp approximation
931 return poem_data.id or 0
932end
933-- }}}
934
935-- {{{ function sort_poems_chronologically_by_dates
936local function sort_poems_chronologically_by_dates(poems_data)
937 local sorted_poems = {}
938
939 -- Extract all poems with temporal sorting data
940 for i, poem in ipairs(poems_data.poems) do
941 if poem.id then
942 local post_timestamp = extract_post_date_from_poem(poem)
943 table.insert(sorted_poems, {
944 poem = poem,
945 timestamp = post_timestamp,
946 sort_key = post_timestamp,
947 original_index = i
948 })
949 end
950 end
951
952 -- Sort by actual temporal order
953 table.sort(sorted_poems, function(a, b)
954 -- If timestamps are equal, use original index as tiebreaker
955 if a.sort_key == b.sort_key then
956 return a.original_index < b.original_index
957 end
958 return a.sort_key < b.sort_key
959 end)
960
961 return sorted_poems
962end
963-- }}}
964
965-- {{{ function calculate_chronological_progress
966local function calculate_chronological_progress(poem_id, total_poems)
967 -- Calculate percentage through chronological corpus
968 local progress_percentage = (poem_id / total_poems) * 100
969
970 return {
971 poem_id = poem_id,
972 total_poems = total_poems,
973 percentage = progress_percentage,
974 position = poem_id,
975 quartile = math.ceil(progress_percentage / 25)
976 }
977end
978-- }}}
979
980-- {{{ function compute_chronological_mapping
981-- Computes poem_index → {position, page_number, total_poems, total_pages, timeline_progress}
982-- Used by parallel workers to generate correct chronological links and progress bars
983-- Issue 8-045: Added timeline_progress for time-based progress bar calculation
984local function compute_chronological_mapping(poems_data, chrono_poems_per_page)
985 -- Sort chronologically (same as generate_chronological_index_with_navigation)
986 local sorted_poems = sort_poems_chronologically_by_dates(poems_data)
987 local total_poems = #sorted_poems
988 local total_pages = chrono_poems_per_page and math.ceil(total_poems / chrono_poems_per_page) or 1
989
990 -- Issue 8-045: Calculate timeline bounds for time-based progress
991 -- sorted_poems[i].timestamp contains Unix timestamp from extract_post_date_from_poem()
992 local first_timestamp = sorted_poems[1] and sorted_poems[1].timestamp or 0
993 local last_timestamp = sorted_poems[total_poems] and sorted_poems[total_poems].timestamp or 0
994 local timeline_span = last_timestamp - first_timestamp
995 -- Avoid division by zero if all poems have same timestamp
996 if timeline_span <= 0 then timeline_span = 1 end
997
998 -- Build mapping
999 local mapping = {}
1000 for position, poem_info in ipairs(sorted_poems) do
1001 local poem = poem_info.poem
1002 local poem_index = poem.poem_index
1003 if poem_index then
1004 local page_number = chrono_poems_per_page and math.ceil(position / chrono_poems_per_page) or 1
1005 -- Issue 8-045: Calculate timeline progress as percentage of time elapsed
1006 local poem_timestamp = poem_info.timestamp or first_timestamp
1007 local timeline_progress = ((poem_timestamp - first_timestamp) / timeline_span) * 100
1008 mapping[poem_index] = {
1009 position = position,
1010 page_number = page_number,
1011 total_poems = total_poems,
1012 total_pages = total_pages,
1013 timeline_progress = timeline_progress -- Issue 8-045: time-based progress
1014 }
1015 end
1016 end
1017
1018 return mapping
1019end
1020-- }}}
1021
1022-- Exported so the word-cloud pages reuse this EXACT chronological mapping (same
1023-- timestamp sort + original-index tiebreaker + page size). A divergent inline
1024-- copy in generate-word-pages sorted by the raw creation_date string with no
1025-- tiebreaker and its own page-size default, so it computed different page
1026-- numbers -> "chronological" links pointed at pages the poem wasn't on and never
1027-- scrolled. One mapping, one answer.
1028M.compute_chronological_mapping = compute_chronological_mapping
1029-- {{{ function M.default_chrono_per_page()
1030-- The chronological page size, from config. There is no compiled-in fallback on
1031-- purpose: a runtime --chrono-per-page override is the OTHER legitimate source
1032-- (callers prefer that and use this only when no override was given), and if the
1033-- config key is somehow missing that is a broken config we want to hear about,
1034-- not paper over with a silent default that would mis-paginate every poem link.
1035function M.default_chrono_per_page()
1036 -- Pull config.lua's pagination overrides into PAGINATION_CONFIG first, so the
1037 -- default reflects the CONFIG FILE (where --chrono-per-page's default lives),
1038 -- not the bare source-table placeholder. Idempotent; safe to call anywhere.
1039 load_pagination_config()
1040 local value = PAGINATION_CONFIG.chronological_poems_per_page
1041 if not value then
1042 error("config is missing chronological_poems_per_page; chronological "
1043 .. "pagination size is required (pass --chrono-per-page or set it "
1044 .. "in the pagination config)")
1045 end
1046 return value
1047end
1048-- }}}
1049
1050-- {{{ function generate_progress_dashes
1051local function generate_progress_dashes(progress_info, color_name, is_golden, position, has_corner_boxes)
1052 -- For golden poems: 82 chars interior (+ 2 corners = 84 total)
1053 -- For regular poems: 83 chars total (positions 0-82)
1054 -- Golden poems have corner characters (╔/┐ or ╚/┘) that add 2 to the width,
1055 -- so interior needs to be 1 less to maintain 84-char total alignment
1056 local total_chars = is_golden and 82 or 83
1057 local progress_chars = math.floor((progress_info.percentage / 100) * total_chars)
1058 local remaining_chars = total_chars - progress_chars
1059
1060 -- Get color information
1061 local hex_color = COLOR_CONFIG[color_name] or COLOR_CONFIG["gray"]
1062
1063 -- For golden bottom borders with corner boxes, we need to insert junction characters
1064 -- Issue 8-055: Fixed junction positions to align ╧/┴ under ┐/┌ corners
1065 -- Junction positions in the 82-char interior (0-indexed):
1066 -- - Position 10: under "similar" box ┐ (same as regular poems)
1067 -- - Position 71: under "different" box ┌ (regular + 1 due to wider golden poem)
1068 local LEFT_JUNCTION_POS = 10 -- Same as regular: left box ┐ at position 10
1069 local RIGHT_JUNCTION_POS = 71 -- Regular + 1: right box ┌ at position 71 (golden is 1 char wider)
1070
1071 -- Junction positions for regular poems (different from golden due to no outer walls)
1072 -- Regular corner boxes: ┌─────────┐ (11 chars) + 59 spaces + ┌───────────┐ (13 chars) = 83 chars
1073 -- Inner walls at positions 10 and 70 (0-indexed)
1074 local REGULAR_LEFT_JUNCTION_POS = 10
1075 local REGULAR_RIGHT_JUNCTION_POS = 70
1076
1077 local visual_output
1078 if is_golden and position == "bottom" and has_corner_boxes then
1079 -- Build progress bar with junction characters inserted
1080 -- We need to construct the bar character by character to insert junctions at the right spots
1081
1082 -- Determine which junction character to use at each position
1083 -- ╧ (U+2567) - up single and horizontal double (connects to ═) - COLORED
1084 -- ┴ (U+2534) - up and horizontal single (connects to ─) - UNCOLORED
1085 local left_in_progress = LEFT_JUNCTION_POS < progress_chars
1086 local right_in_progress = RIGHT_JUNCTION_POS < progress_chars
1087
1088 -- Build colored junctions (╧ when in progress section)
1089 local left_junction
1090 if left_in_progress then
1091 left_junction = string.format('<font color="%s"><b>╧</b></font>', hex_color)
1092 else
1093 left_junction = "┴"
1094 end
1095
1096 local right_junction
1097 if right_in_progress then
1098 right_junction = string.format('<font color="%s"><b>╧</b></font>', hex_color)
1099 else
1100 right_junction = "┴"
1101 end
1102
1103 -- Build the progress section (colored ═) and remaining section (─)
1104 -- We need to split around the junction positions
1105 local segments = {}
1106 local current_pos = 0
1107
1108 -- Helper to add a segment with proper coloring
1109 local function add_segment(start_pos, end_pos)
1110 if end_pos <= start_pos then return end
1111 local seg_len = end_pos - start_pos
1112
1113 -- Determine how much of this segment is progress vs remaining
1114 local progress_in_seg = math.max(0, math.min(seg_len, progress_chars - start_pos))
1115 local remaining_in_seg = seg_len - progress_in_seg
1116
1117 if progress_in_seg > 0 then
1118 table.insert(segments, string.format('<font color="%s"><b>%s</b></font>',
1119 hex_color, string.rep("═", progress_in_seg)))
1120 end
1121 if remaining_in_seg > 0 then
1122 table.insert(segments, string.rep("─", remaining_in_seg))
1123 end
1124 end
1125
1126 -- Bugfix: this copy started at 0 and ended at total_chars, landing the
1127 -- left junction one column too far right (col 11) and the right one a
1128 -- dash short -- so the bottom bar did not line up under the nav-box
1129 -- corners. Start at 1 and end at total_chars+1 to match poem-bars (the
1130 -- word pages, which were correct): 9 dashes before the left junction so
1131 -- it sits at column 10, 11 after the right junction. Width is unchanged.
1132 -- Segment 1: corner ╚ is column 0, so the first dash runs 1..left junction
1133 add_segment(1, LEFT_JUNCTION_POS)
1134 -- Insert left junction (colored if ╧, plain if ┴)
1135 table.insert(segments, left_junction)
1136
1137 -- Segment 2: from left junction + 1 to right junction (exclusive)
1138 add_segment(LEFT_JUNCTION_POS + 1, RIGHT_JUNCTION_POS)
1139 -- Insert right junction (colored if ╧, plain if ┴)
1140 table.insert(segments, right_junction)
1141
1142 -- Segment 3: from right junction + 1 to the far corner (exclusive of ┘)
1143 add_segment(RIGHT_JUNCTION_POS + 1, total_chars + 1)
1144
1145 local interior = table.concat(segments, "")
1146 -- Color the ╚ corner to match the progress bar
1147 local colored_corner = string.format('<font color="%s"><b>╚</b></font>', hex_color)
1148 visual_output = colored_corner .. interior .. "┘"
1149
1150 elseif not is_golden and position == "bottom" and has_corner_boxes then
1151 -- Regular poem bottom border with corner characters and junctions connecting to corner boxes
1152 -- Structure: ╘ (pos 0) + progress bar + ┴/╧ (pos 10) + progress bar + ┴/╧ (pos 69) + progress bar + ┘ (pos 81)
1153 -- ╘ (U+2558) - up single and right double - closes left box, connects to ═ progress
1154 -- ┘ (U+2518) - light up and left - closes right box, connects to ─ remaining
1155
1156 local left_in_progress = REGULAR_LEFT_JUNCTION_POS < progress_chars
1157 local right_in_progress = REGULAR_RIGHT_JUNCTION_POS < progress_chars
1158
1159 -- Build colored junctions (╧ when in progress section, ┴ otherwise)
1160 local left_junction
1161 if left_in_progress then
1162 left_junction = string.format('<font color="%s"><b>╧</b></font>', hex_color)
1163 else
1164 left_junction = "┴"
1165 end
1166
1167 local right_junction
1168 if right_in_progress then
1169 right_junction = string.format('<font color="%s"><b>╧</b></font>', hex_color)
1170 else
1171 right_junction = "┴"
1172 end
1173
1174 -- Left corner ╘ - colored if progress > 0 (position 0 is always in progress section if any progress)
1175 local left_corner
1176 if progress_chars > 0 then
1177 left_corner = string.format('<font color="%s"><b>╘</b></font>', hex_color)
1178 else
1179 left_corner = "╘"
1180 end
1181
1182 -- Right corner ┘ - always uncolored (position 82 is almost never in progress section)
1183 local right_corner = "┘"
1184
1185 -- Build the progress bar with junctions
1186 local segments = {}
1187
1188 -- Helper to add a segment with proper coloring
1189 -- Note: positions are now 1-80 since 0 and 81 are corner characters
1190 local function add_segment(start_pos, end_pos)
1191 if end_pos <= start_pos then return end
1192 local seg_len = end_pos - start_pos
1193
1194 local progress_in_seg = math.max(0, math.min(seg_len, progress_chars - start_pos))
1195 local remaining_in_seg = seg_len - progress_in_seg
1196
1197 if progress_in_seg > 0 then
1198 table.insert(segments, string.format('<font color="%s"><b>%s</b></font>',
1199 hex_color, string.rep("═", progress_in_seg)))
1200 end
1201 if remaining_in_seg > 0 then
1202 table.insert(segments, string.rep("─", remaining_in_seg))
1203 end
1204 end
1205
1206 -- Start with left corner
1207 table.insert(segments, left_corner)
1208
1209 -- Segment 1: from 1 to left junction (exclusive) - 9 chars
1210 add_segment(1, REGULAR_LEFT_JUNCTION_POS)
1211 table.insert(segments, left_junction)
1212
1213 -- Segment 2: from left junction + 1 to right junction (exclusive) - 59 chars
1214 add_segment(REGULAR_LEFT_JUNCTION_POS + 1, REGULAR_RIGHT_JUNCTION_POS)
1215 table.insert(segments, right_junction)
1216
1217 -- Segment 3: from right junction + 1 to end - 1 (exclusive of right corner) - 11 chars
1218 add_segment(REGULAR_RIGHT_JUNCTION_POS + 1, total_chars - 1)
1219
1220 -- End with right corner
1221 table.insert(segments, right_corner)
1222
1223 -- No padding needed - content has 1-space indent for alignment
1224 visual_output = table.concat(segments, "")
1225
1226 elseif is_golden then
1227 -- Golden poem top border or bottom without corner boxes
1228 -- Create progress visualization using equals/dash distinction
1229 local progress_section = string.rep("═", progress_chars)
1230 local remaining_section = string.rep("─", remaining_chars)
1231
1232 local colored_progress = string.format(
1233 '<font color="%s"><b>%s</b></font>%s',
1234 hex_color, progress_section, remaining_section
1235 )
1236
1237 -- Color the left corners to match the progress bar
1238 local colored_top_corner = string.format('<font color="%s"><b>╔</b></font>', hex_color)
1239 local colored_bottom_corner = string.format('<font color="%s"><b>╚</b></font>', hex_color)
1240
1241 if position == "top" then
1242 visual_output = colored_top_corner .. colored_progress .. "┐"
1243 elseif position == "bottom" then
1244 visual_output = colored_bottom_corner .. colored_progress .. "┘"
1245 else
1246 visual_output = colored_top_corner .. colored_progress .. "┐"
1247 end
1248 else
1249 -- Regular poems: no padding needed - content has 1-space indent for alignment
1250 local progress_section = string.rep("═", progress_chars)
1251 local remaining_section = string.rep("─", remaining_chars)
1252
1253 local colored_progress = string.format(
1254 '<font color="%s"><b>%s</b></font>%s',
1255 hex_color, progress_section, remaining_section
1256 )
1257 visual_output = colored_progress
1258 end
1259
1260 -- Screen reader accessible version - brief format for frequent use
1261 local screen_reader_text
1262 if is_golden then
1263 screen_reader_text = string.format(
1264 'aria-label="golden poem border. %s."',
1265 color_name
1266 )
1267 else
1268 screen_reader_text = string.format(
1269 'aria-label="eighty dashes. %s."',
1270 color_name
1271 )
1272 end
1273
1274 return {
1275 visual = visual_output,
1276 accessibility = screen_reader_text,
1277 raw_progress = progress_chars,
1278 raw_remaining = remaining_chars,
1279 color = color_name,
1280 percentage = progress_info.percentage,
1281 is_golden = is_golden or false
1282 }
1283end
1284-- }}}
1285
1286-- {{{ function wrap_single_line_80_chars
1287local function wrap_single_line_80_chars(line)
1288 -- Wrap a single line to 80 characters, preserving words
1289 if #line <= 80 then
1290 return line
1291 end
1292
1293 local result_lines = {}
1294 local words = {}
1295
1296 for word in line:gmatch("%S+") do
1297 table.insert(words, word)
1298 end
1299
1300 local current_line = ""
1301 for _, word in ipairs(words) do
1302 if #current_line == 0 then
1303 current_line = word
1304 elseif #current_line + 1 + #word <= 80 then
1305 current_line = current_line .. " " .. word
1306 else
1307 table.insert(result_lines, current_line)
1308 current_line = word
1309 end
1310 end
1311
1312 if #current_line > 0 then
1313 table.insert(result_lines, current_line)
1314 end
1315
1316 return table.concat(result_lines, "\n")
1317end
1318-- }}}
1319
1320-- {{{ function strip_html_tags
1321local function strip_html_tags(content)
1322 -- Strip all HTML tags and decode HTML entities for TXT export
1323 -- Images should be converted with render_attachment_images_txt() separately
1324 local result = content
1325
1326 -- Strip HTML tags
1327 result = result:gsub("<[^>]+>", "")
1328
1329 -- Decode common HTML entities
1330 result = result:gsub("&", "&")
1331 result = result:gsub("<", "<")
1332 result = result:gsub(">", ">")
1333 result = result:gsub(""", '"')
1334 result = result:gsub("'", "'")
1335 result = result:gsub(" ", " ")
1336 result = result:gsub("&#(%d+);", function(n)
1337 return string.char(tonumber(n))
1338 end)
1339
1340 -- Normalize multiple consecutive spaces/newlines
1341 result = result:gsub("[ \t]+", " ")
1342 result = result:gsub("\n[ \t]+", "\n")
1343 result = result:gsub("[ \t]+\n", "\n")
1344 result = result:gsub("\n\n\n+", "\n\n")
1345
1346 return result
1347end
1348-- }}}
1349
1350-- {{{ function wrap_text_80_chars
1351local function wrap_text_80_chars(text)
1352 -- Wrap text to 80 chars while preserving existing newlines (paragraph breaks)
1353 local input_lines = {}
1354 for line in (text .. "\n"):gmatch("(.-)\n") do
1355 table.insert(input_lines, line)
1356 end
1357
1358 local output_lines = {}
1359 for _, line in ipairs(input_lines) do
1360 if #line == 0 then
1361 -- Preserve empty lines (paragraph breaks)
1362 table.insert(output_lines, "")
1363 else
1364 -- Wrap long lines
1365 local wrapped = wrap_single_line_80_chars(line)
1366 for wrapped_line in (wrapped .. "\n"):gmatch("(.-)\n") do
1367 table.insert(output_lines, wrapped_line)
1368 end
1369 end
1370 end
1371
1372 return table.concat(output_lines, "\n")
1373end
1374-- }}}
1375
1376-- {{{ function M.generate_similarity_ranked_list
1377-- Cache-only similarity ranking lookup (no on-the-fly sorting)
1378-- Requires pre-computed similarity rankings cache from: ./run.sh --generate-similarity
1379-- Parameter similarity_data is kept for API compatibility but not used when cache is available
1380function M.generate_similarity_ranked_list(starting_poem_id, poems_data, similarity_data)
1381 -- Verify cache is loaded
1382 if not SIMILARITY_RANKINGS_CACHE then
1383 error("Similarity rankings cache not loaded! Run: ./run.sh --generate-similarity")
1384 end
1385
1386 if not SIMILARITY_RANKINGS_CACHE.rankings then
1387 error("Similarity rankings cache has invalid format (missing rankings table)")
1388 end
1389
1390 -- Look up pre-sorted ranking for this poem
1391 local cached_ranking = SIMILARITY_RANKINGS_CACHE.rankings[tostring(starting_poem_id)]
1392 if not cached_ranking then
1393 error(string.format("Similarity ranking not found for poem %s in cache.", starting_poem_id))
1394 end
1395
1396 -- Build poem index lookup for fast access
1397 local poem_by_index = {}
1398 for i, poem in ipairs(poems_data.poems) do
1399 if poem.poem_index then
1400 poem_by_index[poem.poem_index] = poem
1401 end
1402 end
1403
1404 -- Initialize ranked list with starting poem
1405 local ranked_poems = {}
1406 local starting_poem = poems_data.poems[starting_poem_id]
1407 table.insert(ranked_poems, {
1408 id = starting_poem_id,
1409 poem = starting_poem,
1410 similarity = 1.0, -- Perfect similarity to self
1411 rank = 1
1412 })
1413
1414 -- Add poems in pre-sorted order from cache
1415 -- Cache contains poem indices already sorted by similarity (descending)
1416 local rank = 2
1417 for _, target_poem_index in ipairs(cached_ranking) do
1418 local poem = poem_by_index[target_poem_index]
1419 if poem then
1420 table.insert(ranked_poems, {
1421 id = poem.id,
1422 poem = poem,
1423 similarity = nil, -- Not needed for display, saves memory
1424 rank = rank
1425 })
1426 rank = rank + 1
1427 end
1428 end
1429
1430 return ranked_poems
1431end
1432-- }}}
1433
1434-- {{{ function M.generate_maximum_diversity_sequence
1435-- Cache-only diversity sequence lookup (no on-the-fly computation)
1436-- Requires pre-computed GPU diversity cache from: ./run.sh --generate-diversity
1437function M.generate_maximum_diversity_sequence(starting_poem_id, poems_data, embeddings_data)
1438 -- Verify cache is loaded
1439 if not DIVERSITY_CACHE then
1440 error("Diversity cache not loaded! Run: ./run.sh --generate-diversity")
1441 end
1442
1443 if not DIVERSITY_CACHE.sequences then
1444 error("Diversity cache has invalid format (missing sequences table)")
1445 end
1446
1447 -- Look up pre-computed sequence
1448 local cached_sequence = DIVERSITY_CACHE.sequences[tostring(starting_poem_id)]
1449 if not cached_sequence then
1450 error(string.format("Diversity sequence not found for poem %d in cache. Cache may be corrupted or incomplete.", starting_poem_id))
1451 end
1452
1453 -- Convert cached poem_index values to full poem objects
1454 -- Note: The diversity cache stores poem_index (globally unique), NOT poem.id (per-category)
1455 local diversity_sequence = {}
1456 local poem_lookup = {}
1457
1458 -- Build lookup table keyed by poem_index (NOT poem.id which is per-category)
1459 for i, poem in ipairs(poems_data.poems) do
1460 if poem.poem_index then
1461 poem_lookup[poem.poem_index] = poem
1462 end
1463 end
1464
1465 -- Convert cached sequence (contains poem_index values) to format expected by HTML generator
1466 -- Issue 10-025: Skip anchor poem (GPU cache stores source poem as first entry)
1467 for step, poem_index in ipairs(cached_sequence) do
1468 if poem_index ~= starting_poem_id then
1469 local poem = poem_lookup[poem_index]
1470 if poem then
1471 table.insert(diversity_sequence, {
1472 id = poem_index, -- Store poem_index for consistency
1473 poem = poem,
1474 step = step
1475 })
1476 end
1477 end
1478 end
1479
1480 return diversity_sequence
1481end
1482-- }}}
1483
1484-- {{{ function render_attachment_images
1485-- Issue 8-049: Renamed conceptually to render all media types (images, audio, video)
1486-- Function name kept for backwards compatibility with existing call sites
1487local function render_attachment_images(attachments)
1488 -- Render HTML for poem attachments (images, audio, video)
1489 -- Returns empty string if no attachments or no renderable attachments
1490 -- Media output format designed for 80-char width aesthetic
1491 --
1492 -- ATTACHMENT STRUCTURE (from ActivityPub extraction):
1493 -- {
1494 -- media_type = "image/png" or "audio/mpeg" or "video/mp4",
1495 -- url = "https://server.com/media/files/123/456/original/abc.png",
1496 -- relative_path = "files/123/456/original/abc.png",
1497 -- alt_text = "User description" or nil,
1498 -- width = 1920, -- images/video only
1499 -- height = 1080 -- images/video only
1500 -- }
1501
1502 if not attachments or #attachments == 0 then
1503 return ""
1504 end
1505
1506 local media_html = {}
1507 -- "up to the site root" -- these attachments render on poem pages, which sit
1508 -- one level below output/ (output/similar/, output/different/, ...), so a
1509 -- "../" prefix reaches the root. Document-relative: resolves the same opened
1510 -- locally from any folder or served on the site, so no path conversion step.
1511 local base_path = ".."
1512
1513 for _, attachment in ipairs(attachments) do
1514 local media_type = attachment.media_type or ""
1515 -- Issue 8-048: media lives at output/media/<source>/<subpath> (see
1516 -- flatten_media_files); media_href keeps art's source+subdir structure so
1517 -- same-named pieces don't collide. "../media/" reaches it from a poem page.
1518 local relative_path = attachment.relative_path or ""
1519 -- media_href namespaces art by source+subdir (collision-safe) and
1520 -- url-encodes; Mastodon hashes collapse to the bare name. Matches where
1521 -- flatten_media_files placed the file.
1522 local media_src = base_path .. "/media/" .. media_href(relative_path)
1523
1524 if media_type:match("^image/") then
1525 -- Use alt text if available, otherwise generate generic description
1526 -- Issue 9-012: ActivityPub uses 'description' field for alt-text
1527 local alt_text = attachment.description or attachment.alt_text or "Image attachment"
1528 -- Issue 8-053: Normalize newlines to spaces for clean HTML attributes
1529 alt_text = alt_text:gsub("\n", " "):gsub("\r", "")
1530 -- Escape quotes in alt text for HTML attribute
1531 alt_text = alt_text:gsub('"', '"')
1532
1533 -- Build image tag with lazy loading for performance
1534 -- Issue 8-005 Fix: Add max-width to prevent viewport overflow
1535 -- display:block prevents multiple images from appearing side-by-side
1536 -- max-width:min(100%,800px) caps at content width (~80 chars) while being responsive
1537 -- width/height hints help browser reserve space before load (aspect ratio preserved)
1538 -- Issue 8-053: title attribute provides mouse-over tooltip for sighted users
1539 local img_tag
1540 if attachment.width and attachment.height then
1541 img_tag = string.format(
1542 ' <img src="%s" alt="%s" title="%s" loading="lazy" width="%d" height="%d" style="display:block; max-width:min(100%%,800px); height:auto">',
1543 media_src, alt_text, alt_text, attachment.width, attachment.height
1544 )
1545 else
1546 img_tag = string.format(
1547 ' <img src="%s" alt="%s" title="%s" loading="lazy" style="display:block; max-width:min(100%%,800px); height:auto">',
1548 media_src, alt_text, alt_text
1549 )
1550 end
1551 table.insert(media_html, img_tag)
1552
1553 elseif media_type:match("^audio/") then
1554 -- Issue 8-049: Audio playback support
1555 -- controls: Shows play/pause, volume, seek bar
1556 -- preload="metadata": Only loads duration/metadata initially for performance
1557 local audio_tag = string.format(
1558 ' <audio controls preload="metadata" style="display:block; max-width:100%%">\n' ..
1559 ' <source src="%s" type="%s">\n' ..
1560 ' Your browser does not support the audio element.\n' ..
1561 ' </audio>',
1562 media_src, media_type
1563 )
1564 table.insert(media_html, audio_tag)
1565
1566 elseif media_type:match("^video/") then
1567 -- Issue 8-049: Video playback support
1568 -- controls: Shows play/pause, volume, seek bar, fullscreen
1569 -- preload="metadata": Only loads poster frame initially for performance
1570 -- max-width caps at content width while being responsive
1571 local video_tag
1572 if attachment.width and attachment.height then
1573 video_tag = string.format(
1574 ' <video controls preload="metadata" width="%d" height="%d" style="display:block; max-width:min(100%%,800px); height:auto">\n' ..
1575 ' <source src="%s" type="%s">\n' ..
1576 ' Your browser does not support the video element.\n' ..
1577 ' </video>',
1578 attachment.width, attachment.height, media_src, media_type
1579 )
1580 else
1581 video_tag = string.format(
1582 ' <video controls preload="metadata" style="display:block; max-width:min(100%%,800px); height:auto">\n' ..
1583 ' <source src="%s" type="%s">\n' ..
1584 ' Your browser does not support the video element.\n' ..
1585 ' </video>',
1586 media_src, media_type
1587 )
1588 end
1589 table.insert(media_html, video_tag)
1590 end
1591 end
1592
1593 if #media_html == 0 then
1594 return ""
1595 end
1596
1597 -- Issue 8-005 Fix: Close </pre> before media, reopen after
1598 -- Media inside <pre> don't respect max-width:100% because <pre> sizes to content
1599 -- By closing </pre>, media inherit width constraints from the parent <td> container
1600 return "\n</pre>\n" .. table.concat(media_html, "\n") .. "\n<pre>\n"
1601end
1602-- }}}
1603
1604-- {{{ function render_attachment_images_txt
1605-- Issue 8-049: Now handles all media types (images, audio, video)
1606local function render_attachment_images_txt(attachments)
1607 -- Render plain text placeholders for poem attachments (images, audio, video)
1608 -- Returns [Image: alt-text], [Audio: filename], [Video: filename] format for TXT export
1609 -- Unlike render_attachment_images(), this outputs plain text, not HTML
1610 --
1611 -- This function exists because TXT exports cannot contain HTML media tags.
1612 -- Media are replaced with bracketed descriptions.
1613
1614 if not attachments or #attachments == 0 then
1615 return ""
1616 end
1617
1618 local media_lines = {}
1619
1620 for _, attachment in ipairs(attachments) do
1621 local media_type = attachment.media_type or ""
1622 local placeholder
1623
1624 if media_type:match("^image/") then
1625 -- Use alt text if available, otherwise indicate no description
1626 local alt_text = attachment.description or attachment.alt_text or "no description"
1627 placeholder = string.format("[Image: %s]", alt_text)
1628
1629 elseif media_type:match("^audio/") then
1630 -- Issue 8-049: Audio placeholder
1631 local basename = (attachment.relative_path or ""):match("([^/]+)$") or "audio file"
1632 placeholder = string.format("[Audio: %s]", basename)
1633
1634 elseif media_type:match("^video/") then
1635 -- Issue 8-049: Video placeholder
1636 local basename = (attachment.relative_path or ""):match("([^/]+)$") or "video file"
1637 placeholder = string.format("[Video: %s]", basename)
1638 end
1639
1640 if placeholder then
1641 -- Wrap long text to 80 characters
1642 if #placeholder > 80 then
1643 placeholder = wrap_text_80_chars(placeholder)
1644 end
1645 table.insert(media_lines, placeholder)
1646 end
1647 end
1648
1649 if #media_lines == 0 then
1650 return ""
1651 end
1652
1653 -- Return with newline prefix/suffix for proper spacing
1654 return "\n" .. table.concat(media_lines, "\n") .. "\n"
1655end
1656-- }}}
1657
1658-- {{{ function format_warning_box
1659local function format_warning_box(warning_text)
1660 -- Create simple ASCII box around content warning
1661 local content = wrap_text_80_chars(warning_text)
1662 local lines = {}
1663 for line in content:gmatch("[^\n]+") do
1664 table.insert(lines, line)
1665 end
1666
1667 -- Find longest line for box width
1668 local max_width = 0
1669 for _, line in ipairs(lines) do
1670 max_width = math.max(max_width, #line)
1671 end
1672
1673 -- Ensure minimum width and maximum of 76 chars (leave room for box borders)
1674 max_width = math.min(math.max(max_width, 20), 76)
1675
1676 local boxed = {}
1677 table.insert(boxed, "┌" .. string.rep("─", max_width + 2) .. "┐")
1678
1679 for _, line in ipairs(lines) do
1680 local padded = line .. string.rep(" ", max_width - #line)
1681 table.insert(boxed, "│ " .. padded .. " │")
1682 end
1683
1684 table.insert(boxed, "└" .. string.rep("─", max_width + 2) .. "┘")
1685
1686 return table.concat(boxed, "\n")
1687end
1688-- }}}
1689
1690-- {{{ function escape_html
1691local function escape_html(text)
1692 -- Escape HTML special characters in poem content to prevent browser interpretation
1693 -- Issue 8-041: Fixes bug where poem content containing </pre> breaks page rendering
1694 -- IMPORTANT: Must be called BEFORE apply_markdown_formatting() so that
1695 -- markdown-generated HTML tags (like <em>) are NOT escaped
1696 -- Order matters: & must be escaped first, otherwise < becomes &lt;
1697 if not text then return "" end
1698 return text
1699 -- Strip NUL and other C0 control bytes that occasionally ride along in
1700 -- source poem text (a stray \0 in one post is what made a chronological
1701 -- page read as "binary" and could make a browser choke on it). Keep the
1702 -- legitimate whitespace controls: tab (\9), newline (\10), CR (\13).
1703 :gsub("[%z\1-\8\11\12\14-\31]", "")
1704 :gsub("&", "&")
1705 :gsub("<", "<")
1706 :gsub(">", ">")
1707end
1708-- }}}
1709
1710-- {{{ function apply_markdown_formatting
1711local function apply_markdown_formatting(text)
1712 -- Handle *\*text*\* (italics with asterisks)
1713 text = text:gsub("%*\\%*([^%*]+)%*\\%*", "<em>*%1*</em>")
1714
1715 -- Handle *text* (simple italics)
1716 text = text:gsub("%*([^%*]+)%*", "<em>%1</em>")
1717
1718 return text
1719end
1720-- }}}
1721
1722-- {{{ function is_golden_poem
1723local function is_golden_poem(poem)
1724 -- Issue 8-044: Use pre-calculated golden status from extraction metadata
1725 -- This correctly accounts for:
1726 -- - Pre-anonymization content (original @mentions preserved)
1727 -- - Content warning text (without "CW: " prefix)
1728 -- The extraction calculates this once; we use metadata as single source of truth
1729 if poem.metadata and poem.metadata.is_golden_poem then
1730 return true
1731 end
1732 return false
1733end
1734-- }}}
1735
1736-- {{{ function is_boost_poem
1737local function is_boost_poem(poem)
1738 -- Issue 8-057: Detect boosted/shared posts for visual formatting
1739 -- Boosts are reshared content from other fediverse users
1740 -- boost_type can be: "cached_external", "external", or "embedded"
1741 if poem.metadata and poem.metadata.is_boost then
1742 return true
1743 end
1744 return false
1745end
1746-- }}}
1747
1748-- {{{ function get_poem_display_filename
1749local function get_poem_display_filename(poem)
1750 -- Returns the display filename for a poem (without extension)
1751 -- For notes: uses metadata.source_file (the original filename)
1752 -- For fediverse/messages: uses the numeric ID
1753 -- All categories: no .txt extension (cleaner display)
1754 local category = poem.category or "unknown"
1755 local filename
1756
1757 if category == "notes" and poem.metadata and poem.metadata.source_file then
1758 -- Notes preserve their original descriptive filenames
1759 filename = poem.metadata.source_file
1760 else
1761 -- Fediverse and messages use numeric ID
1762 filename = tostring(poem.id or "unknown")
1763 end
1764
1765 return category .. "/" .. filename
1766end
1767-- }}}
1768
1769-- {{{ function generate_corner_box_separator
1770local function generate_corner_box_separator(hex_color)
1771 -- Generate the separator line with corner box tops for GOLDEN poems
1772 -- Format: ╟─────────┐ ┌───────────┤
1773 -- Left box: 11 chars (╟ + 9×─ + ┐)
1774 -- Right box: 13 chars (┌ + 11×─ + ┤)
1775 -- Gap: 60 chars (spaces)
1776 -- Total: 84 chars
1777 -- The left junction ╟ is colored to match the progress bar
1778 local colored_junction = string.format('<font color="%s"><b>╟</b></font>', hex_color)
1779 local left_box = colored_junction .. string.rep("─", 9) .. "┐"
1780 local right_box = "┌" .. string.rep("─", 11) .. "┤"
1781 local gap = string.rep(" ", 60)
1782 return left_box .. gap .. right_box
1783end
1784-- }}}
1785
1786-- {{{ function colorize_char
1787-- Helper to wrap a character in color tags
1788local function colorize_char(char, hex_color)
1789 if hex_color then
1790 return string.format('<font color="%s"><b>%s</b></font>', hex_color, char)
1791 end
1792 return char
1793end
1794-- }}}
1795
1796-- {{{ function generate_regular_corner_box_top
1797-- Issue 8-035: Added progress_chars and hex_color for progressive colorization
1798local function generate_regular_corner_box_top(progress_chars, hex_color)
1799 -- Generate the top line of corner boxes for REGULAR poems (no side walls)
1800 -- Format: ┌─────────┐ ┌───────────┐
1801 -- Left box: 11 chars (┌ + 9×─ + ┐) at positions 0-10
1802 -- Right box: 13 chars (┌ + 11×─ + ┐) at positions 70-82
1803 -- Gap: 59 chars (spaces) at positions 11-69
1804 -- Total: 83 chars
1805
1806 progress_chars = progress_chars or 0
1807
1808 -- Left box (positions 0-10)
1809 local left_parts = {}
1810 -- Position 0: ┌
1811 table.insert(left_parts, progress_chars > 0 and colorize_char("┌", hex_color) or "┌")
1812 -- Positions 1-9: ─────────
1813 for i = 1, 9 do
1814 table.insert(left_parts, progress_chars > i and colorize_char("─", hex_color) or "─")
1815 end
1816 -- Position 10: ┐
1817 table.insert(left_parts, progress_chars > 10 and colorize_char("┐", hex_color) or "┐")
1818
1819 -- Gap (positions 11-69) - spaces don't need coloring
1820 local gap = string.rep(" ", 59)
1821
1822 -- Right box (positions 70-82)
1823 local right_parts = {}
1824 -- Position 70: ┌
1825 table.insert(right_parts, progress_chars > 70 and colorize_char("┌", hex_color) or "┌")
1826 -- Positions 71-81: ───────────
1827 for i = 71, 81 do
1828 table.insert(right_parts, progress_chars > i and colorize_char("─", hex_color) or "─")
1829 end
1830 -- Position 82: ┐
1831 table.insert(right_parts, progress_chars > 82 and colorize_char("┐", hex_color) or "┐")
1832
1833 return table.concat(left_parts) .. gap .. table.concat(right_parts)
1834end
1835-- }}}
1836
1837-- {{{ function generate_regular_corner_box_bottom
1838local function generate_regular_corner_box_bottom()
1839 -- Generate the bottom line of corner boxes for REGULAR poems
1840 -- Format: └─────────┘ └───────────┘
1841 -- Gap: 59 chars, Total: 83 chars
1842 local left_box = "└" .. string.rep("─", 9) .. "┘"
1843 local right_box = "└" .. string.rep("─", 11) .. "┘"
1844 local gap = string.rep(" ", 59)
1845 return left_box .. gap .. right_box
1846end
1847-- }}}
1848
1849-- {{{ function generate_corner_box_nav_line
1850local function generate_corner_box_nav_line(similar_link, different_link, chronological_link, hex_color)
1851 -- Generate the navigation line with corner box walls for GOLDEN poems (Issue 8-030)
1852 -- Format: ║ similar │ chronological │ different │
1853 -- Left box: ║ + space + link + space + │ = 11 chars
1854 -- Center text: chronological (13 chars visible) - or empty space if nil (on chronological.html)
1855 -- Right box: │ + space + link + space + │ = 13 chars
1856 -- Gaps: 2 gaps of ~23 chars each
1857 -- Total: 84 chars
1858 -- The left wall ║ is colored to match the progress bar
1859
1860 -- The links contain HTML, so we need to measure visible text
1861 local similar_visible = similar_link:gsub("<[^>]+>", "") -- "similar"
1862 local different_visible = different_link:gsub("<[^>]+>", "") -- "different"
1863
1864 -- Handle nil chronological_link (on chronological.html page, we don't show this link)
1865 local center_text = ""
1866 local center_visible_len = 0
1867 if chronological_link then
1868 center_text = chronological_link
1869 center_visible_len = chronological_link:gsub("<[^>]+>", ""):len() -- "chronological" = 13 chars
1870 end
1871
1872 -- Left box: ║ (colored) + space + similar + padding + │
1873 local colored_wall = string.format('<font color="%s"><b>║</b></font>', hex_color)
1874 local left_content_width = 9 -- space between ║ and │
1875 local similar_padding = left_content_width - 1 - #similar_visible -- 1 for leading space
1876 local left_box = colored_wall .. " " .. similar_link .. string.rep(" ", similar_padding) .. "│"
1877
1878 -- Right box: │ + space + different + padding + │
1879 local right_content_width = 11 -- space between │ and │
1880 local different_padding = right_content_width - 1 - #different_visible -- 1 for leading space
1881 local right_box = "│ " .. different_link .. string.rep(" ", different_padding) .. "│"
1882
1883 -- Calculate gaps: Total 84 - 11 (left) - center_visible - 13 (right) = remaining
1884 -- If no center text, distribute all 47+13 = 60 chars into the gaps (30 left, 30 right)
1885 -- If center text (13 chars), split remaining 47 into 22 left + 25 right
1886 local left_gap, right_gap
1887 if center_visible_len > 0 then
1888 left_gap = string.rep(" ", 22)
1889 right_gap = string.rep(" ", 25)
1890 else
1891 -- No chronological link - distribute 60 chars evenly (30+30)
1892 left_gap = string.rep(" ", 30)
1893 right_gap = string.rep(" ", 30)
1894 end
1895
1896 return left_box .. left_gap .. center_text .. right_gap .. right_box
1897end
1898-- }}}
1899
1900-- {{{ function generate_regular_corner_box_nav_line
1901-- Issue 8-035: Added progress_chars and hex_color for progressive colorization
1902local function generate_regular_corner_box_nav_line(similar_link, different_link, chronological_link, progress_chars, hex_color)
1903 -- Generate the navigation line with corner box walls for REGULAR poems (Issue 8-030)
1904 -- Format: │ similar │ chronological │ different │
1905 -- Left box: │ + space + link + space + │ = 11 chars (positions 0-10)
1906 -- Center text: chronological (13 chars visible) - or empty space if nil (on chronological.html)
1907 -- Right box: │ + space + link + space + │ = 13 chars (positions 70-82)
1908 -- Gaps: 2 gaps totaling 59 chars (with 13 char center text: 23 left + 23 right)
1909 -- Total: 83 chars
1910
1911 progress_chars = progress_chars or 0
1912
1913 local similar_visible = similar_link:gsub("<[^>]+>", "")
1914 local different_visible = different_link:gsub("<[^>]+>", "")
1915
1916 -- Handle nil chronological_link (on chronological.html page, we don't show this link)
1917 local center_text = ""
1918 local center_visible_len = 0
1919 if chronological_link then
1920 center_text = chronological_link
1921 center_visible_len = chronological_link:gsub("<[^>]+>", ""):len() -- "chronological" = 13 chars
1922 end
1923
1924 -- Left box: │ + space + similar + padding + │
1925 -- Wall characters at positions 0 and 10
1926 local left_wall = progress_chars > 0 and colorize_char("│", hex_color) or "│"
1927 local right_wall_of_left = progress_chars > 10 and colorize_char("│", hex_color) or "│"
1928 local left_content_width = 9
1929 local similar_padding = left_content_width - 1 - #similar_visible
1930 local left_box = left_wall .. " " .. similar_link .. string.rep(" ", similar_padding) .. right_wall_of_left
1931
1932 -- Right box: │ + space + different + padding + │
1933 -- Wall characters at positions 70 and 82
1934 local left_wall_of_right = progress_chars > 70 and colorize_char("│", hex_color) or "│"
1935 local right_wall = progress_chars > 82 and colorize_char("│", hex_color) or "│"
1936 local right_content_width = 11
1937 local different_padding = right_content_width - 1 - #different_visible
1938 local right_box = left_wall_of_right .. " " .. different_link .. string.rep(" ", different_padding) .. right_wall
1939
1940 -- Calculate gaps: Total 83 - 11 (left) - 13 (right) = 59 for gaps + center
1941 -- If no center text, distribute 59 chars into the gaps (29 left, 30 right)
1942 -- If center text (13 chars), split remaining 46 into 23 left + 23 right
1943 local left_gap, right_gap
1944 if center_visible_len > 0 then
1945 left_gap = string.rep(" ", 23)
1946 right_gap = string.rep(" ", 23)
1947 else
1948 -- No chronological link - distribute 59 chars (29+30)
1949 left_gap = string.rep(" ", 29)
1950 right_gap = string.rep(" ", 30)
1951 end
1952
1953 return left_box .. left_gap .. center_text .. right_gap .. right_box
1954end
1955-- }}}
1956
1957-- {{{ function apply_golden_poem_formatting
1958local function apply_golden_poem_formatting(content, is_golden, similar_link, different_link, chronological_link, hex_color)
1959 -- Golden poem side borders: ║ on left (colored), │ on right
1960 -- Interior width: 80 characters for content (with 1 space padding on each side)
1961 -- Format: ║ + space + 80 chars content (padded) + space + │ = 84 total
1962 -- The left wall ║ is colored to match the progress bar
1963 if not is_golden then
1964 return content
1965 end
1966
1967 local CONTENT_WIDTH = 80 -- Content area between padding spaces
1968 local color = hex_color or "#787878" -- Default to gray if no color provided
1969
1970 -- Helper to count UTF-8 characters (not bytes)
1971 -- Box-drawing chars are 3 bytes each, so #str gives wrong count
1972 local function utf8_char_count(str)
1973 -- Remove UTF-8 continuation bytes (0x80-0xBF), count what remains
1974 return #(str:gsub("[\128-\191]", ""))
1975 end
1976
1977 -- Split content into lines (append newline to handle last line without trailing newline)
1978 local lines = {}
1979 for line in (content .. "\n"):gmatch("(.-)\n") do
1980 table.insert(lines, line)
1981 end
1982
1983 local formatted_lines = {}
1984 local colored_wall = string.format('<font color="%s"><b>║</b></font>', color)
1985
1986 for _, line in ipairs(lines) do
1987 -- Calculate visible length (excluding HTML tags, counting UTF-8 chars)
1988 -- Issue 8-055: Also decode HTML entities for accurate width counting
1989 -- e.g., > is 4 bytes but displays as 1 character (>)
1990 local visible_length = text_formatter.calculate_visible_width(line)
1991
1992 -- Pad or handle line to fit content width
1993 local padded_line
1994 if visible_length >= CONTENT_WIDTH then
1995 -- Line is already at or over width - use as-is
1996 padded_line = line
1997 else
1998 -- Pad with spaces to reach content width
1999 local padding_needed = CONTENT_WIDTH - visible_length
2000 padded_line = line .. string.rep(" ", padding_needed)
2001 end
2002
2003 -- Add side borders with padding: ║ (colored) content │
2004 table.insert(formatted_lines, colored_wall .. " " .. padded_line .. " │")
2005 end
2006
2007 -- Add corner box navigation (separator + nav line) if links provided
2008 -- Issue 9-003 Fix: Only require similar and different links - chronological_link can be nil
2009 if similar_link and different_link then
2010 -- Add separator line with corner box tops: ╟─────────┐ ┌───────────┤
2011 table.insert(formatted_lines, generate_corner_box_separator(color))
2012 -- Add navigation line with corner box walls: ║ similar │ chronological │ different │
2013 -- chronological_link may be nil on chronological.html (shows empty space in center)
2014 table.insert(formatted_lines, generate_corner_box_nav_line(similar_link, different_link, chronological_link, color))
2015 end
2016
2017 return table.concat(formatted_lines, "\n")
2018end
2019-- }}}
2020
2021-- {{{ Issue 8-057: Boost Visual Formatting Functions
2022-- Boosts use nested frames: outer blue frame + inner teal content box with
2023-- asymmetric arrows (◀═ top-left, ─▶ bottom-right) and a floating [BOOST] label.
2024-- ALL geometry now lives in src/boost-bars.lua (shared, unit-tested) -- the old
2025-- generate_boost_* helpers were removed because three drifting copies produced
2026-- misaligned walls, wrong junction columns, and ▢ corruption. This path keeps
2027-- only the thin assembler below.
2028
2029-- {{{ function apply_boost_poem_formatting
2030local function apply_boost_poem_formatting(content, progress_percent, similar_link, different_link, chronological_link)
2031 -- Issue 8-057: nested frame formatting for boosts, drawn by the shared
2032 -- boost-bars module (single source of truth for every render path). We just
2033 -- split the pre-wrapped content into lines; the module owns all geometry.
2034 local lines = {}
2035 for line in (content .. "\n"):gmatch("(.-)\n") do
2036 table.insert(lines, line)
2037 end
2038 local include_nav = (similar_link and different_link) and true or false
2039 return boost_bars.format_boost(
2040 lines, progress_percent, similar_link, different_link, chronological_link, include_nav)
2041end
2042-- }}}
2043
2044-- }}} End Issue 8-057: Boost Visual Formatting Functions
2045
2046-- {{{ function format_content_with_warnings
2047local function format_content_with_warnings(text, poem_category, poem, similar_link, different_link, chronological_link, hex_color)
2048 -- Issue 8-041: Escape HTML special characters in poem content FIRST
2049 -- This prevents browser from interpreting poem content as HTML markup
2050 -- (e.g., a poem containing "</pre>" would otherwise close the preformatted block)
2051 text = escape_html(text)
2052
2053 -- Apply markdown formatting AFTER escaping
2054 -- This allows *italics* to become <em>italics</em> while keeping
2055 -- literal < > & in poem content safely escaped
2056 text = apply_markdown_formatting(text)
2057
2058 -- Check if this is a golden poem
2059 local is_golden = poem and is_golden_poem(poem)
2060
2061 local formatted_lines = {}
2062
2063 -- Issue 9-011: Display content warning from poem.content_warning field (Mastodon CW)
2064 -- This is separate from in-content CW: patterns - it comes from ActivityPub summary field
2065 if poem and poem.content_warning and poem.content_warning ~= "" then
2066 local cw_label = "CW: " .. poem.content_warning
2067 local warning_box = format_warning_box(cw_label)
2068 table.insert(formatted_lines, warning_box)
2069 table.insert(formatted_lines, "") -- First newline
2070 table.insert(formatted_lines, "") -- Second newline for spacing
2071 end
2072
2073 -- Detect additional content warning patterns in text (CW:, content warning:, etc.)
2074 -- Issue 10-021: Use text_formatter.format_poem_lines to preserve empty lines (paragraph breaks)
2075 local lines = text_formatter.format_poem_lines(text)
2076
2077 for _, line in ipairs(lines) do
2078 -- Check if line starts with content warning (in-content CW pattern)
2079 if line:lower():match("^%s*cw%s*:") or line:lower():match("^%s*content warning%s*:") then
2080 -- Format content warning with box
2081 local warning_box = format_warning_box(line)
2082 table.insert(formatted_lines, warning_box)
2083 table.insert(formatted_lines, "") -- First newline
2084 table.insert(formatted_lines, "") -- Second newline for spacing
2085 else
2086 -- Issue 10-021: Wrap long lines while preserving leading whitespace
2087 -- This replaces 8-056's no-wrap approach with whitespace-aware wrapping
2088 local wrapped = text_formatter.wrap_preserving_indent(line, 80)
2089 for _, wrapped_line in ipairs(wrapped) do
2090 table.insert(formatted_lines, wrapped_line)
2091 end
2092 end
2093 end
2094
2095 local formatted_content = table.concat(formatted_lines, "\n")
2096
2097 -- Apply golden poem box-drawing formatting (with corner box nav inside)
2098 if is_golden then
2099 formatted_content = apply_golden_poem_formatting(formatted_content, true, similar_link, different_link, chronological_link, hex_color)
2100 else
2101 -- For regular poems, add 1-space left padding to each content line
2102 -- Content uses 1-space indent for alignment (83 chars total width)
2103 local padded_lines = {}
2104 for line in (formatted_content .. "\n"):gmatch("(.-)\n") do
2105 table.insert(padded_lines, " " .. line)
2106 end
2107 formatted_content = table.concat(padded_lines, "\n")
2108 end
2109
2110 return formatted_content, is_golden
2111end
2112-- }}}
2113
2114-- {{{ function format_single_poem_with_progress_and_color
2115-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
2116local function format_single_poem_with_progress_and_color(poem, total_poems, poem_colors, chrono_mapping)
2117 -- Issue 9-013: a ranked IMAGE entry (pseudo-poem) renders as an image box,
2118 -- not a poem. Inert until inject_pseudo_poems tags/append image entries.
2119 if poem.is_image then
2120 return image_render.format_image_entry(poem)
2121 end
2122
2123 local formatted = ""
2124
2125 -- Get semantic color for this poem (key by poem_index, NOT poem.id)
2126 local poem_color_data = poem_colors[poem.poem_index]
2127 local semantic_color = poem_color_data and poem_color_data.color or "gray"
2128 local hex_color = COLOR_CONFIG[semantic_color] or COLOR_CONFIG["gray"]
2129
2130 -- Calculate chronological progress (using poem_index for lookup)
2131 local progress_info = calculate_chronological_progress(poem.poem_index, total_poems)
2132
2133 -- Check if this is a golden poem (exactly 1024 characters)
2134 local is_golden = is_golden_poem(poem)
2135
2136 -- Issue 8-057: Check if this is a boost (reshared content from another author)
2137 local is_boost = is_boost_poem(poem)
2138
2139 -- Build navigation links for this poem (using category prefix for anchors, poem_index for paginated files)
2140 local unique_id = get_unique_poem_filename_id(poem) -- For anchor IDs only (e.g. "messages-0001")
2141 local anchor_id = get_poem_anchor_id(poem)
2142 local poem_index = poem.poem_index or 0 -- Numeric ID for paginated files (e.g. 1 → "0001")
2143
2144 -- Issue 8-012 Phase E: Link to paginated format (similar/0001-01.html)
2145 -- Issue 9-003: Use absolute file:// paths - helper script converts to production URLs
2146 local base_path = ".."
2147 local similar_link = string.format("<a href='%s/similar/%04d-01.html'>similar</a>", base_path, poem_index)
2148 local different_link = string.format("<a href='%s/different/%04d-01.html'>different</a>", base_path, poem_index)
2149 -- Issue 8-039: Chronological now in subdirectory
2150 -- Issue 10-036: Use chrono_mapping for correct paginated link (index.html redirect loses anchors)
2151 local chrono_info = chrono_mapping and chrono_mapping[poem_index]
2152 local chrono_page = chrono_info and string.format("%02d", chrono_info.page_number) or "01"
2153 local chronological_link = string.format("<a href='%s/chronological/%s.html#%s'>chronological</a>", base_path, chrono_page, anchor_id)
2154
2155 -- Add file header (notes show original filename, others show numeric ID)
2156 formatted = formatted .. string.format(" -> file: %s\n", get_poem_display_filename(poem))
2157 -- Issue 9-013: text+image posts get a direct "image.png" link below the
2158 -- header. (Image entries never reach here -- they return early above.)
2159 local img_link = image_render.text_image_link(poem)
2160 if img_link ~= "" then formatted = formatted .. " " .. img_link .. "\n" end
2161
2162 -- Issue 8-057: Boost formatting - uses complete nested frame with arrows and [BOOST] label
2163 -- Boost formatting replaces all standard elements (top bar, content, nav, bottom bar)
2164 if is_boost then
2165 -- Escape HTML and apply markdown to content
2166 local text = escape_html(poem.content or "")
2167
2168 -- Issue 10-037: Defensive fallback for blank boost content
2169 -- If content is empty, display the original URI or diagnostic message
2170 if text == "" or text:match("^%s*$") then
2171 local original_uri = poem.metadata and poem.metadata.original_uri
2172 if original_uri then
2173 text = "External post: " .. escape_html(original_uri)
2174 else
2175 text = "(Boost content unavailable)"
2176 end
2177 end
2178
2179 -- Issue 10-039: Make external boost URLs clickable
2180 -- Pattern: "External post: https://..." -> wrap URL in anchor tag
2181 local external_pattern = "^External post: (https?://[^%s]+)$"
2182 local external_url = text:match(external_pattern)
2183 if external_url then
2184 -- Wrap the URL across box lines (boost content width) instead of
2185 -- letting it overflow the box; each line links to the full URL.
2186 text = text_formatter.wrap_external_url("External post: ", external_url, boost_bars.CONTENT_WIDTH)
2187 else
2188 -- Issue 10-041: Wrap long embedded content to fit the boost box.
2189 -- Only wrap non-external-post content (external posts keep URLs intact)
2190 local BOOST_CONTENT_WIDTH = boost_bars.CONTENT_WIDTH
2191 local wrapped_lines = {}
2192 for line in (text .. "\n"):gmatch("(.-)\n") do
2193 local wrapped = text_formatter.wrap_preserving_indent(line, BOOST_CONTENT_WIDTH)
2194 for _, wrapped_line in ipairs(wrapped) do
2195 table.insert(wrapped_lines, wrapped_line)
2196 end
2197 end
2198 text = table.concat(wrapped_lines, "\n")
2199 end
2200
2201 text = apply_markdown_formatting(text)
2202
2203 -- Calculate progress as decimal (0-1) for boost functions
2204 local progress_percent = progress_info.percentage / 100
2205
2206 -- Apply complete boost formatting (includes all frame elements)
2207 local boost_formatted = apply_boost_poem_formatting(
2208 text, progress_percent, similar_link, different_link, chronological_link
2209 )
2210 formatted = formatted .. boost_formatted .. "\n"
2211
2212 -- Render attached images after boost frame
2213 if poem.attachments then
2214 formatted = formatted .. render_attachment_images(poem.attachments)
2215 end
2216
2217 return {
2218 content = formatted,
2219 semantic_color = semantic_color,
2220 progress_percentage = progress_info.percentage,
2221 poem_id = poem.id
2222 }
2223 end
2224
2225 -- Standard formatting for golden and regular poems
2226 -- Generate top progress bar separator (with golden corners if applicable)
2227 local top_dashes = generate_progress_dashes(progress_info, semantic_color, is_golden, "top")
2228 formatted = formatted .. string.format('<span %s>%s</span>',
2229 top_dashes.accessibility,
2230 top_dashes.visual)
2231
2232 -- Add newline after top border for all poems
2233 -- Golden poems: ┐ corner needs newline before ║ content wall on next line
2234 -- Regular poems: progress bar needs newline before content
2235 formatted = formatted .. "\n"
2236
2237 -- Format poem content with content warning handling and whitespace preservation
2238 -- Pass nav links and hex_color for golden poems
2239 local content_formatted = format_content_with_warnings(
2240 poem.content or "", poem.category, poem,
2241 is_golden and similar_link or nil,
2242 is_golden and different_link or nil,
2243 is_golden and chronological_link or nil,
2244 is_golden and hex_color or nil
2245 )
2246 formatted = formatted .. content_formatted
2247
2248 -- Render attached images if present (from ActivityPub extraction)
2249 -- Images appear after poem content, before navigation links
2250 -- Issue 9-010: Images stay with their original post only (no associated_images rendering)
2251 if poem.attachments then
2252 formatted = formatted .. render_attachment_images(poem.attachments)
2253 end
2254
2255 -- For golden poems, content already includes nav in corner boxes
2256 -- For regular poems, add corner-boxed navigation links (top and nav lines only, bottom connects to progress bar)
2257 if not is_golden then
2258 -- Issue 8-035: Calculate progress_chars and hex_color for nav box colorization
2259 local total_chars = LAYOUT.REGULAR_POEM_WIDTH
2260 local progress_chars = math.floor((progress_info.percentage / 100) * total_chars)
2261 local hex_color = COLOR_CONFIG[semantic_color]
2262
2263 formatted = formatted .. "\n"
2264 formatted = formatted .. generate_regular_corner_box_top(progress_chars, hex_color) .. "\n"
2265 formatted = formatted .. generate_regular_corner_box_nav_line(similar_link, different_link, chronological_link, progress_chars, hex_color) .. "\n"
2266 -- No bottom line - corner boxes connect directly to progress bar via junctions
2267 else
2268 -- Golden poems: add newline after nav line (content_formatted doesn't end with newline)
2269 formatted = formatted .. "\n"
2270 end
2271
2272 -- Generate bottom progress bar separator (with junctions for both golden and regular poems)
2273 -- The has_corner_boxes parameter enables junction characters at wall positions
2274 local bottom_dashes = generate_progress_dashes(progress_info, semantic_color, is_golden, "bottom", true)
2275 formatted = formatted .. string.format('<span %s>%s</span>\n',
2276 bottom_dashes.accessibility,
2277 bottom_dashes.visual)
2278
2279 return {
2280 content = formatted,
2281 semantic_color = semantic_color,
2282 progress_percentage = progress_info.percentage,
2283 poem_id = poem.id
2284 }
2285end
2286-- }}}
2287
2288-- {{{ function format_single_poem_with_warnings
2289local function format_single_poem_with_warnings(poem)
2290 local formatted = ""
2291
2292 -- Add file header (notes show original filename, others show numeric ID)
2293 formatted = formatted .. string.format(" -> file: %s\n", get_poem_display_filename(poem))
2294 formatted = formatted .. string.rep("-", 80) .. "\n"
2295
2296 -- Format poem content with content warning handling and whitespace preservation
2297 formatted = formatted .. format_content_with_warnings(poem.content or "", poem.category, poem)
2298
2299 -- Render attached images if present
2300 if poem.attachments then
2301 formatted = formatted .. render_attachment_images(poem.attachments)
2302 end
2303
2304 return formatted
2305end
2306-- }}}
2307
2308-- {{{ function format_single_poem_80_width
2309local function format_single_poem_80_width(poem)
2310 -- Format a single poem for TXT export (80-character width, no HTML)
2311 -- Uses strip_html_tags() to remove HTML and render_attachment_images_txt() for images
2312 local formatted = ""
2313
2314 -- Add file header (notes show original filename, others show numeric ID)
2315 formatted = formatted .. string.format(" -> file: %s\n", get_poem_display_filename(poem))
2316 formatted = formatted .. string.rep("-", 80) .. "\n"
2317
2318 -- Strip HTML tags and format poem content to 80-character width
2319 local clean_content = strip_html_tags(poem.content or "")
2320 formatted = formatted .. wrap_text_80_chars(clean_content)
2321
2322 -- Render attached images as [Image: alt-text] placeholders (not HTML)
2323 if poem.attachments then
2324 formatted = formatted .. render_attachment_images_txt(poem.attachments)
2325 end
2326
2327 return formatted
2328end
2329-- }}}
2330
2331-- {{{ function format_all_poems_with_progress_and_color
2332-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
2333local function format_all_poems_with_progress_and_color(starting_poem, sorted_poems, total_poems, poem_colors, chrono_mapping)
2334 local content = ""
2335
2336 -- Add starting poem first with progress visualization
2337 local formatted_starting = format_single_poem_with_progress_and_color(starting_poem, total_poems, poem_colors, chrono_mapping)
2338 content = content .. formatted_starting.content .. "\n\n"
2339
2340 -- Add all other poems sorted by similarity/diversity
2341 for _, poem_info in ipairs(sorted_poems) do
2342 if poem_info.id ~= starting_poem.id then -- Skip starting poem since we already added it
2343 local formatted_poem = format_single_poem_with_progress_and_color(poem_info.poem, total_poems, poem_colors, chrono_mapping)
2344 content = content .. formatted_poem.content .. "\n\n"
2345 end
2346 end
2347
2348 return content
2349end
2350-- }}}
2351
2352-- {{{ function format_all_poems_with_content_warnings
2353local function format_all_poems_with_content_warnings(starting_poem, sorted_poems)
2354 local content = ""
2355
2356 -- Add starting poem first
2357 content = content .. format_single_poem_with_warnings(starting_poem)
2358 content = content .. "\n\n"
2359
2360 -- Add all other poems sorted by similarity/diversity
2361 for _, poem_info in ipairs(sorted_poems) do
2362 if poem_info.id ~= starting_poem.id then -- Skip starting poem since we already added it
2363 content = content .. format_single_poem_with_warnings(poem_info.poem)
2364 content = content .. "\n\n"
2365 end
2366 end
2367
2368 return content
2369end
2370-- }}}
2371
2372-- {{{ function format_all_poems_80_width
2373local function format_all_poems_80_width(starting_poem, sorted_poems)
2374 local content = ""
2375
2376 -- Add starting poem first
2377 content = content .. format_single_poem_80_width(starting_poem)
2378 content = content .. "\n\n"
2379
2380 -- Add all other poems sorted by similarity/diversity
2381 for _, poem_info in ipairs(sorted_poems) do
2382 if poem_info.id ~= starting_poem.id then -- Skip starting poem since we already added it
2383 content = content .. format_single_poem_80_width(poem_info.poem)
2384 content = content .. "\n\n"
2385 end
2386 end
2387
2388 return content
2389end
2390-- }}}
2391
2392-- {{{ function M.generate_flat_poem_list_html_with_progress
2393-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
2394function M.generate_flat_poem_list_html_with_progress(starting_poem, sorted_poems, page_type, starting_poem_id, use_progress, chrono_mapping)
2395 -- Template uses pure HTML without CSS (except Issue 16-010 font-stack)
2396 -- Content is pre-wrapped to 80 chars, <pre> provides monospace formatting
2397 -- Issue 9-003 Fix: Use centered table for block centering with left-aligned text inside
2398 -- Issue 16-010: Added FONT_STYLE for Hack Nerd Font font-stack
2399 local template = [[<!DOCTYPE html>
2400<html>
2401<head>
2402<meta charset="UTF-8">
2403<title>Poems sorted by %s to: %s</title>
2404]] .. FONT_STYLE .. [[</head>
2405<body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF">
2406<center>
2407<h1>Poetry Collection</h1>
2408<p>All poems sorted by %s to: %s</p>
2409</center>
2410<table align="center"><tr><td>
2411<pre>
2412%s
2413</pre>
2414</td></tr></table>
2415</body>
2416</html>]]
2417
2418 local formatted_content
2419
2420 if use_progress then
2421 -- Load poem colors and use enhanced formatting
2422 local poem_colors = load_poem_colors()
2423
2424 -- Calculate actual total poems by finding the maximum poem ID
2425 -- This represents the total chronological span of the corpus
2426 local max_poem_id = starting_poem.id or 1
2427
2428 for _, poem_info in ipairs(sorted_poems) do
2429 if poem_info.id and poem_info.id > max_poem_id then
2430 max_poem_id = poem_info.id
2431 elseif poem_info.poem and poem_info.poem.id and poem_info.poem.id > max_poem_id then
2432 max_poem_id = poem_info.poem.id
2433 end
2434 end
2435
2436 local total_poems = max_poem_id
2437
2438 -- Issue 10-036: Pass chrono_mapping for correct paginated chronological links
2439 formatted_content = format_all_poems_with_progress_and_color(starting_poem, sorted_poems, total_poems, poem_colors, chrono_mapping)
2440 else
2441 -- Use standard formatting with content warnings
2442 formatted_content = format_all_poems_with_content_warnings(starting_poem, sorted_poems)
2443 end
2444
2445 local page_type_desc = (page_type == "similar") and "similarity" or "difference"
2446 local starting_title = starting_poem.title or ("Poem " .. starting_poem_id)
2447
2448 return string.format(template,
2449 page_type_desc,
2450 starting_title,
2451 page_type_desc,
2452 starting_title,
2453 formatted_content)
2454end
2455-- }}}
2456
2457-- {{{ function M.generate_flat_poem_list_html
2458-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
2459function M.generate_flat_poem_list_html(starting_poem, sorted_poems, page_type, starting_poem_id, chrono_mapping)
2460 -- Default to using progress bars
2461 return M.generate_flat_poem_list_html_with_progress(starting_poem, sorted_poems, page_type, starting_poem_id, true, chrono_mapping)
2462end
2463-- }}}
2464
2465-- {{{ local function generate_download_links
2466-- Generates download links for full-corpus exports (.txt and .html archive)
2467-- poem_id: the anchor poem's ID (used for unique filename)
2468-- page_type: "similar" or "different"
2469-- Returns: HTML string with download links
2470local function generate_download_links(poem_id, page_type)
2471 -- Generate unique filename ID (with category prefix)
2472 local unique_id = string.format("%04d", poem_id)
2473
2474 -- Full-corpus export filenames (not paginated)
2475 local txt_file = string.format("%s/%s.txt", page_type, unique_id)
2476 local html_archive_file = string.format("%s/%s-archive.html", page_type, unique_id)
2477
2478 local links = {}
2479 table.insert(links, "Download full collection:")
2480 table.insert(links, string.format(' [<a href="%s">.txt</a>]', txt_file))
2481 table.insert(links, string.format(' [<a href="%s">.html</a>]', html_archive_file))
2482
2483 return table.concat(links, " ")
2484end
2485-- }}}
2486
2487-- {{{ function M.generate_paginated_poem_page_html
2488-- Generates a single paginated page with navigation
2489-- starting_poem: the anchor poem object
2490-- sorted_poems: full sorted list of all poems
2491-- page_type: "similar" or "different"
2492-- starting_poem_id: the anchor poem's ID
2493-- page_num: 1-indexed page number
2494-- total_pages: total number of pages (may be capped by max_pages_per_poem)
2495-- total_corpus: optional - total poems in full corpus (for storage context display)
2496-- chrono_mapping: optional - poem_index → {page_number, ...} for correct chronological links
2497-- Returns: HTML string for this specific page
2498-- Updated for Issue 8-020: Passes total_corpus to navigation for storage constraint messaging
2499-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
2500function M.generate_paginated_poem_page_html(starting_poem, sorted_poems, page_type, starting_poem_id, page_num, total_pages, total_corpus, chrono_mapping)
2501 -- Ensure pagination config is loaded
2502 load_pagination_config()
2503
2504 -- Get poems for this specific page
2505 local page_poems = get_poems_for_page(sorted_poems, page_num)
2506
2507 if #page_poems == 0 then
2508 utils.log_warn(string.format("No poems found for page %d of %s/%d",
2509 page_num, page_type, starting_poem_id))
2510 return nil
2511 end
2512
2513 -- Use provided total_corpus or calculate from sorted_poems
2514 local corpus_size = total_corpus or #sorted_poems
2515
2516 -- Generate header navigation (with storage context)
2517 local header_nav = generate_prev_next_navigation(page_num, total_pages, starting_poem_id, page_type, corpus_size)
2518
2519 -- Generate footer navigation (same as header)
2520 local footer_nav = generate_prev_next_navigation(page_num, total_pages, starting_poem_id, page_type, corpus_size)
2521
2522 -- Load poem colors for progress bars
2523 local poem_colors = load_poem_colors()
2524
2525 -- Calculate actual total poems (max ID in corpus)
2526 local max_poem_id = starting_poem.id or 1
2527 for _, poem_info in ipairs(sorted_poems) do
2528 local pid = poem_info.id or (poem_info.poem and poem_info.poem.id)
2529 if pid and pid > max_poem_id then
2530 max_poem_id = pid
2531 end
2532 end
2533 local corpus_total = max_poem_id
2534
2535 -- Format the poems for this page
2536 -- Issue 10-036: Pass chrono_mapping for correct paginated chronological links
2537 local formatted_content = format_all_poems_with_progress_and_color(
2538 starting_poem, page_poems, corpus_total, poem_colors, chrono_mapping)
2539
2540 -- Build the page
2541 local page_type_desc = (page_type == "similar") and "similarity" or "difference"
2542 local starting_title = starting_poem.title or ("Poem " .. starting_poem_id)
2543 local padded_id = string.format("%04d", starting_poem_id)
2544
2545 -- Generate download links for full-corpus exports
2546 local download_links = generate_download_links(starting_poem_id, page_type)
2547
2548 -- Issue 9-003 Fix: Use centered table for block centering with left-aligned text inside
2549 -- Issue 16-010: Added FONT_STYLE for Hack Nerd Font font-stack
2550 local template = [[<!DOCTYPE html>
2551<html>
2552<head>
2553<meta charset="UTF-8">
2554<title>Poems sorted by %s to: %s (Page %d of %d)</title>
2555]] .. FONT_STYLE .. [[</head>
2556<body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF">
2557<center>
2558<h1>Poetry Collection</h1>
2559<p>Poems sorted by %s to: %s</p>
2560<p>%s</p>
2561</center>
2562<table align="center"><tr><td>
2563<pre>
2564%s
2565
2566%s
2567
2568%s
2569</pre>
2570</td></tr></table>
2571</body>
2572</html>]]
2573
2574 return string.format(template,
2575 page_type_desc, starting_title, page_num, total_pages,
2576 page_type_desc, starting_title,
2577 download_links,
2578 header_nav,
2579 formatted_content,
2580 footer_nav)
2581end
2582-- }}}
2583
2584-- {{{ function M.generate_all_paginated_pages_for_poem
2585-- Generates all paginated pages for a single poem's similarity or diversity ordering
2586-- starting_poem: the anchor poem object
2587-- sorted_poems: full sorted list of all poems
2588-- page_type: "similar" or "different"
2589-- starting_poem_id: the anchor poem's ID
2590-- output_dir: base output directory
2591-- pages_to_generate: optional - which pages to generate (nil = use config limits, or {1,2,3} for specific pages)
2592-- Returns: table with generated file paths and stats
2593-- Updated for Issue 8-020: Respects max_pages_per_poem storage constraint
2594function M.generate_all_paginated_pages_for_poem(starting_poem, sorted_poems, page_type, starting_poem_id, output_dir, pages_to_generate)
2595 -- Ensure pagination config is loaded
2596 load_pagination_config()
2597
2598 local total_poems = #sorted_poems
2599 local total_pages_possible = calculate_page_count(total_poems)
2600
2601 -- Apply max_pages_per_poem limit (Issue 8-020: 45GB storage constraint)
2602 local max_pages = PAGINATION_CONFIG.max_pages_per_poem
2603 local total_pages = math.min(total_pages_possible, max_pages)
2604
2605 local results = {
2606 files_generated = {},
2607 total_pages = total_pages,
2608 total_pages_possible = total_pages_possible, -- Before storage limit
2609 poems_per_page = PAGINATION_CONFIG.poems_per_page,
2610 poem_id = starting_poem_id,
2611 storage_limited = (total_pages < total_pages_possible) -- Indicates if pages were capped
2612 }
2613
2614 -- Determine which pages to generate
2615 local pages = pages_to_generate
2616 if not pages then
2617 -- Generate pages 1 through max_pages (respecting storage limit)
2618 pages = {}
2619 for i = 1, total_pages do
2620 table.insert(pages, i)
2621 end
2622 end
2623
2624 -- Ensure output directory exists
2625 local page_dir = output_dir .. "/" .. page_type
2626 os.execute("mkdir -p " .. page_dir)
2627
2628 -- Generate each requested page (respecting max_pages limit)
2629 for _, page_num in ipairs(pages) do
2630 if page_num <= total_pages then
2631 local html = M.generate_paginated_poem_page_html(
2632 starting_poem, sorted_poems, page_type, starting_poem_id,
2633 page_num, total_pages, total_poems) -- Pass total_poems for storage context
2634
2635 if html then
2636 local filename = generate_page_filename(starting_poem_id, page_num, page_type)
2637 local filepath = output_dir .. "/" .. filename
2638
2639 if utils.write_file(filepath, html) then
2640 table.insert(results.files_generated, filepath)
2641 end
2642 end
2643 end
2644 end
2645
2646 return results
2647end
2648-- }}}
2649
2650-- {{{ function M.get_pagination_config
2651-- Exposes pagination configuration for external scripts
2652-- Returns: PAGINATION_CONFIG table
2653function M.get_pagination_config()
2654 load_pagination_config()
2655 return PAGINATION_CONFIG
2656end
2657-- }}}
2658
2659-- {{{ function M.get_storage_config
2660-- Exposes storage configuration for external scripts (Issue 8-020)
2661-- Returns: STORAGE_CONFIG table
2662function M.get_storage_config()
2663 load_pagination_config() -- This also loads storage config
2664 return STORAGE_CONFIG
2665end
2666-- }}}
2667
2668-- {{{ function M.calculate_page_count
2669-- Exposes page count calculation for external scripts
2670-- Returns: number of pages needed for given poem count
2671function M.calculate_page_count(total_poems)
2672 load_pagination_config()
2673 return calculate_page_count(total_poems)
2674end
2675-- }}}
2676
2677-- {{{ local function generate_chronological_page_navigation
2678-- Issue 8-039: Files now in chronological/ subdirectory, use simpler relative paths
2679local function generate_chronological_page_navigation(current_page, total_pages)
2680 -- Generate pagination navigation for chronological pages
2681 -- Format: [« First] [‹ Prev] Page X of Y [Next ›] [Last »]
2682 -- Issue 8-039: Using relative paths within chronological/ directory (01.html, not chronological-01.html)
2683 if total_pages <= 1 then
2684 return ""
2685 end
2686
2687 local nav_parts = {}
2688
2689 -- First page link
2690 if current_page > 1 then
2691 table.insert(nav_parts, "<a href='01.html'>« First</a>")
2692 else
2693 table.insert(nav_parts, "« First")
2694 end
2695
2696 -- Previous page link
2697 if current_page > 1 then
2698 table.insert(nav_parts, string.format("<a href='%02d.html'>‹ Prev</a>", current_page - 1))
2699 else
2700 table.insert(nav_parts, "‹ Prev")
2701 end
2702
2703 -- Current page indicator
2704 table.insert(nav_parts, string.format("Page %d of %d", current_page, total_pages))
2705
2706 -- Next page link
2707 if current_page < total_pages then
2708 table.insert(nav_parts, string.format("<a href='%02d.html'>Next ›</a>", current_page + 1))
2709 else
2710 table.insert(nav_parts, "Next ›")
2711 end
2712
2713 -- Last page link
2714 if current_page < total_pages then
2715 table.insert(nav_parts, string.format("<a href='%02d.html'>Last »</a>", total_pages))
2716 else
2717 table.insert(nav_parts, "Last »")
2718 end
2719
2720 -- Issue 8-052: Use Unicode box-drawing vertical for consistent HTML output
2721 return table.concat(nav_parts, " │ ")
2722end
2723-- }}}
2724
2725-- {{{ function M.generate_chronological_index_with_navigation
2726-- Issue 9-003: chrono_per_page parameter allows CLI override of poems per page
2727function M.generate_chronological_index_with_navigation(poems_data, output_dir, chrono_per_page)
2728 -- Load pagination config for chronological settings (Issue 9-003 Fix F)
2729 load_pagination_config()
2730
2731 local chronological_paginated = PAGINATION_CONFIG.chronological_paginated or false
2732 local poems_per_page = PAGINATION_CONFIG.chronological_poems_per_page or 500
2733
2734 -- Apply CLI override if provided. Pagination is enabled either by config
2735 -- or by the operator supplying --chrono-per-page.
2736 if chrono_per_page and type(chrono_per_page) == "number" and chrono_per_page > 0 then
2737 poems_per_page = chrono_per_page
2738 chronological_paginated = true
2739 end
2740
2741 utils.log_info(string.format("Chronological pagination: %d poems/page", poems_per_page))
2742
2743 -- Sort poems chronologically (by actual post dates)
2744 local sorted_poems_with_timestamps = sort_poems_chronologically_by_dates(poems_data)
2745 local total_poems = #sorted_poems_with_timestamps
2746
2747 -- Issue 8-045: Calculate timeline bounds for time-based progress bars
2748 local first_timestamp = sorted_poems_with_timestamps[1] and sorted_poems_with_timestamps[1].timestamp or 0
2749 local last_timestamp = sorted_poems_with_timestamps[total_poems] and sorted_poems_with_timestamps[total_poems].timestamp or 0
2750 local timeline_span = last_timestamp - first_timestamp
2751 if timeline_span <= 0 then timeline_span = 1 end -- Avoid division by zero
2752
2753 -- Calculate pagination
2754 local total_pages = chronological_paginated and math.ceil(total_poems / poems_per_page) or 1
2755 if total_pages < 1 then total_pages = 1 end
2756
2757 utils.log_info(string.format("Generating chronological HTML for %d poems (%d pages, %d poems/page)...",
2758 total_poems, total_pages, chronological_paginated and poems_per_page or total_poems))
2759 local generation_start = os.time()
2760
2761 -- Load poem colors for progress bars
2762 local poem_colors = load_poem_colors()
2763
2764 os.execute("mkdir -p " .. output_dir)
2765
2766 local files_written = {}
2767
2768 for page_num = 1, total_pages do
2769 -- Calculate poem range for this page
2770 local start_idx = (page_num - 1) * poems_per_page + 1
2771 local end_idx = chronological_paginated and math.min(page_num * poems_per_page, total_poems) or total_poems
2772
2773 -- Generate page navigation
2774 local page_nav = generate_chronological_page_navigation(page_num, total_pages)
2775 local page_nav_html = page_nav ~= "" and string.format("<p>%s</p>", page_nav) or ""
2776
2777 -- Template with optional pagination navigation
2778 -- Issue 9-003 Fix: Use centered table for block centering with left-aligned text inside
2779 -- Issue 16-010: Added FONT_STYLE for Hack Nerd Font font-stack
2780 local template
2781 if chronological_paginated and total_pages > 1 then
2782 template = string.format([[<!DOCTYPE html>
2783<html>
2784<head>
2785<meta charset="UTF-8">
2786<title>Poetry Collection - Chronological Order (Page %d of %d)</title>
2787%s</head>
2788<body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF">
2789<center>
2790<h1>Poetry Collection</h1>
2791<p>Poems in true chronological order by post date</p>
2792%s
2793<p><a href="../wordcloud.html">Menu</a></p>
2794</center>
2795<table align="center"><tr><td>
2796<pre>
2797%%s
2798</pre>
2799</td></tr></table>
2800<center>%s</center>
2801</body>
2802</html>]], page_num, total_pages, FONT_STYLE, page_nav_html, page_nav_html)
2803 else
2804 template = [[<!DOCTYPE html>
2805<html>
2806<head>
2807<meta charset="UTF-8">
2808<title>Poetry Collection - Chronological Order</title>
2809]] .. FONT_STYLE .. [[</head>
2810<body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF">
2811<center>
2812<h1>Poetry Collection</h1>
2813<p>All poems in true chronological order by post date</p>
2814<p><a href="../wordcloud.html">Menu</a></p>
2815</center>
2816<table align="center"><tr><td>
2817<pre>
2818%s
2819</pre>
2820</td></tr></table>
2821</body>
2822</html>]]
2823 end
2824
2825 -- Generate content for this page
2826 local content = ""
2827 for i = start_idx, end_idx do
2828 local poem_info = sorted_poems_with_timestamps[i]
2829 local poem = poem_info.poem
2830 local poem_id = poem.poem_index
2831
2832 -- Progress output every 100 poems
2833 if i % 100 == 0 or i == total_poems then
2834 local elapsed = os.time() - generation_start
2835 local rate = i / math.max(elapsed, 1)
2836 local eta = (total_poems - i) / math.max(rate, 1)
2837 local progress_msg = string.format("\r Processing poem %d/%d (%.1f%%) - %.1f poems/sec, ETA: %ds",
2838 i, total_poems, (i / total_poems) * 100, rate, eta)
2839 io.write(progress_msg .. string.rep(" ", math.max(0, 80 - #progress_msg)))
2840 io.flush()
2841 end
2842
2843 -- Issue 8-045: Calculate chronological progress based on actual timestamp
2844 -- This shows temporal position in the author's timeline, not just poem count
2845 local poem_timestamp = poem_info.timestamp or first_timestamp
2846 local timeline_progress = ((poem_timestamp - first_timestamp) / timeline_span) * 100
2847 local progress_info = {
2848 poem_id = poem_id,
2849 total_poems = total_poems,
2850 percentage = timeline_progress, -- Issue 8-045: time-based, not position-based
2851 position = i,
2852 temporal_index = i
2853 }
2854
2855 local poem_color_data = poem_colors[poem_id]
2856 local semantic_color = poem_color_data and poem_color_data.color or "gray"
2857 local is_golden = is_golden_poem(poem)
2859 local anchor_id = get_poem_anchor_id(poem)
2860 local poem_index = poem.poem_index or 0
2861
2862 -- Add HTML anchor
2863 content = content .. string.format('<span id="%s"></span>', anchor_id)
2864 content = content .. string.format(" -> file: %s\n", get_poem_display_filename(poem))
2865
2866 -- Navigation links (absolute paths for consistency)
2867 -- Issue 9-003: Use absolute file:// paths - helper script converts to production URLs
2868 local base_path = ".."
2869 local similar_link = string.format("<a href='%s/similar/%04d-01.html'>similar</a>", base_path, poem_index)
2870 local different_link = string.format("<a href='%s/different/%04d-01.html'>different</a>", base_path, poem_index)
2871 local chronological_link = nil -- Issue 9-003 Fix C: No chronological link on chronological pages
2872
2873 -- Issue 10-040: Apply boost formatting consistently on chronological pages
2874 -- Uses same boost box styling as similar/different pages
2875 if is_boost then
2876 -- Escape HTML and apply markdown to content
2877 local text = escape_html(poem.content or "")
2878
2879 -- Issue 10-037: Defensive fallback for blank boost content
2880 if text == "" or text:match("^%s*$") then
2881 local original_uri = poem.metadata and poem.metadata.original_uri
2882 if original_uri then
2883 text = "External post: " .. escape_html(original_uri)
2884 else
2885 text = "(Boost content unavailable)"
2886 end
2887 end
2888
2889 -- Issue 10-039: Make external boost URLs clickable
2890 local external_pattern = "^External post: (https?://[^%s]+)$"
2891 local external_url = text:match(external_pattern)
2892 if external_url then
2893 -- Wrap the URL across box lines instead of overflowing.
2894 text = text_formatter.wrap_external_url("External post: ", external_url, boost_bars.CONTENT_WIDTH)
2895 else
2896 -- Issue 10-041: Wrap long embedded content to fit boost box
2897 local BOOST_CONTENT_WIDTH = boost_bars.CONTENT_WIDTH
2898 local wrapped_lines = {}
2899 for line in (text .. "\n"):gmatch("(.-)\n") do
2900 local wrapped = text_formatter.wrap_preserving_indent(line, BOOST_CONTENT_WIDTH)
2901 for _, wrapped_line in ipairs(wrapped) do
2902 table.insert(wrapped_lines, wrapped_line)
2903 end
2904 end
2905 text = table.concat(wrapped_lines, "\n")
2906 end
2907
2908 text = apply_markdown_formatting(text)
2909
2910 -- Calculate progress as decimal (0-1) for boost functions
2911 local progress_decimal = progress_info.percentage / 100
2912
2913 -- Apply complete boost formatting (includes all frame elements)
2914 local boost_formatted = apply_boost_poem_formatting(
2915 text, progress_decimal, similar_link, different_link, chronological_link
2916 )
2917 content = content .. boost_formatted .. "\n"
2918
2919 -- Render attached images after boost frame
2920 if poem.attachments then
2921 content = content .. render_attachment_images(poem.attachments)
2922 end
2923 else
2924 -- Standard formatting for golden and regular poems
2925 -- Generate top progress bar
2926 local top_dashes = generate_progress_dashes(progress_info, semantic_color, is_golden, "top")
2927 content = content .. string.format('<span %s>%s</span>\n',
2928 top_dashes.accessibility,
2929 top_dashes.visual)
2930
2931 -- Add poem content
2932 local hex_color = COLOR_CONFIG[semantic_color] or COLOR_CONFIG["gray"]
2933 local formatted_content = format_content_with_warnings(
2934 poem.content or "", poem.category, poem,
2935 is_golden and similar_link or nil,
2936 is_golden and different_link or nil,
2937 is_golden and chronological_link or nil,
2938 is_golden and hex_color or nil
2939 )
2940 content = content .. formatted_content
2941
2942 -- Add images if present
2943 -- Issue 9-010: Images stay with their original post only (no associated_images rendering)
2944 if poem.attachments and #poem.attachments > 0 then
2945 content = content .. render_attachment_images(poem.attachments)
2946 end
2947
2948 -- Add navigation box for regular poems
2949 if not is_golden then
2950 -- Issue 8-035: Calculate progress_chars for nav box colorization
2951 local total_chars = LAYOUT.REGULAR_POEM_WIDTH
2952 local progress_chars = math.floor((progress_info.percentage / 100) * total_chars)
2953
2954 content = content .. "\n"
2955 content = content .. generate_regular_corner_box_top(progress_chars, hex_color) .. "\n"
2956 content = content .. generate_regular_corner_box_nav_line(similar_link, different_link, chronological_link, progress_chars, hex_color) .. "\n"
2957 else
2958 content = content .. "\n"
2959 end
2960 end
2961
2962 -- Generate bottom progress bar (skip for boosts - they have their own bottom border)
2963 if not is_boost then
2964 local bottom_dashes = generate_progress_dashes(progress_info, semantic_color, is_golden, "bottom", true)
2965 content = content .. string.format('<span %s>%s</span>\n\n',
2966 bottom_dashes.accessibility,
2967 bottom_dashes.visual)
2968 else
2969 content = content .. "\n" -- Just add spacing between poems
2970 end
2971 end
2972
2973 -- Write page file
2974 -- Issue 8-039: Files now in chronological/ subdirectory
2975 local final_html = string.format(template, content)
2976 local chrono_dir = output_dir .. "/chronological"
2977 os.execute(string.format('mkdir -p "%s"', chrono_dir))
2978
2979 local output_file
2980 if chronological_paginated and total_pages > 1 then
2981 -- Paginated: chronological/01.html, chronological/02.html, etc.
2982 output_file = string.format("%s/%02d.html", chrono_dir, page_num)
2983 else
2984 -- Single page: chronological/index.html (for clean URL)
2985 output_file = chrono_dir .. "/index.html"
2986 end
2987
2988 local success = utils.write_file(output_file, final_html)
2989 if success then
2990 table.insert(files_written, output_file)
2991 else
2992 utils.log_error("Failed to write: " .. output_file)
2993 end
2994 end
2995
2996 io.write("\n")
2997 local total_elapsed = os.time() - generation_start
2998 utils.log_info(string.format("Chronological HTML generation complete: %d poems, %d pages in %d seconds",
2999 total_poems, total_pages, total_elapsed))
3000
3001 -- Issue 8-039: For paginated chronological, create index.html redirect within the subdirectory
3002 if chronological_paginated and total_pages > 1 then
3003 local chrono_dir = output_dir .. "/chronological"
3004 local redirect_html = [[<!DOCTYPE html>
3005<html>
3006<head>
3007<meta charset="UTF-8">
3008<meta http-equiv="refresh" content="0;url=01.html">
3009<title>Redirecting...</title>
3010</head>
3011<body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF">
3012<p>Redirecting to <a href="01.html">01.html</a>...</p>
3013</body>
3014</html>]]
3015 utils.write_file(chrono_dir .. "/index.html", redirect_html)
3016 utils.log_info("✓ chronological/index.html created (redirect to 01.html)")
3017 end
3018
3019 return files_written[1]
3020end
3021-- }}}
3022
3023-- {{{ function explore_page_shell()
3024-- Shared HTML shell for the explore pages: black background, monospace, the
3025-- corrected centered-<pre> layout. Returns the full document for a title +
3026-- heading + pre-formatted body.
3027local function explore_page_shell(title, heading, body)
3028 return string.format([[<!DOCTYPE html>
3029<html>
3030<head>
3031<meta charset="UTF-8">
3032<title>%s</title>
3033]] .. FONT_STYLE .. [[</head>
3034<body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF">
3035<center>
3036<h1>%s</h1>
3037</center>
3038<table align="center"><tr><td>
3039<pre>
3040%s
3041</pre>
3042</td></tr></table>
3043</body>
3044</html>]], title, heading, body)
3045end
3046-- }}}
3047
3048-- {{{ function corpus_stats()
3049-- Gather the live numbers both explore pages render from, so nothing is
3050-- hard-coded (stale figures are worse than no figures). Reads only poems_data
3051-- (the small 12MB file) -- never the 662MB similarity matrix -- so it is cheap.
3052local function corpus_stats(poems_data)
3053 local poems = (poems_data and poems_data.poems) or {}
3054 local stats = {
3055 total = #poems,
3056 sources = {}, -- category -> count
3057 source_order = {}, -- source names, most-poems first
3058 image_only = 0,
3059 min_date = nil, max_date = nil,
3060 per_year = {}, year_order = {},
3061 length_hist = {}, length_labels = {}, -- length-distribution buckets
3062 }
3063 -- Length buckets (characters). The last bucket is open-ended.
3064 local edges = {0, 100, 250, 500, 1000, 2000}
3065 for i = 1, #edges do
3066 stats.length_hist[i] = 0
3067 if i < #edges then
3068 stats.length_labels[i] = string.format("%d-%d", edges[i], edges[i + 1] - 1)
3069 else
3070 stats.length_labels[i] = string.format("%d+", edges[i])
3071 end
3072 end
3073 for _, p in ipairs(poems) do
3074 local cat = p.category or "unknown"
3075 stats.sources[cat] = (stats.sources[cat] or 0) + 1
3076 if p.is_image_only then stats.image_only = stats.image_only + 1 end
3077 local d = p.creation_date
3078 if d and d ~= "" then
3079 if not stats.min_date or d < stats.min_date then stats.min_date = d end
3080 if not stats.max_date or d > stats.max_date then stats.max_date = d end
3081 local year = d:sub(1, 4)
3082 if year:match("^%d%d%d%d$") then
3083 stats.per_year[year] = (stats.per_year[year] or 0) + 1
3084 end
3085 end
3086 -- Place the poem in its length bucket (last edge is open-ended).
3087 local len = p.length or #(p.content or "")
3088 local bucket = #edges
3089 for i = 1, #edges - 1 do
3090 if len < edges[i + 1] then bucket = i; break end
3091 end
3092 stats.length_hist[bucket] = stats.length_hist[bucket] + 1
3093 end
3094 for cat in pairs(stats.sources) do stats.source_order[#stats.source_order + 1] = cat end
3095 table.sort(stats.source_order, function(a, b) return stats.sources[a] > stats.sources[b] end)
3096 for year in pairs(stats.per_year) do stats.year_order[#stats.year_order + 1] = year end
3097 table.sort(stats.year_order)
3098 return stats
3099end
3100-- }}}
3101
3102-- {{{ function ascii_bar_row()
3103-- One labelled monospace bar: "<label padded> | ████···· <count>". Fits the
3104-- site's no-JS, monospace aesthetic (same idiom as the poem progress bars).
3105local function ascii_bar_row(label, count, max_count, bar_width, label_width)
3106 local filled = (max_count > 0) and math.floor((count / max_count) * bar_width + 0.5) or 0
3107 if filled > bar_width then filled = bar_width end
3108 local bar = string.rep("█", filled) .. string.rep("·", bar_width - filled)
3109 return string.format("%-" .. label_width .. "s | %s %d", label, bar, count)
3110end
3111-- }}}
3112
3113-- {{{ function M.generate_simple_discovery_instructions
3114-- Back-compat shim: callers that pass only output_dir still work (boosts/golden
3115-- counts simply won't appear without the corpus). Prefer passing poems_data.
3116function M.generate_simple_discovery_instructions(output_dir, poems_data)
3117 M.generate_explore_page(output_dir, poems_data)
3118 M.generate_explore_math_page(output_dir, poems_data)
3119 return output_dir .. "/explore.html"
3120end
3121-- }}}
3122
3123-- {{{ function M.generate_explore_page()
3124-- explore.html -- the welcome / map: orientation + live corpus stats + every
3125-- navigation mode + links to the deeper-math page and (placeholder) the source
3126-- browser (Issue 10-052). Data/view split: corpus_stats() computes, this renders.
3127function M.generate_explore_page(output_dir, poems_data)
3128 local s = corpus_stats(poems_data)
3129
3130 -- The per-source list is a LOOP over the corpus, so it stays rendered here
3131 -- and is handed to the template as one ready-made block (Issue 11-005: prose
3132 -- and scalars live in the editable file; loops stay in code).
3133 local source_rows = {}
3134 for _, cat in ipairs(s.source_order) do
3135 source_rows[#source_rows + 1] = string.format(" %-22s %d", cat, s.sources[cat])
3136 end
3137
3138 -- The scalar facts that only make sense when they exist use page_template.OMIT,
3139 -- which drops the whole template line -- matching the old "only add this line
3140 -- when there is a date / an image-only count" conditionals, with no blank gap.
3141 local values = {
3142 TOTAL_POEMS = s.total,
3143 SOURCE_COUNT = #s.source_order,
3144 MIN_DATE = (s.min_date and s.max_date) and s.min_date:sub(1, 10) or page_template.OMIT,
3145 MAX_DATE = (s.min_date and s.max_date) and s.max_date:sub(1, 10) or page_template.OMIT,
3146 IMAGE_ONLY_COUNT = (s.image_only > 0) and s.image_only or page_template.OMIT,
3147 SOURCE_LIST = table.concat(source_rows, "\n"),
3148 }
3149
3150 local template_path = DIR .. "/input/pages/explore.txt"
3151 local body, err = page_template.render_file(template_path, values)
3152 -- A broken template (typo'd marker, missing file) is a real error worth
3153 -- halting on -- a half-filled page is worse than a loud failure (no fallbacks).
3154 if not body then error("generate_explore_page: " .. tostring(err)) end
3155 -- The template file ends with a newline; the page shell adds its own, so trim
3156 -- trailing newlines to keep the centered <pre> block from gaining a blank tail.
3157 body = body:gsub("\n+$", "")
3158
3159 local html = explore_page_shell(
3160 "Poetry Collection - Explore", "Poetry Collection - Explore", body)
3161 local output_file = output_dir .. "/explore.html"
3162 return utils.write_file(output_file, html) and output_file or nil
3163end
3164-- }}}
3165
3166-- {{{ function M.generate_explore_math_page()
3167-- explore-2.html -- the deeper math: how the semantic engine works, explained
3168-- honestly, with REAL corpus-shape charts (per-source, length, over-time) drawn
3169-- as monospace bars. Similarity-distribution charts need the 662MB matrix that
3170-- is deliberately not loaded here, so they are noted as a future addition.
3171function M.generate_explore_math_page(output_dir, poems_data)
3172 local s = corpus_stats(poems_data)
3173 local BAR = 40
3174
3175 -- Each histogram is a LOOP over the corpus, so they stay rendered here and are
3176 -- handed to the template as ready-made blocks (Issue 11-005). ascii_bar_row
3177 -- draws one labelled monospace bar.
3178
3179 -- Poems-per-source bars.
3180 local max_src = 0
3181 for _, c in ipairs(s.source_order) do if s.sources[c] > max_src then max_src = s.sources[c] end end
3182 local source_bars = {}
3183 for _, cat in ipairs(s.source_order) do
3184 source_bars[#source_bars + 1] = " " .. ascii_bar_row(cat, s.sources[cat], max_src, BAR, 20)
3185 end
3186
3187 -- Poem-length bars.
3188 local max_len_bucket = 0
3189 for _, v in ipairs(s.length_hist) do if v > max_len_bucket then max_len_bucket = v end end
3190 local length_bars = {}
3191 for i, label in ipairs(s.length_labels) do
3192 length_bars[#length_bars + 1] = " " .. ascii_bar_row(label, s.length_hist[i], max_len_bucket, BAR, 20)
3193 end
3194
3195 -- Poems-per-year is a whole conditional section (blank line + heading + bars).
3196 -- When the corpus has no dated poems it becomes OMIT, dropping the section's
3197 -- template line entirely -- the same guard the inline version used.
3198 local year_section
3199 if #s.year_order > 0 then
3200 local year_lines = { "", " Poems per year:" }
3201 local max_year = 0
3202 for _, y in ipairs(s.year_order) do if s.per_year[y] > max_year then max_year = s.per_year[y] end end
3203 for _, y in ipairs(s.year_order) do
3204 year_lines[#year_lines + 1] = " " .. ascii_bar_row(y, s.per_year[y], max_year, BAR, 20)
3205 end
3206 year_section = table.concat(year_lines, "\n")
3207 else
3208 year_section = page_template.OMIT
3209 end
3210
3211 -- The embedding-model name comes from the live inference config rather than a
3212 -- baked-in string, so it can never drift from the model the pipeline actually
3213 -- used (per the "reference a source, don't hard-code figures" convention).
3214 local values = {
3215 EMBEDDING_MODEL = inference_config.get_selected_model(),
3216 TOTAL_POEMS = s.total,
3217 SOURCE_BARS = table.concat(source_bars, "\n"),
3218 LENGTH_BARS = table.concat(length_bars, "\n"),
3219 YEAR_SECTION = year_section,
3220 }
3221
3222 local template_path = DIR .. "/input/pages/explore-math.txt"
3223 local body, err = page_template.render_file(template_path, values)
3224 if not body then error("generate_explore_math_page: " .. tostring(err)) end
3225 body = body:gsub("\n+$", "")
3226
3227 local html = explore_page_shell(
3228 "Poetry Collection - The Math", "How the Similarity Works", body)
3229 local output_file = output_dir .. "/explore-2.html"
3230 return utils.write_file(output_file, html) and output_file or nil
3231end
3232-- }}}
3233
3234-- {{{ function generate_txt_file_header
3235local function generate_txt_file_header(title, total_poems)
3236 -- Generate a consistent header for TXT export files
3237 -- Matches the compiled.txt aesthetic with 80-character width
3238 local separator = string.rep("=", 80)
3239 local header = separator .. "\n"
3240
3241 -- Center the title
3242 local padding = math.floor((80 - #title) / 2)
3243 header = header .. string.rep(" ", padding) .. title .. "\n"
3244
3245 header = header .. separator .. "\n"
3246 header = header .. string.format("Total poems: %d\n", total_poems)
3247 header = header .. string.format("Generated: %s\n", os.date("%Y-%m-%d %H:%M:%S"))
3248 header = header .. separator .. "\n\n"
3249
3250 return header
3251end
3252-- }}}
3253
3254-- {{{ function generate_similarity_txt_file
3255function generate_similarity_txt_file(starting_poem, sorted_poems, output_file)
3256 -- Generate TXT export for similarity-sorted poems
3257 -- Includes file header with metadata and all poems formatted at 80-char width
3258 local title = string.format("POEMS SORTED BY SIMILARITY TO POEM %s", starting_poem.id or "?")
3259 local header = generate_txt_file_header(title, #sorted_poems + 1)
3260 local poems_content = format_all_poems_80_width(starting_poem, sorted_poems)
3261 local content = header .. poems_content
3262 return utils.write_file(output_file, content) and output_file or nil
3263end
3264-- }}}
3265
3266-- {{{ function generate_similarity_html_archive
3267-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
3268function generate_similarity_html_archive(starting_poem, sorted_poems, output_file, chrono_mapping)
3269 -- Generate HTML archive for similarity-sorted poems (full corpus with images)
3270 -- Unlike paginated pages, this is a single file with ALL poems
3271 -- Use poem_index (globally unique) for consistency
3272 local html = M.generate_flat_poem_list_html(starting_poem, sorted_poems, "similar", starting_poem.poem_index, chrono_mapping)
3273 return utils.write_file(output_file, html) and output_file or nil
3274end
3275-- }}}
3276
3277-- {{{ function generate_diversity_txt_file
3278function generate_diversity_txt_file(starting_poem, sorted_poems, output_file)
3279 -- Generate TXT export for diversity-sorted poems
3280 -- Includes file header with metadata and all poems formatted at 80-char width
3281 local title = string.format("POEMS SORTED BY DIVERSITY FROM POEM %s", starting_poem.poem_index or "?")
3282 local header = generate_txt_file_header(title, #sorted_poems + 1)
3283 local poems_content = format_all_poems_80_width(starting_poem, sorted_poems)
3284 local content = header .. poems_content
3285 return utils.write_file(output_file, content) and output_file or nil
3286end
3287-- }}}
3288
3289-- {{{ function generate_diversity_html_archive
3290-- Issue 10-036: Added chrono_mapping for correct paginated chronological links
3291function generate_diversity_html_archive(starting_poem, sorted_poems, output_file, chrono_mapping)
3292 -- Generate HTML archive for diversity-sorted poems (full corpus with images)
3293 -- Unlike paginated pages, this is a single file with ALL poems
3294 -- Use poem_index (globally unique) for consistency
3295 local html = M.generate_flat_poem_list_html(starting_poem, sorted_poems, "different", starting_poem.poem_index, chrono_mapping)
3296 return utils.write_file(output_file, html) and output_file or nil
3297end
3298-- }}}
3299
3300-- {{{ function M.generate_chronological_txt_file
3301function M.generate_chronological_txt_file(poems_data, output_file)
3302 -- Generate TXT export for all poems in chronological order
3303 -- Uses actual post dates for sorting (not poem IDs)
3304 -- Includes file header with metadata and all poems formatted at 80-char width
3305
3306 -- Sort poems chronologically by actual post dates
3307 local sorted_poems = sort_poems_chronologically_by_dates(poems_data)
3308 local total_poems = #sorted_poems
3309
3310 -- Generate header
3311 local title = "POEMS IN CHRONOLOGICAL ORDER"
3312 local header = generate_txt_file_header(title, total_poems)
3313
3314 -- Generate content for each poem
3315 local content = header
3316 for i, poem_info in ipairs(sorted_poems) do
3317 content = content .. format_single_poem_80_width(poem_info.poem)
3318 content = content .. "\n\n"
3319 end
3320
3321 return utils.write_file(output_file, content) and output_file or nil
3322end
3323-- }}}
3324
3325-- {{{ function M.generate_complete_flat_html_collection
3326-- Generates all similarity and diversity pages for the entire corpus
3327-- poems_data: full poems dataset
3328-- similarity_data: similarity matrix
3329-- embeddings_data: poem embeddings (for diversity calculation)
3330-- output_dir: base output directory
3331-- pages_spec: (optional) --pages flag value: nil/"default", "all", "1", "1-10" (Phase D: Issue 8-012)
3332-- poems_per_page: (optional) CLI override for poems per page (Issue 8-022)
3333-- num_threads: (optional) number of parallel threads (default: 1 = single-threaded)
3334-- chrono_per_page: (optional) CLI override for chronological poems per page (Issue 9-003)
3335function M.generate_complete_flat_html_collection(poems_data, similarity_data, embeddings_data, output_dir, pages_spec, poems_per_page, num_threads, chrono_per_page)
3336 -- Load diversity cache for fast HTML generation (Issue: diversity generation taking 42+ hours)
3337 -- Cache provides instant lookup of pre-computed GPU diversity sequences
3338 load_diversity_cache()
3339
3340 -- Load similarity rankings cache for fast HTML generation
3341 -- Cache provides instant lookup of pre-sorted similarity rankings (no O(n log n) sorting per poem)
3342 load_similarity_rankings_cache()
3343
3344 -- Load pagination config first
3345 load_pagination_config()
3346
3347 -- Issue 8-048: Flatten media files to output/media/ for easier deployment
3348 -- Must happen before HTML generation so paths resolve correctly
3349 flatten_media_files(output_dir)
3350
3351 -- Apply CLI override if provided. The honest summary below is logged
3352 -- whether or not an override was supplied — what the operator wants to
3353 -- see is "what value am I actually using," not "which knob set it."
3354 if poems_per_page and type(poems_per_page) == "number" and poems_per_page > 0 then
3355 PAGINATION_CONFIG.poems_per_page = poems_per_page
3356 end
3357
3358 -- Issue 10-057 follow-up: the storage ceiling on pages-per-poem is MEASURED from
3359 -- the budget and the last build's actual page sizes, not frozen in config (the old
3360 -- literal 15 would have shipped ~66GB into a 45GB quota). Self-corrects each build.
3361 PAGINATION_CONFIG.max_pages_per_poem =
3362 compute_storage_max_pages(output_dir, #(poems_data.poems or {}))
3363
3364 -- Issue 10-057: both neighbour caches may be capped to the top-K poems per poem
3365 -- (each stamps the K it was built with). If this run asks for more pages than that
3366 -- K can fill, a poem would silently get fewer pages than --pages requested. Fail
3367 -- loudly with the exact regen command instead of under-generating. A stamp of 0
3368 -- (or no stamp -- an older, uncapped cache) means "keep all", always enough.
3369 do
3370 local per_page = PAGINATION_CONFIG.poems_per_page
3371 local pages
3372 if not pages_spec or pages_spec == "" or pages_spec == "default" then
3373 pages = PAGINATION_CONFIG.minimum_pages
3374 elseif pages_spec == "all" then
3375 pages = PAGINATION_CONFIG.max_pages_per_poem
3376 else
3377 pages = tonumber(pages_spec)
3378 or tonumber(tostring(pages_spec):match("(%d+)$"))
3379 or PAGINATION_CONFIG.minimum_pages
3380 end
3381 local needed_k = pages * per_page
3382 local function check_cache(cache, label, regen_flag)
3383 local meta = cache and cache.metadata
3384 local stored_k = meta and tonumber(meta.top_k) or 0
3385 if stored_k > 0 and stored_k < needed_k then
3386 error(string.format(
3387 "%s cache holds only top-%d per poem, but this run needs %d (%d page(s) "
3388 .. "x %d poems/page). Regenerate it for these settings: ./run.sh %s "
3389 .. "--pages %d --poems-per-page %d",
3390 label, stored_k, needed_k, pages, per_page, regen_flag, pages, per_page))
3391 end
3392 end
3393 check_cache(SIMILARITY_RANKINGS_CACHE, "Similarity", "--generate-similarity")
3394 check_cache(DIVERSITY_CACHE, "Diversity", "--generate-diversity")
3395 end
3396
3397 -- Count poems with valid poem_index (globally unique identifier)
3398 -- Note: poem.id is per-category and NOT unique across categories
3399 -- poem_index is the globally unique identifier used by embeddings/similarity
3400 local valid_poems = {}
3401 for i, poem in ipairs(poems_data.poems) do
3402 if poem.poem_index then
3403 valid_poems[poem.poem_index] = poem
3404 end
3405 end
3406
3407 local total_poems = 0
3408 for _ in pairs(valid_poems) do
3409 total_poems = total_poems + 1
3410 end
3411
3412 -- Parse pages specification (Phase D: Issue 8-012)
3413 local pages_config = parse_pages_specification(pages_spec, nil) -- total_pages not known yet
3414 local use_pagination = true -- Always use pagination now (Phase D)
3415
3416 -- Report the pages-per-poem THIS run will actually generate, not the storage
3417 -- ceiling. The orchestrator worker generates #pages_config.pages pages per poem
3418 -- (one page by default); the old banner printed the 15-page storage cap
3419 -- unconditionally, which read as "generating 15 pages" when it generates 1.
3420 -- The cap is still shown, clearly labelled as a ceiling, for context.
3421 local pages_per_poem = pages_config.is_all
3422 and PAGINATION_CONFIG.max_pages_per_poem
3423 or (pages_config.pages and #pages_config.pages or 1)
3424 utils.log_info(string.format(
3425 "Similarity/diversity pagination: %d poems/page, %d page(s) per poem (storage ceiling: %d pages, %dGB)",
3426 PAGINATION_CONFIG.poems_per_page,
3427 pages_per_poem,
3428 PAGINATION_CONFIG.max_pages_per_poem,
3429 STORAGE_CONFIG.limit_gb))
3430
3431 local results = {
3432 similarity_pages = {},
3433 diversity_pages = {},
3434 chronological_index = nil,
3435 txt_files = {},
3436 html_archives = {},
3437 instructions_page = nil
3438 }
3439
3440 -- Normalize num_threads
3441 num_threads = num_threads or 1
3442 if num_threads < 1 then num_threads = 1 end
3443
3444 -- Issue 10-057 (Piece 1, wired): clamp the worker count to what fits in free RAM
3445 -- before spawning. After the cache cap (Fix B) the fixed cost is small, so on a
3446 -- roomy machine this is a no-op -- but it is the guard rail that keeps a big corpus
3447 -- or a small box out of swap, and it logs the estimate either way.
3448 if num_threads > 1 then
3449 local budget = require("memory-budgeter")
3450 local model = inference_config.get_selected_model()
3451 -- fixed: the two neighbour caches the orchestrator holds resident (file size x
3452 -- ~2.5 for the parsed Lua table) plus the ~12MB poems data already in RAM.
3453 local sim_file = utils.embeddings_dir(model) .. "/similarity_rankings_cache.json"
3454 local div_file = utils.embeddings_dir_disk(model) .. "/diversity_cache.json"
3455 local fixed = ((budget.file_size_bytes(sim_file) or 0)
3456 + (budget.file_size_bytes(div_file) or 0)) * 2.5 + 12e6
3457 -- per worker: an effil Lua state (~25MB) plus the one page it builds at a time.
3458 num_threads = budget.fit_threads({
3459 pool = "ram", fixed = fixed, per_thread = 30e6,
3460 want = num_threads, label = "HTML",
3461 })
3462 end
3463
3464 -- Build ordered list of poem indices for batch distribution
3465 local poem_indices = {}
3466 for poem_index, _ in pairs(valid_poems) do
3467 table.insert(poem_indices, poem_index)
3468 end
3469 table.sort(poem_indices) -- Ensure consistent ordering across runs
3470
3471 -- Issue 10-036: Compute chrono_mapping before parallel/sequential split so both paths can use it
3472 local chronological_paginated = PAGINATION_CONFIG.chronological_paginated
3473 local chrono_poems_per_page_config = PAGINATION_CONFIG.chronological_poems_per_page or 500
3474 local effective_chrono_per_page = chrono_poems_per_page_config
3475 if chrono_per_page and type(chrono_per_page) == "number" and chrono_per_page > 0 then
3476 effective_chrono_per_page = chrono_per_page
3477 chronological_paginated = true
3478 end
3479 local chrono_mapping = compute_chronological_mapping(poems_data, chronological_paginated and effective_chrono_per_page or nil)
3480
3481 -- Check if parallel processing is available and requested
3482 local use_parallel = num_threads > 1 and has_threading and effil
3483
3484 if use_parallel then
3485 -- {{{ Parallel processing with effil threads (Issue 10-034: Orchestrator pattern)
3486 -- Main thread acts as cache server, sending 80KB work slices instead of workers loading 700MB
3487 utils.log_info(string.format("Using parallel processing with %d threads (orchestrator mode)", num_threads))
3488
3489 -- Issue 10-034: Create channels for orchestrator communication
3490 -- Workers request work → main sends slices → workers report completion
3491 local work_request_channel = effil.channel() -- Workers → Main: work requests + completions
3492 local work_response_channels = {} -- Main → Worker[i]: work slices or shutdown
3493 for t = 1, num_threads do
3494 work_response_channels[t] = effil.channel()
3495 end
3496
3497 -- Issue 10-034: Build work queue (all poem indices that need processing)
3498 local work_queue = {}
3499 for _, poem_index in ipairs(poem_indices) do
3500 table.insert(work_queue, poem_index)
3501 end
3502 local total_work = #work_queue
3503
3504 -- Issue 10-036: chrono_mapping is now computed before parallel/sequential split
3505 -- (see Issue 9-003 Fix D for original rationale)
3506
3507 -- Prepare shared config for threads (serializable data only)
3508 local thread_config = {
3509 dir = DIR,
3510 output_dir = output_dir,
3511 -- Issue 9-013: where the worker finds the image pseudo-poem manifest
3512 image_manifest_path = utils.embeddings_dir() .. "/image-manifest.json",
3513 pages_is_all = pages_config.is_all,
3514 pages_list = pages_config.pages,
3515 poems_per_page = PAGINATION_CONFIG.poems_per_page,
3516 generate_html_archives = PAGINATION_CONFIG.generate_html_archives,
3517 generate_txt_exports = PAGINATION_CONFIG.generate_txt_exports,
3518 -- Issue 9-003 Fix D: Full formatting data
3519 chrono_mapping = chrono_mapping,
3520 chrono_paginated = chronological_paginated,
3521 -- Issue 8-055: Pass layout constants to worker threads for consistency
3522 layout = {
3523 golden_poem_width = LAYOUT.GOLDEN_POEM_WIDTH or 84,
3524 regular_poem_width = LAYOUT.REGULAR_POEM_WIDTH or 82,
3525 text_content_width = LAYOUT.TEXT_CONTENT_WIDTH or 80,
3526 golden_left_junction = LAYOUT.GOLDEN_LEFT_JUNCTION_POS or 10,
3527 golden_right_junction = LAYOUT.GOLDEN_RIGHT_JUNCTION_POS or 71,
3528 regular_left_junction = LAYOUT.REGULAR_LEFT_JUNCTION_POS or 10,
3529 regular_right_junction = LAYOUT.REGULAR_RIGHT_JUNCTION_POS or 70
3530 }
3531 }
3532
3533 -- Create and launch worker threads
3534 local threads = {}
3535 local start_time = os.time()
3536
3537 -- Issue 10-034: Launch workers that request work from orchestrator
3538 for thread_id = 1, num_threads do
3539 -- effil.thread creates a new Lua state that runs the function
3540 -- Workers receive work slices via channels instead of loading full caches
3541 local thread_func = effil.thread(function(config, tid, request_channel, response_channel)
3542 -- Set up package paths in thread context
3543 package.path = config.dir .. "/libs/?.lua;" .. config.dir .. "/src/?.lua;" .. package.path
3544
3545 -- Load required modules in thread context
3546 local t_utils = require('utils')
3547 local t_dkjson = require('dkjson')
3548 -- Issue 8-056: Shared text formatting module for whitespace preservation
3549 local t_text_formatter = require('text-formatter')
3550 -- Shared box/bar drawing (canonical geometry) so this worker copy
3551 -- can't drift from the main thread's bars. See poem-bars.lua.
3552 local t_poem_bars = require('poem-bars')
3553 -- Issue 9-013: fold ranked image pseudo-poems into this worker's
3554 -- poem list so it draws them instead of dropping unknown indices.
3555 local t_image_render = require('image-render')
3556 t_utils.init_assets_root({config.dir})
3557
3558 -- Load data files (each thread loads independently - files are in disk cache)
3559 local poems_file = t_utils.asset_path("poems.json")
3560 local poems_data = t_utils.read_json_file(poems_file)
3561 if not poems_data then
3562 error("Thread " .. tid .. ": Failed to load poems.json")
3563 end
3564 t_image_render.inject_pseudo_poems(poems_data,
3565 t_image_render.load_manifest(config.image_manifest_path, t_utils.read_json_file))
3566
3567 -- Build poem lookup by poem_index
3568 local poem_lookup = {}
3569 for i, poem in ipairs(poems_data.poems) do
3570 if poem.poem_index then
3571 poem_lookup[poem.poem_index] = poem
3572 end
3573 end
3574
3575 -- Issue 10-034: Caches NOT loaded here - orchestrator sends work slices
3576 -- This saves 700MB RAM per worker thread
3577
3578 -- Load poem colors (small file: ~900KB, acceptable per-worker)
3579 local poem_colors_file = t_utils.embeddings_dir() .. "/poem_colors.json"
3580 local poem_colors_data = t_utils.read_json_file(poem_colors_file)
3581 local poem_colors = poem_colors_data and poem_colors_data.poem_colors or {}
3582
3583 -- Color config for progress bars
3584 local color_config = {
3585 red = "#dc3c3c", blue = "#3c78dc", green = "#3cb45a",
3586 purple = "#8c3cc8", orange = "#e68c3c", yellow = "#c8b428", gray = "#787878"
3587 }
3588
3589 -- Local helper: Get unique filename ID for poem
3590 local function get_unique_id(poem)
3591 local cat_prefix = (poem.category or "unknown"):sub(1, 1):lower()
3592 local id_num = poem.id or poem.poem_index or 0
3593 return string.format("%s-%04d", cat_prefix, id_num)
3594 end
3595
3596 -- {{{ Local helper: Get source path for poem identification in ranking headers
3597 -- Issue 8-036: Returns human-readable source path for each category
3598 local function get_source_path(poem)
3599 local category = poem.category or "unknown"
3600 if category == "notes" and poem.metadata and poem.metadata.source_file then
3601 -- Notes show original descriptive filename
3602 return "notes/" .. poem.metadata.source_file
3603 elseif category == "bluesky" then
3604 -- Bluesky uses # notation
3605 return "bluesky#" .. (poem.id or 0)
3606 elseif category == "fediverse" then
3607 -- Fediverse shows category/id
3608 return "fediverse/" .. (poem.id or 0)
3609 elseif category == "messages" then
3610 -- Messages shows category/id
3611 return "messages/" .. (poem.id or 0)
3612 else
3613 return category .. "/" .. (poem.id or poem.poem_index or 0)
3614 end
3615 end
3616 -- }}}
3617
3618 -- {{{ Local helper: Check if poem is golden (exactly 1024 chars when posted)
3619 -- Issue 8-044: Use pre-calculated metadata as single source of truth
3620 local function is_golden_poem(poem)
3621 if poem.metadata and poem.metadata.is_golden_poem then
3622 return true
3623 end
3624 return false
3625 end
3626 -- }}}
3627
3628 -- {{{ Local helper: Check if poem is a boost (Issue 8-057)
3629 local function is_boost_poem(poem)
3630 if poem.metadata and poem.metadata.is_boost then
3631 return true
3632 end
3633 return false
3634 end
3635 -- }}}
3636
3637 -- Issue 8-057: Boost color configuration for worker thread
3638 local BOOST_COLORS = {
3639 arrow = "#dc3c3c", -- Red: ◀═ and ─▶ arrows, [BOOST] label
3640 outer_frame = "#3c78dc", -- Blue: ╔═╗║╚═╝ outer frame
3641 inner_box = "#2aa198", -- Teal: ┌─┐│└─┘ inner content box
3642 content_text = "#c8b428" -- Yellow: boosted text content
3643 }
3644
3645 -- Same shared boost-frame module the main thread uses; require()
3646 -- reloads it fresh in this isolated worker state (only live
3647 -- closures can't cross states, plain modules reload from disk).
3648 local t_boost_bars = require('boost-bars')
3649 t_boost_bars.configure(BOOST_COLORS)
3650
3651 -- Local helper: Build poem lookup by poem_index for ranking conversion
3652 local function build_poem_by_index()
3653 local lookup = {}
3654 for i, poem in ipairs(poems_data.poems) do
3655 if poem.poem_index then
3656 lookup[poem.poem_index] = poem
3657 end
3658 end
3659 return lookup
3660 end
3661 local poem_by_index = build_poem_by_index()
3662
3663 -- Issue 10-034: Convert similarity ranking (raw indices) to poem objects
3664 -- ranking_data is an array of poem indices received from orchestrator
3665 local function convert_similarity_ranking(ranking_data, source_poem_index)
3666 if not ranking_data then return {} end
3667 local result = {}
3668 for i, neighbor_index in ipairs(ranking_data) do
3669 local neighbor_poem = poem_by_index[neighbor_index]
3670 if neighbor_poem then
3671 table.insert(result, {
3672 poem = neighbor_poem,
3673 rank = i
3674 })
3675 end
3676 end
3677 return result
3678 end
3679
3680 -- Issue 10-034: Convert diversity sequence (raw indices) to poem objects
3681 -- sequence_data is an array of poem indices received from orchestrator
3682 -- Issue 10-025: Skip anchor poem (GPU cache stores source poem as first entry)
3683 local function convert_diversity_sequence(sequence_data, source_poem_index)
3684 if not sequence_data then return {} end
3685 local result = {}
3686 for step, neighbor_index in ipairs(sequence_data) do
3687 if neighbor_index ~= source_poem_index then
3688 local neighbor_poem = poem_by_index[neighbor_index]
3689 if neighbor_poem then
3690 table.insert(result, {
3691 id = neighbor_index,
3692 poem = neighbor_poem,
3693 step = step
3694 })
3695 end
3696 end
3697 end
3698 return result
3699 end
3700
3701 -- {{{ Issue 8-057: Boost formatting functions for worker thread
3702
3703 -- Worker: apply complete boost formatting. All geometry lives in
3704 -- the shared boost-bars module (top/inner/content/nav/bottom +
3705 -- the asymmetric fill-frontier right edge). The worker only splits
3706 -- the pre-wrapped content into lines; txt_fmt is unused now that
3707 -- the module owns visible-width padding.
3708 local function worker_apply_boost_formatting(content, progress_percent, similar_link, different_link, chronological_link, txt_fmt)
3709 local lines = {}
3710 for line in (content .. "\n"):gmatch("(.-)\n") do
3711 table.insert(lines, line)
3712 end
3713 local include_nav = (similar_link and different_link) and true or false
3714 return t_boost_bars.format_boost(
3715 lines, progress_percent, similar_link, different_link, chronological_link, include_nav)
3716 end
3717 -- }}} End Issue 8-057: Boost formatting functions
3718
3719 -- Local helper: Format single poem with full formatting (Issue 9-003 Fix D)
3720 -- Includes progress bars, navigation box, and chronological page links
3721 -- Issue 8-044: Added golden poem formatting support
3722 -- Issue 8-057: Added boost formatting support
3723 local function format_poem_entry(poem, poem_colors_tbl, clr_config, chrono_map, chrono_paged)
3724 -- Issue 9-013: a ranked IMAGE entry draws as an image box, not a poem.
3725 if poem.is_image then
3726 return t_image_render.format_image_entry(poem)
3727 end
3728 local poem_idx = poem.poem_index
3729 local poem_color_data = poem_colors_tbl[poem_idx]
3730 local semantic_color = poem_color_data and poem_color_data.color or "gray"
3731 local hex_color = clr_config[semantic_color] or clr_config["gray"]
3732 -- Hand the shared bar module this state's palette (idempotent).
3733 t_poem_bars.configure(clr_config)
3734
3735 -- Issue 8-044: Check if this is a golden poem
3736 local is_golden = is_golden_poem(poem)
3737
3738 -- Issue 8-057: Check if this is a boost and handle with special formatting
3739 local is_boost = is_boost_poem(poem)
3740
3741 -- Get chronological position from mapping
3742 local chrono_info = chrono_map[poem_idx] or {position = 1, page_number = 1, total_poems = 1, total_pages = 1, timeline_progress = 50}
3743 -- Issue 8-045: Use timeline_progress (time-based) instead of position-based
3744 -- Shows actual temporal position in the author's timeline, not just poem count
3745 local progress_pct = chrono_info.timeline_progress or ((chrono_info.position / chrono_info.total_poems) * 100)
3746
3747 -- Calculate progress bar chars
3748 -- Golden: 82 interior chars + 2 corners = 84 total
3749 -- Regular: 83 chars total (no corners on top bar)
3750 local total_bar_chars = is_golden and 82 or 83
3751 local progress_chars = math.floor((progress_pct / 100) * total_bar_chars)
3752 local remaining_chars = total_bar_chars - progress_chars
3753
3754 -- Top bar from the shared poem-bars module (canonical 83
3755 -- regular / 84 golden). progress_chars above is still used to
3756 -- progressively colour the regular nav corner boxes below.
3757 local colored_progress = t_poem_bars.progress_dashes(
3758 { percentage = progress_pct }, semantic_color, is_golden, "top", false).visual
3759
3760 -- Navigation links (absolute paths for local testing)
3761 -- Issue 9-003 Fix: Use absolute file:// paths - helper script converts to production URLs
3762 local base_path = ".."
3763 local similar_link = string.format("<a href='%s/similar/%04d-01.html'>similar</a>", base_path, poem_idx)
3764 local different_link = string.format("<a href='%s/different/%04d-01.html'>different</a>", base_path, poem_idx)
3765 -- Issue 16-006: Use poem_index for simpler, machine-readable anchor format
3766 -- Old format: "poem-fediverse-0042" (leaked category info)
3767 -- New format: "poem-4625" (just the unique poem_index)
3768 local anchor_id = string.format("poem-%d", poem.poem_index or 0)
3769
3770 -- Issue 8-039: Chronological link points to subdirectory
3771 local chrono_link
3772 if chrono_paged and chrono_info.total_pages > 1 then
3773 -- Paginated: chronological/01.html, chronological/02.html, etc.
3774 chrono_link = string.format("<a href='%s/chronological/%02d.html#%s'>chronological</a>",
3775 base_path, chrono_info.page_number, anchor_id)
3776 else
3777 -- Single page: chronological/index.html
3778 chrono_link = string.format("<a href='%s/chronological/index.html#%s'>chronological</a>", base_path, anchor_id)
3779 end
3780
3781 -- Issue 8-057: Handle boost poems with special nested frame formatting
3782 -- Boosts return early with their complete formatting (arrows, [BOOST] label, nested frames)
3783 if is_boost then
3784 local boost_content = poem.content or ""
3785 -- Escape HTML in content
3786 boost_content = boost_content:gsub("[%z\1-\8\11\12\14-\31]", ""):gsub("&", "&"):gsub("<", "<"):gsub(">", ">")
3787
3788 -- Issue 10-037: Defensive fallback for blank boost content (worker thread)
3789 -- If content is empty, display the original URI or diagnostic message
3790 if boost_content == "" or boost_content:match("^%s*$") then
3791 local original_uri = poem.metadata and poem.metadata.original_uri
3792 if original_uri then
3793 -- Escape HTML in URI
3794 local safe_uri = original_uri:gsub("[%z\1-\8\11\12\14-\31]", ""):gsub("&", "&"):gsub("<", "<"):gsub(">", ">")
3795 boost_content = "External post: " .. safe_uri
3796 else
3797 boost_content = "(Boost content unavailable)"
3798 end
3799 end
3800
3801 -- Issue 10-039: Make external boost URLs clickable (worker thread)
3802 -- Pattern: "External post: https://..." -> wrap URL in anchor tag
3803 local external_pattern = "^External post: (https?://[^%s]+)$"
3804 local external_url = boost_content:match(external_pattern)
3805 if external_url then
3806 -- Wrap the URL across box lines instead of overflowing.
3807 boost_content = t_text_formatter.wrap_external_url("External post: ", external_url, t_boost_bars.CONTENT_WIDTH)
3808 else
3809 -- Issue 10-041: Wrap long embedded content to fit the boost box.
3810 -- Only wrap non-external-post content (external posts keep URLs intact)
3811 local BOOST_CONTENT_WIDTH = t_boost_bars.CONTENT_WIDTH
3812 local wrapped_lines = {}
3813 for line in (boost_content .. "\n"):gmatch("(.-)\n") do
3814 local wrapped = t_text_formatter.wrap_preserving_indent(line, BOOST_CONTENT_WIDTH)
3815 for _, wrapped_line in ipairs(wrapped) do
3816 table.insert(wrapped_lines, wrapped_line)
3817 end
3818 end
3819 boost_content = table.concat(wrapped_lines, "\n")
3820 end
3821
3822 -- Calculate progress as decimal (0-1)
3823 local progress_decimal = progress_pct / 100
3824
3825 -- Apply boost formatting with all frame elements
3826 local boost_formatted = worker_apply_boost_formatting(
3827 boost_content, progress_decimal,
3828 similar_link, different_link, chrono_link,
3829 t_text_formatter
3830 )
3831
3832 -- Build output including any attached media
3833 local output = { boost_formatted }
3834
3835 -- Handle media attachments for boosts (same logic as regular poems)
3836 local media_base = ".."
3837 local has_media = false
3838 local media_atts = {}
3839 if poem.attachments and #poem.attachments > 0 then
3840 for _, att in ipairs(poem.attachments) do
3841 local mt = att.media_type or ""
3842 if mt:match("^image/") or mt:match("^audio/") or mt:match("^video/") then
3843 table.insert(media_atts, att)
3844 has_media = true
3845 end
3846 end
3847 end
3848
3849 if has_media then
3850 table.insert(output, "</pre>")
3851 for _, att in ipairs(media_atts) do
3852 local rpath = att.relative_path or ""
3853 -- media_href: namespace art by source+subdir,
3854 -- url-encode (this is the path that previously
3855 -- emitted the raw broken "...TROUBLE-U-?...png"
3856 -- link on the similarity pages); Mastodon stays flat.
3857 local media_src = "../media/" .. media_href(rpath)
3858 local media_type = att.media_type or "image/png"
3859 if media_type:match("^image/") then
3860 local alt = att.description and att.description ~= "" and att.description or "Image attachment"
3861 if att.width and att.height then
3862 table.insert(output, string.format(
3863 ' <img src="%s" alt="%s" loading="lazy" width="%d" height="%d" style="display:block; max-width:min(100%%,800px); height:auto">',
3864 media_src, alt, att.width, att.height
3865 ))
3866 else
3867 table.insert(output, string.format(
3868 ' <img src="%s" alt="%s" loading="lazy" style="display:block; max-width:min(100%%,800px); height:auto">',
3869 media_src, alt
3870 ))
3871 end
3872 elseif media_type:match("^audio/") then
3873 table.insert(output, string.format(
3874 ' <audio controls preload="metadata" style="display:block; max-width:100%%">\n' ..
3875 ' <source src="%s" type="%s">\n' ..
3876 ' Your browser does not support the audio element.\n' ..
3877 ' </audio>',
3878 media_src, media_type
3879 ))
3880 elseif media_type:match("^video/") then
3881 if att.width and att.height then
3882 table.insert(output, string.format(
3883 ' <video controls preload="metadata" width="%d" height="%d" style="display:block; max-width:min(100%%,800px); height:auto">\n' ..
3884 ' <source src="%s" type="%s">\n' ..
3885 ' Your browser does not support the video element.\n' ..
3886 ' </video>',
3887 att.width, att.height, media_src, media_type
3888 ))
3889 else
3890 table.insert(output, string.format(
3891 ' <video controls preload="metadata" style="display:block; max-width:min(100%%,800px); height:auto">\n' ..
3892 ' <source src="%s" type="%s">\n' ..
3893 ' Your browser does not support the video element.\n' ..
3894 ' </video>',
3895 media_src, media_type
3896 ))
3897 end
3898 end
3899 end
3900 table.insert(output, "<pre>")
3901 end
3902
3903 return table.concat(output, "\n")
3904 end
3905
3906 -- Standard formatting for golden and regular (non-boost) poems
3907 -- Wrap content to 80 chars while preserving paragraph breaks
3908 -- Also handle content warnings (CW: or content warning:)
3909 local content = poem.content or ""
3910
3911 -- Issue 8-041: Escape HTML special characters in poem content
3912 -- Prevents browser from interpreting poem content as HTML markup
3913 -- (e.g., a poem containing "</pre>" would otherwise close the preformatted block)
3914 -- Order: & first, then < and > (otherwise < becomes &lt;)
3915 content = content:gsub("[%z\1-\8\11\12\14-\31]", ""):gsub("&", "&"):gsub("<", "<"):gsub(">", ">")
3916
3917 local wrapped_lines = {}
3918
3919 -- Issue 9-011: Display content warning from poem.content_warning field (Mastodon CW)
3920 -- This is separate from in-content CW: patterns - it comes from ActivityPub summary field
3921 if poem.content_warning and poem.content_warning ~= "" then
3922 -- Build box around ActivityPub content warning
3923 local cw_display = "CW: " .. poem.content_warning
3924 local box_width = math.min(math.max(#cw_display, 20), 76)
3925 local padded_cw = cw_display .. string.rep(" ", box_width - #cw_display)
3926 table.insert(wrapped_lines, " ┌" .. string.rep("─", box_width + 2) .. "┐")
3927 table.insert(wrapped_lines, " │ " .. padded_cw .. " │")
3928 table.insert(wrapped_lines, " └" .. string.rep("─", box_width + 2) .. "┘")
3929 table.insert(wrapped_lines, "") -- Empty line after CW
3930 table.insert(wrapped_lines, "") -- Second empty line for spacing
3931 end
3932
3933 -- Check for content warning at start
3934 local cw_text = nil
3935 local main_content = content
3936 local cw_match = content:match("^%s*[Cc][Ww]%s*:(.-)[\n\r]")
3937 if not cw_match then
3938 cw_match = content:match("^%s*[Cc]ontent [Ww]arning%s*:(.-)[\n\r]")
3939 end
3940 if cw_match then
3941 cw_text = cw_match:match("^%s*(.-)%s*$") -- trim whitespace
3942 -- Remove the CW line from main content
3943 main_content = content:gsub("^%s*[Cc][Ww]%s*:[^\n\r]*[\n\r]?", "")
3944 main_content = main_content:gsub("^%s*[Cc]ontent [Ww]arning%s*:[^\n\r]*[\n\r]?", "")
3945 end
3946
3947 -- If there's a content warning, format it in a box
3948 if cw_text and #cw_text > 0 then
3949 -- Build simple box around CW
3950 local cw_display = "CW: " .. cw_text
3951 local box_width = math.min(math.max(#cw_display, 20), 76)
3952 local padded_cw = cw_display .. string.rep(" ", box_width - #cw_display)
3953 table.insert(wrapped_lines, " ┌" .. string.rep("─", box_width + 2) .. "┐")
3954 table.insert(wrapped_lines, " │ " .. padded_cw .. " │")
3955 table.insert(wrapped_lines, " └" .. string.rep("─", box_width + 2) .. "┘")
3956 table.insert(wrapped_lines, "") -- Empty line after CW
3957 end
3958
3959 -- Issue 8-056: Preserve whitespace for ALL categories
3960 -- Poetry is artistic content - author's spacing must be respected
3961 -- Use shared text-formatter module for consistent behavior with main thread
3962 local content_lines = t_text_formatter.format_poem_content(main_content)
3963 for _, line in ipairs(content_lines) do
3964 table.insert(wrapped_lines, line)
3965 end
3966
3967 -- Issue 8-044: Apply golden side borders to content lines
3968 -- Golden poems get ║ (colored) on left and │ on right
3969 -- Total width: ║ (1) + space (1) + 80 chars content + space (1) + │ (1) = 84 total
3970 if is_golden then
3971 local golden_lines = {}
3972 local colored_wall = string.format('<font color="%s"><b>║</b></font>', hex_color)
3973 -- Issue 8-055: Use config layout values instead of hardcoded 80
3974 local CONTENT_WIDTH = config.layout and config.layout.text_content_width or 80
3975
3976 -- Helper to count UTF-8 characters (not bytes)
3977 -- Box-drawing chars are 3 bytes each, so #str gives wrong count
3978 local function utf8_char_count(str)
3979 -- Remove UTF-8 continuation bytes (0x80-0xBF), count what remains
3980 return #(str:gsub("[\128-\191]", ""))
3981 end
3982
3983 for _, line in ipairs(wrapped_lines) do
3984 -- Strip the leading space that word-wrap added (we'll add our own)
3985 local content = line:match("^%s*(.*)$") or line
3986
3987 -- Calculate visible length (excluding HTML tags, counting UTF-8 chars)
3988 -- Issue 8-055: Also decode HTML entities for accurate width counting
3989 -- e.g., > is 4 bytes but displays as 1 character (>)
3990 local visible_length = t_text_formatter.calculate_visible_width(content)
3991
3992 -- Pad content to 80 chars
3993 local padded_content
3994 if visible_length >= CONTENT_WIDTH then
3995 padded_content = content
3996 else
3997 local padding_needed = CONTENT_WIDTH - visible_length
3998 padded_content = content .. string.rep(" ", padding_needed)
3999 end
4000
4001 -- Add side borders: ║ + space + 80 chars + space + │ = 84 total
4002 table.insert(golden_lines, colored_wall .. " " .. padded_content .. " │")
4003 end
4004 wrapped_lines = golden_lines
4005 end
4006
4007 -- Build navigation box matching reference implementation
4008 -- Regular poem structure: 83 chars total (positions 0-82)
4009 -- ┌─────────┐ (11 chars) + 59 spaces + ┌───────────┐ (13 chars) = 83 chars
4010
4011 -- Issue 8-035: Helper to colorize box characters based on progress
4012 local function color_char(char, pos)
4013 if progress_chars > pos then
4014 return string.format('<font color="%s"><b>%s</b></font>', hex_color, char)
4015 end
4016 return char
4017 end
4018
4019 -- Build nav_top and nav_mid
4020 -- Issue 8-044: Golden poems use different box characters
4021 -- Nav top + line come from the shared poem-bars module so this
4022 -- worker can't drift from the main thread (the whole point of
4023 -- the de-dup). Golden nav box dashes are not progress-tinted,
4024 -- which now matches the chronological (main-rendered) golden
4025 -- poems exactly.
4026 local nav_top, nav_mid
4027 if is_golden then
4028 nav_top = t_poem_bars.golden_corner_box_separator(hex_color, progress_chars)
4029 nav_mid = t_poem_bars.golden_corner_box_nav_line(similar_link, different_link, chrono_link, hex_color, progress_chars)
4030 else
4031 nav_top = t_poem_bars.corner_box_top(progress_chars, hex_color)
4032 nav_mid = t_poem_bars.corner_box_nav_line(similar_link, different_link, chrono_link, progress_chars, hex_color)
4033 end
4034
4035 -- Bottom line: delegate to the shared poem-bars module so this
4036 -- worker cannot drift from the main thread's canonical geometry.
4037 -- The old inline copy used the 82-char CONTENT width as the BAR
4038 -- width, so the bar ended one column short of the nav boxes (and
4039 -- an earlier version produced 88-char bars with doubled ╧╧).
4040 -- progress_dashes is correct for both regular (83) and golden
4041 -- (84) and seats the junctions under the corner-box walls.
4042 local bottom_line = t_poem_bars.progress_dashes(
4043 { percentage = progress_pct }, semantic_color, is_golden, "bottom", true).visual
4044
4045 -- Build formatted output
4046 local output = {}
4047 table.insert(output, colored_progress) -- Top progress bar (golden: 84 chars, regular: 83 chars)
4048 table.insert(output, table.concat(wrapped_lines, "\n")) -- Content with preserved newlines
4049
4050 -- Issue 8-040: Render attached images if present (from ActivityPub extraction)
4051 -- Images appear after poem content, before navigation links
4052 -- Must be inline since worker thread can't access main scope functions
4053 local base_path = ".."
4054
4055 -- Issue 8-049: Check if we have any media to render (images, audio, video)
4056 -- Issue 9-010: Media stays with their original post only (no associated_images rendering)
4057 local has_any_media = false
4058 local media_attachments = {}
4059 if poem.attachments and #poem.attachments > 0 then
4060 for _, att in ipairs(poem.attachments) do
4061 local mt = att.media_type or ""
4062 if mt:match("^image/") or mt:match("^audio/") or mt:match("^video/") then
4063 table.insert(media_attachments, att)
4064 has_any_media = true
4065 end
4066 end
4067 end
4068
4069 -- If there are media attachments, close </pre>, render them, reopen <pre>
4070 -- Issue 8-005 Fix: Media rendered outside <pre> for proper max-width behavior
4071 -- display:block prevents side-by-side, max-width:min(100%,800px) caps width
4072 if has_any_media then
4073 table.insert(output, "</pre>")
4074 for _, attachment in ipairs(media_attachments) do
4075 -- Issue 8-048: Use flat output/media/ path structure
4076 local relative_path = attachment.relative_path or ""
4077 -- media_href: namespace art by source+subdir (collision-
4078 -- safe) + url-encode; Mastodon hashes stay flat.
4079 local media_src = base_path .. "/media/" .. media_href(relative_path)
4080 local media_type = attachment.media_type or ""
4081
4082 if media_type:match("^image/") then
4083 local alt_text = attachment.description or attachment.alt_text or "Image attachment"
4084 -- Issue 8-053: Normalize newlines to spaces for clean HTML attributes
4085 alt_text = alt_text:gsub("\n", " "):gsub("\r", "")
4086 alt_text = alt_text:gsub('"', '"')
4087 -- Issue 8-053: title attribute provides mouse-over tooltip
4088 local img_tag = string.format(
4089 ' <img src="%s" alt="%s" title="%s" loading="lazy" style="display:block; max-width:min(100%%,800px); height:auto"',
4090 media_src, alt_text, alt_text
4091 )
4092 if attachment.width and attachment.height then
4093 img_tag = img_tag .. string.format(' width="%d" height="%d"', attachment.width, attachment.height)
4094 end
4095 img_tag = img_tag .. '>'
4096 table.insert(output, img_tag)
4097
4098 elseif media_type:match("^audio/") then
4099 -- Issue 8-049: Audio playback support
4100 local audio_tag = string.format(
4101 ' <audio controls preload="metadata" style="display:block; max-width:100%%">\n' ..
4102 ' <source src="%s" type="%s">\n' ..
4103 ' Your browser does not support the audio element.\n' ..
4104 ' </audio>',
4105 media_src, media_type
4106 )
4107 table.insert(output, audio_tag)
4108
4109 elseif media_type:match("^video/") then
4110 -- Issue 8-049: Video playback support
4111 local video_tag
4112 if attachment.width and attachment.height then
4113 video_tag = string.format(
4114 ' <video controls preload="metadata" width="%d" height="%d" style="display:block; max-width:min(100%%,800px); height:auto">\n' ..
4115 ' <source src="%s" type="%s">\n' ..
4116 ' Your browser does not support the video element.\n' ..
4117 ' </video>',
4118 attachment.width, attachment.height, media_src, media_type
4119 )
4120 else
4121 video_tag = string.format(
4122 ' <video controls preload="metadata" style="display:block; max-width:min(100%%,800px); height:auto">\n' ..
4123 ' <source src="%s" type="%s">\n' ..
4124 ' Your browser does not support the video element.\n' ..
4125 ' </video>',
4126 media_src, media_type
4127 )
4128 end
4129 table.insert(output, video_tag)
4130 end
4131 end
4132 table.insert(output, "<pre>")
4133 end
4134
4135 table.insert(output, nav_top) -- Nav box top (golden: 84, regular: 83 chars)
4136 table.insert(output, nav_mid) -- Nav box middle
4137 table.insert(output, bottom_line) -- Bottom with junctions
4138
4139 return table.concat(output, "\n")
4140 end
4141
4142 -- Local helper: Generate paginated HTML page
4143 -- Issue 9-003 Fix D: Added chrono_map and chrono_paged for full formatting
4144 local function generate_page(poem, sorted_list, page_type, page_num, poems_per_pg, out_dir, chrono_map, chrono_paged)
4145 local start_idx = (page_num - 1) * poems_per_pg + 1
4146 local end_idx = math.min(start_idx + poems_per_pg - 1, #sorted_list)
4147 if start_idx > #sorted_list then return nil end
4148
4149 local type_label = page_type == "similar" and "similarity" or "diversity"
4150 local poem_idx_str = string.format("%04d", poem.poem_index or 0)
4151 local filename = string.format("%s/%s/%s-%02d.html", out_dir, page_type, poem_idx_str, page_num)
4152
4153 -- Build HTML content with full formatting
4154 -- Issue 9-003 Fix: Use centered table for block centering with left-aligned text inside
4155 -- Issue 16-010: Added inline font style for Hack Nerd Font font-stack
4156 local font_style = [[<style>body, pre { font-family: 'Hack Nerd Font', 'Hack', 'Fira Code', 'JetBrains Mono', 'Cascadia Code', 'Consolas', 'Monaco', 'Liberation Mono', 'Courier New', monospace; }</style>]]
4157 local html_parts = {
4158 '<!DOCTYPE html><html><head><meta charset="UTF-8">',
4159 '<title>Poems by ' .. type_label .. ' to poem ' .. poem_idx_str .. ' (page ' .. page_num .. ')</title>',
4160 font_style,
4161 '</head><body bgcolor="#000000" text="#FFFFFF" link="#6699FF" vlink="#9966FF"><table align="center"><tr><td><pre>'
4162 }
4163
4164 -- Add anchor poem with full formatting
4165 table.insert(html_parts, "=== ANCHOR POEM ===\n")
4166 table.insert(html_parts, format_poem_entry(poem, poem_colors, color_config, chrono_map, chrono_paged))
4167 table.insert(html_parts, "\n\n=== " .. type_label:upper() .. " RANKED ===\n\n")
4168
4169 -- Add poems for this page with full formatting
4170 for i = start_idx, end_idx do
4171 local entry = sorted_list[i]
4172 local entry_poem = entry.poem
4173 if entry_poem then
4174 -- Issue 8-036: Add poem source path to ranking header
4175 local source_path = get_source_path(entry_poem)
4176 table.insert(html_parts, string.format("--- #%d %s ---\n", i, source_path))
4177 -- Issue 9-013: text+image posts get a direct "image.png"
4178 -- link below their header (image entries are skipped --
4179 -- their title already deep-links into the gallery).
4180 if not entry_poem.is_image then
4181 local img_link = t_image_render.text_image_link(entry_poem)
4182 if img_link ~= "" then
4183 table.insert(html_parts, " " .. img_link .. "\n")
4184 end
4185 end
4186 table.insert(html_parts, format_poem_entry(entry_poem, poem_colors, color_config, chrono_map, chrono_paged))
4187 table.insert(html_parts, "\n\n")
4188 end
4189 end
4190
4191 table.insert(html_parts, '</pre></td></tr></table></body></html>')
4192
4193 -- Write file
4194 local dir_path = filename:match("(.*/)")
4195 os.execute('mkdir -p "' .. dir_path .. '"')
4196 local f = io.open(filename, "w")
4197 if f then
4198 f:write(table.concat(html_parts))
4199 f:close()
4200 return filename
4201 end
4202 return nil
4203 end
4204
4205 -- Issue 10-034: Orchestrator request/response loop
4206 -- Workers request work, receive slices, generate pages, report completion
4207 local similarity_count = 0
4208 local diversity_count = 0
4209 local processed = 0
4210
4211 while true do
4212 -- Request work from orchestrator
4213 request_channel:push({
4214 type = "get_work",
4215 worker_id = tid
4216 })
4217
4218 -- Wait for response (blocks until data available)
4219 local work = response_channel:pop()
4220
4221 if not work then
4222 -- Channel closed or error
4223 break
4224 end
4225
4226 if work.type == "shutdown" then
4227 -- No more work, exit loop
4228 break
4229 end
4230
4231 if work.type == "work" then
4232 local poem_index = work.poem_index
4233 local poem = poem_lookup[poem_index]
4234
4235 if poem then
4236 -- Convert raw index arrays to poem objects using data from orchestrator
4237 local similar_ranking = convert_similarity_ranking(work.similarity_ranking, poem_index)
4238 local diverse_sequence = convert_diversity_sequence(work.diversity_sequence, poem_index)
4239
4240 -- Generate similarity pages (page 1 only, respecting config)
4241 -- Issue 9-003 Fix D: Pass chrono_mapping and chrono_paginated for full formatting
4242 local max_pages = config.pages_is_all and 1 or (config.pages_list and #config.pages_list or 1)
4243 for page_num = 1, max_pages do
4244 local page_file = generate_page(poem, similar_ranking, "similar", page_num, config.poems_per_page, config.output_dir, config.chrono_mapping, config.chrono_paginated)
4245 if page_file then similarity_count = similarity_count + 1 end
4246 end
4247
4248 -- Generate diversity pages
4249 for page_num = 1, max_pages do
4250 local page_file = generate_page(poem, diverse_sequence, "different", page_num, config.poems_per_page, config.output_dir, config.chrono_mapping, config.chrono_paginated)
4251 if page_file then diversity_count = diversity_count + 1 end
4252 end
4253
4254 processed = processed + 1
4255
4256 -- Report completion to orchestrator
4257 request_channel:push({
4258 type = "done",
4259 worker_id = tid,
4260 poem_index = poem_index
4261 })
4262 end
4263 end
4264 end
4265
4266 return similarity_count, diversity_count, processed
4267 end)
4268
4269 -- Launch thread with channels for orchestrator communication
4270 threads[thread_id] = thread_func(thread_config, thread_id, work_request_channel, work_response_channels[thread_id])
4271 end
4272
4273 -- Issue 10-034: Orchestrator loop - serves work slices to workers
4274 -- Main thread holds caches, sends ~80KB slices instead of workers loading 700MB
4275
4276 -- Track work state
4277 local work_queue_idx = 1 -- Next poem index to assign
4278 local completed_count = 0 -- Number of poems completed
4279 local workers_active = num_threads -- Number of workers still running
4280 local workers_shutdown = {} -- Track which workers have been told to shut down
4281 for t = 1, num_threads do
4282 workers_shutdown[t] = false
4283 end
4284
4285 -- Get references to caches loaded in main thread (lines 3092-3096)
4286 -- DIVERSITY_CACHE and SIMILARITY_RANKINGS_CACHE are module-level variables
4287 local similarity_cache = SIMILARITY_RANKINGS_CACHE
4288 local diversity_cache = DIVERSITY_CACHE
4289
4290 -- Progress tracking
4291 local last_progress_time = os.time()
4292 local progress_interval = 1 -- Update progress every 1 second
4293
4294 -- Orchestrator main loop: process requests until all work done and all workers shut down
4295 while workers_active > 0 do
4296 -- Non-blocking receive with short timeout (100ms)
4297 local msg = work_request_channel:pop(100)
4298
4299 if msg then
4300 if msg.type == "get_work" then
4301 local worker_id = msg.worker_id
4302
4303 if work_queue_idx <= total_work then
4304 -- Get next poem index from queue
4305 local poem_index = work_queue[work_queue_idx]
4306 work_queue_idx = work_queue_idx + 1
4307
4308 -- Extract work slice from caches (~80KB: similarity ranking + diversity sequence)
4309 local similarity_ranking = similarity_cache.rankings[tostring(poem_index)]
4310 local diversity_sequence = diversity_cache.sequences[tostring(poem_index)]
4311
4312 -- Send work slice to worker
4313 work_response_channels[worker_id]:push({
4314 type = "work",
4315 poem_index = poem_index,
4316 similarity_ranking = similarity_ranking,
4317 diversity_sequence = diversity_sequence
4318 })
4319 else
4320 -- No more work - tell worker to shut down
4321 if not workers_shutdown[worker_id] then
4322 work_response_channels[worker_id]:push({
4323 type = "shutdown"
4324 })
4325 workers_shutdown[worker_id] = true
4326 workers_active = workers_active - 1
4327 end
4328 end
4329
4330 elseif msg.type == "done" then
4331 -- Worker completed a poem
4332 completed_count = completed_count + 1
4333 end
4334 end
4335
4336 -- Update progress display periodically
4337 local now = os.time()
4338 if now - last_progress_time >= progress_interval then
4339 last_progress_time = now
4340
4341 local elapsed = now - start_time
4342 local rate = elapsed > 0 and (completed_count / elapsed) or 0
4343 local remaining = total_work - completed_count
4344 local eta = rate > 0 and math.floor(remaining / rate) or 0
4345 local pct = (completed_count / total_work) * 100
4346
4347 -- Show orchestrator progress as the shared bar. The rate, ETA,
4348 -- and remaining queue depth ride along as the suffix.
4349 local label = string.format(" [%d threads]", num_threads)
4350 local suffix = string.format("%.1f poems/sec | ETA: %ds | Queue: %d",
4351 rate, eta, total_work - work_queue_idx + 1)
4352 progress.update(label, completed_count, total_work, suffix)
4353 end
4354 end
4355
4356 -- Close the animated bar, then print a plain completion summary so it
4357 -- survives in logs (the bar itself is suppressed when piped/quiet).
4358 progress.finish()
4359 local elapsed = os.time() - start_time
4360 print(string.format(" [%d threads] Complete: %d poems in %ds (%.1f poems/sec)",
4361 num_threads, completed_count, elapsed, completed_count / math.max(elapsed, 1)))
4362
4363 -- Wait for all threads to fully complete and collect results
4364 local total_similarity = 0
4365 local total_diversity = 0
4366 local total_processed = 0
4367
4368 for tid, thread in pairs(threads) do
4369 -- Wait for thread completion (may already be done)
4370 local status = thread:wait()
4371 if status == "completed" then
4372 local sim_count, div_count, proc_count = thread:get()
4373 total_similarity = total_similarity + (sim_count or 0)
4374 total_diversity = total_diversity + (div_count or 0)
4375 total_processed = total_processed + (proc_count or 0)
4376 elseif status == "failed" then
4377 local err = thread:get()
4378 utils.log_error(string.format("Thread %d failed: %s", tid, tostring(err)))
4379 else
4380 utils.log_warn(string.format("Thread %d in unexpected state: %s", tid, status))
4381 end
4382 end
4383
4384 -- Update results counts (we don't have individual filenames in parallel mode)
4385 for i = 1, total_similarity do table.insert(results.similarity_pages, "parallel") end
4386 for i = 1, total_diversity do table.insert(results.diversity_pages, "parallel") end
4387 -- }}} End parallel processing
4388
4389 else
4390 -- {{{ Sequential processing (original code path)
4391 if num_threads > 1 and not has_threading then
4392 utils.log_warn("Parallel processing requested but effil not available, using single thread")
4393 end
4394
4395 -- Generate similarity and diversity pages for each poem
4396 -- Note: Loop variable is poem_index (globally unique) not poem.id (per-category)
4397 local progress_count = 0
4398 for poem_index, poem_data in pairs(valid_poems) do
4399 progress_count = progress_count + 1
4400
4401 -- Animate one progress line; throttle sparser under --debug (verbose).
4402 local step = (progress.mode() == 2) and 100 or 25
4403 if progress_count % step == 0 then
4404 progress.update(" 📄 HTML pages", progress_count, total_poems)
4405 end
4406
4407 -- Generate unique filename identifier (category prefix for cross-category uniqueness)
4408 local unique_id = get_unique_poem_filename_id(poem_data)
4409
4410 -- Generate similarity ranking (cache is keyed by poem_index)
4411 local similar_ranking = M.generate_similarity_ranked_list(poem_index, poems_data, similarity_data)
4412
4413 -- Phase D (Issue 8-012): Use paginated generation
4414 -- Note: Pagination uses poem_index (numeric) for file naming (similar/0001-01.html)
4415 local pagination_result = M.generate_all_paginated_pages_for_poem(
4416 poem_data,
4417 similar_ranking,
4418 "similar",
4419 poem_data.poem_index, -- Use numeric poem_index for pagination filenames
4420 output_dir,
4421 pages_config.is_all and nil or pages_config.pages -- nil means "all pages"
4422 )
4423
4424 if pagination_result and pagination_result.files_generated then
4425 for _, file in ipairs(pagination_result.files_generated) do
4426 table.insert(results.similarity_pages, file)
4427 end
4428 end
4429
4430 -- Generate TXT version (full corpus export - not paginated)
4431 local similar_txt = generate_similarity_txt_file(poem_data, similar_ranking,
4432 string.format("%s/similar/%s.txt", output_dir, unique_id))
4433 if similar_txt then
4434 table.insert(results.txt_files, similar_txt)
4435 end
4436
4437 -- Generate HTML archive version (full corpus export with images - not paginated)
4438 -- Issue 10-036: Pass chrono_mapping for correct paginated chronological links
4439 if PAGINATION_CONFIG.generate_html_archives then
4440 local similar_archive = generate_similarity_html_archive(poem_data, similar_ranking,
4441 string.format("%s/similar/%s-archive.html", output_dir, unique_id), chrono_mapping)
4442 if similar_archive then
4443 table.insert(results.html_archives, similar_archive)
4444 end
4445 end
4446
4447 -- Generate diversity pages (cache is keyed by poem_index)
4448 local diverse_sequence = M.generate_maximum_diversity_sequence(poem_index, poems_data, embeddings_data)
4449
4450 -- Phase D (Issue 8-012): Use paginated generation for diversity pages too
4451 -- Note: Pagination uses poem_index (numeric) for file naming (different/0001-01.html)
4452 local diversity_pagination_result = M.generate_all_paginated_pages_for_poem(
4453 poem_data,
4454 diverse_sequence,
4455 "different",
4456 poem_data.poem_index, -- Use numeric poem_index for pagination filenames
4457 output_dir,
4458 pages_config.is_all and nil or pages_config.pages -- nil means "all pages"
4459 )
4460
4461 if diversity_pagination_result and diversity_pagination_result.files_generated then
4462 for _, file in ipairs(diversity_pagination_result.files_generated) do
4463 table.insert(results.diversity_pages, file)
4464 end
4465 end
4466
4467 -- Generate TXT version (full corpus export - not paginated)
4468 local diverse_txt = generate_diversity_txt_file(poem_data, diverse_sequence,
4469 string.format("%s/different/%s.txt", output_dir, unique_id))
4470 if diverse_txt then
4471 table.insert(results.txt_files, diverse_txt)
4472 end
4473
4474 -- Generate HTML archive version (full corpus export with images - not paginated)
4475 -- Issue 10-036: Pass chrono_mapping for correct paginated chronological links
4476 if PAGINATION_CONFIG.generate_html_archives then
4477 local diverse_archive = generate_diversity_html_archive(poem_data, diverse_sequence,
4478 string.format("%s/different/%s-archive.html", output_dir, unique_id), chrono_mapping)
4479 if diverse_archive then
4480 table.insert(results.html_archives, diverse_archive)
4481 end
4482 end
4483 end
4484 -- }}} End sequential processing
4485 progress.finish()
4486 end
4487
4488 -- Note: Chronological index and explore.html are generated by main.lua before this function
4489 -- to avoid duplicate work. We only generate the TXT export here.
4490
4491 -- Generate chronological TXT export (not generated elsewhere)
4492 local chrono_txt_file = output_dir .. "/chronological.txt"
4493 local chrono_txt = M.generate_chronological_txt_file(poems_data, chrono_txt_file)
4494 if chrono_txt then
4495 table.insert(results.txt_files, chrono_txt)
4496 results.chronological_txt = chrono_txt
4497 end
4498
4499 return results
4500end
4501-- }}}
4502
4503-- {{{ function M.main
4504function M.main(interactive_mode)
4505 if interactive_mode then
4506 print("Flat HTML Generator - Interactive Mode")
4507 print("1. Generate complete flat HTML collection")
4508 print("2. Generate chronological index only")
4509 print("3. Generate instructions page only")
4510 print("4. Test single similarity page")
4511 print("5. Test single difference page")
4512 io.write("Select option (1-5): ")
4513 local choice = io.read()
4514
4515 local poems_file = utils.asset_path("poems.json")
4516 local similarity_file = utils.embeddings_dir() .. "/similarity_matrix.json"
4517 local embeddings_file = utils.embeddings_dir() .. "/embeddings.json"
4518 local output_dir = DIR .. "/output"
4519
4520 if choice == "1" then
4521 utils.log_info("Loading data files...")
4522 local poems_data = utils.read_json_file(poems_file)
4523 local similarity_data = utils.read_json_file(similarity_file)
4524 local embeddings_data = utils.read_json_file(embeddings_file)
4525
4526 if poems_data and similarity_data and embeddings_data then
4527 M.generate_complete_flat_html_collection(poems_data, similarity_data.similarities, embeddings_data, output_dir)
4528 else
4529 utils.log_error("Failed to load required data files")
4530 end
4531 elseif choice == "2" then
4532 local poems_data = utils.read_json_file(poems_file)
4533 if poems_data then
4534 M.generate_chronological_index_with_navigation(poems_data, output_dir)
4535 M.generate_chronological_txt_file(poems_data, output_dir .. "/chronological.txt")
4536 utils.log_info("Generated chronological/index.html and chronological.txt")
4537 end
4538 elseif choice == "3" then
4539 M.generate_simple_discovery_instructions(output_dir)
4540 elseif choice == "4" then
4541 io.write("Enter poem ID for similarity test: ")
4542 local poem_id = tonumber(io.read())
4543 if poem_id then
4544 local poems_data = utils.read_json_file(poems_file)
4545 local similarity_data = utils.read_json_file(similarity_file)
4546
4547 if poems_data and similarity_data then
4548 local poem_data = nil
4549 for _, poem in ipairs(poems_data.poems) do
4550 if poem.id == poem_id then
4551 poem_data = poem
4552 break
4553 end
4554 end
4555
4556 if poem_data then
4557 local ranking = M.generate_similarity_ranked_list(poem_id, poems_data, similarity_data.similarities)
4558 -- Issue 10-036: Pass nil for chrono_mapping in interactive test (uses "01" fallback)
4559 local html = M.generate_flat_poem_list_html(poem_data, ranking, "similar", poem_id, nil)
4560 local test_file = string.format("%s/test_similar_%03d.html", output_dir, poem_id)
4561 os.execute("mkdir -p " .. output_dir)
4562 utils.write_file(test_file, html)
4563 utils.log_info("Test file written: " .. test_file)
4564 end
4565 end
4566 end
4567 elseif choice == "5" then
4568 io.write("Enter poem ID for difference test: ")
4569 local poem_id = tonumber(io.read())
4570 if poem_id then
4571 local poems_data = utils.read_json_file(poems_file)
4572 local embeddings_data = utils.read_json_file(embeddings_file)
4573
4574 if poems_data and embeddings_data then
4575 local poem_data = nil
4576 for _, poem in ipairs(poems_data.poems) do
4577 if poem.id == poem_id then
4578 poem_data = poem
4579 break
4580 end
4581 end
4582
4583 if poem_data then
4584 local sequence = M.generate_maximum_diversity_sequence(poem_id, poems_data, embeddings_data)
4585 -- Issue 10-036: Pass nil for chrono_mapping in interactive test (uses "01" fallback)
4586 local html = M.generate_flat_poem_list_html(poem_data, sequence, "different", poem_id, nil)
4587 local test_file = string.format("%s/test_different_%03d.html", output_dir, poem_id)
4588 os.execute("mkdir -p " .. output_dir)
4589 utils.write_file(test_file, html)
4590 utils.log_info("Test file written: " .. test_file)
4591 end
4592 end
4593 end
4594 end
4595 else
4596 utils.log_info("Use -I flag for interactive mode")
4597 end
4598end
4599-- }}}
4600
4601-- Command line execution (only when run directly, not when require()'d)
4602-- arg[0] contains the script name - check if it matches this file
4603if arg and arg[0] and arg[0]:match("flat%-html%-generator%.lua$") then
4604 -- Check for interactive flag
4605 local interactive = false
4606 for _, arg_val in ipairs(arg) do
4607 if arg_val == "-I" then
4608 interactive = true
4609 break
4610 end
4611 end
4612
4613 M.main(interactive)
4614end
4615
4616return M
4617