src/main.lua
1#!/usr/bin/env luajit
2
3-- {{{ Early argument parsing (before package.path setup)
4-- Parse arguments to extract directory path, skipping flags like -I
5-- This must happen before utils is loaded since package.path depends on DIR
6local function parse_dir_from_args(args)
7 if not args then return nil end
8 for i = 1, #args do
9 local a = args[i]
10 -- Skip flags (start with -)
11 if a and a:sub(1, 1) ~= "-" then
12 return a
13 end
14 -- Handle --dir=path or --dir path
15 if a == "--dir" and args[i + 1] then
16 return args[i + 1]
17 end
18 if a:match("^--dir=") then
19 return a:match("^--dir=(.+)")
20 end
21 end
22 return nil
23end
24
25local function setup_dir_path(provided_dir)
26 if provided_dir then
27 return provided_dir
28 end
29 return "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
30end
31-- }}}
32
33-- Script configuration
34local DIR = setup_dir_path(parse_dir_from_args(arg))
35
36-- Load required libraries
37-- Include TUI library from shared scripts location (updates propagate automatically)
38local TUI_LIBS = "/home/ritz/programming/ai-stuff/scripts/libs"
39package.path = DIR .. "/libs/?.lua;" .. DIR .. "/src/?.lua;" .. TUI_LIBS .. "/?.lua;" .. package.path
40local utils = require("utils")
41
42-- Initialize asset path configuration (CLI --dir takes precedence over config)
43-- This must happen early, before any asset_path() calls
44utils.init_assets_root(arg)
45
46-- Load modules properly by temporarily updating package.path
47local old_path = package.path
48package.path = DIR .. "/src/?.lua;" .. DIR .. "/libs/?.lua;" .. package.path
49
50local poem_extractor = require("poem-extractor")
51local poem_validator = require("poem-validator")
52local image_manager = require("image-manager")
53local flat_html_generator = require("flat-html-generator")
54local inference_config = require("inference-server-config")
55local dkjson = require("dkjson")
56
57-- Give inference-server-config the project root so it can find config.lua. This
58-- enables every call site below that uses inference_config.get_selected_model()
59-- to resolve through the same code path the GPU embeddings stage uses.
60inference_config.set_project_root(DIR)
61
62-- Boost inclusion: a CLI flag (--no-boosts / --exclude-boosts to drop them,
63-- --include-boosts to force-keep) takes precedence over config.privacy.
64-- include_boosts. Resolved once here and threaded into extraction.
65local config_loader = require("config-loader")
66config_loader.set_project_root(DIR)
67local function resolve_include_boosts()
68 for _, a in ipairs(arg or {}) do
69 if a == "--no-boosts" or a == "--exclude-boosts" then return false end
70 if a == "--include-boosts" then return true end
71 end
72 local ok, cfg = pcall(function() return config_loader.load() end)
73 if ok and cfg and cfg.privacy and cfg.privacy.include_boosts ~= nil then
74 return cfg.privacy.include_boosts
75 end
76 return false -- default: exclude boosts (opt-in via --include-boosts)
77end
78
79-- Restore original path
80package.path = old_path
81
82local M = {}
83
84-- {{{ TUI Menu Configuration
85-- Try to load TUI menu library (falls back to simple text menu if unavailable)
86-- NOTE: The TUI library requires LuaJIT (for the 'bit' module). If running with
87-- standard Lua, it will fall back to the simple text menu.
88local tui_available, menu = pcall(require, "menu")
89local tui_load_error = not tui_available and menu or nil -- menu contains error message when pcall fails
90local tui = tui_available and require("tui") or nil
91
92-- {{{ function M.build_menu_config
93-- Builds the unified menu configuration for the TUI library
94-- Combines functionality from main.lua and flat-html-generator.lua into one interface
95local function build_menu_config()
96 return {
97 title = "Neocities Poetry Modernization",
98 subtitle = "Static HTML poetry recommendation system",
99 sections = {
100 -- Data Pipeline Section
101 {
102 id = "pipeline",
103 title = "Data Pipeline",
104 type = "single", -- Radio-button style (one action at a time)
105 items = {
106 {
107 id = "extract",
108 label = "Extract poems from sources",
109 type = "checkbox",
110 value = "0",
111 description = "Auto-detect JSON extracts or compiled.txt",
112 shortcut = "e"
113 },
114 {
115 id = "validate",
116 label = "Validate extracted poems",
117 type = "checkbox",
118 value = "0",
119 description = "Check data quality and generate validation report",
120 shortcut = "v"
121 },
122 {
123 id = "catalog",
124 label = "Catalog and manage images",
125 type = "checkbox",
126 value = "0",
127 description = "Index media attachments from archives",
128 shortcut = "c"
129 },
130 {
131 id = "dataset",
132 label = "Generate complete dataset",
133 type = "checkbox",
134 value = "0",
135 description = "Run extract + validate + catalog in sequence",
136 shortcut = "d"
137 }
138 }
139 },
140 -- Embedding & Similarity Section
141 {
142 id = "embedding",
143 title = "Embedding & Similarity",
144 type = "single",
145 items = {
146 {
147 id = "test_inference_server",
148 label = "Test inference server",
149 type = "checkbox",
150 value = "0",
151 description = "Verify connection to local LLM service",
152 shortcut = "t"
153 },
154 {
155 id = "similarity",
156 label = "Calculate similarity matrix (parallel)",
157 type = "checkbox",
158 value = "0",
159 description = "Generate per-poem similarity files using effil threads",
160 shortcut = "s"
161 }
162 }
163 },
164 -- HTML Generation Section
165 {
166 id = "html",
167 title = "HTML Generation",
168 type = "single",
169 items = {
170 {
171 id = "chronological",
172 label = "Generate chronological index",
173 type = "checkbox",
174 value = "0",
175 description = "Main entry point listing all poems in order"
176 },
177 {
178 id = "explore",
179 label = "Generate explore.html",
180 type = "checkbox",
181 value = "0",
182 description = "Discovery instructions page"
183 },
184 {
185 id = "similar_pages",
186 label = "Generate similarity pages (parallel)",
187 type = "checkbox",
188 value = "0",
189 description = "Per-poem pages sorted by semantic similarity"
190 },
191 {
192 id = "different_pages",
193 label = "Generate difference pages (parallel)",
194 type = "checkbox",
195 value = "0",
196 description = "Per-poem pages sorted by maximum diversity"
197 },
198 {
199 id = "full_website",
200 label = "Generate complete website",
201 type = "checkbox",
202 value = "0",
203 description = "Run all HTML generation steps",
204 shortcut = "w"
205 }
206 }
207 },
208 -- Testing Section
209 {
210 id = "testing",
211 title = "Testing & Debug",
212 type = "single",
213 items = {
214 {
215 id = "test_similar",
216 label = "Test single similarity page",
217 type = "checkbox",
218 value = "0",
219 description = "Generate one similarity page for testing"
220 },
221 {
222 id = "test_different",
223 label = "Test single difference page",
224 type = "checkbox",
225 value = "0",
226 description = "Generate one difference page for testing"
227 }
228 }
229 },
230 -- Options Section (with editable fields)
231 {
232 id = "options",
233 title = "Options",
234 type = "multi", -- Allow multiple options to be set
235 items = {
236 {
237 id = "test_poem_id",
238 label = "Test poem ID",
239 type = "flag",
240 value = "1:5", -- value:width format
241 description = "Poem ID for test page generation"
242 },
243 {
244 id = "thread_count",
245 label = "Thread count",
246 type = "flag",
247 value = "8:3",
248 description = "Number of parallel threads for generation"
249 }
250 }
251 },
252 -- Utilities Section
253 {
254 id = "utilities",
255 title = "Utilities",
256 type = "single",
257 items = {
258 {
259 id = "status",
260 label = "View project status",
261 type = "checkbox",
262 value = "0",
263 description = "Show file status and dataset statistics",
264 shortcut = "i"
265 },
266 {
267 id = "clean",
268 label = "Clean and rebuild assets",
269 type = "checkbox",
270 value = "0",
271 description = "Delete generated assets and regenerate",
272 shortcut = "r"
273 }
274 }
275 },
276 -- Action Section
277 {
278 id = "action",
279 title = "",
280 type = "single",
281 items = {
282 {
283 id = "run",
284 label = "[Run]",
285 type = "action",
286 value = "",
287 description = "Execute selected operation"
288 }
289 }
290 }
291 }
292 }
293end
294-- }}}
295
296-- {{{ function M.show_tui_menu
297-- Runs the TUI menu and returns the selected action and values
298function M.show_tui_menu()
299 if not tui_available then
300 -- Fallback to simple text menu
301 return M.show_simple_menu()
302 end
303
304 local config = build_menu_config()
305 menu.init(config)
306
307 local action, values = menu.run()
308 menu.cleanup()
309
310 return action, values
311end
312-- }}}
313
314-- {{{ function M.show_simple_menu
315-- Simple text-based menu fallback when TUI is not available
316function M.show_simple_menu()
317 -- Show why TUI isn't available (once per session)
318 if tui_load_error and not M._tui_warning_shown then
319 M._tui_warning_shown = true
320 io.stderr:write("\n")
321 io.stderr:write("╔══════════════════════════════════════════════════════════════════════╗\n")
322 io.stderr:write("║ NOTE: TUI menu not available. Using simple text menu. ║\n")
323 io.stderr:write("║ ║\n")
324 if tui_load_error:match("bit") then
325 io.stderr:write("║ Reason: The 'bit' module requires LuaJIT. ║\n")
326 io.stderr:write("║ Fix: Run with 'luajit src/main.lua -I' instead of 'lua' ║\n")
327 io.stderr:write("║ or use the shebang: './src/main.lua -I' ║\n")
328 else
329 io.stderr:write("║ Reason: " .. tostring(tui_load_error):sub(1, 60) .. "\n")
330 end
331 io.stderr:write("╚══════════════════════════════════════════════════════════════════════╝\n")
332 io.stderr:write("\n")
333 end
334
335 local options = {
336 "Extract poems (auto-detect JSON/compiled.txt)",
337 "Validate extracted poems",
338 "Test inference server",
339 "Generate complete dataset",
340 "Catalog and manage images",
341 "Generate website HTML",
342 "View project status",
343 "Clean and rebuild assets",
344 "Exit"
345 }
346
347 local choice = utils.show_menu("Neocities Poetry Modernization", options)
348
349 -- Map simple menu choice to action/values format
350 local action_map = {
351 [1] = "extract",
352 [2] = "validate",
353 [3] = "test_inference_server",
354 [4] = "dataset",
355 [5] = "catalog",
356 [6] = "full_website",
357 [7] = "status",
358 [8] = "clean",
359 [9] = nil -- Exit
360 }
361
362 if choice == 9 then
363 return "quit", {}
364 end
365
366 local values = {}
367 local action_id = action_map[choice]
368 if action_id then
369 values[action_id] = "1"
370 end
371
372 return "run", values
373end
374-- }}}
375
376-- {{{ function M.show_main_menu (legacy wrapper)
377-- Legacy wrapper for backwards compatibility
378function M.show_main_menu()
379 local options = {
380 "Extract poems (auto-detect JSON/compiled.txt)",
381 "Validate extracted poems",
382 "Test inference server",
383 "Generate complete dataset",
384 "Catalog and manage images",
385 "Generate website HTML",
386 "View project status",
387 "Clean and rebuild assets",
388 "Exit"
389 }
390
391 return utils.show_menu("Neocities Poetry Modernization", options)
392end
393-- }}}
394-- }}}
395
396-- {{{ function M.is_data_fresh
397function M.is_data_fresh()
398 -- Check if assets/poems.json exists and is newer than source files
399 local output_file = utils.asset_path("poems.json")
400 if not utils.file_exists(output_file) then
401 return false
402 end
403
404 -- Get output file modification time
405 local output_mtime = utils.get_file_mtime(output_file)
406 if not output_mtime then
407 return false
408 end
409
410 -- Check source JSON files
411 local source_files = {
412 DIR .. "/input/fediverse/files/poems.json",
413 DIR .. "/input/messages/files/poems.json",
414 DIR .. "/input/notes/files/poems.json",
415 DIR .. "/input/bluesky/files/poems.json"
416 }
417
418 for _, source_file in ipairs(source_files) do
419 if utils.file_exists(source_file) then
420 local source_mtime = utils.get_file_mtime(source_file)
421 if source_mtime and source_mtime > output_mtime then
422 return false -- Source is newer, need to regenerate
423 end
424 end
425 end
426
427 return true -- Output exists and is up to date
428end
429-- }}}
430
431-- {{{ function M.extract_poems
432function M.extract_poems(force)
433 -- Skip if data is fresh (unless forced)
434 if not force and M.is_data_fresh() then
435 utils.log_info("Poem data is up to date, skipping extraction")
436 return true
437 end
438
439 local output_file = utils.asset_path("poems.json")
440
441 -- Use auto-detection to handle both JSON extracts and compiled.txt
442 local include_boosts = resolve_include_boosts()
443 if not include_boosts then
444 utils.log_info("Boosts excluded for this extraction (CLI/config: include_boosts=false)")
445 end
446 local success, result = pcall(function()
447 return poem_extractor.extract_poems_auto(DIR, output_file, { include_boosts = include_boosts })
448 end)
449
450 if success then
451 local mode = result.metadata.source_mode
452 utils.log_info("Poem extraction completed using " .. mode .. " mode")
453 utils.log_info("Found " .. result.metadata.total_poems .. " poems")
454 return true
455 else
456 utils.log_error("Poem extraction failed: " .. tostring(result))
457 return false
458 end
459end
460-- }}}
461
462-- {{{ function M.validate_poems
463function M.validate_poems()
464 local input_file = utils.asset_path("poems.json")
465 local output_file = utils.asset_path("validation-report.json")
466
467 if not utils.file_exists(input_file) then
468 utils.log_error("Poems file not found. Run extraction first.")
469 return false
470 end
471
472 poem_validator.validate_poems(input_file, output_file)
473 utils.log_info("Poem validation completed")
474 return true
475end
476-- }}}
477
478-- {{{ function M.test_embedding_service
479function M.test_embedding_service()
480 utils.log_info("Testing inference server embedding service...")
481
482 local server_manager = require("embedding-server-manager")
483 if server_manager then
484 local endpoint = server_manager.ensure_ready()
485 if endpoint then
486 server_manager.test_embedding(endpoint, inference_config.get_selected_model())
487 return true
488 end
489 end
490
491 utils.log_error("Embedding service test failed")
492 return false
493end
494-- }}}
495
496-- {{{ function M.catalog_images
497-- Issue 10-015a: Added verbose parameter for detailed statistics output
498function M.catalog_images(verbose)
499
500 local success, result = pcall(function()
501 return image_manager.main(verbose)
502 end)
503
504 if success and result then
505 utils.log_info("Image cataloging completed successfully")
506 return true
507 else
508 utils.log_error("Image cataloging failed: " .. tostring(result))
509 return false
510 end
511end
512-- }}}
513
514-- {{{ function M.is_html_fresh
515function M.is_html_fresh()
516 -- Check if output HTML files exist and are newer than source data
517 -- We check chronological.html as the main indicator
518 local output_file = DIR .. "/output/chronological.html"
519 if not utils.file_exists(output_file) then
520 return false
521 end
522
523 local output_mtime = utils.get_file_mtime(output_file)
524 if not output_mtime then
525 return false
526 end
527
528 -- Check against poems.json (the primary source data)
529 local poems_file = utils.asset_path("poems.json")
530 if utils.file_exists(poems_file) then
531 local poems_mtime = utils.get_file_mtime(poems_file)
532 if poems_mtime and poems_mtime > output_mtime then
533 return false
534 end
535 end
536
537 -- Check against similarity matrix (affects similarity/different pages)
538 local similarity_file = utils.embeddings_dir() .. "/similarity_matrix.json"
539 if utils.file_exists(similarity_file) then
540 local similarity_mtime = utils.get_file_mtime(similarity_file)
541 if similarity_mtime and similarity_mtime > output_mtime then
542 return false
543 end
544 end
545
546 return true
547end
548-- }}}
549
550-- {{{ function M.generate_website_html
551-- Phase D (Issue 8-012): Added pages_spec parameter for pagination control
552-- Issue 8-022: Added poems_per_page parameter for CLI override
553-- Issue 9-002: Added num_threads parameter for parallel HTML generation
554-- Issue 9-003: Added chrono_per_page parameter for chronological pagination
555function M.generate_website_html(force, pages_spec, poems_per_page, num_threads, chrono_per_page)
556 -- Skip if HTML is fresh (unless forced)
557 if not force and M.is_html_fresh() then
558 utils.log_info("Website HTML is up to date, skipping generation")
559 return true
560 end
561
562 -- Check dependencies
563 local poems_file = utils.asset_path("poems.json")
564 if not utils.file_exists(poems_file) then
565 utils.log_error("Poems file not found. Run extraction first.")
566 return false
567 end
568
569 -- Issue 10-033: Check that pre-computed caches exist (these are what actually get used)
570 -- The similarity_matrix.json and embeddings.json are NOT loaded anymore - saves 739MB RAM
571 -- Issue 10-054: diversity stays on disk (embeddings_dir_disk); the similarity
572 -- ranking cache is movable (embeddings_dir, RAM once the switch is flipped).
573 local diversity_cache_file = utils.embeddings_dir_disk() .. "/diversity_cache.json"
574 local similarity_cache_file = utils.embeddings_dir() .. "/similarity_rankings_cache.json"
575
576 -- Pre-flight: validate the prerequisite caches EXIST before the long
577 -- generation starts. A miss is a hard error (return false -> the CLI handler
578 -- os.exit(1)s -> run.sh stops). The path is printed because the usual cause is
579 -- a RAM/disk mismatch (Issue 10-054): a writer left it on disk while this
580 -- reader looks in RAM (or vice versa), so seeing WHERE it looked is the clue.
581 if not utils.file_exists(diversity_cache_file) then
582 utils.log_error("Diversity cache not found at: " .. diversity_cache_file
583 .. " (run ./run.sh --generate-diversity, or check the RAM/disk switch)")
584 return false
585 end
586
587 if not utils.file_exists(similarity_cache_file) then
588 utils.log_error("Similarity rankings cache not found at: " .. similarity_cache_file
589 .. " (run ./run.sh --generate-similarity, or check the RAM/disk switch)")
590 return false
591 end
592
593 -- Load only poems data (12MB) - caches are loaded inside generator as needed
594 -- Issue 10-033: Skip loading embeddings.json (77MB) and similarity_matrix.json (662MB)
595 -- These are never used - generator functions use pre-computed caches exclusively
596 local poems_data = utils.read_json_file(poems_file)
597 if not poems_data then
598 utils.log_error("Failed to load poems data")
599 return false
600 end
601
602 -- Note: similarity_data and embeddings_data parameters below are nil
603 -- This is safe because generator functions use cache lookups (Issue 10-033)
604 local similarity_data = nil
605 local embeddings_data = nil
606
607 local output_dir = DIR .. "/output"
608
609 -- Generate chronological index (main entry point)
610 -- Issue 9-003: Pass chrono_per_page for CLI override of chronological pagination
611 local success = flat_html_generator.generate_chronological_index_with_navigation(poems_data, output_dir, chrono_per_page)
612 if not success then
613 utils.log_error("Failed to generate chronological index")
614 return false
615 end
616
617 -- Generate explore.html (the map) + explore-2.html (the deeper math).
618 -- Pass poems_data so both pages render live corpus stats instead of prose.
619 flat_html_generator.generate_simple_discovery_instructions(output_dir, poems_data)
620
621 -- Generate all similarity and diversity pages
622 -- Note: This is the long operation - generates ~12,000+ files
623 -- Phase D (Issue 8-012): Pass pages_spec for pagination control
624 -- Issue 8-022: Pass poems_per_page for CLI override
625 -- Issue 9-002: Pass num_threads for parallel processing
626 -- Issue 9-003: Pass chrono_per_page for chronological mapping in parallel workers
627 local gen_success = flat_html_generator.generate_complete_flat_html_collection(
628 poems_data, similarity_data, embeddings_data, output_dir, pages_spec, poems_per_page, num_threads, chrono_per_page
629 )
630
631 if gen_success then
632 utils.log_info("Website HTML generation completed successfully")
633 return true
634 else
635 utils.log_error("Website HTML generation failed")
636 return false
637 end
638end
639-- }}}
640
641-- {{{ function M.generate_complete_dataset
642function M.generate_complete_dataset()
643 utils.log_info("Generating complete dataset...")
644
645 if not M.extract_poems() then
646 return false
647 end
648
649 if not M.validate_poems() then
650 return false
651 end
652
653 if not M.catalog_images() then
654 return false
655 end
656
657 utils.log_info("Complete dataset generation finished")
658 return true
659end
660-- }}}
661
662-- {{{ function M.show_project_status
663function M.show_project_status()
664 print("\n=== PROJECT STATUS ===")
665
666 local paths = utils.get_project_paths(DIR)
667 local assets_root = utils.get_assets_root()
668
669 -- Check key files (input files use paths.root, generated assets use assets_root)
670 local status_items = {
671 {"Legacy Source", paths.root .. "/compiled.txt"},
672 {"JSON Extracts (Fed)", paths.root .. "/input/fediverse/files/poems.json"},
673 {"JSON Extracts (Msg)", paths.root .. "/input/messages/files/poems.json"},
674 {"JSON Extracts (Notes)", paths.root .. "/input/notes/files/poems.json"},
675 {"JSON Extracts (Sky)", paths.root .. "/input/bluesky/files/poems.json"},
676 {"Processed Poems", utils.asset_path("poems.json")},
677 {"Validation Report", utils.asset_path("validation-report.json")},
678 {"Image Catalog", utils.asset_path("image-catalog.json")},
679 {"Poem Extractor", paths.src .. "/poem-extractor.lua"},
680 {"Poem Validator", paths.src .. "/poem-validator.lua"},
681 {"Image Manager", paths.src .. "/image-manager.lua"},
682 {"Embedding Server Manager", paths.src .. "/embedding-server-manager.lua"}
683 }
684
685 -- Show assets location
686 print(string.format("%-20s: %s", "Assets Location", assets_root))
687
688 for _, item in ipairs(status_items) do
689 local name, filepath = item[1], item[2]
690 local status = utils.file_exists(filepath) and "✅ Found" or "❌ Missing"
691 print(string.format("%-20s: %s", name, status))
692 end
693
694 -- Show poem count if available
695 local poems_file = utils.asset_path("poems.json")
696 if utils.file_exists(poems_file) then
697 local content = utils.read_file(poems_file)
698 if content then
699 local poem_count = select(2, content:gsub('"id":', '"id":'))
700 print(string.format("%-20s: %d poems", "Dataset Size", poem_count))
701 end
702 end
703
704 print("")
705end
706-- }}}
707
708-- {{{ function M.clean_and_rebuild
709function M.clean_and_rebuild()
710 utils.log_info("Cleaning and rebuilding assets...")
711
712 if utils.confirm_action("This will delete existing assets. Continue?") then
713 -- Remove old generated assets (from configured assets location)
714 os.execute("rm -f " .. utils.asset_path("poems.json"))
715 os.execute("rm -f " .. utils.asset_path("validation-report.json"))
716
717 -- Regenerate
718 return M.generate_complete_dataset()
719 else
720 utils.log_info("Operation cancelled")
721 return false
722 end
723end
724-- }}}
725
726-- {{{ function M.handle_tui_action
727-- Handles actions from the TUI menu based on which items are selected
728-- values is a table mapping item_id -> "1" for selected items
729function M.handle_tui_action(values)
730 local executed = false
731
732 -- Data Pipeline actions
733 if values.extract == "1" then
734 M.extract_poems(true) -- force=true in interactive mode
735 executed = true
736 end
737 if values.validate == "1" then
738 M.validate_poems()
739 executed = true
740 end
741 if values.catalog == "1" then
742 M.catalog_images()
743 executed = true
744 end
745 if values.dataset == "1" then
746 M.generate_complete_dataset()
747 executed = true
748 end
749
750 -- Embedding & Similarity actions
751 if values.test_inference_server == "1" then
752 M.test_embedding_service()
753 executed = true
754 end
755 if values.similarity == "1" then
756 -- Run similarity on the GPU. The CPU engine was removed (Issue 10-057) -- these
757 -- are O(N^2) calculations that make no sense on a CPU. vk_similarity uses
758 -- absolute/project-root-relative paths, so it runs fine in-process here (no
759 -- cd-wrapper needed, unlike diversity before the path unification).
760 utils.log_info("Running GPU similarity calculation...")
761 -- vk_similarity reads DIR from the environment to locate its library; set it
762 -- before the module loads.
763 local ffi = require("ffi")
764 ffi.cdef[[int setenv(const char *name, const char *value, int overwrite);]]
765 ffi.C.setenv("DIR", DIR, 1)
766 local vk_sim = require("vulkan-compute.lua.vk_similarity")
767 -- Same K the pipeline uses: pages x poems_per_page from config (the default
768 -- minimum_pages here, since this action takes no --pages). A capped cache the
769 -- HTML stage cannot fill is caught by the loader's K-check.
770 local pag = config_loader.load().pagination
771 if not pag then error("config.pagination missing; cannot size the similarity cache") end
772 local top_k = pag.minimum_pages * pag.poems_per_page
773 local thread_count = tonumber(values.thread_count) or 8
774 local embeddings_file = utils.embeddings_dir() .. "/embeddings.json"
775 vk_sim.generate_similarity_matrix_gpu_parallel(
776 embeddings_file, inference_config.get_selected_model(), false, thread_count, top_k)
777 executed = true
778 end
779
780 -- HTML Generation actions
781 if values.chronological == "1" then
782 local poems_file = utils.asset_path("poems.json")
783 local poems_data = utils.read_json_file(poems_file)
784 if poems_data then
785 flat_html_generator.generate_chronological_index_with_navigation(poems_data, DIR .. "/output")
786 utils.log_info("Generated chronological.html")
787 end
788 executed = true
789 end
790 if values.explore == "1" then
791 flat_html_generator.generate_simple_discovery_instructions(DIR .. "/output")
792 utils.log_info("Generated explore.html")
793 executed = true
794 end
795 if values.similar_pages == "1" then
796 utils.log_info("Generating similarity pages (use scripts/generate-html-parallel --similar-only)...")
797 os.execute("luajit " .. DIR .. "/scripts/generate-html-parallel --similar-only")
798 executed = true
799 end
800 if values.different_pages == "1" then
801 utils.log_info("Generating difference pages (use scripts/generate-html-parallel --different-only)...")
802 os.execute("luajit " .. DIR .. "/scripts/generate-html-parallel --different-only")
803 executed = true
804 end
805 if values.full_website == "1" then
806 M.generate_website_html(true, nil, nil) -- TUI mode: use config defaults
807 executed = true
808 end
809
810 -- Testing actions
811 if values.test_similar == "1" then
812 local poem_id = tonumber(values.test_poem_id) or 1
813 M.test_single_similarity_page(poem_id)
814 executed = true
815 end
816 if values.test_different == "1" then
817 local poem_id = tonumber(values.test_poem_id) or 1
818 M.test_single_difference_page(poem_id)
819 executed = true
820 end
821
822 -- Utility actions
823 if values.status == "1" then
824 M.show_project_status()
825 executed = true
826 end
827 if values.clean == "1" then
828 M.clean_and_rebuild()
829 executed = true
830 end
831
832 return executed
833end
834-- }}}
835
836-- {{{ function M.test_single_similarity_page
837-- Test generating a single similarity page for debugging
838function M.test_single_similarity_page(poem_id)
839 utils.log_info("Testing similarity page for poem " .. poem_id .. "...")
840 local poems_file = utils.asset_path("poems.json")
841 local similarity_file = utils.embeddings_dir() .. "/similarity_matrix.json"
842 local output_dir = DIR .. "/output"
843
844 local poems_data = utils.read_json_file(poems_file)
845 local similarity_data = utils.read_json_file(similarity_file)
846
847 if poems_data and similarity_data then
848 local poem_data = nil
849 for _, poem in ipairs(poems_data.poems) do
850 if poem.id == poem_id then
851 poem_data = poem
852 break
853 end
854 end
855
856 if poem_data then
857 local ranking = flat_html_generator.generate_similarity_ranked_list(poem_id, poems_data, similarity_data.similarities or similarity_data)
858 -- Issue 10-036: Pass nil for chrono_mapping in test function (uses "01" fallback)
859 local html = flat_html_generator.generate_flat_poem_list_html(poem_data, ranking, "similar", poem_id, nil)
860 local test_file = string.format("%s/test_similar_%03d.html", output_dir, poem_id)
861 os.execute("mkdir -p " .. output_dir)
862 utils.write_file(test_file, html)
863 utils.log_info("Test file written: " .. test_file)
864 else
865 utils.log_error("Poem ID " .. poem_id .. " not found")
866 end
867 else
868 utils.log_error("Failed to load required data files")
869 end
870end
871-- }}}
872
873-- {{{ function M.test_single_difference_page
874-- Test generating a single difference page for debugging
875function M.test_single_difference_page(poem_id)
876 utils.log_info("Testing difference page for poem " .. poem_id .. "...")
877 local poems_file = utils.asset_path("poems.json")
878 local embeddings_file = utils.embeddings_dir() .. "/embeddings.json"
879 local output_dir = DIR .. "/output"
880
881 local poems_data = utils.read_json_file(poems_file)
882 local embeddings_data = utils.read_json_file(embeddings_file)
883
884 if poems_data and embeddings_data then
885 local poem_data = nil
886 for _, poem in ipairs(poems_data.poems) do
887 if poem.id == poem_id then
888 poem_data = poem
889 break
890 end
891 end
892
893 if poem_data then
894 local diversity_chaining = require("diversity-chaining")
895 local ranking = diversity_chaining.generate_diversity_chain(poem_id, poems_data, embeddings_data)
896 -- Issue 10-036: Pass nil for chrono_mapping in test function (uses "01" fallback)
897 local html = flat_html_generator.generate_flat_poem_list_html(poem_data, ranking, "different", poem_id, nil)
898 local test_file = string.format("%s/test_different_%03d.html", output_dir, poem_id)
899 os.execute("mkdir -p " .. output_dir)
900 utils.write_file(test_file, html)
901 utils.log_info("Test file written: " .. test_file)
902 else
903 utils.log_error("Poem ID " .. poem_id .. " not found")
904 end
905 else
906 utils.log_error("Failed to load required data files")
907 end
908end
909-- }}}
910
911-- {{{ function M.main
912-- Main entry point with support for selective stage execution via CLI flags
913-- Options table can include: interactive, parse_only, validate_only, catalog_only, html_only, force, threads
914function M.main(options)
915 options = options or {}
916
917 if options.interactive then
918 -- Use TUI menu if available, otherwise fall back to simple menu
919 while true do
920 local action, values = M.show_tui_menu()
921
922 if action == "quit" then
923 print("Goodbye!")
924 break
925 elseif action == "run" then
926 local executed = M.handle_tui_action(values)
927
928 if executed then
929 -- Don't pause after status display
930 if values.status ~= "1" then
931 print("\nPress Enter to continue...")
932 io.read()
933 end
934 else
935 print("No action selected. Use space to toggle options, then press Enter or select [Run].")
936 print("\nPress Enter to continue...")
937 io.read()
938 end
939 end
940 end
941 elseif options.parse_only then
942 M.extract_poems(options.force)
943 elseif options.validate_only then
944 M.validate_poems()
945 elseif options.catalog_only then
946 -- Issue 10-015a: Pass verbose option for detailed statistics
947 M.catalog_images(options.verbose)
948 elseif options.html_only then
949 -- Run only HTML generation
950 -- Phase D (Issue 8-012): Pass pages parameter
951 -- Issue 8-022: Pass poems_per_page parameter
952 -- Issue 9-002: Pass threads parameter for parallel processing
953 -- Issue 9-003: Pass chrono_per_page parameter
954 -- A false return means a prerequisite was missing (e.g. the similarity
955 -- rankings cache) or generation failed. This MUST stop the pipeline with a
956 -- non-zero exit so run.sh's `|| exit 1` fires -- otherwise the run carries
957 -- on and builds a site whose nav pages were never made (Issue 10-054
958 -- fallout). A missing prerequisite is an error, never a warning to ignore.
959 if not M.generate_website_html(options.force, options.pages, options.poems_per_page, options.threads, options.chrono_per_page) then
960 os.exit(1)
961 end
962 else
963 -- Non-interactive mode - generate dataset and website HTML (full pipeline)
964 -- Phase D (Issue 8-012): Pass pages parameter
965 -- Issue 8-022: Pass poems_per_page parameter
966 -- Issue 9-002: Pass threads parameter for parallel processing
967 -- Issue 9-003: Pass chrono_per_page parameter
968 utils.log_info("Running in non-interactive mode (full pipeline)")
969 M.show_project_status()
970 M.generate_complete_dataset()
971 -- Same as above: a false return is a hard failure that must stop the run.
972 if not M.generate_website_html(options.force, options.pages, options.poems_per_page, options.threads, options.chrono_per_page) then
973 os.exit(1)
974 end
975 end
976end
977-- }}}
978
979-- Command line execution
980if arg then
981 local options = utils.parse_cli_args(arg)
982 if options.dir_override then
983 DIR = options.dir_override
984 package.path = DIR .. "/libs/?.lua;" .. package.path
985 end
986
987 M.main(options)
988end
989
990return M