libs/utils.lua
1#!/usr/bin/env lua
2
3-- Project-wide utility library
4-- Common functions for file I/O, logging, and configuration management
5
6local M = {}
7
8-- {{{ local function setup_dir_path
9local function setup_dir_path(provided_dir)
10 if provided_dir then
11 return provided_dir
12 end
13 return "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
14end
15-- }}}
16
17-- Module configuration
18M.DIR = setup_dir_path()
19
20-- {{{ function M.log_info
21function M.log_info(message)
22 print(string.format("[INFO] %s", message))
23end
24-- }}}
25
26-- {{{ function M.log_warn
27function M.log_warn(message)
28 print(string.format("[WARN] %s", message))
29end
30-- }}}
31
32-- {{{ function M.log_error
33function M.log_error(message)
34 print(string.format("[ERROR] %s", message))
35end
36-- }}}
37
38-- {{{ function M.file_exists
39function M.file_exists(filepath)
40 local file = io.open(filepath, "r")
41 if file then
42 file:close()
43 return true
44 end
45 return false
46end
47-- }}}
48
49-- {{{ function M.read_file
50function M.read_file(filepath)
51 local file = io.open(filepath, "r")
52 if not file then
53 return nil, "Could not open file: " .. filepath
54 end
55
56 local content = file:read("*all")
57 file:close()
58 return content
59end
60-- }}}
61
62-- {{{ function M.write_file
63function M.write_file(filepath, content)
64 local file = io.open(filepath, "w")
65 if not file then
66 return false, "Could not create file: " .. filepath
67 end
68
69 file:write(content)
70 file:close()
71 return true
72end
73-- }}}
74
75-- {{{ function M.get_timestamp
76function M.get_timestamp()
77 return os.date("%Y-%m-%d %H:%M:%S")
78end
79-- }}}
80
81-- {{{ function M.ensure_directory
82function M.ensure_directory(dirpath)
83 local cmd = "mkdir -p " .. dirpath
84 local result = os.execute(cmd)
85 return result == 0 or result == true
86end
87-- }}}
88
89-- {{{ function M.get_project_paths
90function M.get_project_paths(base_dir)
91 base_dir = base_dir or M.DIR
92 return {
93 root = base_dir,
94 src = base_dir .. "/src",
95 libs = base_dir .. "/libs",
96 assets = base_dir .. "/assets",
97 docs = base_dir .. "/docs",
98 notes = base_dir .. "/notes",
99 issues = base_dir .. "/issues"
100 }
101end
102-- }}}
103
104-- {{{ function M.parse_interactive_args
105function M.parse_interactive_args(args)
106 local interactive = false
107 local dir_override = nil
108
109 for i, arg in ipairs(args or {}) do
110 if arg == "-I" then
111 interactive = true
112 elseif not arg:match("^%-") then
113 -- Non-flag argument, treat as directory override
114 dir_override = arg
115 end
116 end
117
118 return interactive, dir_override
119end
120-- }}}
121
122-- {{{ function M.parse_cli_args
123-- Comprehensive CLI argument parser for main.lua
124-- Returns a table with all parsed options for selective stage execution
125-- Supports: stage flags (--parse-only, --validate-only, etc.), config (--force, --threads)
126-- Phase D (Issue 8-012): Added --pages flag for pagination control
127function M.parse_cli_args(args)
128 local options = {
129 interactive = false,
130 dir_override = nil,
131 -- Stage flags (when set, only run specified stages)
132 parse_only = false,
133 validate_only = false,
134 catalog_only = false,
135 html_only = false,
136 -- Config flags
137 force = false,
139 threads = nil,
144 }
145
146 local i = 1
147 while i <= #(args or {}) do
148 local arg = args[i]
149
150 if arg == "-I" or arg == "--interactive" then
151 options.interactive = true
152 elseif arg == "--parse-only" then
153 options.parse_only = true
154 elseif arg == "--validate-only" then
155 options.validate_only = true
156 elseif arg == "--catalog-only" then
157 options.catalog_only = true
158 elseif arg == "--html-only" then
159 options.html_only = true
160 elseif arg == "--force" then
161 options.force = true
162 elseif arg == "--verbose" or arg == "-v" then
163 options.verbose = true
164 elseif arg == "--threads" and args[i + 1] then
165 options.threads = tonumber(args[i + 1])
166 i = i + 1
167 elseif arg:match("^--threads=") then
168 options.threads = tonumber(arg:match("^--threads=(%d+)"))
169 elseif arg == "--pages" and args[i + 1] then
170 options.pages = args[i + 1] -- String value: "1", "all", "1-10"
171 i = i + 1
172 elseif arg:match("^--pages=") then
173 options.pages = arg:match("^--pages=(.+)") -- String value: "1", "all", "1-10"
174 elseif arg == "--poems-per-page" and args[i + 1] then
175 options.poems_per_page = tonumber(args[i + 1]) -- Numeric value: 100, 200, etc.
176 i = i + 1
177 elseif arg:match("^--poems%-per%-page=") then
178 options.poems_per_page = tonumber(arg:match("^--poems%-per%-page=(%d+)"))
179 elseif arg == "--chrono-per-page" and args[i + 1] then
181 i = i + 1
182 elseif arg:match("^--chrono%-per%-page=") then
183 options.chrono_per_page = tonumber(arg:match("^--chrono%-per%-page=(%d+)"))
184 -- Issue 10-058: consume --seed (both forms) so the bare numeric value is
185 -- never swallowed by the dir-override branch below (which would point the
186 -- build at a nonexistent directory named after the seed).
187 elseif arg == "--seed" and args[i + 1] then
188 options.seed = tonumber(args[i + 1])
189 i = i + 1
190 elseif arg:match("^--seed=") then
191 options.seed = tonumber(arg:match("^--seed=(%d+)"))
192 elseif not arg:match("^%-") then
193 -- Non-flag argument, treat as directory override
194 options.dir_override = arg
195 end
196 -- Skip unknown flags (--dir handled elsewhere)
197
198 i = i + 1
199 end
200
201 return options
202end
203-- }}}
204
205-- {{{ function M.show_menu
206function M.show_menu(title, options)
207 print("\n=== " .. title .. " ===")
208 for i, option in ipairs(options) do
209 print(string.format("%d. %s", i, option))
210 end
211 io.write("Select option (1-" .. #options .. "): ")
212 local choice = tonumber(io.read())
213
214 if choice and choice >= 1 and choice <= #options then
215 return choice
216 else
217 print("Invalid choice")
218 return nil
219 end
220end
221-- }}}
222
223-- {{{ function M.confirm_action
224function M.confirm_action(message)
225 io.write(message .. " (y/N): ")
226 local response = io.read():lower()
227 return response == "y" or response == "yes"
228end
229-- }}}
230
231-- {{{ function M.read_json_file
232function M.read_json_file(filepath)
233 package.path = M.DIR .. "/libs/?.lua;" .. package.path
234 local dkjson = require("dkjson")
235 local content = M.read_file(filepath)
236 if content then
237 local data, pos, err = dkjson.decode(content, 1, nil)
238 if err then
239 M.log_error("JSON decode error in " .. filepath .. ": " .. err)
240 return nil
241 end
242 return data
243 end
244 return nil
245end
246-- }}}
247
248-- {{{ function M.write_json_file
249function M.write_json_file(filepath, data)
250 package.path = M.DIR .. "/libs/?.lua;" .. package.path
251 local dkjson = require("dkjson")
252 local json_string = dkjson.encode(data, { indent = true })
253 if json_string then
254 return M.write_file(filepath, json_string)
255 else
256 M.log_error("Failed to encode JSON data for " .. filepath)
257 return false
258 end
259end
260-- }}}
261
262-- {{{ function M.directory_exists
263function M.directory_exists(dirpath)
264 local cmd = "[ -d '" .. dirpath .. "' ]"
265 local result = os.execute(cmd)
266 return result == 0 or result == true
267end
268-- }}}
269
270-- {{{ function M.get_file_mtime
271function M.get_file_mtime(filepath)
272 local stat_cmd = string.format("stat -c %%Y '%s' 2>/dev/null", filepath)
273 local handle = io.popen(stat_cmd)
274 if handle then
275 local result = handle:read("*a")
276 handle:close()
277 if result and result ~= "" then
278 local clean_result = result:gsub("%s+", "")
279 local timestamp = tonumber(clean_result)
280 return timestamp
281 end
282 end
283 return nil
284end
285-- }}}
286
287-- {{{ function M.get_working_directory
288function M.get_working_directory()
289 local handle = io.popen("pwd")
290 if handle then
291 local result = handle:read("*l")
292 handle:close()
293 return result or M.DIR
294 end
295 return M.DIR
296end
297-- }}}
298
299-- {{{ function M.relative_path
300function M.relative_path(absolute_path, base_dir)
301 -- Convert absolute path to relative path for cleaner output
302 -- Issue 7-003: If path equals base_dir, show project name instead of "./"
303 base_dir = base_dir or M.DIR
304 if absolute_path == base_dir or absolute_path == base_dir .. "/" then
305 -- Return the directory name (e.g., "neocities-modernization/")
306 local dir_name = base_dir:match("([^/]+)/?$")
307 return dir_name .. "/"
308 end
309 if absolute_path:sub(1, #base_dir) == base_dir then
310 local relative = absolute_path:sub(#base_dir + 1)
311 if relative:sub(1, 1) == "/" then
312 relative = relative:sub(2)
313 end
314 return "./" .. relative
315 end
316 return absolute_path
317end
318-- }}}
319
320-- ============================================================================
321-- Asset Path Configuration
322-- Configurable storage for generated assets (embeddings, poems.json, etc.)
323-- ============================================================================
324
325-- Module state for cached asset configuration
326local _assets_root = nil
327local _assets_config_loaded = false
328
329-- {{{ function M.parse_assets_dir
330-- Parse --dir flag from command line arguments
331-- @param args: table of command line arguments (default: global 'arg')
332-- @return: string path if --dir found, nil otherwise
333function M.parse_assets_dir(args)
334 args = args or arg
335 if not args then return nil end
336
337 local i = 1
338 while i <= #args do
339 local arg_val = args[i]
340 if arg_val == "--dir" and args[i + 1] then
341 return args[i + 1]
342 elseif arg_val:match("^%-%-dir=") then
343 return arg_val:match("^%-%-dir=(.+)$")
344 end
345 i = i + 1
346 end
347 return nil
348end
349-- }}}
350
351-- {{{ function M.load_asset_config
352-- Issue 10-003: Load asset path configuration from unified config (config.lua)
353-- @return: table with assets_root key, or nil if config not found
354function M.load_asset_config()
355 -- Use config-loader to get asset_paths from unified config
356 local ok, config_loader = pcall(require, "config-loader")
357 if ok and config_loader then
358 config_loader.set_project_root(M.DIR)
359 local config = config_loader.load()
360 if config and config.asset_paths then
361 return config.asset_paths
362 end
363 end
364 return nil
365end
366-- }}}
367
368-- {{{ function M.init_assets_root
369-- Initialize assets root path with priority: CLI > config > error
370-- Must be called once at startup, before any asset_path() calls
371-- @param cli_args: optional table of CLI arguments (default: global 'arg')
372-- @return: string path to assets root, or nil on error (after printing message)
373function M.init_assets_root(cli_args)
374 -- Check CLI argument first (highest priority)
375 local cli_dir = M.parse_assets_dir(cli_args)
376 if cli_dir then
377 if not M.directory_exists(cli_dir) then
378 io.stderr:write("\n")
379 io.stderr:write("Error: Assets directory not found: " .. cli_dir .. "\n")
380 io.stderr:write("\n")
381 io.stderr:write("Fix: supply valid path via --dir ~/your/assets/path\n")
382 io.stderr:write("\n")
383 io.stderr:write("Expected structure:\n")
384 io.stderr:write(" " .. cli_dir .. "/\n")
385 io.stderr:write(" poems.json\n")
386 io.stderr:write(" embeddings/\n")
387 io.stderr:write(" <model-name>/\n")
388 io.stderr:write(" embeddings.json\n")
389 io.stderr:write("\n")
390 return nil
391 end
392 _assets_root = cli_dir
393 _assets_config_loaded = true
394 return _assets_root
395 end
396
397 -- Try config file (second priority)
398 local config = M.load_asset_config()
399 if config and config.assets_root then
400 if not M.directory_exists(config.assets_root) then
401 io.stderr:write("\n")
402 io.stderr:write("Error: Assets directory not found: " .. config.assets_root .. "\n")
403 io.stderr:write("\n")
404 io.stderr:write("Fix: supply path via --dir ~/your/assets/path\n")
405 io.stderr:write(" or update asset_paths.assets_root in config.lua\n")
406 io.stderr:write("\n")
407 io.stderr:write("Expected structure:\n")
408 io.stderr:write(" " .. config.assets_root .. "/\n")
409 io.stderr:write(" poems.json\n")
410 io.stderr:write(" embeddings/\n")
411 io.stderr:write(" <model-name>/\n")
412 io.stderr:write(" embeddings.json\n")
413 io.stderr:write("\n")
414 return nil
415 end
416 _assets_root = config.assets_root
417 _assets_config_loaded = true
418 return _assets_root
419 end
420
421 -- Fallback to project default (for backward compatibility during transition)
422 local default_path = M.DIR .. "/assets"
423 if M.directory_exists(default_path) then
424 _assets_root = default_path
425 _assets_config_loaded = true
426 return _assets_root
427 end
428
429 -- Nothing found - error
430 io.stderr:write("\n")
431 io.stderr:write("Error: Assets directory not found\n")
432 io.stderr:write("\n")
433 io.stderr:write("Fix: supply path via --dir ~/your/assets/path\n")
434 io.stderr:write("\n")
435 io.stderr:write("Expected structure:\n")
436 io.stderr:write(" ~/your/assets/path/\n")
437 io.stderr:write(" poems.json\n")
438 io.stderr:write(" embeddings/\n")
439 io.stderr:write(" <model-name>/\n")
440 io.stderr:write(" embeddings.json\n")
441 io.stderr:write("\n")
442 return nil
443end
444-- }}}
445
446-- {{{ function M.get_assets_root
447-- Get the configured assets root path
448-- Initializes from config if not already done
449-- @param cli_args: optional CLI args for initialization
450-- @return: string path to assets root
451function M.get_assets_root(cli_args)
452 if not _assets_config_loaded then
453 local result = M.init_assets_root(cli_args)
454 if not result then
455 os.exit(1)
456 end
457 end
458 return _assets_root
459end
460-- }}}
461
462-- {{{ function M.asset_path
463-- Build full path to an asset file
464-- @param relative: relative path within assets (e.g., "poems.json")
465-- @return: full absolute path
466function M.asset_path(relative)
467 return M.get_assets_root() .. "/" .. relative
468end
469-- }}}
470
471-- {{{ function M.embeddings_dir
472-- Get path to embeddings directory for the named model, or for the currently
473-- configured default model when called with no argument.
474--
475-- Centralizing the model -> directory mapping here means a model switch in
476-- config.lua propagates automatically to every caller, instead of requiring
477-- a hunt through ~30 hardcoded "embeddinggemma_latest" string literals.
478-- @param model_name: optional. nil means "ask inference-server-config which model is
479-- currently selected and use that"; pass an explicit
480-- string only if you need a different model's directory.
481-- @return: full path to that model's embeddings directory
482-- Issue 10-054: movable, regenerable caches live in RAM (tmp/, a tmpfs symlink)
483-- to spare SSD write endurance. Only diversity_cache.json stays on disk (it costs
484-- ~45-50 min to recompute) via embeddings_dir_disk(); everything else is RAM.
485--
486-- There is no switch any more: the project ALWAYS caches in RAM. The earlier
487-- on/off flag only invited "half the writers still point at disk" desyncs. With
488-- one unconditional location, every reader and writer that goes through this
489-- function agrees by construction. The single rule: movable caches ->
490-- embeddings_dir() (RAM); the one reboot-must-survive cache -> embeddings_dir_disk().
491local function safe_model(model_name)
492 if not model_name then
493 model_name = require("inference-server-config").get_selected_model()
494 end
495 -- Sanitize model name for filesystem safety (e.g. embeddinggemma:latest -> embeddinggemma_latest)
496 return model_name:gsub("[^%w%-_.]", "_")
497end
498
499function M.embeddings_dir(model_name)
500 return M.DIR .. "/tmp/cache/embeddings/" .. safe_model(model_name)
501end
502-- }}}
503
504-- {{{ function M.embeddings_dir_disk
505-- The on-DISK embeddings dir (assets/). Use ONLY for caches that must survive a
506-- reboot -- currently just diversity_cache.json. Everything else uses
507-- embeddings_dir() (RAM by default).
508function M.embeddings_dir_disk(model_name)
509 return M.asset_path("embeddings/" .. safe_model(model_name))
510end
511-- }}}
512
513-- {{{ function M.similarities_dir
514-- Get path to similarities directory for a specific model
515-- @param model_name: optional model name
516-- @return: full path to model's similarities directory
517function M.similarities_dir(model_name)
518 return M.embeddings_dir(model_name) .. "/similarities"
519end
520-- }}}
521
522return M
523