src/run-validation.lua
1#!/usr/bin/env lua
2
3-- Validation Runner Script
4-- Command-line interface for running similarity data validation
5
6local DIR = "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
7
8package.path = package.path .. ';' .. DIR .. '/?.lua;' .. DIR .. '/libs/?.lua'
9
10local validation_module = require("src.validation-engine")
11local utils = require("libs.utils")
12local json = require("libs.json")
13
14-- {{{ function show_usage
15function show_usage()
16 print("Similarity Validation Runner")
17 print("===========================")
18 print("")
19 print("Usage:")
20 print(" lua src/run-validation.lua [options]")
21 print("")
22 print("Options:")
23 print(" -h, --help Show this help message")
24 print(" -I, --interactive Run in interactive mode")
25 print(" -a, --algorithm ALGO Similarity algorithm to use (default: cosine)")
26 print(" -t, --tolerance NUM Tolerance for similarity comparison (default: 0.001)")
27 print(" -s, --sample SIZE Sample size for validation (default: all)")
28 print(" -o, --output DIR Output directory for reports (default: ./validation_reports)")
29 print(" --similarity FILE Similarity matrix file to validate")
30 print(" --embeddings FILE Embeddings file for recalculation")
31 print(" --test Run validation engine tests")
32 print("")
33 print("Examples:")
34 print(" lua src/run-validation.lua --test")
35 print(" lua src/run-validation.lua -I")
36 print(" lua src/run-validation.lua -a cosine -t 0.001 -s 1000")
37 print(" lua src/run-validation.lua --similarity matrix.json --embeddings embed.json")
38end
39-- }}}
40
41-- {{{ function parse_arguments
42function parse_arguments(args)
43 local config = {
44 algorithm = "cosine",
45 tolerance = 0.001,
46 sample_size = nil,
47 output_dir = "./validation_reports",
48 similarity_file = nil,
49 embeddings_file = nil,
50 interactive = false,
51 run_tests = false,
52 show_help = false
53 }
54
55 local i = 1
56 while i <= #args do
57 local arg = args[i]
58
59 if arg == "-h" or arg == "--help" then
60 config.show_help = true
61 elseif arg == "-I" or arg == "--interactive" then
62 config.interactive = true
63 elseif arg == "-a" or arg == "--algorithm" then
64 i = i + 1
65 config.algorithm = args[i] or "cosine"
66 elseif arg == "-t" or arg == "--tolerance" then
67 i = i + 1
68 config.tolerance = tonumber(args[i]) or 0.001
69 elseif arg == "-s" or arg == "--sample" then
70 i = i + 1
71 config.sample_size = tonumber(args[i])
72 elseif arg == "-o" or arg == "--output" then
73 i = i + 1
74 config.output_dir = args[i] or "./validation_reports"
75 elseif arg == "--similarity" then
76 i = i + 1
77 config.similarity_file = args[i]
78 elseif arg == "--embeddings" then
79 i = i + 1
80 config.embeddings_file = args[i]
81 elseif arg == "--test" then
82 config.run_tests = true
83 else
84 print(string.format("Unknown argument: %s", arg))
85 config.show_help = true
86 end
87
88 i = i + 1
89 end
90
91 return config
92end
93-- }}}
94
95-- {{{ function interactive_mode
96function interactive_mode()
97 print("š Interactive Validation Mode")
98 print("==============================")
99
100 -- Algorithm selection
101 local algorithms = {"cosine", "euclidean", "manhattan", "angular", "pearson_correlation", "dot_product", "normalized_euclidean", "chebyshev"}
102
103 print("\nSelect similarity algorithm:")
104 for i, algo in ipairs(algorithms) do
105 print(string.format(" %d. %s", i, algo))
106 end
107 io.write("Choice (1-" .. #algorithms .. ") [1]: ")
108 local algo_choice = tonumber(io.read()) or 1
109 local selected_algorithm = algorithms[algo_choice] or algorithms[1]
110
111 -- Tolerance setting
112 io.write("Tolerance for similarity comparison [0.001]: ")
113 local tolerance_input = io.read()
114 local tolerance = tonumber(tolerance_input) or 0.001
115
116 -- Sample size setting
117 io.write("Sample size (leave empty for full validation): ")
118 local sample_input = io.read()
119 local sample_size = sample_input ~= "" and tonumber(sample_input) or nil
120
121 -- File selection
122 print("\nSelect validation mode:")
123 print(" 1. Use default project files")
124 print(" 2. Specify custom files")
125 io.write("Choice (1-2) [1]: ")
126 local file_choice = tonumber(io.read()) or 1
127
128 local similarity_file, embeddings_file
129
130 if file_choice == 2 then
131 io.write("Similarity matrix file: ")
132 similarity_file = io.read()
133 io.write("Embeddings file: ")
134 embeddings_file = io.read()
135 else
136 similarity_file = DIR .. "/assets/embeddings/embeddinggemma_latest/similarity_matrix.json"
137 embeddings_file = DIR .. "/assets/embeddings/embeddinggemma_latest/embeddings.json"
138 end
139
140 return {
141 algorithm = selected_algorithm,
142 tolerance = tolerance,
143 sample_size = sample_size,
144 similarity_file = similarity_file,
145 embeddings_file = embeddings_file,
146 output_dir = "./validation_reports"
147 }
148end
149-- }}}
150
151-- {{{ function run_validation
152function run_validation(config)
153 print("\nš Starting Validation Process")
154 print("==============================")
155
156 -- Check files exist
157 if not utils.file_exists(config.similarity_file) then
158 error("Similarity file not found: " .. (config.similarity_file or "none specified"))
159 end
160
161 if not utils.file_exists(config.embeddings_file) then
162 error("Embeddings file not found: " .. (config.embeddings_file or "none specified"))
163 end
164
165 -- Create output directory
166 os.execute("mkdir -p " .. config.output_dir)
167
168 print(string.format("Configuration:"))
169 print(string.format(" - Algorithm: %s", config.algorithm))
170 print(string.format(" - Tolerance: %f", config.tolerance))
171 print(string.format(" - Sample size: %s", config.sample_size and tostring(config.sample_size) or "full dataset"))
172 print(string.format(" - Similarity file: %s", config.similarity_file))
173 print(string.format(" - Embeddings file: %s", config.embeddings_file))
174 print(string.format(" - Output directory: %s", config.output_dir))
175
176 -- Run validation
177 local report = validation_module.validate_single_file(
178 config.similarity_file,
179 config.embeddings_file,
180 config.algorithm,
181 {
182 tolerance = config.tolerance,
183 sample_size = config.sample_size
184 }
185 )
186
187 -- Save detailed report
188 local timestamp = os.date("%Y%m%d_%H%M%S")
189 local report_file = string.format("%s/validation_report_%s_%s.json",
190 config.output_dir, config.algorithm, timestamp)
191
192 utils.write_json_file(report_file, report)
193
194 -- Print summary
195 print("\nš Validation Results Summary")
196 print("=============================")
197 print(string.format("Algorithm: %s", report.algorithm))
198 print(string.format("Duration: %d seconds", report.duration_seconds))
199 print(string.format("Total comparisons: %d", report.statistics.total_comparisons))
200 print(string.format("Accuracy rate: %.2f%%", report.statistics.accuracy_rate * 100))
201 print(string.format("Missing embeddings: %d", report.statistics.missing_embeddings))
202 print(string.format("Calculation errors: %d", report.errors.count))
203 print(string.format("Discrepancies: %d", report.discrepancies.count))
204 print(string.format("Performance: %.1f comparisons/sec", report.performance.comparisons_per_second))
205
206 if report.discrepancies.max_difference then
207 print(string.format("Maximum discrepancy: %.6f", report.discrepancies.max_difference))
208 end
209
210 print(string.format("\nDetailed report saved: %s", report_file))
211
212 if #report.recommendations > 0 then
213 print("\nRecommendations:")
214 for i, rec in ipairs(report.recommendations) do
215 print(string.format(" %d. %s", i, rec))
216 end
217 end
218
219 return report
220end
221-- }}}
222
223-- {{{ function run_tests
224function run_tests()
225 local test_module = require("src.test-validation-engine")
226 return test_module.main()
227end
228-- }}}
229
230-- {{{ function main
231function main(args)
232 local config = parse_arguments(args or {})
233
234 if config.show_help then
235 show_usage()
236 return 0
237 end
238
239 if config.run_tests then
240 return run_tests()
241 end
242
243 if config.interactive then
244 config = interactive_mode()
245 end
246
247 -- Validate required parameters
248 if not config.similarity_file or not config.embeddings_file then
249 if not config.interactive then
250 print("Error: Must specify similarity and embeddings files or use interactive mode")
251 show_usage()
252 return 1
253 end
254 end
255
256 local success, result = pcall(function()
257 return run_validation(config)
258 end)
259
260 if success then
261 print("\nā
Validation completed successfully")
262 return 0
263 else
264 print(string.format("\nā Validation failed: %s", result))
265 return 1
266 end
267end
268-- }}}
269
270-- Run main if executed directly
271if arg and arg[0] and arg[0]:match("run%-validation%.lua$") then
272 os.exit(main(arg))
273end
274
275return {
276 main = main,
277 run_validation = run_validation,
278 interactive_mode = interactive_mode,
279 parse_arguments = parse_arguments
280}