src/run-validation-with-reports.lua

418 lines

1#!/usr/bin/env lua

3-- Integrated Validation and Reporting CLI

4-- Combines validation engine with report generation for complete validation workflow

6local DIR = "/mnt/mtwo/programming/ai-stuff/neocities-modernization"

8package.path = package.path .. ';' .. DIR .. '/?.lua;' .. DIR .. '/libs/?.lua'

10local validation_module = require("src.validation-engine")

11local report_module = require("src.report-generator")

12local utils = require("libs.utils")

14-- {{{ function show_usage

15function show_usage()

16 print("Integrated Similarity Validation and Reporting Tool")

17 print("===================================================")

18 print("")

19 print("Usage:")

20 print(" lua src/run-validation-with-reports.lua [options]")

21 print("")

22 print("Options:")

23 print(" -h, --help Show this help message")

24 print(" -I, --interactive Run in interactive mode")

25 print(" -a, --algorithm ALGO Similarity algorithm to use (default: cosine)")

26 print(" -t, --tolerance NUM Tolerance for similarity comparison (default: 0.001)")

27 print(" -s, --sample SIZE Sample size for validation (default: all)")

28 print(" -o, --output DIR Output directory for reports (default: ./validation_reports)")

29 print(" -f, --format FORMAT Report format: html, markdown, json (default: html)")

30 print(" --similarity FILE Similarity matrix file to validate")

31 print(" --embeddings FILE Embeddings file for recalculation")

32 print(" --compare Run comparative analysis across multiple algorithms")

33 print(" --test Run validation and reporting tests")

34 print("")

35 print("Examples:")

36 print(" lua src/run-validation-with-reports.lua --test")

37 print(" lua src/run-validation-with-reports.lua -I")

38 print(" lua src/run-validation-with-reports.lua -a cosine -f html")

39 print(" lua src/run-validation-with-reports.lua --compare -f markdown")

40 print(" lua src/run-validation-with-reports.lua --similarity matrix.json --embeddings embed.json")

41end

42-- }}}

44-- {{{ function parse_arguments

45function parse_arguments(args)

46 local config = {

47 algorithm = "cosine",

48 tolerance = 0.001,

49 sample_size = nil,

50 output_dir = "./validation_reports",

51 format = "html",

52 similarity_file = nil,

53 embeddings_file = nil,

54 interactive = false,

55 run_tests = false,

56 compare_algorithms = false,

57 show_help = false

58 }

60 local i = 1

61 while i <= #args do

62 local arg = args[i]

64 if arg == "-h" or arg == "--help" then

65 config.show_help = true

66 elseif arg == "-I" or arg == "--interactive" then

67 config.interactive = true

68 elseif arg == "-a" or arg == "--algorithm" then

69 i = i + 1

70 config.algorithm = args[i] or "cosine"

71 elseif arg == "-t" or arg == "--tolerance" then

72 i = i + 1

73 config.tolerance = tonumber(args[i]) or 0.001

74 elseif arg == "-s" or arg == "--sample" then

75 i = i + 1

76 config.sample_size = tonumber(args[i])

77 elseif arg == "-o" or arg == "--output" then

78 i = i + 1

79 config.output_dir = args[i] or "./validation_reports"

80 elseif arg == "-f" or arg == "--format" then

81 i = i + 1

82 config.format = args[i] or "html"

83 elseif arg == "--similarity" then

84 i = i + 1

85 config.similarity_file = args[i]

86 elseif arg == "--embeddings" then

87 i = i + 1

88 config.embeddings_file = args[i]

89 elseif arg == "--compare" then

90 config.compare_algorithms = true

91 elseif arg == "--test" then

92 config.run_tests = true

93 else

94 print(string.format("Unknown argument: %s", arg))

95 config.show_help = true

96 end

98 i = i + 1

99 end

100

101 return config

102end

103-- }}}

104

105-- {{{ function interactive_mode

106function interactive_mode()

107 print("🔍 Interactive Validation and Reporting Mode")

108 print("=============================================")

109

110 -- Algorithm selection

111 local algorithms = {"cosine", "euclidean", "manhattan", "angular", "pearson_correlation", "dot_product", "normalized_euclidean", "chebyshev"}

112

113 print("\nSelect similarity algorithm:")

114 for i, algo in ipairs(algorithms) do

115 print(string.format(" %d. %s", i, algo))

116 end

117 io.write("Choice (1-" .. #algorithms .. ") [1]: ")

118 local algo_choice = tonumber(io.read()) or 1

119 local selected_algorithm = algorithms[algo_choice] or algorithms[1]

120

121 -- Report format selection

122 local formats = {"html", "markdown", "json"}

123 print("\nSelect report format:")

124 for i, format in ipairs(formats) do

125 print(string.format(" %d. %s", i, format))

126 end

127 io.write("Choice (1-" .. #formats .. ") [1]: ")

128 local format_choice = tonumber(io.read()) or 1

129 local selected_format = formats[format_choice] or formats[1]

130

131 -- Validation mode

132 print("\nSelect validation mode:")

133 print(" 1. Single algorithm validation")

134 print(" 2. Comparative analysis (multiple algorithms)")

135 io.write("Choice (1-2) [1]: ")

136 local mode_choice = tonumber(io.read()) or 1

137

138 local compare_algorithms = mode_choice == 2

139

140 -- Tolerance setting

141 io.write("Tolerance for similarity comparison [0.001]: ")

142 local tolerance_input = io.read()

143 local tolerance = tonumber(tolerance_input) or 0.001

144

145 -- Sample size setting

146 io.write("Sample size (leave empty for full validation): ")

147 local sample_input = io.read()

148 local sample_size = sample_input ~= "" and tonumber(sample_input) or nil

149

150 -- File selection

151 print("\nSelect data source:")

152 print(" 1. Use default project files")

153 print(" 2. Specify custom files")

154 io.write("Choice (1-2) [1]: ")

155 local file_choice = tonumber(io.read()) or 1

156

157 local similarity_file, embeddings_file

158

159 if file_choice == 2 then

160 io.write("Similarity matrix file: ")

161 similarity_file = io.read()

162 io.write("Embeddings file: ")

163 embeddings_file = io.read()

164 else

165 similarity_file = DIR .. "/assets/embeddings/embeddinggemma_latest/similarity_matrix.json"

166 embeddings_file = DIR .. "/assets/embeddings/embeddinggemma_latest/embeddings.json"

167 end

168

169 return {

170 algorithm = selected_algorithm,

171 format = selected_format,

172 tolerance = tolerance,

173 sample_size = sample_size,

174 similarity_file = similarity_file,

175 embeddings_file = embeddings_file,

176 output_dir = "./validation_reports",

177 compare_algorithms = compare_algorithms

178 }

179end

180-- }}}

181

182-- {{{ function run_single_validation_with_report

183function run_single_validation_with_report(config)

184 print("\n🔍 Running Single Algorithm Validation with Reporting")

185 print("======================================================")

186

187 -- Check files exist

188 if not utils.file_exists(config.similarity_file) then

189 error("Similarity file not found: " .. (config.similarity_file or "none specified"))

190 end

191

192 if not utils.file_exists(config.embeddings_file) then

193 error("Embeddings file not found: " .. (config.embeddings_file or "none specified"))

194 end

195

196 -- Create output directory

197 os.execute("mkdir -p " .. config.output_dir)

198

199 print(string.format("Configuration:"))

200 print(string.format(" - Algorithm: %s", config.algorithm))

201 print(string.format(" - Format: %s", config.format))

202 print(string.format(" - Tolerance: %f", config.tolerance))

203 print(string.format(" - Sample size: %s", config.sample_size and tostring(config.sample_size) or "full dataset"))

204 print(string.format(" - Output directory: %s", config.output_dir))

205

206 -- Run validation

207 print("\nRunning validation...")

208 local validation_result = validation_module.validate_single_file(

209 config.similarity_file,

210 config.embeddings_file,

211 config.algorithm,

212 {

213 tolerance = config.tolerance,

214 sample_size = config.sample_size

215 }

216 )

217

218 -- Generate report

219 print(string.format("Generating %s report...", config.format))

220 local timestamp = os.date("%Y%m%d_%H%M%S")

221 local report_extension = config.format == "html" and "html" or (config.format == "json" and "json" or "md")

222 local report_file = string.format("%s/validation_report_%s_%s.%s",

223 config.output_dir, config.algorithm, timestamp, report_extension)

224

225 local generated_report = report_module.generate_single_report(validation_result, report_file, config.format)

226

227 -- Print summary

228 print("\n📊 Validation Results Summary")

229 print("=============================")

230 print(string.format("Algorithm: %s", validation_result.algorithm))

231 print(string.format("Duration: %d seconds", validation_result.duration_seconds))

232 print(string.format("Total comparisons: %d", validation_result.statistics.total_comparisons))

233 print(string.format("Accuracy rate: %.2f%%", validation_result.statistics.accuracy_rate * 100))

234 print(string.format("Performance: %.1f comparisons/sec", validation_result.performance.comparisons_per_second))

235

236 print(string.format("\n📋 Report generated: %s", generated_report))

237

238 return validation_result, generated_report

239end

240-- }}}

241

242-- {{{ function run_comparative_validation_with_report

243function run_comparative_validation_with_report(config)

244 print("\n📊 Running Comparative Algorithm Analysis")

245 print("=========================================")

246

247 local algorithms_to_compare = {"cosine", "euclidean", "angular", "manhattan"}

248

249 -- Check files exist

250 if not utils.file_exists(config.similarity_file) then

251 error("Similarity file not found: " .. (config.similarity_file or "none specified"))

252 end

253

254 if not utils.file_exists(config.embeddings_file) then

255 error("Embeddings file not found: " .. (config.embeddings_file or "none specified"))

256 end

257

258 -- Create output directory

259 os.execute("mkdir -p " .. config.output_dir)

260

261 print(string.format("Configuration:"))

262 print(string.format(" - Algorithms: %s", table.concat(algorithms_to_compare, ", ")))

263 print(string.format(" - Format: %s", config.format))

264 print(string.format(" - Tolerance: %f", config.tolerance))

265 print(string.format(" - Sample size: %s", config.sample_size and tostring(config.sample_size) or "full dataset"))

266

267 -- Run validation for each algorithm

268 local validation_results = {}

269

270 for i, algorithm in ipairs(algorithms_to_compare) do

271 print(string.format("\nRunning validation %d/%d: %s", i, #algorithms_to_compare, algorithm))

272

273 local validation_result = validation_module.validate_single_file(

274 config.similarity_file,

275 config.embeddings_file,

276 algorithm,

277 {

278 tolerance = config.tolerance,

279 sample_size = config.sample_size

280 }

281 )

282

283 table.insert(validation_results, validation_result)

284

285 print(string.format(" - %s: %.1f%% accuracy, %.1f comparisons/sec",

286 algorithm, validation_result.statistics.accuracy_rate * 100,

287 validation_result.performance.comparisons_per_second))

288 end

289

290 -- Generate comparative report

291 print(string.format("\nGenerating comparative %s report...", config.format))

292 local timestamp = os.date("%Y%m%d_%H%M%S")

293 local report_extension = config.format == "html" and "html" or (config.format == "json" and "json" or "md")

294 local report_file = string.format("%s/comparative_validation_report_%s.%s",

295 config.output_dir, timestamp, report_extension)

296

297 local generated_report = report_module.generate_comparative_report(validation_results, report_file, config.format)

298

299 -- Print summary

300 print("\n📊 Comparative Analysis Summary")

301 print("===============================")

302

303 -- Sort results by accuracy for summary

304 table.sort(validation_results, function(a, b)

305 return a.statistics.accuracy_rate > b.statistics.accuracy_rate

306 end)

307

308 print("Algorithm Rankings by Accuracy:")

309 for i, result in ipairs(validation_results) do

310 print(string.format(" %d. %s: %.2f%% accuracy (%.1f comp/sec)",

311 i, result.algorithm, result.statistics.accuracy_rate * 100,

312 result.performance.comparisons_per_second))

313 end

314

315 print(string.format("\n📋 Comparative report generated: %s", generated_report))

316

317 return validation_results, generated_report

318end

319-- }}}

320

321-- {{{ function run_tests

322function run_tests()

323 print("🧪 Running Integrated Validation and Reporting Tests")

324 print("====================================================")

325

326 local validation_test = require("src.test-validation-engine")

327 local report_test = require("src.test-report-generator")

328

329 print("\n1. Testing Validation Engine:")

330 local validation_result = validation_test.main()

331

332 print("\n2. Testing Report Generator:")

333 local report_result = report_test.main()

334

335 print("\n3. Testing Integration:")

336 -- Create a quick integration test

337 local mock_result = report_test.create_mock_validation_result("cosine", 0.95, 67.5)

338 local test_report_file = DIR .. "/test_integration_report.html"

339

340 local integration_success = pcall(function()

341 return report_module.generate_single_report(mock_result, test_report_file, "html")

342 end)

343

344 if integration_success and utils.file_exists(test_report_file) then

345 print("✅ Integration test: PASSED")

346 os.remove(test_report_file)

347 else

348 print("❌ Integration test: FAILED")

349 end

350

351 local overall_success = (validation_result == 0) and (report_result == 0) and integration_success

352

353 if overall_success then

354 print("\n🎉 All integrated tests passed!")

355 return 0

356 else

357 print("\n⚠️ Some tests failed")

358 return 1

359 end

360end

361-- }}}

362

363-- {{{ function main

364function main(args)

365 local config = parse_arguments(args or {})

366

367 if config.show_help then

368 show_usage()

369 return 0

370 end

371

372 if config.run_tests then

373 return run_tests()

374 end

375

376 if config.interactive then

377 config = interactive_mode()

378 end

379

380 -- Validate required parameters

381 if not config.similarity_file or not config.embeddings_file then

382 if not config.interactive then

383 print("Error: Must specify similarity and embeddings files or use interactive mode")

384 show_usage()

385 return 1

386 end

387 end

388

389 local success, result = pcall(function()

390 if config.compare_algorithms then

391 return run_comparative_validation_with_report(config)

392 else

393 return run_single_validation_with_report(config)

394 end

395 end)

396

397 if success then

398 print("\n✅ Validation and reporting completed successfully")

399 return 0

400 else

401 print(string.format("\n❌ Validation and reporting failed: %s", result))

402 return 1

403 end

404end

405-- }}}

406

407-- Run main if executed directly

408if arg and arg[0] and arg[0]:match("run%-validation%-with%-reports%.lua$") then

409 os.exit(main(arg))

410end

411

412return {

413 main = main,

414 run_single_validation_with_report = run_single_validation_with_report,

415 run_comparative_validation_with_report = run_comparative_validation_with_report,

416 interactive_mode = interactive_mode,

417 parse_arguments = parse_arguments

418}