src/regenerate-clean-site.lua

99 lines

1#!/usr/bin/env lua
2
3-- Quick script to regenerate the site with PDF-corrupted poems filtered out
4-- This fixes the performance issue caused by embedded PDF binary data
5
6local DIR = arg[1] or "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
7
8package.path = DIR .. "/libs/?.lua;" .. DIR .. "/src/?.lua;" .. package.path
9local utils = require("utils")
10local flat_generator = require("flat-html-generator")
11
12-- Initialize asset path configuration (CLI --dir takes precedence over config)
13utils.init_assets_root(arg)
14
15print("Loading poems data...")
16local poems_data = utils.read_json_file(utils.asset_path("poems.json"))
17
18if not poems_data then
19 print("ERROR: Could not load poems.json")
20 os.exit(1)
21end
22
23print(string.format("Loaded %d poems", #poems_data.poems))
24
25-- Filter out corrupted poems containing PDF data
26local filtered_poems = {}
27local pdf_poem_count = 0
28
29for _, poem in ipairs(poems_data.poems) do
30 if poem.content and (poem.content:find("%%PDF") or poem.content:find("^%PDF")) then
31 pdf_poem_count = pdf_poem_count + 1
32 print(string.format("Filtering out PDF-corrupted poem ID %d from category %s",
33 poem.id, poem.category or "unknown"))
34 else
35 table.insert(filtered_poems, poem)
36 end
37end
38
39print(string.format("Filtered out %d PDF-corrupted poems", pdf_poem_count))
40print(string.format("Remaining poems: %d", #filtered_poems))
41
42-- Update poems data with filtered list
43poems_data.poems = filtered_poems
44
45-- Load similarity data
46print("Loading similarity matrix...")
47local similarity_data = utils.read_json_file(utils.embeddings_dir() .. "/similarity_matrix.json")
48
49-- Load embeddings data
50print("Loading embeddings...")
51local embeddings_data = utils.read_json_file(utils.embeddings_dir() .. "/embeddings.json")
52
53if not similarity_data or not embeddings_data then
54 print("WARNING: Could not load similarity/embedding data. Generating chronological index only...")
55 -- Generate just the chronological index (HTML and TXT)
56 local output_dir = DIR .. "/output"
57 print("Generating chronological index...")
58 local result = flat_generator.generate_chronological_index_with_navigation(poems_data, output_dir)
59 if result then
60 print("Successfully generated chronological.html at: " .. result)
61 else
62 print("ERROR: Failed to generate chronological HTML index")
63 end
64 local txt_result = flat_generator.generate_chronological_txt_file(poems_data, output_dir .. "/chronological.txt")
65 if txt_result then
66 print("Successfully generated chronological.txt at: " .. txt_result)
67 else
68 print("ERROR: Failed to generate chronological TXT export")
69 end
70else
71 -- Generate complete site
72 print("Generating complete flat HTML collection...")
73 local output_dir = DIR .. "/output"
74 local results = flat_generator.generate_complete_flat_html_collection(
75 poems_data,
76 similarity_data.similarities,
77 embeddings_data,
78 output_dir
79 )
80
81 if results then
82 print("Site generation complete!")
83 print(string.format("- Generated %d similarity pages", #results.similarity_pages))
84 print(string.format("- Generated %d diversity pages", #results.diversity_pages))
85 if results.chronological_index then
86 print("- Generated chronological index: " .. results.chronological_index)
87 end
88 if results.chronological_txt then
89 print("- Generated chronological TXT: " .. results.chronological_txt)
90 end
91 if results.instructions_page then
92 print("- Generated instructions page: " .. results.instructions_page)
93 end
94 else
95 print("ERROR: Site generation failed")
96 end
97end
98
99print("\nDone! The site should now load without performance issues.")