src/diversity-chaining.lua
1-- Diversity chaining algorithm for creating maximally different poem sequences
2-- Uses least-similar selection to build "schizophrenic" reading experiences
3
4local DIR = DIR or "/mnt/mtwo/programming/ai-stuff/neocities-modernization"
5
6-- Set up path for local development
7if not DIR:find("^/") then
8 -- Relative path mode
9 package.path = './libs/?.lua;' .. package.path
10 local utils = require('utils')
11else
12 -- Absolute path mode
13 package.path = DIR .. '/libs/?.lua;' .. package.path
14 local utils = require('utils')
15end
16
17local utils = require('utils')
18-- Issue 10-051 family: shared progress renderer (animated bar on a TTY, plain
19-- lines under --debug, silent when piped) -- one updating line instead of a
20-- "Progress:" line every N chains.
21local progress = require('progress-display')
22
23local M = {}
24
25-- {{{ DiversityConfig class
26local DiversityConfig = {
27 default_chain_length = 20,
28 min_diversity_threshold = 0.0, -- Accept any dissimilarity
29 max_chain_length = 100,
30 enable_debug_logging = false
31}
32
33function DiversityConfig:new(config)
34 config = config or {}
35 local obj = {}
36 setmetatable(obj, {__index = self})
37
38 obj.chain_length = config.chain_length or self.default_chain_length
39 obj.diversity_threshold = config.diversity_threshold or self.min_diversity_threshold
40 obj.debug_logging = config.debug_logging or self.enable_debug_logging
41 obj.max_length = math.min(config.max_length or self.max_chain_length, self.max_chain_length)
42
43 -- Validate configuration
44 if obj.chain_length > obj.max_length then
45 obj.chain_length = obj.max_length
46 utils.log_warn("Chain length reduced to maximum: " .. obj.max_length)
47 end
48
49 return obj
50end
51-- }}}
52
53-- {{{ function find_least_similar_unused_poem
54local function find_least_similar_unused_poem(current_poem_id, poems_data, similarity_data, used_poems)
55 local least_similar_poem = nil
56 local lowest_similarity = math.huge
57 local candidates_checked = 0
58
59 -- Handle both sparse and full matrix formats
60 local similarities = similarity_data.similarities and
61 similarity_data.similarities[tostring(current_poem_id)] or
62 similarity_data[tostring(current_poem_id)] or {}
63
64 for target_poem_id, similarity_score in pairs(similarities) do
65 candidates_checked = candidates_checked + 1
66 local target_id = tonumber(target_poem_id)
67
68 -- Skip if poem is already used or doesn't exist
69 if target_id and not used_poems[target_id] and poems_data[target_id] then
70 -- Handle both sparse (object with similarity) and full (direct score) formats
71 local score = type(similarity_score) == "table" and similarity_score.similarity or similarity_score
72
73 -- Find LEAST similar (lowest score)
74 if score and score < lowest_similarity then
75 lowest_similarity = score
76 least_similar_poem = {
77 id = target_id,
78 similarity = score,
79 title = poems_data[target_id].title or "Untitled",
80 category = poems_data[target_id].category or "unknown"
81 }
82 end
83 end
84 end
85
86 -- Fallback: if we couldn't find any candidates, try exhaustive search
87 if not least_similar_poem and candidates_checked == 0 then
88 utils.log_warn("No similarity data found for poem " .. current_poem_id .. ", using fallback selection")
89
90 -- Find any unused poem
91 for poem_id, poem_data in pairs(poems_data) do
92 local id = tonumber(poem_id)
93 if id and not used_poems[id] and id ~= current_poem_id then
94 least_similar_poem = {
95 id = id,
96 similarity = 0.0, -- Assume maximum diversity
97 title = poem_data.title or "Untitled",
98 category = poem_data.category or "unknown"
99 }
100 break
101 end
102 end
103 end
104
105 return least_similar_poem
106end
107-- }}}
108
109-- {{{ function M.generate_maximum_diversity_chain
110function M.generate_maximum_diversity_chain(starting_poem_id, poems_data, similarity_data, config)
111 config = config or DiversityConfig:new()
112 starting_poem_id = tonumber(starting_poem_id)
113
114 if not starting_poem_id or not poems_data[starting_poem_id] then
115 utils.log_error("Invalid starting poem ID: " .. tostring(starting_poem_id))
116 return {}
117 end
118
119 local chain = {starting_poem_id}
120 local used_poems = {[starting_poem_id] = true}
121 local current_poem_id = starting_poem_id
122 local total_diversity = 0
123 local step_count = 0
124
125 utils.log_info(string.format("๐ Building diversity chain starting from poem %d (%s)",
126 starting_poem_id,
127 poems_data[starting_poem_id].title or "Untitled"))
128
129 for i = 2, config.chain_length do
130 local least_similar_poem = find_least_similar_unused_poem(
131 current_poem_id,
132 poems_data,
133 similarity_data,
134 used_poems
135 )
136
137 if least_similar_poem then
138 table.insert(chain, least_similar_poem.id)
139 used_poems[least_similar_poem.id] = true
140 current_poem_id = least_similar_poem.id
141 step_count = step_count + 1
142 total_diversity = total_diversity + (1 - least_similar_poem.similarity) -- Higher diversity = lower similarity
143
144 if config.debug_logging then
145 utils.log_info(string.format(" ๐ Step %d: %d -> %d (similarity: %.3f, diversity: %.3f)",
146 i-1, chain[i-1], least_similar_poem.id,
147 least_similar_poem.similarity,
148 1 - least_similar_poem.similarity))
149 end
150 else
151 utils.log_warn(string.format("No more unused poems available at step %d", i))
152 break
153 end
154 end
155
156 local average_diversity = step_count > 0 and (total_diversity / step_count) or 0
157
158 utils.log_info(string.format("โ
Diversity chain complete: %d poems, avg diversity: %.3f",
159 #chain, average_diversity))
160
161 return {
162 chain = chain,
163 metadata = {
164 starting_poem_id = starting_poem_id,
165 chain_length = #chain,
166 target_length = config.chain_length,
167 completion_rate = #chain / config.chain_length,
168 average_diversity = average_diversity,
169 generated_at = os.date("%Y-%m-%d %H:%M:%S")
170 }
171 }
172end
173-- }}}
174
175-- {{{ function M.generate_multiple_diversity_chains
176function M.generate_multiple_diversity_chains(poem_ids, poems_data, similarity_data, config)
177 config = config or DiversityConfig:new()
178 local results = {}
179 local successful_chains = 0
180 local total_diversity = 0
181
182 utils.log_info(string.format("๐ Generating diversity chains for %d starting poems", #poem_ids))
183
184 for i, poem_id in ipairs(poem_ids) do
185 -- Animate one progress line every iteration (throttled), regardless of
186 -- whether this poem yielded a chain, so the bar tracks real progress.
187 local step = (progress.mode() == 2) and 100 or 25
188 if i % step == 0 then progress.update(" ๐ Diversity chains", i, #poem_ids) end
189
190 local chain_result = M.generate_maximum_diversity_chain(poem_id, poems_data, similarity_data, config)
191
192 if chain_result and #chain_result.chain > 1 then
193 results[poem_id] = chain_result
194 successful_chains = successful_chains + 1
195 total_diversity = total_diversity + chain_result.metadata.average_diversity
196 else
197 utils.log_warn("Failed to generate chain for poem " .. poem_id)
198 end
199 end
200 progress.finish()
201
202 local overall_average_diversity = successful_chains > 0 and (total_diversity / successful_chains) or 0
203
204 utils.log_info(string.format("โ
Batch generation complete: %d/%d successful, avg diversity: %.3f",
205 successful_chains, #poem_ids, overall_average_diversity))
206
207 return {
208 chains = results,
209 metadata = {
210 total_requested = #poem_ids,
211 successful_chains = successful_chains,
212 success_rate = successful_chains / #poem_ids,
213 overall_average_diversity = overall_average_diversity,
214 config = config,
215 generated_at = os.date("%Y-%m-%d %H:%M:%S")
216 }
217 }
218end
219-- }}}
220
221-- {{{ function M.analyze_chain_diversity
222function M.analyze_chain_diversity(chain_data, similarity_data)
223 if not chain_data or not chain_data.chain or #chain_data.chain < 2 then
224 return {error = "Invalid or too short chain for analysis"}
225 end
226
227 local chain = chain_data.chain
228 local diversities = {}
229 local similarities = {}
230 local total_diversity = 0
231
232 for i = 1, #chain - 1 do
233 local current_id = tostring(chain[i])
234 local next_id = tostring(chain[i + 1])
235
236 -- Get similarity between consecutive poems
237 local similarity_score = 0
238 local current_similarities = similarity_data.similarities and
239 similarity_data.similarities[current_id] or
240 similarity_data[current_id] or {}
241
242 if current_similarities[next_id] then
243 similarity_score = type(current_similarities[next_id]) == "table" and
244 current_similarities[next_id].similarity or
245 current_similarities[next_id]
246 end
247
248 local diversity = 1 - similarity_score
249 table.insert(diversities, diversity)
250 table.insert(similarities, similarity_score)
251 total_diversity = total_diversity + diversity
252 end
253
254 local average_diversity = #diversities > 0 and (total_diversity / #diversities) or 0
255
256 -- Calculate diversity statistics
257 table.sort(diversities)
258 local median_diversity = #diversities > 0 and diversities[math.ceil(#diversities / 2)] or 0
259 local min_diversity = #diversities > 0 and diversities[1] or 0
260 local max_diversity = #diversities > 0 and diversities[#diversities] or 0
261
262 return {
263 chain_length = #chain,
264 step_count = #diversities,
265 average_diversity = average_diversity,
266 median_diversity = median_diversity,
267 min_diversity = min_diversity,
268 max_diversity = max_diversity,
269 diversities = diversities,
270 similarities = similarities,
271 quality_score = average_diversity -- Higher is better for diversity chains
272 }
273end
274-- }}}
275
276-- {{{ function M.load_similarity_data
277function M.load_similarity_data(similarity_file)
278 if not utils.file_exists(similarity_file) then
279 utils.log_error("Similarity file not found: " .. similarity_file)
280 return nil
281 end
282
283 utils.log_info("Loading similarity data from: " .. similarity_file)
284 local similarity_data = utils.read_json_file(similarity_file)
285
286 if not similarity_data then
287 utils.log_error("Failed to parse similarity data")
288 return nil
289 end
290
291 -- Detect data format
292 local format = "unknown"
293 local poem_count = 0
294 local total_relationships = 0
295
296 if similarity_data.similarities then
297 format = "full_matrix"
298 for poem_id, relationships in pairs(similarity_data.similarities) do
299 poem_count = poem_count + 1
300 for _ in pairs(relationships) do
301 total_relationships = total_relationships + 1
302 end
303 end
304 elseif similarity_data.metadata then
305 format = "sparse_matrix"
306 for poem_id, poem_data in pairs(similarity_data.similarities or {}) do
307 poem_count = poem_count + 1
308 if poem_data.top_similar then
309 total_relationships = total_relationships + #poem_data.top_similar
310 end
311 end
312 end
313
314 utils.log_info(string.format("Similarity data loaded: %s format, %d poems, %d relationships",
315 format, poem_count, total_relationships))
316
317 return similarity_data
318end
319-- }}}
320
321-- {{{ function M.test_diversity_algorithm
322function M.test_diversity_algorithm(similarity_file, poems_file, test_poem_id, chain_length)
323 test_poem_id = test_poem_id or 1
324 chain_length = chain_length or 10
325
326 utils.log_info("๐งช Testing diversity algorithm with poem " .. test_poem_id)
327
328 -- Load data
329 local similarity_data = M.load_similarity_data(similarity_file)
330 if not similarity_data then return false end
331
332 local poems_data = utils.read_json_file(poems_file)
333 if not poems_data or not poems_data.poems then
334 utils.log_error("Failed to load poems data")
335 return false
336 end
337
338 -- Configure test
339 local config = DiversityConfig:new({
340 chain_length = chain_length,
341 debug_logging = true
342 })
343
344 -- Generate test chain
345 local result = M.generate_maximum_diversity_chain(test_poem_id, poems_data.poems, similarity_data, config)
346
347 if result and result.chain then
348 utils.log_info("โ
Test successful! Chain generated:")
349 for i, poem_id in ipairs(result.chain) do
350 local title = poems_data.poems[poem_id] and poems_data.poems[poem_id].title or "Unknown"
351 utils.log_info(string.format(" %d. Poem %d: %s", i, poem_id, title))
352 end
353
354 -- Analyze chain
355 local analysis = M.analyze_chain_diversity(result, similarity_data)
356 if analysis.error then
357 utils.log_warn("Chain analysis failed: " .. analysis.error)
358 else
359 utils.log_info(string.format("Chain analysis: avg diversity: %.3f, quality: %.3f",
360 analysis.average_diversity or 0, analysis.quality_score or 0))
361 end
362
363 return result
364 else
365 utils.log_error("โ Test failed - no chain generated")
366 return false
367 end
368end
369-- }}}
370
371-- Export DiversityConfig as well for external access
372M.DiversityConfig = DiversityConfig
373
374return M