config.lua

1-- {{{ config.lua

2-- Issue 10-003: Single authoritative configuration for neocities-modernization

3-- All settings are validated against actual script usage as of 2026-01-21.

4-- Sections are organized with vimfolds for easy navigation.

5--

6-- For detailed field documentation, see: docs/config-reference.md (Issue 10-019)

7--

8-- Usage:

9-- local config = require("config-loader")

10-- local assets_root = config.asset_paths.assets_root

11-- local colors = config.semantic_colors

12-- }}}

15 -- {{{ asset_paths

16 -- Root directory for all generated assets: embeddings, caches, indexes.

17 -- Scripts use this to locate poem embeddings, similarity matrices, and other

18 -- computed data that persists between pipeline runs.

19 asset_paths = {

20 assets_root = "/mnt/mtwo/programming/ai-stuff/neocities-modernization/assets"

21 },

22 -- }}}

24 -- {{{ layout

25 -- Controls the visual appearance of poem boxes in generated HTML.

26 -- These values are read by src/flat-html-generator.lua:load_layout_from_config()

27 -- Width values are in characters. Junction positions are character offsets.

28 layout = {

29 regular_poem_width = 83, -- Width of standard poem boxes

30 golden_poem_width = 85, -- Width of golden poem boxes (1024 chars)

31 text_content_width = 80, -- Inner content area width

32 left_box_width = 11, -- Left navigation box width

33 right_box_width = 13, -- Right navigation box width

34 gap_width = 59, -- Gap between left and right boxes

35 left_junction_pos = 5, -- Position of left box junction point

36 right_junction_pos = 6 -- Position of right box junction point

37 },

38 -- }}}

44 -- {{{ sources

45 -- Unified input source configuration (Issue 10-015, extended 10-026).

46 -- Each source type supports multiple named directories.

47 -- Pipeline deduplicates by content ID across directories.

48 -- All extractors now use sources-loader.lua to read these paths.

49 --

50 -- Issue 10-026: External sync info is now embedded in each source:

51 -- - directories[].external.source = where to rsync from

52 -- - archives[] = ZIP files that extract to this source's directory

53 -- Use sources-loader.get_all_external_syncs() to collect all sync entries.

54 sources = {

55 fediverse = {

56 enabled = true,

57 format = "activitypub",

58 directories = {

59 {

60 name = "primary",

61 path = "input/fediverse",

62 },

63 },

64 -- Issue 10-026: Archive sources (ZIP files that extract to this source's directory)

65 archives = {

66 {

67 name = "fediverse-zip",

68 source = "/home/ritz/backups/fediverse/backups/most-recent-29.zip",

69 extract_to = "input", -- Extracts to input/ root (archive contains fediverse/ dir)

70 },

71 },

72 media = {

73 extract_attachments = true,

74 output_path = "input/media_attachments/fediverse",

75 },

76 },

77 messages = {

78 enabled = true,

79 format = "messages_export",

80 directories = {

81 {

82 name = "primary",

83 path = "input/messages",

84 },

85 },

86 -- Issue 10-026: Archive sources

87 archives = {

88 {

89 name = "messages-zip",

90 source = "/home/ritz/backups/messages-to-myself/input-zip-file/queen-of-her-castle.zip",

91 extract_to = "input", -- Extracts to input/ root (archive contains messages/ dir)

92 },

93 },

94 },

95 notes = {

96 enabled = true,

97 format = "plaintext",

98 directories = {

99 {

100 name = "primary",

101 path = "input/notes",

102 -- Issue 10-026: External source for rsync

103 external = {

104 source = "/home/ritz/notes",

105 },

106 },

107 },

108 },

109 bluesky = {

110 enabled = true,

111 format = "atproto",

112 directories = {

113 {

114 name = "primary",

115 path = "input/bluesky",

116 -- Issue 10-026: External source for rsync

117 external = {

118 source = "/home/ritz/backups/bluesky/input",

119 },

120 },

121 },

122 },

123 images = {

124 enabled = true,

125 -- include_by_default (per source, default true):

126 -- true -> ship EVERYTHING from this source, minus the entries in

127 -- excluded_images that name files in it (a blacklist).

128 -- false -> ship NOTHING from this source EXCEPT the entries in

129 -- excluded_images that name files in it (a whitelist) --

130 -- i.e. the SAME list, but its lines now ADD rather than

131 -- remove. Use false when a source is a big directory and

132 -- you only want a handful of files out of it.

133 -- This keeps excluded_images as one flat list; the flag decides

134 -- whether a source's lines subtract from "all" or add to "none".

135 directories = {

136 {

137 name = "fediverse-media",

138 -- Bugfix: this pointed at input/images/files, which never

139 -- exists, so 546 fediverse post attachments were silently

140 -- skipped and never rendered inline. The ZIP extraction

141 -- (scripts/update -> scripts/zip-extractor.lua) writes the

142 -- deeply-nested Mastodon media to input/media_attachments/

143 -- files/..., so the source points there now.

144 path = "input/media_attachments/files",

145 description = "Mastodon/ActivityPub media attachments (deeply nested)",

146 -- No external: comes from ZIP extraction

147 },

148 {

149 name = "my-art",

150 include_by_default = true,

151 path = "input/images/my-art",

152 description = "artwork made in kolourpaint",

153 -- Issue 10-026: External source for rsync

154 external = {

155 source = "/home/ritz/pictures/my-art",

156 },

157 },

158 {

159 name = "things-I-almost-posted",

160 include_by_default = true,

161 path = "input/images/things-i-almost-posted",

162 external = {

163 source = "/home/ritz/pictures/things-i-almost-posted",

164 },

165 randomize_order = false,

166 },

167 {

168 name = "poem-pictures",

169 include_by_default = true,

170 path = "input/images/poem-pictures",

171 external = {

172 source = "/home/ritz/pictures/poem-pictures",

173 },

174 },

175 {

176 name = "dnd-pictures-from-the-internet",

177 include_by_default = true,

178 path = "input/images/dnd-pictures",

179 external = {

180 source = "/home/ritz/pictures/dnd-pictures",

181 },

182 -- Issue 10-030: Randomize position of these images in timeline

183 -- (they don't have meaningful dates, scatter throughout)

184 randomize_order = true;

185

186 },

187 {

188 -- NOTE: external syncs to fediverse-stars, sources reads from here

189 -- Path updated to match sync destination (was fediverse-backup)

190 name = "fediverse-stars",

191 include_by_default = true,

192 path = "input/images/fediverse-stars",

193 external = {

194 source = "/home/ritz/pictures/fediverse-backup",

195 },

196 -- Issue 10-030: Randomize position of these images in timeline

197 -- (they don't have meaningful dates, scatter throughout)

198 randomize_order = true;

199 },

200 },

201 supported_formats = {"png", "jpg", "jpeg", "gif", "webp", "svg"},

202 max_file_size_mb = 200,

203 preserve_structure = true,

204 overwrite_existing = false,

205 },

206 },

207 -- }}}

209 -- {{{ external_files - DEPRECATED (Issue 10-026)

210 -- This section has been merged into the 'sources' section above.

211 -- External sync info is now stored as 'external' fields in each source's directories,

212 -- and as 'archives' arrays for ZIP files.

213 --

214 -- external-sync.lua now reads from sources-loader.get_all_external_syncs()

215 -- which collects external sync info from the unified sources configuration.

216 --

217 -- This empty array is kept for backward compatibility during the transition.

218 -- It can be removed after confirming all scripts use sources-loader.

219 external_files = {},

220 -- }}}

222 -- {{{ extraction

223 -- Controls which input sources are processed during extraction.

224 -- Disabling a source skips it entirely, useful for testing or partial rebuilds.

225 extraction = {

226 enable_fediverse = true,

227 enable_messages = true,

228 enable_notes = true,

229 enable_bluesky = true,

230 -- Issue 7-003: ZIP files to ignore during archive scanning.

231 -- These are ZIPs that appear in input/ but aren't content archives

232 -- (e.g., site backups embedded in media_attachments from fediverse export).

233 ignored_archives = {

234 "neocities-ritz-menardi" -- Neocities site backup, not content data

235 }

236 },

237 -- }}}

239 -- {{{ randomization

240 -- Issue 10-058: One master seed governs every randomization site in a build.

241 -- Today that is the word-cloud word shuffle (src/wordcloud-generator.lua) and

242 -- the image-order randomization for any source that does not pin its own

243 -- per-source random_seed (src/image-manager.lua). Reproducibility needs two

244 -- things: (a) all randomness flowing from ONE known seed, and (b) that seed

245 -- recorded somewhere durable. run.sh resolves the seed -- precedence is the

246 -- --seed CLI flag > this config value > an auto-generated seed -- and records

247 -- the resolved value to output/generation-metadata.json and the run log, so a

248 -- build is always answerable to "which seed produced this?".

249 --

250 -- seed = nil => run.sh invents a seed each build and RECORDS it, so even a

251 -- build nobody thought to seed is reproducible after the fact.

252 -- seed = N => a fixed non-negative integer pins the build: the same seed

253 -- over the same inputs yields byte-identical shuffled output.

254 --

255 -- A `--seed N` on the run.sh command line overrides this value for one run.

256 randomization = {

257 seed = nil,

258 },

259 -- }}}

261 -- {{{ excluded_poems

262 -- Issue 6-031: Poems to exclude from the collection during extraction.

263 -- Excluded poems leave gaps in the ID sequence (tombstoning) - they don't

264 -- shift other poem IDs down, preserving stable anchor links.

265 -- Read by: libs/exclusion-filter.lua

266 --

267 -- ID Formats by Category:

268 -- fediverse: Numeric post ID from ActivityPub (e.g., "113847291038475")

269 -- notes: Filename without extension (e.g., "what-a-lame-movie")

270 -- messages: Numeric message index (e.g., "42")

271 -- bluesky: AT Protocol record key (e.g., "3k...abc")

272 --

273 -- Finding poem IDs:

274 -- Browse chronological.html, search poems.json, or grep generated HTML

275 excluded_poems = {

276 fediverse = {

277 -- Add fediverse post IDs here, e.g.: "113847291038475"

278 },

279 notes = {

280 -- Add note filenames here (without extension), e.g.: "test-post-please-ignore"

281 -- 0129.txt is a raw PDF file (its content starts with %PDF-1.5), not text.

282 -- Extraction stored the PDF's binary bytes as the poem content, and because

283 -- a PDF is maximally dissimilar to every text poem it became a diversity

284 -- outlier -- landing on ~7,900 "different" pages and dumping ~14KB of binary

285 -- (NUL bytes, PDF stream data) into each one. Excluding it tombstones the

286 -- poem so it never enters poems.json, clearing the whole "different" section.

287 -- (Takes effect on the next extraction run.)

288 "0129",

289 },

290 messages = {

291 -- Add message indices here, e.g.: "42"

292 },

293 bluesky = {

294 -- Add bluesky record keys here

295 }

296 },

297 -- }}}

299 -- {{{ excluded_images

300 -- Issue 10-053: Images to exclude, named RELATIVE TO input/images/ -- i.e.

301 -- "<source>/<path-within-source>", the same shape you'd see under

302 -- input/images/. The leading "input/images/" is implied, so it is no longer

303 -- repeated on every line (it carried no information and hid the part that

304 -- matters: which gallery, which file).

305 --

306 -- These are STRIPPED from input/ by scripts/strip-excluded after sync, so

307 -- they never get cataloged, embedded, flattened into output/media, rendered,

308 -- OR uploaded with input/. The originals stay safe in the /home/ritz/... rsync

309 -- sources (a later sync re-copies them; the strip removes them again).

310 --

311 -- VALIDATED at build start: strip-excluded resolves every entry back to its

312 -- rsync source and ERRORS if one points at no real file -- a wrong path

313 -- (e.g. forgetting a subdirectory like kooky-dookerie/) can no longer fail

314 -- silently and let the image ship anyway. Fix the path and re-run the phase.

315 --

316 -- Finding an image's path: copy it from the gallery/page that shows it, or

317 -- ls input/images/<source>/ then drop the "input/images/" prefix.

318 excluded_images = {

319 -- "my-art/that-one-i-regret.png",

320 "poem-pictures/stick-cubes-2.png",

321 "my-art/sword-of-damocles-3.png",

322 "my-art/help-me-obiwan-kenobi-3.png",

323 "my-art/help-me-obiwan-kenobi-2.png",

324 "my-art/help-me-obiwan-kenobi-1.png",

325 "my-art/help-me-obiwan-kenobi.png",

326 "my-art/legion-td-idea.png",

327 "my-art/chat-application-with-arrows.png",

328 "my-art/air-defence-drones-1.png",

329 "my-art/air-defence-drones-2.png",

330 "my-art/air-defence-drones-3.png",

331

332 "my-art/air-defence-drones-5.png",

333 "my-art/greed.png",

334 "my-art/continual-context.png",

335 "my-art/continual-context-part-2.png",

336 "my-art/about-face.png",

337 "my-art/perspective-of-matter.png",

338

339 -- usa-today is a sliced thread: the 18 numbered pieces (1..9, then

340 -- 99..9999999999) are just the cut-up panels of the single stitched

341 -- image usa-today.png, which is the only one we want in the gallery.

342 -- The slices stay on disk under my-art/usa-today/ (and their .txt

343 -- alt-text); only their input/ copies are stripped so they never

344 -- catalog, render, or upload. preserve_structure=true keeps the

345 -- usa-today/ subdir, so these paths carry it.

346 "my-art/usa-today/1.png",

347 "my-art/usa-today/2.png",

348 "my-art/usa-today/3.png",

349 "my-art/usa-today/4.png",

350 "my-art/usa-today/5.png",

351 "my-art/usa-today/6.png",

352 "my-art/usa-today/7.png",

353 "my-art/usa-today/8.png",

354 "my-art/usa-today/9.png",

355 "my-art/usa-today/99.png",

356 "my-art/usa-today/999.png",

357 "my-art/usa-today/9999.png",

358 "my-art/usa-today/99999.png",

359 "my-art/usa-today/999999.png",

360 "my-art/usa-today/9999999.png",

361 "my-art/usa-today/99999999.png",

362 "my-art/usa-today/999999999.png",

363 "my-art/usa-today/9999999999.png",

364

365 -- 777-1.png lives in the kooky-dookerie/ subdir, NOT the poem-pictures

366 -- root -- preserve_structure=true keeps that subdir, so the exclusion

367 -- path must carry it. The old root-level path silently matched nothing.

368 -- (The former poem-pictures/1-7.png entries were dropped after those

369 -- 2560x1440 screenshots were deleted from disk.)

370 "poem-pictures/kooky-dookerie/777-1.png",

371

372 "fediverse-stars/ffdsfa90f670235.png",

373

374 "dnd-pictures/flag.png",

375 "dnd-pictures/flag6.png",

376 "dnd-pictures/flag7.png",

377 "dnd-pictures/flag8.png",

378 "dnd-pictures/flag9.png",

379 },

380 -- }}}

382 -- {{{ privacy

383 -- Anonymization settings for public deployment. In "clean" mode, usernames

384 -- are replaced with sequential identifiers (user-1, user-2...) to prevent

385 -- identifying who you were talking to. The local_server_domain is your home

386 -- instance - local users are anonymized while you remain identifiable.

387 -- Available modes: "clean" (anonymize), "raw" (preserve original)

388 privacy = {

389 mode = "clean", -- "clean" or "raw"

390 anonymization_prefix = "user-", -- Prefix for anonymized usernames

391 include_boosts = false, -- Boosted/reblogged posts OFF by default (the name implies opt-in); pass --include-boosts to add them

392 preserve_original_length = true, -- Keep length hints for anonymized names

393 store_anonymization_map = false, -- Don't store mapping (privacy)

394 local_server_domain = "tech.lgbt" -- Your home instance domain

395 },

396 -- }}}

405 -- {{{ semantic_colors

406 -- Colors for the semantic clustering visualization. Each poem is assigned

407 -- a color based on its embedding cluster, creating a visual map of your

408 -- collection's thematic regions. Progress bars blend these colors.

409 -- Read by: src/semantic-color-calculator.lua

410 semantic_colors = {

411 red = { rgb = {220, 60, 60}, hex = "#dc3c3c", name = "red" },

412 blue = { rgb = {60, 120, 220}, hex = "#3c78dc", name = "blue" },

413 green = { rgb = {60, 180, 90}, hex = "#3cb45a", name = "green" },

414 purple = { rgb = {140, 60, 200}, hex = "#8c3cc8", name = "purple" },

415 orange = { rgb = {230, 140, 60}, hex = "#e68c3c", name = "orange" },

416 yellow = { rgb = {200, 180, 40}, hex = "#c8b428", name = "yellow" },

417 gray = { rgb = {120, 120, 120}, hex = "#787878", name = "gray" }

418 },

419 -- Ordered list for deterministic iteration across pages

420 color_names = {"red", "blue", "green", "purple", "orange", "yellow", "gray"},

421

422 -- {{{ color_associations

423 -- Each color's "essence" as a list of associated words -- concrete things AND

424 -- abstract feelings/concepts. semantic-color-calculator embeds every word,

425 -- mean-combines them into one per-color centroid (the same recombination used

426 -- for long-poem chunks, Issue 10-050), and assigns each poem the color whose

427 -- centroid it sits most ABOVE-baseline for (z-scored, hubness-corrected).

428 --

429 -- Why a list of associations instead of the bare color word: the bare word

430 -- "red" embeds to a generic point that, by raw nearness, swallowed ~38% of

431 -- all poems. A list (fire, blood, passion, rage...) pulls the anchor into the

432 -- color's real semantic territory -- a poem about war or embers reads red even

433 -- if it never says "red". Keep each list COHERENT (every word genuinely of

434 -- that color); a coherent set averages to a clean anchor, an incoherent one

435 -- to mush. These are a starting point -- edit freely; re-run stage 6.5 after.

436 color_associations = {

437 red = {"fire", "blood", "passion", "anger", "rose", "heat", "danger", "war", "rage", "embers", "desire", "love", "wound"},

438 blue = {"sky", "ocean", "calm", "sadness", "cold", "melancholy", "depth", "distance", "ice", "serenity", "longing", "loneliness", "peace"},

439 green = {"forest", "growth", "nature", "envy", "leaf", "spring", "life", "moss", "renewal", "jealousy", "fertility", "garden", "grass"},

440 purple = {"royalty", "mystery", "magic", "twilight", "luxury", "grief", "wisdom", "orchid", "velvet", "dusk", "nobility", "dream", "spirituality"},

441 orange = {"autumn", "warmth", "energy", "citrus", "sunset", "harvest", "enthusiasm", "pumpkin", "amber", "glow", "vitality", "spice", "zest"},

442 yellow = {"sun", "joy", "warning", "gold", "happiness", "cowardice", "daffodil", "brightness", "caution", "summer", "lemon", "optimism", "light"},

443 gray = {"fog", "ash", "stone", "age", "neutrality", "concrete", "rain", "dullness", "shadow", "winter", "steel", "silence", "gloom"},

444 },

445 -- }}}

446 -- }}}

448 -- {{{ similarity

449 -- Algorithm settings for computing poem-to-poem similarity scores.

450 -- Read by: src/similarity-calculator.lua

451 -- Available algorithms: "cosine", "euclidean", "manhattan", "angular", "pearson_correlation"

452 similarity = {

453 default_algorithm = "cosine" -- Cosine is standard for text embeddings

454 },

455 -- }}}

457 -- {{{ inference_servers

458 -- Issue 10-049: Inference-server configuration for embedding generation.

459 -- Originally written for Ollama under 10-017; renamed and re-shaped for

460 -- llama.cpp. Define multiple servers (local, remote GPU, etc.) and

461 -- switch between them via TUI selection or CLI flags.

462 -- Read by: libs/inference-server-config.lua

463 -- CLI overrides: --server NAME, --model NAME, --list-servers

464 --

465 -- Fields per server:

466 -- name: Label shown in the TUI and used with the --server flag

467 -- description: Human-readable description

468 -- host: Server hostname or IP

469 -- port: Inference server's HTTP port

470 -- model: Identifier sent in the OpenAI request body (informational;

471 -- llama-server serves whatever model it has loaded). Convention

472 -- is to use the GGUF basename without ".gguf".

473 -- model_path: Path to the GGUF model file on disk, relative to the

474 -- project DIR. start-llamacpp-server.sh resolves this

475 -- into the absolute path it passes to llama-server -m.

476 -- available_models: (optional) List of models the host can serve

477 -- embedding_prompt_prefix: (optional) Prefix prepended to every input

478 -- (e.g. "clustering: " for nomic-embed-text v1.5)

479 inference_servers = {

480 {

481 name = "gpu-server",

482 description = "Remote GPU server (CUDA)",

483 host = "192.168.0.115",

484 port = 10265,

485 model = "nomic-embed-text-v1.5",

486 model_path = "assets/models/nomic-embed-text-v1.5.Q8_0.gguf",

487 available_models = {

488 "nomic-embed-text-v1.5",

489 "mxbai-embed-large",

490 }

491 },

492 {

493 name = "gpu-server-alt",

494 description = "Remote GPU server (alternate port)",

495 host = "192.168.0.115",

496 port = 11434,

497 model = "nomic-embed-text-v1.5",

498 model_path = "assets/models/nomic-embed-text-v1.5.Q8_0.gguf",

499 },

500 {

501 name = "local",

502 description = "Local llama.cpp instance (CUDA-enabled)",

503 host = "192.168.1.100",

504 port = 10265,

505 -- nomic-embed-text v1.5 produces 768-dimensional vectors and

506 -- requires a task-prefix on every input. For diversity ranking

507 -- of poetry the right prefix is "clustering: ", which routes

508 -- the model through its clustering-oriented internal weights.

509 -- Switching models requires regenerating embeddings.json, the

510 -- similarity caches, the diversity cache, etc.

511 model = "nomic-embed-text-v1.5",

512 model_path = "assets/models/nomic-embed-text-v1.5.Q8_0.gguf",

513 embedding_prompt_prefix = "clustering: ",

514 -- This one machine can serve several local GGUFs (one at a time:

515 -- start-llamacpp-server.sh --server=local --model=NAME loads the

516 -- chosen file). The default model above is nomic; the entries below

517 -- add the others. A plain-string entry (or the default model itself)

518 -- uses the server-level model_path/prefix above; a table entry brings

519 -- its OWN GGUF and the prompt phrasing its makers intend for

520 -- clustering/similarity, so each model is asked the same question

521 -- the right way. Switching the served model needs a server restart

522 -- (and regenerating the caches that depend on the embedding space).

523 available_models = {

524 "nomic-embed-text-v1.5",

525 {

526 model = "mxbai-embed-large-v1",

527 model_path = "assets/models/mxbai-embed-large-v1.Q8_0.gguf",

528 -- No task-prompt training; embed plain text for symmetric

529 -- poem-to-poem similarity (the "Represent this sentence..."

530 -- instruction is only for the query side of retrieval).

531 embedding_prompt_prefix = nil,

532 },

533 {

534 model = "embeddinggemma-300m",

535 model_path = "assets/models/embeddinggemma-300M-Q8_0.gguf",

536 -- Trained WITH task prompts; the clustering task uses this

537 -- exact prefix per the model card, mirroring nomic's intent.

538 embedding_prompt_prefix = "task: clustering | query: ",

539 },

540 },

541 },

542 },

543 -- Default server name (must match a name above)

544 -- If not set, first server in list is used

545 default_inference_server = "local",

546 -- }}}

548 -- {{{ image_integration

549 -- Settings for including media attachments (images, GIFs) alongside poems.

550 -- Images from fediverse posts are copied to the output and displayed inline.

551 -- Read by: src/image-manager.lua (uses sources.images for directories)

552 image_integration = {

553 enabled = true,

554 -- NOTE: image directories now come from sources.images (Issue 10-015a)

555 supported_formats = {"png", "jpg", "jpeg", "gif", "webp", "svg"},

556 max_file_size_mb = 100, -- Skip oversized files

557 output_path = "assets/images", -- Where to copy images

558 catalog_file = "assets/image-catalog.json" -- Index of all images

559 },

560 -- }}}

562 -- {{{ image_sync - DEPRECATED (Issue 10-003b)

563 -- This section has been replaced by external_files (see above).

564 -- All external file syncing is now handled by libs/external-sync.lua

565 -- and scripts/sync-external-files.

566 --

567 -- To add new image sources, add entries to external_files with:

568 -- destination = "media_attachments/your-source-name"

569 -- }}}

572 -- {{{ pagination

573 -- Controls how poems are split across HTML pages. Large collections need

574 -- pagination to keep page load times reasonable.

575 -- Read by: src/flat-html-generator.lua:load_pagination_config()

576 -- CLI overrides: --poems-per-page, --chrono-per-page, --pages (via run.sh)

577 pagination = {

578 poems_per_page = 200, -- Poems per similar/different page

579 -- CLI: --poems-per-page N (run.sh default: 200)

580 minimum_pages = 1, -- Minimum pages to generate

581 -- max_pages_per_poem is intentionally NOT here: the per-poem page ceiling is

582 -- COMPUTED each build from the storage quota (storage.limit_gb below) and the

583 -- measured size of the last build's pages, by flat-html-generator's

584 -- compute_storage_max_pages (Issue 10-057). A frozen 15 was an estimate that

585 -- would have shipped ~66GB into a 45GB quota.

586 page_number_padding = 2, -- Zero-padding for page numbers (01, 02...)

587 generate_txt_exports = true, -- Generate .txt versions of poems

588 generate_html_archives = false, -- Disabled: redundant with paginated pages

589 chronological_paginated = false, -- Split chronological.html into pages

590 chronological_poems_per_page = 1000 -- Poems per chronological page (if paginated)

591 -- CLI: --chrono-per-page N

592 },

593 -- }}}

595 -- {{{ storage

596 -- Budget planning for Neocities deployment. These values inform the

597 -- pagination system about storage constraints.

598 -- Read by: src/flat-html-generator.lua:load_pagination_config()

599 storage = {

600 limit_gb = 45, -- Total available storage (Neocities supporter)

601 reserved_for_maze_gb = 0.031, -- Reserved for HTML Maze feature

602 reserved_headroom_gb = 5 -- Safety buffer

603 },

604 -- }}}

606 -- {{{ word_cloud

607 -- Word cloud page settings. Extracts vocabulary from all poems, filters

608 -- stop words (common words like "the", "and"), and displays the remaining

609 -- words sized by frequency. Each word links to poems containing it.

610 -- Read by: src/wordcloud-generator.lua

611 word_cloud = {

612 enabled = true,

613 output_file = "wordcloud.html",

614 min_occurrences = 5, -- Minimum times a word must appear

615 max_words = 200, -- Maximum words to display (0 = unlimited)

616 min_word_length = 3, -- Ignore words shorter than this

617 font_size_min = 1, -- HTML font tag: 1-7 scale

618 font_size_max = 7,

619

620 -- Stop words: common words to exclude from word cloud

621 -- Organized by category for easy editing

622 stop_words = {

623 -- Anonymization artifacts (from privacy processing)

624 "user", "users",

625 -- Contraction fragments (from apostrophe removal)

626 "don", "doesn", "didn", "isn", "aren", "wasn", "weren",

627 "wouldn", "couldn", "shouldn", "haven", "hasn", "hadn", "won",

628 -- URL/Technical artifacts

629 "https", "http", "www", "com", "org", "net",

630 -- Articles

631 "a", "an", "the",

632 -- Pronouns

633 "i", "me", "my", "mine", "myself", "you", "your", "yours", "yourself",

634 "he", "him", "his", "himself", "she", "her", "hers", "herself",

635 "it", "its", "itself", "we", "us", "our", "ours", "ourselves",

636 "they", "them", "their", "theirs", "themselves",

637 "who", "whom", "whose", "which", "what", "that", "this", "these", "those",

638 -- Prepositions

639 "in", "on", "at", "to", "for", "of", "with", "by", "from", "up", "down",

640 "out", "into", "over", "under", "through", "between", "among",

641 "about", "after", "before", "during", "without", "within",

642 -- Conjunctions

643 "and", "or", "but", "nor", "so", "yet", "because", "although",

644 "while", "if", "when", "where", "as", "than",

645 -- Auxiliary verbs

646 "is", "are", "was", "were", "be", "been", "being", "am",

647 "have", "has", "had", "having", "do", "does", "did", "doing",

648 "will", "would", "could", "should", "may", "might", "must", "shall", "can",

649 -- Common verbs

650 "get", "got", "go", "went", "gone", "come", "came", "make", "made",

651 "take", "took", "taken", "see", "saw", "seen", "know", "knew", "known",

652 "think", "thought", "say", "said", "give", "gave", "given",

653 "find", "found", "tell", "told", "feel", "felt", "become", "became",

654 "leave", "left", "put", "keep", "kept", "let", "begin", "began", "begun",

655 "seem", "seemed", "help", "helped", "show", "showed", "shown",

656 "hear", "heard", "turn", "turned", "start", "started", "run", "ran", "move", "moved",

657 -- Common adverbs

658 "very", "really", "just", "also", "too", "still", "even", "now", "then",

659 "here", "there", "always", "never", "often", "sometimes", "already",

660 "again", "ever", "soon", "only",

661 -- Question words

662 "how", "why",

663 -- Other common words

664 "all", "some", "any", "no", "not", "more", "most", "other", "such",

665 "own", "same", "like", "well", "way", "back", "much", "many",

666 "new", "good", "first", "last", "long", "great", "little", "old",

667 "right", "big", "high", "different", "small", "large", "next", "early",

668 "young", "important", "few", "public", "bad", "enough", "able", "sure",

669 "thing", "things", "people", "time", "year", "years", "day", "days",

670 "world", "life", "man", "woman", "men", "women", "child", "children",

671 "something", "nothing", "everything", "someone", "anyone", "everyone"

672 }

673 },

674 -- }}}

676 -- {{{ centroids

677 -- Mood-based exploration anchors. Each centroid defines a "semantic target"

678 -- using keywords and optional source files. The pipeline embeds these targets

679 -- and generates similarity pages showing which poems match each mood.

680 -- Read by: src/centroid-generator.lua

681 --

682 -- To add a new mood: copy an existing entry, change the name/slug/keywords.

683 -- Keywords can be single words or evocative phrases - the embedding model

684 -- will find poems that feel similar to the combined meaning.

685 centroids = {

686 {

687 name = "melancholy",

688 description = "Sad, reflective, introspective moods - winter feelings and quiet grief",

689 source_files = {},

690 keywords = {

691 "loneliness",

692 "grief",

693 "winter",

694 "rain on windows",

695 "empty rooms",

696 "quiet sadness",

697 "memory of someone gone",

698 "the weight of silence"

699 },

700 output_slug = "melancholy"

701 },

702 {

703 name = "wonder",

704 description = "Awe, curiosity, the vastness of existence",

705 source_files = {},

706 keywords = {

707 "stars",

708 "infinity",

709 "childhood wonder",

710 "discovery",

711 "the unknown",

712 "first time seeing the ocean",

713 "questions without answers",

714 "the size of the universe"

715 },

716 output_slug = "wonder"

717 },

718 {

719 name = "rage",

720 description = "Anger, frustration, righteous fury",

721 source_files = {},

722 keywords = {

723 "injustice",

724 "betrayal",

725 "fire",

726 "screaming into the void",

727 "broken promises",

728 "systemic failure",

729 "enough is enough"

730 },

731 output_slug = "rage"

732 },

733 {

734 name = "tenderness",

735 description = "Gentle love, care, softness between beings",

736 source_files = {},

737 keywords = {

738 "holding hands",

739 "soft voice",

740 "caring for someone sick",

741 "pet sleeping on your lap",

742 "forgiveness",

743 "vulnerability",

744 "being seen"

745 },

746 output_slug = "tenderness"

747 },

748 {

749 name = "absurdity",

750 description = "The strange, surreal, and darkly comic",

751 source_files = {},

752 keywords = {

753 "kafka",

754 "bureaucracy",

755 "meaninglessness that becomes funny",

756 "the universe as joke",

757 "recursive paradox",

758 "waiting for something that never comes"

759 },

760 output_slug = "absurd"

761 },

762 {

763 name = "hope",

764 description = "Uplifting, encouraging, healing - poems for hope cards and difficult times",

765 source_files = {},

766 keywords = {

767 "hope",

768 "healing",

769 "light at the end of the tunnel",

770 "things will get better",

771 "resilience after hardship",

772 "growth through difficulty",

773 "recovery and renewal",

774 "new beginnings",

775 "gentle encouragement",

776 "you are not alone in this",

777 "kindness in dark times",

778 "compassion for yourself",

779 "tomorrow is another day",

780 "this too shall pass",

781 "the relief after crying",

782 "being held when you're scared",

783 "winter turning to spring",

784 "stars in the darkest night",

785 "tired but still here",

786 "scared but brave enough",

787 "small victories matter",

788 "rest is not giving up",

789 "you did your best today",

790 "permission to be imperfect"

791 },

792 output_slug = "hope"

793 },

794 {

795 name = "fierce-hope",

796 description = "Empowering, activist, revolutionary hope - strength and resistance",

797 source_files = {},

798 keywords = {

799 "revolution",

800 "resistance",

801 "we will overcome",

802 "rising up together",

803 "collective power",

804 "speaking truth to power",

805 "no justice no peace",

806 "solidarity",

807 "the arc of justice",

808 "they tried to bury us they didn't know we were seeds",

809 "we are the ones we've been waiting for",

810 "never give up never surrender",

811 "fierce tenderness",

812 "angry and hopeful",

813 "burn it down and build anew"

814 },

815 output_slug = "fierce-hope"

816 },

817 {

818 name = "quiet-comfort",

819 description = "Cozy, gentle, safe spaces - poems for rest and sanctuary",

820 source_files = {},

821 keywords = {

822 "rest",

823 "safety",

824 "warm blanket on cold night",

825 "tea and quiet moments",

826 "sanctuary from the storm",

827 "soft lighting",

828 "gentle rain on windows",

829 "curled up with a book",

830 "permission to do nothing",

831 "the luxury of being alone",

832 "home as refuge",

833 "peace in small things",

834 "the comfort of routine",

835 "slow mornings",

836 "everything can wait",

837 "you are safe here"

838 },

839 output_slug = "comfort"

840 }

841 },

842 -- }}}

844 -- {{{ html_theme

845 -- Dark mode theme colors applied via HTML body attributes (CSS-free).

846 -- Uses true black (#000000) for OLED power savings and maximum contrast.

847 -- These colors are applied to <body bgcolor="..." text="..." link="..." vlink="...">

848 html_theme = {

849 background = "#000000", -- True black background (OLED-friendly)

850 text = "#FFFFFF", -- White text for readability

851 link = "#6699FF", -- Blue for unvisited links

852 vlink = "#9966FF" -- Purple for visited links

853 },

854 -- }}}

856 -- {{{ Algorithm Reference (documentation only)

857 -- These algorithm descriptions are for reference only - not read by scripts.

858 -- The actual algorithm is selected via similarity.default_algorithm above.

859 --

860 -- Available algorithms:

861 -- cosine: Angle between vectors, range [-1, 1], fast, best for text embeddings

862 -- euclidean: Distance converted to similarity, range [0, 1], fast

863 -- manhattan: L1 distance converted to similarity, range [0, 1], robust to outliers

864 -- angular: Normalized angle, range [0, 1], good for directional data

865 -- pearson: Correlation coefficient, range [0, 1], for statistical analysis

866 --

867 -- Removed stale options (2026-01-21, Issue 10-003):

868 -- output_format: Only JSON is supported, no need for config

869 -- preserve_timestamps: Always preserved, not configurable

870 -- validation_settings: Over-engineering, not implemented

871 -- }}}