scripts/test-seed-reproducibility.sh
1#!/usr/bin/env bash
2# test-seed-reproducibility.sh
3#
4# Issue 10-058 validator. In one sentence for the general: it proves that the
5# word cloud's "random" word order is actually controlled by a single seed -- run
6# it twice with the same seed and you get the exact same page; change the seed and
7# the words rearrange. That is the whole promise of the reproducibility feature.
8#
9# WHAT IT CHECKS
10# 1. Same --seed twice -> byte-identical output/wordcloud.html (the strongest
11# possible statement of determinism).
12# 2. Different --seed -> a genuinely DIFFERENT word order. Compared with the
13# stamped seed-comment stripped out, so a pass means the
14# ORDER changed, not merely the printed seed number.
15# 3. The stamped seed in the page matches the --seed it was given (the "which
16# seed made this?" record travels with the artifact).
17#
18# HOW: runs src/wordcloud-generator.lua directly -- one cheap stage, no GPU, no
19# embeddings -- snapshotting output/wordcloud.html between runs into tmp/ (RAM).
20#
21# REQUIRES assets/poems.json (run the extract stage first). Missing data is a hard
22# error here, not a skipped check -- a test that silently passes on no data is worse
23# than no test.
24#
25# Usage: scripts/test-seed-reproducibility.sh [DIR]
26# DIR defaults to the hard-coded project path; pass a path to run from anywhere.
27
28set -u
29
30# {{{ paths + preconditions
31DIR="${1:-/mnt/mtwo/programming/ai-stuff/neocities-modernization}"
32GENERATOR="$DIR/src/wordcloud-generator.lua"
33WORDCLOUD_HTML="$DIR/output/wordcloud.html"
34POEMS_JSON="$DIR/assets/poems.json"
35# Ephemeral snapshots go to the RAM-backed tmp/ symlink (project convention).
36SNAP_DIR="$DIR/tmp/seed-test"
37
38fail() { echo "FAIL: $*" >&2; exit 1; }
39
40[ -f "$GENERATOR" ] || fail "generator not found at $GENERATOR"
41[ -f "$POEMS_JSON" ] || fail "assets/poems.json not found -- run the extract stage first (this test needs real poems to shuffle)."
42
43# tmp/ is a symlink into a RAM-backed dir that a reboot wipes, leaving the link
44# dangling. Ensure its target exists before writing (project convention), via the
45# canonical helper if present, else by creating the resolved target directly.
46if [ -x "$DIR/scripts/ensure-tmp-symlink" ]; then
47 "$DIR/scripts/ensure-tmp-symlink" "$DIR" >/dev/null 2>&1 || true
48fi
49TMP_TARGET="$(readlink -f "$DIR/tmp" 2>/dev/null || echo "$DIR/tmp")"
50mkdir -p "$TMP_TARGET" || fail "could not create tmp target $TMP_TARGET"
51mkdir -p "$SNAP_DIR" || fail "could not create snapshot dir $SNAP_DIR"
52# }}}
53
54# {{{ run_with_seed(seed, snapshot_name)
55# Generate the word cloud with a fixed seed and copy the result aside. Errors if
56# the generator did not actually (re)write the page.
57run_with_seed() {
58 local seed="$1"
59 local snapshot="$2"
60 luajit "$GENERATOR" "$DIR" --seed "$seed" >/dev/null || fail "generator exited non-zero for seed $seed"
61 [ -f "$WORDCLOUD_HTML" ] || fail "generator produced no wordcloud.html for seed $seed"
62 cp "$WORDCLOUD_HTML" "$SNAP_DIR/$snapshot" || fail "could not snapshot seed $seed"
63}
64# }}}
65
66# {{{ strip_seed_comment(file) -> stdout
67# Drop the three-line "<!-- Issue 10-058: ... -->" stamp so the order comparison
68# reflects the WORD ORDER only, not the printed seed number.
69strip_seed_comment() {
70 grep -v -e 'Issue 10-058: word order shuffled' -e '^ --seed' -e 'output/generation-metadata.json. -->' "$1"
71}
72# }}}
73
75
76# {{{ check 1: same seed -> byte-identical
77run_with_seed 12345 a-seed12345.html
78run_with_seed 12345 b-seed12345.html
79if cmp -s "$SNAP_DIR/a-seed12345.html" "$SNAP_DIR/b-seed12345.html"; then
80 echo "PASS: same seed (12345) twice -> byte-identical word cloud"
81else
82 fail "same seed produced DIFFERENT output -- the shuffle is not deterministic"
83fi
84# }}}
85
86# {{{ check 2: different seed -> different word order
87run_with_seed 99999 c-seed99999.html
88if strip_seed_comment "$SNAP_DIR/a-seed12345.html" > "$SNAP_DIR/a.body" \
89 && strip_seed_comment "$SNAP_DIR/c-seed99999.html" > "$SNAP_DIR/c.body" \
90 && cmp -s "$SNAP_DIR/a.body" "$SNAP_DIR/c.body"; then
91 fail "different seeds (12345 vs 99999) produced the SAME word order -- the seed does not govern the shuffle"
92else
93 echo "PASS: different seed (99999) -> different word order"
94fi
95# }}}
96
97# {{{ check 3: stamped seed matches the seed given
98if grep -q "master seed 99999" "$SNAP_DIR/c-seed99999.html"; then
99 echo "PASS: page stamps the seed it was built with (99999)"
100else
101 fail "page does not record its own seed -- the 'which seed made this?' stamp is missing"
102fi
103# }}}
104
105echo "All seed-reproducibility checks passed."
106