libs/inference-server-config.lua

479 lines

1-- {{{ inference-server-config.lua

2-- Issue 10-049: Inference-server configuration loader (originally written for

3-- Ollama under 10-017; renamed and reframed for llama.cpp). Reads server

4-- definitions from config.lua and provides an API for server selection. The

5-- public surface intentionally stays close to the pre-migration shape so

6-- existing call sites in the rest of the codebase keep their structure.

7--

8-- Usage:

9-- local inference = require("inference-server-config")

10-- inference.set_project_root("/path/to/project") -- Required before other calls

11--

12-- -- Get servers

13-- local servers = inference.get_servers()

14-- local server = inference.get_server_by_name("gpu-server")

15-- local default = inference.get_default_server()

16--

17-- -- Build URL

18-- local url = inference.build_host_url(server) -- "http://192.168.0.115:10265"

19--

20-- -- Validate connection

21-- local ok, msg = inference.validate_server(server)

22-- }}}

24local M = {}

26-- {{{ Module state

27local project_root = nil

28local config = nil

29local selected_server = nil -- CLI override

30local selected_model = nil -- CLI override

32-- Whether the caller is in an interactive context. Off by default. The only

33-- way to enable it is for a CLI driver to call set_interactive_mode(true)

34-- after detecting -I on its command line — this is deliberately not a

35-- config.lua key, because the user-editable config file should describe

36-- what the project IS, not how the operator happens to be running it today.

37--

38-- The library applies a consistent policy whenever user input fails to

39-- resolve against a configured set of options (a typo in --server, an

40-- unrecognized --model, a missing default that points at a nonexistent

41-- entry, etc.): non-interactive callers hard-error immediately so the

42-- mistake is impossible to miss; interactive callers prompt the user to

43-- choose between using a sensible default or aborting. Silent fallback to

44-- a default is never the answer here — warnings get scrolled past in long

45-- log streams, and the wrong default can produce hours of work against the

46-- wrong endpoint before anyone notices.

47local interactive_mode = false

48-- }}}

50-- {{{ set_project_root

51-- Set the project root directory (required before loading config)

52function M.set_project_root(path)

53 project_root = path

54 config = nil -- Reset config when root changes

55end

56-- }}}

58-- {{{ local function load_config

59-- Load config.lua if not already loaded

60local function load_config()

61 if config then

62 return config

63 end

65 if not project_root then

66 -- Try to detect from package.path

67 local path = package.path:match("([^;]+)/libs/%?%.lua")

68 if path then

69 project_root = path

70 else

71 -- Fallback default

72 project_root = "/mnt/mtwo/programming/ai-stuff/neocities-modernization"

73 end

74 end

76 local config_path = project_root .. "/config.lua"

77 local ok, result = pcall(dofile, config_path)

78 if not ok then

79 -- Config not available, use empty

80 config = {}

81 return config

82 end

84 config = result

85 return config

86end

87-- }}}

89-- {{{ local function prompt_for_server_fallback

90-- Interactive recovery for "--server=<name> did not resolve."

91-- Shows the configured default server and asks whether to use it or abort.

92-- Prompts go to stderr so callers that capture stdout still surface them

93-- (though callers that capture stdout should not enable interactive mode

94-- in the first place). On success, also caches the choice by clearing

95-- selected_server, so a stage that calls get_selected_server twice does

96-- not re-prompt the operator.

97local function prompt_for_server_fallback(bad_name)

98 local cfg = load_config()

99 local default_name = cfg.default_inference_server

100 local default_server = default_name and M.get_server_by_name(default_name) or nil

101

102 io.stderr:write(string.format(

103 "\n[!] Inference server '%s' was not found in config.lua's inference_servers.\n", bad_name))

104

105 if not default_server then

106 io.stderr:write(" No usable default_inference_server is configured to fall back to.\n")

107 error(string.format(

108 "inference-server-config: --server=%s did not resolve and no default is available.", bad_name))

109 end

110

111 io.stderr:write("\nThe configured default is:\n")

112 io.stderr:write(string.format(" name: %s\n", default_server.name))

113 io.stderr:write(string.format(" host: %s\n", default_server.host or "(missing in config!)"))

114 io.stderr:write(string.format(" port: %s\n", tostring(default_server.port or "(missing in config!)")))

115 io.stderr:write(string.format(" model: %s\n", default_server.model or "(missing in config!)"))

116 io.stderr:write("\n 1) Use the default\n")

117 io.stderr:write(" 2) Error and exit\n")

118 io.stderr:write("\nSelect 1 or 2: ")

119

120 local choice = io.read("*l")

121 if choice == "1" then

122 selected_server = nil -- so subsequent calls go straight to the default without re-prompting

123 return default_server

124 end

125

126 error(string.format(

127 "inference-server-config: aborted by user — '%s' did not resolve and user chose to exit.", bad_name))

128end

129-- }}}

130

131-- {{{ function M.set_interactive_mode

132-- Flip the library into interactive mode. Only the CLI driver should call

133-- this, and only after confirming -I was passed on the command line.

134-- See the doc-comment on interactive_mode above for the policy this enables.

135function M.set_interactive_mode(enabled)

136 interactive_mode = enabled and true or false

137end

138-- }}}

139

140-- {{{ get_servers

141-- Get all configured Inference servers

142-- Returns array of server objects, or default fallback if none configured

143function M.get_servers()

144 local cfg = load_config()

145 local servers = cfg.inference_servers

146

147 if servers and #servers > 0 then

148 return servers

149 end

150

151 -- Fallback default if no servers configured. host:port matches the

152 -- operator's LAN-accessible llama.cpp box (192.168.1.100:10265) so a

153 -- bare-config run still resolves to the right endpoint.

154 return {

155 {

156 name = "local",

157 description = "Local llama.cpp instance (fallback)",

158 host = "192.168.1.100",

159 port = 10265,

160 model = "nomic-embed-text-v1.5"

161 }

162 }

163end

164-- }}}

165

166-- {{{ get_server_by_name

167-- Get a specific server by name

168-- Returns server object or nil if not found

169function M.get_server_by_name(name)

170 if not name then return nil end

171

172 for _, server in ipairs(M.get_servers()) do

173 if server.name == name then

174 return server

175 end

176 end

177

178 return nil

179end

180-- }}}

181

182-- {{{ get_default_server

183-- Resolve the configured default Inference server.

184--

185-- This is the "must work" path: callers that need an endpoint to make a

186-- request rely on this. It errors loudly if either default_inference_server

187-- is not set, or the named server does not exist in inference_servers.

188--

189-- A silent fallback to servers[1] used to live here. It was removed because

190-- it masked config drift: if default_inference_server was renamed without

191-- updating its referent, every consumer in the pipeline would silently

192-- start talking to whatever server happened to be first in the list,

193-- producing wrong results without any error message. Loud failure now

194-- guarantees that "endpoint resolution succeeded" means "your config said

195-- to use this server", not "we guessed."

196function M.get_default_server()

197 local cfg = load_config()

198

199 if not cfg.default_inference_server then

200 error("inference-server-config: config.lua does not set default_inference_server. "

201 .. "Set it to one of the names in inference_servers, or pass --server=<name> on the CLI.")

202 end

203

204 local server = M.get_server_by_name(cfg.default_inference_server)

205 if not server then

206 error(string.format(

207 "inference-server-config: default_inference_server is '%s' but no entry with that name exists in inference_servers. "

208 .. "Fix the name in config.lua, or add a matching inference_servers entry.",

209 cfg.default_inference_server))

210 end

211

212 return server

213end

214-- }}}

215

216-- {{{ get_selected_server

217-- Resolve the currently selected server.

218--

219-- Resolution order:

220-- 1. If --server=<name> was passed via set_selected_server, look it up

221-- in inference_servers.

222-- - If the name resolves, return that server.

223-- - If the name does not resolve:

224-- interactive: prompt the user to choose default or exit.

225-- non-interactive: hard-error with a message that names the

226-- offending --server=<name> and points at the fix.

227-- 2. If no --server was passed, delegate to get_default_server, which

228-- either returns a resolved default or errors loudly if the default

229-- itself is missing or unresolvable.

230--

231-- This function deliberately never falls back silently. A typoed --server

232-- used to print a stderr warning and continue against the default, which

233-- meant a busy operator could miss the warning in the log stream and

234-- spend hours of pipeline time talking to the wrong endpoint.

235function M.get_selected_server()

236 if selected_server then

237 local server = M.get_server_by_name(selected_server)

238 if server then

239 return server

240 end

241

242 if interactive_mode then

243 return prompt_for_server_fallback(selected_server)

244 end

245

246 error(string.format(

247 "inference-server-config: --server=%s does not match any entry in inference_servers (config.lua).\n"

248 .. "Fix the name on the CLI, add a matching entry to inference_servers, "

249 .. "or pass -I to enable interactive selection.",

250 selected_server))

251 end

252

253 return M.get_default_server()

254end

255-- }}}

256

257-- {{{ set_selected_server

258-- Set the selected server name (from CLI --server flag)

259function M.set_selected_server(name)

260 selected_server = name

261end

262-- }}}

263

264-- {{{ get_selected_model

265-- Resolve the model identifier to send to the inference server.

266--

267-- The library does not validate --model=<name> against any local list.

268-- The inference server is the source of truth for "what model is loaded"

269-- — the config can only ever guess. If the operator passes a --model

270-- that the server does not have, the server returns a "model not found"

271-- error and the pipeline halts there. We deliberately do not want two

272-- layers both claiming to be authoritative about model existence; that

273-- produces drift bugs where the config lists models that are no longer

274-- installed, or omits models that are.

275--

276-- The available_models field on each inference_servers entry is still

277-- useful documentation for operators (and for list_servers' --list-servers

278-- output), it is just not consulted here as a gate.

279--

280-- Resolution order:

281-- 1. Resolve the server (delegates to get_selected_server, which errors

282-- or prompts if --server=<name> did not resolve).

283-- 2. If --model=<name> was passed, return it verbatim.

284-- 3. Otherwise return server.model. If that field is missing in the

285-- inference_servers entry, hard-error — config.lua is still the source

286-- of truth for "what model do we use by default on this host."

287function M.get_selected_model()

288 local server = M.get_selected_server()

289

290 if selected_model then

291 return selected_model

292 end

293

294 -- Model-propagation fix: a --model passed to run.sh is recorded once, at

295 -- startup, on the shared per-run notepad (tmp/run-overrides.lua). Consulting

296 -- it here means EVERY short-lived child process -- the HTML, word-cloud and

297 -- word-page stages that call this (or embeddings_dir() with no argument) --

298 -- resolves the SAME model the embedding stage used, instead of silently

299 -- reverting to server.model below. An absent notepad / absent key returns

300 -- nil, so a plain run (no --model) still falls through to config.lua exactly

301 -- as before. project_root is already resolved by get_selected_server above.

302 local overrides = require("runtime-overrides")

303 overrides.set_project_root(project_root)

304 local override_model = overrides.get("model")

305 if override_model then

306 return override_model

307 end

308

309 if not server.model then

310 error(string.format(

311 "inference-server-config: server '%s' has no 'model' field in config.lua's inference_servers entry. "

312 .. "Add a model = \"<name>\" field to that entry, or pass --model=<name> on the CLI.",

313 server.name))

314 end

315 return server.model

316end

317-- }}}

318

319-- {{{ set_selected_model

320-- Set the selected model (from CLI --model flag)

321function M.set_selected_model(model)

322 selected_model = model

323end

324-- }}}

325

326-- {{{ build_host_url

327-- Build the full URL for a server. Both host and port must be set in

328-- config.lua — a server entry without them is a config bug, not a chance

329-- for the library to guess sensible defaults. The previous code silently

330-- substituted "localhost" and 11434, which meant a forgotten host = in

331-- the config would silently redirect every embedding request to nothing.

332-- Returns URL string like "http://192.168.0.115:10265".

333function M.build_host_url(server)

334 if not server then

335 server = M.get_selected_server()

336 end

337

338 local name = server.name or "(unnamed server)"

339 if not server.host then

340 error(string.format(

341 "inference-server-config: server '%s' has no 'host' field in config.lua. "

342 .. "Add a host = \"<hostname-or-ip>\" field to that inference_servers entry.", name))

343 end

344 if not server.port then

345 error(string.format(

346 "inference-server-config: server '%s' has no 'port' field in config.lua. "

347 .. "Add a port = <number> field to that inference_servers entry.", name))

348 end

349

350 return string.format("http://%s:%d", server.host, server.port)

351end

352-- }}}

353

354-- {{{ validate_server

355-- Check if a server is reachable

356-- Returns: success (bool), message (string)

357function M.validate_server(server)

358 if not server then

359 server = M.get_selected_server()

360 end

361

362 -- /v1/models is llama.cpp's OpenAI-compatible "what's loaded" endpoint.

363 -- Was /api/tags under Ollama; migrated in 10-049 along with the rest

364 -- of the API surface.

365 local url = M.build_host_url(server) .. "/v1/models"

366 local cmd = string.format("curl -s -o /dev/null -w '%%{http_code}' --max-time 3 '%s' 2>/dev/null", url)

367

368 local handle = io.popen(cmd)

369 local status = handle:read("*a")

370 handle:close()

371

372 status = status:gsub("%s+", "") -- Trim whitespace

373

374 if status == "200" then

375 return true, "Server is reachable"

376 elseif status == "000" then

377 return false, "Connection timeout - server unreachable"

378 else

379 return false, "Server returned HTTP " .. status

380 end

381end

382-- }}}

383

384-- {{{ list_servers

385-- Print a formatted list of available servers.

386-- Reads default_inference_server directly rather than calling get_default_server

387-- so that --list-servers works even when no default is configured (or is

388-- misconfigured). The purpose of this function is diagnostic, not

389-- request-issuing — it must not error when the user is trying to inspect

390-- their config.

391function M.list_servers()

392 local cfg = load_config()

393 local servers = M.get_servers()

394 local default_name = cfg.default_inference_server -- may be nil; that's fine here

395

396 print("Available Inference servers:")

397 print(string.rep("-", 70))

398

399 for _, server in ipairs(servers) do

400 local is_default = (default_name ~= nil and server.name == default_name)

401 local default_marker = is_default and " (default)" or ""

402 local url = M.build_host_url(server)

403

404 print(string.format(" %s%s", server.name, default_marker))

405 print(string.format(" %s", server.description or ""))

406 print(string.format(" URL: %s", url))

407 print(string.format(" Model: %s", server.model or "nomic-embed-text"))

408

409 if server.available_models and #server.available_models > 0 then

410 -- available_models entries may be plain strings or {model=...} tables

411 -- (a table also carries its own GGUF + prompt); show just the names.

412 local names = {}

413 for _, entry in ipairs(server.available_models) do

414 names[#names + 1] = (type(entry) == "table") and entry.model or entry

415 end

416 print(string.format(" Available models: %s", table.concat(names, ", ")))

417 end

418 print("")

419 end

420end

421-- }}}

422

423-- {{{ format_embedding_prompt

424-- Apply the active server's embedding_prompt_prefix to a text payload.

425--

426-- Some embedding models (notably nomic-embed-text v1.5+) require a

427-- task-prefix on every input — "clustering: ", "search_query: ",

428-- "search_document: ", etc. — that routes the model through different

429-- internal weights. Models that don't need a prefix (embeddinggemma,

430-- qwen3-embedding) leave the field nil and this function is a no-op.

431--

432-- Centralizing the prefix here means a model swap is a single config

433-- edit even when the new model has different prefix requirements; no

434-- caller needs to know which model is active to embed text correctly.

435function M.format_embedding_prompt(text)

436 local cfg = M.get_selected_model_config()

437 local prefix = cfg and cfg.embedding_prompt_prefix

438 if prefix and prefix ~= "" then

439 return prefix .. text

440 end

441 return text

442end

443-- }}}

444

445-- {{{ get_selected_model_config

446-- Resolve { model, model_path, embedding_prompt_prefix } for the SELECTED model

447-- on the selected server. This is what lets one server entry serve several local

448-- GGUFs: each available_models entry may be a table carrying its own model_path

449-- and prompt prefix, so `--server local --model X` loads X with X's phrasing.

450--

451-- Resolution: if the selected model matches a TABLE entry in available_models,

452-- use that entry's fields. Otherwise (a plain-string entry, or a server whose

453-- available_models is documentation-only like the remote gpu-server) fall back

454-- to the server's top-level model_path / embedding_prompt_prefix -- the default

455-- model. So the common case (no --model, default model) is unchanged.

456function M.get_selected_model_config()

457 local server = M.get_selected_server()

458 local model = M.get_selected_model()

459 if server.available_models then

460 for _, entry in ipairs(server.available_models) do

461 if type(entry) == "table" and entry.model == model then

462 return {

463 model = model,

464 model_path = entry.model_path or server.model_path,

465 embedding_prompt_prefix = entry.embedding_prompt_prefix,

466 }

467 end

468 end

469 end

470 return {

471 model = model,

472 model_path = server.model_path,

473 embedding_prompt_prefix = server.embedding_prompt_prefix,

474 }

475end

476-- }}}

477

478return M

479