libs/markdown.lua
1-- markdown.lua
2-- A small, dependency-free Markdown -> HTML renderer (Issue 10-055, Feature B).
3--
4-- General description (for the reader skimming): the source browser used to dump
5-- .md files as plain numbered text, so tables were ASCII soup and headings were
6-- not headings. This turns the common Markdown the repo actually uses -- ATX
7-- headings, bold/italic, inline + fenced code, ordered/unordered lists, GitHub
8-- pipe tables, blockquotes, horizontal rules, and links -- into clean HTML. It
9-- is intentionally a pragmatic subset, not a CommonMark engine: it must run
10-- server-side in LuaJIT with no JavaScript on the page (the deploy platform
11-- forbids script), and it must be readable. Anything it does not recognize falls
12-- through as an escaped paragraph, so unknown input degrades to plain text
13-- rather than breaking.
14--
15-- Public surface: M.render(markdown_text) -> html_string.
16
17local M = {}
18
19-- {{{ local function escape_html()
20-- Escape the three characters that would otherwise be read as markup. Done to
21-- the RAW text before any tags are emitted, so our own <em>/<a>/... are the only
22-- angle brackets that survive. Ampersand first, or we would double-escape the
23-- entities we just produced.
24local function escape_html(s)
25 return (s:gsub("&", "&"):gsub("<", "<"):gsub(">", ">"))
26end
27-- }}}
28
29-- {{{ local function render_inline()
30-- Turn one already-trusted line of text into inline HTML. Order matters:
31-- 1. escape, so user text can never inject tags;
32-- 2. lift out `code spans` into placeholders, so the * and _ inside code are
33-- not later mistaken for emphasis (the classic bug if you gsub naively);
34-- 3. links, then bold (**), then italic (* or _);
35-- 4. drop the code spans back in as <code>.
36-- The placeholder bytes (\1 .. \2) cannot appear in normal text, so they make a
37-- safe stash that the emphasis passes skip over.
38local function render_inline(text)
39 text = escape_html(text)
40
41 local code_spans = {}
42 text = text:gsub("`([^`]+)`", function(code)
43 code_spans[#code_spans + 1] = code
44 return "\1" .. #code_spans .. "\2"
45 end)
46
47 -- Images first () so the leading ! is consumed before links.
48 text = text:gsub("!%[([^%]]*)%]%(([^%)%s]+)[^%)]*%)", function(alt, src)
49 return string.format('<img src="%s" alt="%s">', src, alt)
50 end)
51 -- Links [text](url).
52 text = text:gsub("%[([^%]]*)%]%(([^%)%s]+)[^%)]*%)", function(label, url)
53 return string.format('<a href="%s">%s</a>', url, label)
54 end)
55 -- Bold before italic, so **x** is not eaten by the single-* rule.
56 text = text:gsub("%*%*(.-)%*%*", "<strong>%1</strong>")
57 text = text:gsub("__(.-)__", "<strong>%1</strong>")
58 text = text:gsub("%*([^%s][^%*]-)%*", "<em>%1</em>")
59 -- Underscore italic only between word boundaries, so snake_case_names survive.
60 text = text:gsub("%f[%w_]_([^_]+)_%f[^%w_]", "<em>%1</em>")
61
62 text = text:gsub("\1(%d+)\2", function(n)
63 return "<code>" .. code_spans[tonumber(n)] .. "</code>"
64 end)
65 return text
66end
67-- }}}
68
69-- {{{ local function is_table_separator()
70-- A GitHub table's second line: pipes around dashes, optionally with colons for
71-- alignment, e.g. |:---|---:|. Recognizing it is what tells row 1 it was a header.
72local function is_table_separator(line)
73 if not line:find("|") then return false end
74 local body = line:gsub("^%s*|?", ""):gsub("|?%s*$", "")
75 for cell in (body .. "|"):gmatch("(.-)|") do
76 if not cell:match("^%s*:?%-+:?%s*$") then return false end
77 end
78 return true
79end
80-- }}}
81
82-- {{{ local function split_row()
83-- Split a table row on unescaped pipes, trimming each cell. Leading/trailing
84-- pipes are optional in the source, so we strip them before splitting.
85local function split_row(line)
86 local cells = {}
87 local body = line:gsub("^%s*|", ""):gsub("|%s*$", "")
88 for cell in (body .. "|"):gmatch("(.-)|") do
89 cells[#cells + 1] = (cell:gsub("^%s+", ""):gsub("%s+$", ""))
90 end
91 return cells
92end
93-- }}}
94
95-- {{{ M.render()
96-- Block-level pass. Walk the lines once, and at each line decide which block it
97-- starts (fence, table, list, quote, rule, heading) or whether it joins a
98-- paragraph. Blocks that contain code are emitted verbatim-escaped with NO inline
99-- pass; everything else gets render_inline so emphasis/links/code work.
100function M.render(md)
101 local lines = {}
102 for line in (md .. "\n"):gmatch("(.-)\n") do lines[#lines + 1] = line end
103
104 local out = {}
105 local i, n = 1, #lines
106 while i <= n do
107 local line = lines[i]
108
109 -- Fenced code block: ```lang ... ``` . Content is literal, escaped, no inline.
110 local fence = line:match("^```(.*)$")
111 if fence ~= nil then
112 local lang = fence:match("^%s*(%S*)")
113 local code = {}
114 i = i + 1
115 while i <= n and not lines[i]:match("^```") do
116 code[#code + 1] = escape_html(lines[i]); i = i + 1
117 end
118 i = i + 1 -- consume the closing fence
119 local cls = (lang ~= "" and lang) and string.format(' class="lang-%s"', lang) or ""
120 out[#out + 1] = string.format("<pre><code%s>%s</code></pre>", cls,
121 table.concat(code, "\n"))
122
123 -- Pipe table: a line with pipes whose NEXT line is a separator row.
124 elseif line:find("|") and lines[i + 1] and is_table_separator(lines[i + 1]) then
125 local header = split_row(line)
126 i = i + 2 -- skip header + separator
127 local thead = {}
128 for _, c in ipairs(header) do thead[#thead + 1] = "<th>" .. render_inline(c) .. "</th>" end
129 local rows = { "<tr>" .. table.concat(thead) .. "</tr>" }
130 while i <= n and lines[i]:find("|") and lines[i]:match("%S") do
131 local tds = {}
132 for _, c in ipairs(split_row(lines[i])) do tds[#tds + 1] = "<td>" .. render_inline(c) .. "</td>" end
133 rows[#rows + 1] = "<tr>" .. table.concat(tds) .. "</tr>"
134 i = i + 1
135 end
136 out[#out + 1] = "<table>" .. table.concat(rows) .. "</table>"
137
138 -- Horizontal rule: a line of only ---, ***, or ___ (3+).
139 elseif line:match("^%s*%-%-%-+%s*$") or line:match("^%s*%*%*%*+%s*$")
140 or line:match("^%s*___+%s*$") then
141 out[#out + 1] = "<hr>"; i = i + 1
142
143 -- ATX heading: #..###### then text.
144 elseif line:match("^#+%s") then
145 local hashes, text = line:match("^(#+)%s+(.*)$")
146 local level = math.min(#hashes, 6)
147 out[#out + 1] = string.format("<h%d>%s</h%d>", level, render_inline(text), level)
148 i = i + 1
149
150 -- Blockquote: consecutive lines starting with >.
151 elseif line:match("^%s*>") then
152 local quoted = {}
153 while i <= n and lines[i]:match("^%s*>") do
154 quoted[#quoted + 1] = render_inline((lines[i]:gsub("^%s*>%s?", "")))
155 i = i + 1
156 end
157 out[#out + 1] = "<blockquote>" .. table.concat(quoted, "<br>") .. "</blockquote>"
158
159 -- Unordered list: -, *, or + markers.
160 elseif line:match("^%s*[%-%*%+]%s+") then
161 local items = {}
162 while i <= n and lines[i]:match("^%s*[%-%*%+]%s+") do
163 items[#items + 1] = "<li>" .. render_inline((lines[i]:gsub("^%s*[%-%*%+]%s+", ""))) .. "</li>"
164 i = i + 1
165 end
166 out[#out + 1] = "<ul>" .. table.concat(items) .. "</ul>"
167
168 -- Ordered list: 1. 2. ...
169 elseif line:match("^%s*%d+%.%s+") then
170 local items = {}
171 while i <= n and lines[i]:match("^%s*%d+%.%s+") do
172 items[#items + 1] = "<li>" .. render_inline((lines[i]:gsub("^%s*%d+%.%s+", ""))) .. "</li>"
173 i = i + 1
174 end
175 out[#out + 1] = "<ol>" .. table.concat(items) .. "</ol>"
176
177 -- Blank line: paragraph separator, nothing to emit.
178 elseif line:match("^%s*$") then
179 i = i + 1
180
181 -- Otherwise a paragraph: gather consecutive "plain" lines until a blank
182 -- line or a line that starts some other block.
183 else
184 local para = {}
185 while i <= n do
186 local l = lines[i]
187 if l:match("^%s*$") or l:match("^#+%s") or l:match("^```")
188 or l:match("^%s*>") or l:match("^%s*[%-%*%+]%s+")
189 or l:match("^%s*%d+%.%s+")
190 or l:match("^%s*%-%-%-+%s*$")
191 or (l:find("|") and lines[i + 1] and is_table_separator(lines[i + 1])) then
192 break
193 end
194 para[#para + 1] = render_inline(l)
195 i = i + 1
196 end
197 out[#out + 1] = "<p>" .. table.concat(para, "<br>") .. "</p>"
198 end
199 end
200
201 return table.concat(out, "\n")
202end
203-- }}}
204
205return M
206