erlmarkdown.erl 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581
  1. %%% -*- mode: Erlang; fill-column: 80; comment-column: 75; -*-
  2. %%% Copyright 2012 Erlware, LLC. All Rights Reserved.
  3. %%%
  4. %%% This file is provided to you under the Apache License,
  5. %%% Version 2.0 (the "License"); you may not use this file
  6. %%% except in compliance with the License. You may obtain
  7. %%% a copy of the License at
  8. %%%
  9. %%% http://www.apache.org/licenses/LICENSE-2.0
  10. %%%
  11. %%% Unless required by applicable law or agreed to in writing,
  12. %%% software distributed under the License is distributed on an
  13. %%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. %%% KIND, either express or implied. See the License for the
  15. %%% specific language governing permissions and limitations
  16. %%% under the License.
  17. %%%---------------------------------------------------------------------------
  18. %%% @copyright (C) Erlware, LLC.
  19. %%% @copyright (C) 2009, Gordon Guthrie
  20. %%% @doc
  21. -module(erlmarkdown).
  22. -export([conv/1,
  23. conv_utf8/1,
  24. conv_file/2]).
  25. -define(SPACE, 32).
  26. -define(TAB, 9).
  27. -define(LF, 10).
  28. -define(CR, 13).
  29. -define(NBSP, 160).
  30. -define(AMP, $&, $a, $m, $p, $;).
  31. -define(COPY, $&, $c, $o, $p, $y, $;).
  32. %%============================================================================
  33. %% API
  34. %%============================================================================
  35. %% the lexer first lexes the input
  36. %% make_lines does 2 passes:
  37. %% * it chops the lexed strings into lines which it represents as a
  38. %% list of lists
  39. %% * it then types the lines into the following:
  40. %% * normal lines
  41. %% * reference style links
  42. %% * reference style images
  43. %% * special line types
  44. %% - blank
  45. %% - SETEXT header lines
  46. %% - ATX header lines
  47. %% - blockquote
  48. %% - unordered lists
  49. %% - ordered lists
  50. %% - code blocks
  51. %% - horizontal rules
  52. %% the parser then does its magic interpolating the references as appropriate
  53. conv(String) ->
  54. Lex = lex(String),
  55. UntypedLines = make_lines(Lex),
  56. {TypedLines, Refs} = type_lines(UntypedLines),
  57. parse(TypedLines, Refs).
  58. -spec conv_utf8(list()) -> list().
  59. conv_utf8(Utf8) ->
  60. Str = xmerl_ucs:from_utf8(Utf8),
  61. Res = conv(Str),
  62. xmerl_ucs:to_utf8(Res).
  63. conv_file(FileIn, FileOut) ->
  64. case file:open(FileIn, [read]) of
  65. {ok, Device} ->
  66. Input = get_all_lines(Device,[]),
  67. Output = conv(Input),
  68. write(FileOut, Output);
  69. _ ->
  70. error
  71. end.
  72. %%============================================================================
  73. %% Internal Functions
  74. %%============================================================================
  75. get_all_lines(Device, Accum) ->
  76. case io:get_line(Device,"") of
  77. eof ->
  78. file:close(Device),
  79. Accum;
  80. Line ->
  81. get_all_lines(Device,Accum ++ Line)
  82. end.
  83. write(File, Text) ->
  84. _Return=filelib:ensure_dir(File),
  85. case file:open(File, [write]) of
  86. {ok, Id} ->
  87. io:fwrite(Id, "~s~n", [Text]),
  88. file:close(Id);
  89. _ ->
  90. error
  91. end.
  92. %%
  93. %% Parse the lines interpolating the references as appropriate
  94. %%
  95. parse(TypedLines, Refs) ->
  96. string:strip(p1(TypedLines, Refs, 0, []), both, $\n).
  97. %% goes through the lines
  98. %% Variable 'R' contains the References and 'I' is the indent level
  99. %% Terminal clause
  100. p1([], _R, _I, Acc) ->
  101. lists:flatten(lists:reverse(Acc));
  102. %% Tags have the highest precedence...
  103. p1([{tag, Tag} | T], R, I, Acc) ->
  104. case T of
  105. [] ->
  106. p1([], R, I, ["</p>", make_tag_str(Tag, R), "<p>" | Acc]);
  107. [{blank, _} | T2] ->
  108. p1(T2, R, I, [make_tag_str(Tag, R) | Acc]);
  109. _Other ->
  110. p1(T, R, I, [pad(I) ++ make_tag_str(Tag, R) | Acc])
  111. end;
  112. p1([{blocktag, [{{{tag, open}, Type}, Tg}] = _Tag} | T], R, I, Acc) ->
  113. {Block, Rest} = grab_for_blockhtml(T, Type, []),
  114. Str = lists:flatten([Tg, "\n" | Block]),
  115. p1(Rest, R, I, [Str | Acc]);
  116. %% blank lines/linefeeds are gobbled down
  117. p1([{Type, _} | T], R, I, Acc)
  118. when Type == blank orelse Type == linefeed ->
  119. Rest = grab_empties(T),
  120. p1(Rest, R, I, [pad(I) ++ "\n" | Acc]);
  121. %% two consecutive normal lines should be concatenated...
  122. %% remembering the pad the second line with the indent...
  123. p1([{normal, P1}, {normal, P2} | T], R, I, Acc) ->
  124. p1([{normal, merge(P1, pad(I), P2)} | T], R, I, Acc);
  125. %% as should a normal and linefeed
  126. %% setext h1 is a look behind and it overrides blockquote and code...
  127. p1([{normal, P}, {setext_h1, _} | T], R, I, Acc) ->
  128. p1(T, R, I, [pad(I) ++ "<h1>" ++ make_string(snip(P), R)
  129. ++ "</h1>\n\n" | Acc]);
  130. p1([{blockquote, P}, {setext_h1, _} | T], R, I, Acc) ->
  131. p1(T, R, I, [pad(I) ++ "<h1>" ++ make_string(snip(P), R)
  132. ++ "</h1>\n\n" | Acc]);
  133. p1([{{codeblock, P}, _}, {setext_h1, _} | T], R, I, Acc) ->
  134. p1(T, R, I, [pad(I) ++ "<h1>" ++ make_string(snip(P), R)
  135. ++ "</h1>\n\n" | Acc]);
  136. p1([{blockquote, P}, {h2_or_hr, _} | T], R, I, Acc) ->
  137. p1(T, R, I, [pad(I) ++ "<h2>" ++ make_string(snip(P), R)
  138. ++ "</h2>\n\n" | Acc]);
  139. p1([{{codeblock, P}, _}, {h2_or_hr, _} | T], R, I, Acc) ->
  140. p1(T, R, I, [pad(I) ++ "<h2>" ++ make_string(snip(P), R)
  141. ++ "</h2>\n\n" | Acc]);
  142. %% but a setext with no lookbehind is just rendered as a normal line,
  143. %% so change its type and rethrow it
  144. p1([{setext_h1, P} | T], R, I, Acc) ->
  145. p1([{normal, P} | T], R, I, Acc);
  146. %% setext h2 might be a look behind
  147. p1([{normal, P}, {h2_or_hr, _} | T], R, I, Acc) ->
  148. P2 = string:strip(make_string(snip(P), R), both, ?SPACE),
  149. p1(T, R, I, [pad(I) ++ "<h2>" ++ P2 ++ "</h2>\n\n" | Acc]);
  150. %% blockquotes swallow each other
  151. %% replace the first blockquote mark with a space...
  152. p1([{blockquote, P1}, {blockquote, [_ | P2]} | T], R, I, Acc) ->
  153. p1([{blockquote, merge(P1, pad(I), [{{ws, sp}, " "} | P2])} | T], R, I, Acc);
  154. %% blockquotes swallow normal
  155. p1([{blockquote, P1}, {normal, P2} | T], R, I, Acc) ->
  156. p1([{blockquote, merge(P1, pad(I + 1), P2)} | T], R, I, Acc);
  157. %% blockquote
  158. p1([{blockquote, P} | T], R, I, Acc) ->
  159. [{{md, gt}, _} | T1] = P,
  160. T2 = string:strip(make_string(T1, R)),
  161. p1(T, R, I,
  162. ["\n<blockquote>\n" ++ pad(I + 1) ++ "<p>" ++ T2 ++ "</p>\n</blockquote>" | Acc]);
  163. %% one normal is just normal...
  164. p1([{normal, P} | T], R, I, Acc) ->
  165. P2 = string:strip(make_string(snip(P), R), both, ?SPACE),
  166. p1(T, R, I, [pad(I) ++ "<p>" ++ P2 ++ "</p>\n" | Acc]);
  167. %% atx headings
  168. p1([{{h1, P}, _} | T], R, I, Acc) ->
  169. NewP = string:strip(make_string(snip(P), R), right),
  170. p1(T, R, I, [pad(I) ++ "<h1>" ++ NewP ++ "</h1>\n\n" | Acc]);
  171. p1([{{h2, P}, _} | T], R, I, Acc) ->
  172. NewP = string:strip(make_string(snip(P), R), right),
  173. p1(T, R, I, [pad(I) ++ "<h2>" ++ NewP ++ "</h2>\n\n" | Acc]);
  174. p1([{{h3, P}, _} | T], R, I, Acc) ->
  175. NewP = string:strip(make_string(snip(P), R), right),
  176. p1(T, R, I, [pad(I) ++ "<h3>" ++ NewP ++ "</h3>\n\n" | Acc]);
  177. p1([{{h4, P}, _} | T], R, I, Acc) ->
  178. NewP = string:strip(make_string(snip(P), R), right),
  179. p1(T, R, I, [pad(I) ++ "<h4>" ++ NewP ++ "</h4>\n\n" | Acc]);
  180. p1([{{h5, P}, _} | T], R, I, Acc) ->
  181. NewP = string:strip(make_string(snip(P), R), right),
  182. p1(T, R, I, [pad(I) ++ "<h5>" ++ NewP ++ "</h5>\n\n" | Acc]);
  183. p1([{{h6, P}, _} | T], R, I, Acc) ->
  184. NewP = string:strip(make_string(snip(P), R), right),
  185. p1(T, R, I, [pad(I) ++ "<h6>" ++ NewP ++ "</h6>\n\n" | Acc]);
  186. %% unordered lists swallow normal and codeblock lines
  187. p1([{{ul, P1}, S1}, {{normal, P2}, S2} | T], R, I , Acc) ->
  188. p1([{{ul, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
  189. p1([{{ul, P1}, S1}, {{codeblock, P2}, S2} | T], R, I , Acc) ->
  190. p1([{{ul, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
  191. p1([{{ul, _P}, _} | _T] = List, R, I, Acc) ->
  192. {Rest, NewAcc} = parse_list(ul, List, R, I, [], false),
  193. p1(Rest, R, I, [pad(I) ++ "<ul>\n" ++ NewAcc
  194. ++ pad(I) ++ "</ul>\n" | Acc]);
  195. %% ordered lists swallow normal and codeblock lines
  196. p1([{{ol, P1}, S1}, {{normal, P2}, S2} | T], R, I , Acc) ->
  197. p1([{{ol, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
  198. p1([{{ol, P1}, S1}, {{codeblock, P2}, S2} | T], R, I , Acc) ->
  199. p1([{{ol, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
  200. p1([{{ol, _P}, _} | _T] = List, R, I, Acc) ->
  201. {Rest, NewAcc} = parse_list(ol, List, R, I, [], false),
  202. p1(Rest, R, I, [pad(I) ++ "<ol>\n" ++ NewAcc
  203. ++ pad(I) ++ "</ol>\n" | Acc]);
  204. %% codeblock consumes any following empty lines
  205. %% and other codeblocks
  206. p1([{{codeblock, P1}, S1}, {{codeblock, P2}, S2} | T], R, I, Acc) ->
  207. p1([{{codeblock, merge(P1, pad(I), P2)}, S1 ++ S2} | T], R, I, Acc);
  208. p1([{{codeblock, P}, _} | T], R, I, Acc) ->
  209. Rest = grab_empties(T),
  210. p1(Rest, R, I, ["<pre><code>" ++ make_string(snip(P), R)
  211. ++ "\n</code></pre>\n\n" | Acc]);
  212. %% horizontal rules
  213. p1([{hr, _} | T], R, I, Acc) ->
  214. p1(T, R, I, ["<hr />" | Acc]);
  215. %% h2_or_hr is greedy for normal lines
  216. p1([{h2_or_hr, P1}, {normal, P2} | T], R, I, Acc) ->
  217. p1([{normal, lists:flatten([P1 | P2])} | T], R, I, Acc);
  218. %% the clause with a normal before an 'h2_or_hr' has already been
  219. %% handled further up the tree, so this is a bona fide 'hr'...
  220. p1([{h2_or_hr, _} | T], R, I, Acc) ->
  221. p1(T, R, I, ["<hr />" | Acc]);
  222. %% Now start pulling out inline refs etc, etc
  223. p1([{inlineref, _P} | T], R, I, Acc) ->
  224. p1(T, R, I, Acc).
  225. grab_for_blockhtml([], Type, Acc) ->
  226. {lists:reverse(["</" ++ Type ++ ">" | Acc]), []};
  227. grab_for_blockhtml([{blocktag, [{{{tag, close}, Type}, Tg}]}
  228. | T], Type, Acc) ->
  229. {lists:reverse([Tg | Acc]), T};
  230. grab_for_blockhtml([{blocktag, [{{{tag, _}, GrabType}, Tg}]}
  231. | T], Type, Acc) when GrabType =/= Type ->
  232. %% blocktags grabbed in a blocktag need a line ending pushed
  233. grab_for_blockhtml(T, Type, ["\n", Tg | Acc]);
  234. grab_for_blockhtml([{tag, {{{tag, self_closing}, _Ty}, Tg}}
  235. | T], Type, Acc) ->
  236. grab_for_blockhtml(T, Type, [Tg | Acc]);
  237. grab_for_blockhtml([H | T], Type, Acc) ->
  238. {_Type, Content} = H,
  239. Str = make_plain_string(Content),
  240. grab_for_blockhtml(T, Type, [Str | Acc]).
  241. grab_empties([{linefeed, _} | T]) -> grab_empties(T);
  242. grab_empties([{blank, _} | T]) -> grab_empties(T);
  243. grab_empties(List) -> List.
  244. merge(P1, Pad, P2) ->
  245. NewP1 = make_br(P1),
  246. lists:flatten([NewP1, {string, Pad} | P2]).
  247. make_br(List) -> make_br1(lists:reverse(List)).
  248. make_br1([{{lf, _}, _},
  249. {{ws, comp}, _} | T]) -> lists:reverse([{tags, " <br />\n"} | T]);
  250. make_br1([{{lf, _}, _},
  251. {{ws, tab}, _} | T]) -> lists:reverse([{tags, " <br />\n"} | T]);
  252. make_br1(List) -> lists:reverse(List).
  253. pad(N) ->
  254. pad1(N, []).
  255. pad1(0, Acc) ->
  256. Acc;
  257. pad1(N, Acc)
  258. when N > 0 ->
  259. pad1(N - 1, [" " | Acc]).
  260. %% this is a bit messy because of the way that hard lines are treated...
  261. %% If your li's have a blank line between them the item gets wrapped in a para,
  262. %% if not, they don't
  263. %% BUT if one item is <p> wrapped then the next is too
  264. parse_list(_Type, [], _R, _I, A, _) ->
  265. {[], lists:reverse(A)};
  266. parse_list(Type, [{{Type, P}, _} | T], R, I, A, Wrap) ->
  267. {Rest, NewP, NewWrap} = grab(T, R, [], Wrap),
  268. Li =
  269. case NewWrap of
  270. false ->
  271. Ret = parse([{normal, P}], R),
  272. %% need to strip off the extra <p></p>'s
  273. Ret2 = string:left(Ret, length(Ret) - 4),
  274. Ret3 = string:right(Ret2, length(Ret2) -3),
  275. Ret3 ++ "\n" ++ NewP ++ pad(I);
  276. true ->
  277. string:strip(parse([{normal, P}], R), right, ?LF)
  278. ++ NewP ++ pad(I)
  279. end,
  280. NewWrap2 =
  281. case T of
  282. [] ->
  283. false; % doesnt matter
  284. [H2 | _T2] ->
  285. case H2 of
  286. {linefeed, _} ->
  287. true;
  288. _ ->
  289. false
  290. end
  291. end,
  292. parse_list(Type, Rest, R, I, [pad(I) ++ "<li>"
  293. ++ string:strip(Li, right, ?LF)
  294. ++ "</li>\n" | A], NewWrap2);
  295. parse_list(_Type, List, _R, _I, A, _) ->
  296. {List, lists:reverse(A)}.
  297. %% grab grabs normals, double codeblocks, linefeeds and blanks
  298. %% BUT stop grabbing if a normal if preceeded by a linefeed or blank
  299. %% UNLESS the normal starts with white space :(
  300. %% the third return parameter is 'true' if the 'li' should be
  301. %% wrapped in '<p></p>' and false if it shouldn't
  302. grab([{{codeblock, _}, S} | T] = List, R, Acc, W) ->
  303. case is_blockquote(S, T) of
  304. {{true, R1}, T2} ->
  305. grab(T2, R, ["</blockquote>",
  306. make_escape_string(R1, R),
  307. "<blockquote>" | Acc], W);
  308. {{esc_false, R1}, _T2} ->
  309. {R1, lists:reverse(Acc), false};
  310. {false, T2} ->
  311. case is_double_indent(S) of
  312. false ->
  313. {List, lists:reverse(Acc), false};
  314. {true, R2} ->
  315. %% if it is a double indent - delete 4 spaces
  316. %% no it makes not sense to me neither :(
  317. grab(T2, R, [" " ++ make_escape_string(R2, R) | Acc], W)
  318. end
  319. end;
  320. grab([{linefeed, _} | T], R, Acc, false) ->
  321. grab2(T, R, Acc, T, Acc, true);
  322. grab([{linefeed, _} | T], R, Acc, true) ->
  323. grab2(T, R, ["\n" | Acc], T, Acc, true);
  324. grab([{blank, _} | T], R, Acc, false) ->
  325. grab2(T, R, Acc, T, Acc, true);
  326. grab([{blank, _} | T], R, Acc, true) ->
  327. grab2(T, R, ["\n" | Acc], T, Acc, true);
  328. grab([{normal, P} | T], R, Acc, W) ->
  329. Li = case W of
  330. false ->
  331. make_escape_string(P, R);
  332. true ->
  333. "<p>"++ string:strip(make_escape_string(P, R), right, ?LF)
  334. ++ "</p>"
  335. end,
  336. grab(T, R, [Li | Acc], W);
  337. grab(List, _R, Acc, W) ->
  338. {List, lists:reverse(Acc), W}.
  339. %% the problem is knowing when to grab, if the list is followed by a long
  340. %% string of blank lines and linefeeds and a normal then the linefeeds aren't
  341. %% grabbed
  342. %% if the list if followed by blank lines and linefeeds and a normal with an
  343. %% initial whitespace it is grabbed...
  344. grab2([{normal, P2} | T], R, Acc, LO, AO, W) ->
  345. case P2 of
  346. [{{ws, _}, _} | T2] ->
  347. Li = case W of
  348. false ->
  349. make_escape_string(T2, R);
  350. true ->
  351. "<p>" ++
  352. string:strip(make_escape_string(T2, R), right, ?LF)
  353. ++ "</p>"
  354. end,
  355. grab(T, R, [Li | Acc], W);
  356. _ ->
  357. {LO, AO, false}
  358. end;
  359. grab2([{linefeed, _} | T], R, Acc, LO, AO, _W) ->
  360. grab2(T, R, ["\n" | Acc], LO, AO, true);
  361. grab2([{blank, _} | T], R, Acc, LO, AO, _W) ->
  362. grab2(T, R, ["\n" | Acc], LO, AO, true);
  363. %% We dont want to grab this stuff so return the old list and the old acc
  364. grab2(_List, _R, _Acc, LO, AO, _W) ->
  365. {LO, AO, true}.
  366. is_double_indent(List) ->
  367. is_double_indent1(List, 0).
  368. %% double indent is any combination of tabs and spaces that add
  369. %% up to 8
  370. is_double_indent1([], _N) ->
  371. false;
  372. is_double_indent1(Rest, N)
  373. when N > 7 ->
  374. {true, Rest};
  375. is_double_indent1([{{ws, sp}, _} | T], N) ->
  376. is_double_indent1(T, N + 1);
  377. is_double_indent1([{{ws, tab}, _} | T], N) ->
  378. is_double_indent1(T, N + 4);
  379. is_double_indent1(_List, _N) ->
  380. false.
  381. is_blockquote(List, T) ->
  382. case is_bq1(List, 0) of
  383. false ->
  384. {false, T};
  385. {esc_false, R} ->
  386. {{esc_false, R}, T};
  387. {true, R} ->
  388. {NewT, NewR} = grab2(T, R),
  389. {{true, NewR}, NewT}
  390. end.
  391. is_bq1([], _N) ->
  392. false;
  393. is_bq1([{{ws, sp}, _} | T], N) ->
  394. is_bq1(T, N + 1);
  395. is_bq1([{{ws, tab}, _} | T], N) ->
  396. is_bq1(T, N + 4);
  397. is_bq1([{{md, gt}, _}, {{ws, _}, _} | T], N)
  398. when N > 3 ->
  399. {true, T};
  400. is_bq1([{{punc, bslash}, _}, {{md, gt}, GT}, {{ws, _}, WS} | T], N)
  401. when N > 3 ->
  402. {esc_false, [GT, WS | T]};
  403. is_bq1(_List, _N) ->
  404. false.
  405. grab2(List, R) ->
  406. gb2(List, lists:reverse(R)).
  407. gb2([], Acc) ->
  408. {[], lists:flatten(lists:reverse(Acc))};
  409. gb2([{blank, _} | T], Acc) ->
  410. {T, lists:flatten(lists:reverse(Acc))};
  411. gb2([{_Type, P} | T], Acc) ->
  412. gb2(T, [P | Acc]).
  413. %%
  414. %% Make the lines from the raw tokens
  415. %%
  416. make_lines(Tokens) ->
  417. ml1(Tokens, [], []).
  418. ml1([], [], A2) ->
  419. lists:reverse(A2);
  420. ml1([], A1, A2) ->
  421. ml1([], [], [lists:reverse(A1) | A2]);
  422. ml1([{{lf, _}, _} = H | T], A1, A2) ->
  423. ml1(T, [], [ml2(H, A1) | A2]);
  424. ml1([H | T], A1, A2) ->
  425. ml1(T, [H | A1], A2).
  426. ml2(H, List) ->
  427. lists:reverse([H | List]).
  428. %%%
  429. %%% Process the lines and give each line a type. The valid types are:
  430. %%% * normal line
  431. %%% * reference style links
  432. %%% * reference style images
  433. %%% * special line types
  434. %%% - blank
  435. %%% - SETEXT header lines
  436. %%% - ATX header lines
  437. %%% - unordered lists (including code blocks)
  438. %%% - ordered lists (including code blocks)
  439. %%% - blockquotes
  440. %%% - code blocks
  441. %%% - horizontal rules
  442. %%%
  443. type_lines(Lines) ->
  444. {Refs, TypedLines} = type_lines1(Lines, [], []),
  445. {strip_lines(TypedLines), Refs}.
  446. type_lines1([], A1, A2) ->
  447. {A1, lists:reverse(A2)};
  448. type_lines1([[{{ws, sp}, _}, {{inline, open}, _} | T1] = H | T2], A1, A2) ->
  449. %% this clause extracts URL and Image refs
  450. %% (it is the only one that uses A1 and A2...
  451. %% inlines can have up to 3 spaces before it
  452. t_inline(H, T1, T2, A1, A2);
  453. type_lines1([[{{ws, tab}, _}, {{inline, open}, _} | T1] = H | T2], A1, A2) ->
  454. t_inline(H, T1, T2, A1, A2);
  455. type_lines1([[{{ws, comp}, W}, {{inline, open}, _} | T1] = H | T2], A1, A2) ->
  456. case gt(W, 3) of
  457. {true, _R} ->
  458. t_inline(H, T1, T2, A1, A2);
  459. false ->
  460. type_lines1(T1, A1, [{normal , H} | A2]) % same exit at the final clause!
  461. end,
  462. t_inline(H, T1, T2, A1, A2);
  463. type_lines1([[{{inline, open}, _} | T1] = H | T2], A1, A2) ->
  464. t_inline(H, T1, T2, A1, A2);
  465. type_lines1([[{{md, eq}, _} | _T] = H | T], A1, A2) ->
  466. %% types setext lines
  467. type_lines1(T, A1, [type_setext_h1(H) | A2]);
  468. type_lines1([[{{md, dash}, _} | _T] = H | T], A1, A2) ->
  469. %% NOTE 1: generates a ul as the default not a normal line
  470. %% NOTE 2: depending on the context this might generate an <h2> header
  471. %% or an <hr />
  472. %% NOTE 3: space - is typed to a bullet down in <ul> land...
  473. type_lines1(T, A1, [type_setext_h2(H) | A2]);
  474. type_lines1([[{{md, atx}, _} | _T] = H | T], A1, A2) ->
  475. %% types atx lines
  476. type_lines1(T, A1, [type_atx(H) | A2]);
  477. type_lines1([[{{md, gt}, _} | []] = H | T], A1, A2) ->
  478. %% types blockquotes
  479. %% a blockquote on its own or followed by a linefeed is
  480. %% displayed 'as is' by showdown
  481. type_lines1(T, A1, [{normal, H} | A2]);
  482. type_lines1([[{{md, gt}, _}, {{lf, _}, _} | []] = H | T], A1, A2) ->
  483. type_lines1(T, A1, [{normal, H} | A2]);
  484. %% one with anything after it starts a blockquote
  485. type_lines1([[{{md, gt}, _} | _T1] = H | T], A1, A2) ->
  486. type_lines1(T, A1, [{blockquote, H} | A2]);
  487. type_lines1([[{{ws, _}, _}, {{md, star}, _} = ST1, {{ws, _}, _} = WS1 | T1] = H | T], A1, A2) ->
  488. %% types unordered lists lines
  489. %% NOTE 1: the dashed version is generated in type_setext_h2
  490. %% NOTE 2: the asterix version also might generate a horizontal rule
  491. %% which is why it jumps to type_star2 <-- note the 2!!
  492. type_lines1(T, A1, [{type_star2([ST1, WS1 | T1]), H} | A2]);
  493. type_lines1([[{{md, star}, _}, {{ws, _}, _} | _T1] = H | T], A1, A2) ->
  494. type_lines1(T, A1, [{type_star2(H), H} | A2]);
  495. type_lines1([[{{ws, _}, _}, {{md, plus}, _}, {{ws, _}, _} = W | T1] = H | T], A1, A2) ->
  496. type_lines1(T, A1, [{{ul, make_list_str([W | T1])}, H} | A2]);
  497. type_lines1([[{{md, plus}, _}, {{ws, _}, _} = W | T1] = H | T], A1, A2) ->
  498. type_lines1(T, A1, [{{ul, make_list_str([W | T1])}, H} | A2]);
  499. %% UL based on dashes
  500. type_lines1([[{{ws, _}, _}, {{md, dash}, _}, {{ws, _}, _} = W | T1] = H | T], A1, A2) ->
  501. type_lines1(T, A1, [{{ul, make_list_str([W | T1])}, H} | A2]);
  502. type_lines1([[{{ws, _}, _}, {num, _} = N1| T1] | T], A1, A2) ->
  503. %% types ordered lists...
  504. type_lines1(T, A1, [type_ol([N1 | T1]) | A2]);
  505. type_lines1([[{num, _} | _T] = H | T], A1, A2) ->
  506. type_lines1(T, A1, [type_ol(H) | A2]);
  507. type_lines1([[{{md, underscore}, _} | _T1] = H | T], A1, A2) ->
  508. %% types horizontal rules for stars and underscores
  509. %% dashes and some stars are done elsewhere...
  510. type_lines1(T, A1, [type_underscore(H) | A2]);
  511. type_lines1([[{{md, star}, _} | _T1] = H | T], A1, A2) ->
  512. type_lines1(T, A1, [type_star(H) | A2]);
  513. type_lines1([[{{{tag, _Type}, Tag}, _ } = H | T1] = List | T], A1, A2) ->
  514. %% Block level tags - these are look ahead they must be
  515. %% on a single line (ie directly followed by a lf and nothing else
  516. case is_blank(T1) of
  517. false ->
  518. type_lines1(T, A1, [{normal , List} | A2]);
  519. true ->
  520. case is_block_tag(Tag) of
  521. true ->
  522. type_lines1(T, A1, [{blocktag , [H]} | A2]);
  523. false ->
  524. type_lines1(T, A1, [{tag, [H | T1]} | A2])
  525. end
  526. end;
  527. type_lines1([[{{lf, _}, _}| []] = H | T], A1, A2) ->
  528. %% types a blank line or a code block
  529. type_lines1(T, A1, [{linefeed, H} | A2]);
  530. type_lines1([[{{ws, _}, _} | _T1] = H | T], A1, A2) ->
  531. type_lines1(T, A1, [type_ws(H) | A2]);
  532. %% Final clause...
  533. type_lines1([H | T], A1, A2) ->
  534. type_lines1(T, A1, [{normal , H} | A2]).
  535. t_inline(H, T1, T2, A1, A2) ->
  536. case snip_ref(T1) of
  537. {Type, {Id, {Url, Title}}} ->
  538. type_lines1(T2, lists:flatten([{Id, {Url, Title}} | A1]),
  539. [{Type, H} | A2]);
  540. normal ->
  541. type_lines1(T2, A1, [{normal, H} | A2])
  542. end.
  543. %% strips blanks from the beginning and end
  544. strip_lines(List) ->
  545. lists:reverse(strip_lines1(lists:reverse(strip_lines1(List)))).
  546. strip_lines1([{linefeed, _} | T]) ->
  547. strip_lines1(T);
  548. strip_lines1([{blank, _} | T]) ->
  549. strip_lines1(T);
  550. strip_lines1(List) ->
  551. List.
  552. %%
  553. %% Loads of type rules...
  554. %%
  555. is_blank([]) ->
  556. true;
  557. is_blank([{{lf, _}, _} | []]) ->
  558. true;
  559. is_blank([{{ws, _}, _} | T]) ->
  560. is_blank(T);
  561. is_blank(_List) ->
  562. false.
  563. is_block_tag("address") ->
  564. true;
  565. is_block_tag("blockquote") ->
  566. true;
  567. is_block_tag("center") ->
  568. true;
  569. is_block_tag("dir") ->
  570. true;
  571. is_block_tag("div") ->
  572. true;
  573. is_block_tag("dl") ->
  574. true;
  575. is_block_tag("fieldset") ->
  576. true;
  577. is_block_tag("form") ->
  578. true;
  579. is_block_tag("h1") ->
  580. true;
  581. is_block_tag("h2") ->
  582. true;
  583. is_block_tag("h3") ->
  584. true;
  585. is_block_tag("h4") ->
  586. true;
  587. is_block_tag("h5") ->
  588. true;
  589. is_block_tag("h6") ->
  590. true;
  591. is_block_tag("hr") ->
  592. true;
  593. is_block_tag("isindex") ->
  594. true;
  595. is_block_tag("menu") ->
  596. true;
  597. is_block_tag("noframes") ->
  598. true;
  599. is_block_tag("noscript") ->
  600. true;
  601. is_block_tag("ol") ->
  602. true;
  603. is_block_tag("p") ->
  604. true;
  605. is_block_tag("pre") ->
  606. true;
  607. is_block_tag("table") ->
  608. true;
  609. is_block_tag("thead") ->
  610. true;
  611. is_block_tag("tbody") ->
  612. true;
  613. is_block_tag("tr") ->
  614. true;
  615. is_block_tag("td") ->
  616. true;
  617. is_block_tag("ul") ->
  618. true;
  619. is_block_tag(_Other) ->
  620. false.
  621. type_underscore(List) ->
  622. case type_underscore1(trim_right(List)) of
  623. hr ->
  624. {hr, List};
  625. maybe ->
  626. {type_underscore2(List), List}
  627. end.
  628. type_underscore1([]) ->
  629. hr;
  630. type_underscore1([{{md, underscore}, _} | T]) ->
  631. type_underscore1(T);
  632. type_underscore1(_List) ->
  633. maybe.
  634. type_underscore2(List) ->
  635. case trim_right(List) of % be permissive of trailing spaces
  636. [{{md, underscore}, _}, {{ws, _}, _},
  637. {{md, underscore}, _}, {{ws, _}, _},
  638. {{md, underscore}, _}] ->
  639. hr;
  640. _Other ->
  641. normal
  642. end.
  643. type_star(List) ->
  644. Trim = trim_right(List),
  645. case type_star1(Trim) of % be permssive of trailing spaces
  646. hr ->
  647. {hr, trim_right(Trim)};
  648. maybe ->
  649. Type = type_star2(List),
  650. %% if it is a normal line we prepend it with a special
  651. %% non-space filling white space character
  652. case Type of
  653. normal ->
  654. {normal, [{{ws, none}, none} | List]};
  655. _ ->
  656. {Type, List}
  657. end
  658. end.
  659. type_star1([]) ->
  660. hr;
  661. type_star1([{{md, star}, _} | T]) ->
  662. type_star1(T);
  663. type_star1(_List) ->
  664. maybe.
  665. type_star2(List) ->
  666. case trim_right(List) of
  667. [{{md, star}, _}, {{ws, _}, _},
  668. {{md, star}, _}, {{ws, _}, _},
  669. {{md, star}, _}] ->
  670. hr;
  671. _Other ->
  672. case List of
  673. [{{md, star}, _},
  674. {{ws, _}, _}= WS | T] ->
  675. {ul, make_list_str([WS | T])};
  676. _Other2 ->
  677. normal
  678. end
  679. end.
  680. type_ol(List) ->
  681. case type_ol1(List, []) of
  682. normal ->
  683. {normal, List};
  684. {ol, Str} ->
  685. {{ol, Str}, List};
  686. {esc_normal, Str} ->
  687. {normal, Str}
  688. end.
  689. %% this line terminates on an escaped fullstop after a number
  690. %% (but you need to drop the bslash...)
  691. type_ol1([{num, _} = N,
  692. {{punc, bslash}, _},
  693. {{punc, fullstop}, _} = P | T], Acc) ->
  694. {esc_normal, lists:flatten([lists:reverse(Acc), N, P | T])};
  695. type_ol1([{num, _} = H | T], Acc) ->
  696. %% we accumulate the digits in case we need to escape a full stop in a normal line
  697. type_ol1(T, [H | Acc]);
  698. type_ol1([{{punc, fullstop}, _}, {{ws, _}, _} | T], _Acc) ->
  699. {ol, T};
  700. type_ol1(_List, _Acc) ->
  701. normal.
  702. %% You need to understand what this function is trying to d...
  703. %% '### blah' is fine
  704. %% '### blah ###' is reduced to '### blah' because trailing #'s are
  705. %% just for show but...
  706. %% '##' is like appling '#' to '#' <-- applying 1 less styling to a single #
  707. %% and '###' is like appling '##' to '#' etc, etc
  708. %% but after you hit 6#'s you just get this for a single hash
  709. %% ie '#############' is like applying '######' to a single '#'
  710. %% but/and '######## blah' is like apply '######' to '## blah'
  711. %% strip trailing #'s as they are decorative only...
  712. type_atx(List) ->
  713. {Sz, R} = get_atx_size(List),
  714. A = [{{md, atx}, "#"}],
  715. Type =
  716. case is_all_hashes(R) of
  717. true ->
  718. if
  719. Sz == 1 ->
  720. normal;
  721. ((Sz > 1) andalso (Sz < 6)) ->
  722. Ns = integer_to_list(Sz - 1),
  723. Hn = list_to_atom("h" ++ Ns),
  724. {Hn, A};
  725. ((Sz == 6) andalso (R == [])) ->
  726. {h5, A};
  727. ((Sz == 6) andalso (R == [{{lf, lf}, "\n"}])) ->
  728. {h5, A};
  729. ((Sz == 6) andalso (R == [{{lf, crlf}, "\r\n"}])) ->
  730. {h5, A};
  731. ((Sz == 6) andalso (R =/= [])) ->
  732. {h6, A}
  733. end;
  734. false ->
  735. Ns = integer_to_list(Sz),
  736. Hn = list_to_atom("h" ++ Ns),
  737. {Hn, strip_atx(R)}
  738. end,
  739. {Type, List}.
  740. is_all_hashes([]) ->
  741. true;
  742. is_all_hashes([{{md, atx}, _} | T]) ->
  743. is_all_hashes(T);
  744. is_all_hashes([{{lf, _}, _} | []]) ->
  745. true;
  746. is_all_hashes(_List) ->
  747. false.
  748. get_atx_size(List) ->
  749. g_atx_size1(List, 0).
  750. % this function also strips whitespace to the left...
  751. g_atx_size1([{{md, atx}, _} = A | T], N) when N == 6 ->
  752. {6, [A | T]};
  753. g_atx_size1([{{md, atx}, _} | T], N) ->
  754. g_atx_size1(T, N + 1);
  755. g_atx_size1([{{ws, _}, _} | T], N) ->
  756. g_atx_size1(T, N);
  757. g_atx_size1(List, N) ->
  758. {N, List}.
  759. strip_atx(List) ->
  760. lists:reverse(s_atx1(lists:reverse(List))).
  761. s_atx1([{{lf, _}, _}, {{md, atx}, _} | T]) ->
  762. s_atx1(T);
  763. s_atx1([{{md, atx}, _} | T]) ->
  764. s_atx1(T);
  765. s_atx1(List) ->
  766. List.
  767. type_setext_h1(List) ->
  768. type_s_h1_1(List, []).
  769. %% terminates on running out or new line
  770. type_s_h1_1([{{lf, _}, _} = L | []], Acc) ->
  771. {setext_h1, lists:reverse([L | Acc])};
  772. type_s_h1_1([], Acc)->
  773. {setext_h1, lists:reverse(Acc)};
  774. type_s_h1_1([[] | T], Acc) ->
  775. type_s_h1_1(T, Acc);
  776. type_s_h1_1([{{md, eq}, _} = H | T], Acc) ->
  777. type_s_h1_1(T, [H | Acc]);
  778. type_s_h1_1(L, Acc) ->
  779. {normal, lists:flatten([Acc | L])}.
  780. type_setext_h2(List) ->
  781. case type_s_h2_1(List) of
  782. h2_or_hr ->
  783. {h2_or_hr, List};
  784. not_h2 ->
  785. {type_s_h2_2(trim_right(List)), List}
  786. end.
  787. %% terminates on running out or new line
  788. type_s_h2_1([{{lf, _}, _} | []]) ->
  789. h2_or_hr;
  790. type_s_h2_1([]) ->
  791. h2_or_hr;
  792. type_s_h2_1([[] | T]) ->
  793. type_s_h2_1(T);
  794. type_s_h2_1([{{md, dash}, _} | T]) ->
  795. type_s_h2_1(T);
  796. type_s_h2_1(_L) ->
  797. not_h2.
  798. type_s_h2_2([{{md, dash}, _}, {{ws,_}, _},
  799. {{md, dash}, _}, {{ws, _}, _},
  800. {{md, dash}, _}]) ->
  801. hr;
  802. type_s_h2_2([{{md, dash}, _},
  803. {{ws, _}, _} = WS | T]) ->
  804. {ul, make_list_str([WS | T])};
  805. type_s_h2_2(_List) ->
  806. normal.
  807. type_ws(List) ->
  808. case type_ws1(List) of
  809. blank ->
  810. {blank, List};
  811. try_codeblock ->
  812. case type_ws2(List) of
  813. normal ->
  814. {normal, List};
  815. {codeblock, Ret} ->
  816. {{codeblock, Ret}, List}
  817. end
  818. end.
  819. type_ws1([]) ->
  820. blank;
  821. type_ws1([{{lf, _}, _} | []]) ->
  822. blank;
  823. type_ws1([[] | T]) ->
  824. type_ws1(T);
  825. type_ws1([{{ws, _}, _} | T]) ->
  826. type_ws1(T);
  827. type_ws1(_L) ->
  828. try_codeblock.
  829. %% 4 or more spaces takes you over the limit
  830. %% (a tab is 4...)
  831. type_ws2([{{ws, tab}, _} | T]) ->
  832. {codeblock, T};
  833. type_ws2([{{ws, comp}, W} | T]) ->
  834. case gt(W, 4) of
  835. {true, R} ->
  836. {codeblock, [R| T]};
  837. false ->
  838. normal
  839. end;
  840. type_ws2([{{ws, sp}, _} | _T]) ->
  841. normal.
  842. gt(String, Len) ->
  843. ExpString = re:replace(String, "\t", " ", [{return, list}]),
  844. ExpStringLen = length(ExpString),
  845. if
  846. ExpStringLen >= Len ->
  847. WS = string:substr(ExpString, Len + 1,
  848. ExpStringLen),
  849. {true, {{ws, sp}, WS}};
  850. ExpStringLen < Len ->
  851. false
  852. end.
  853. %% make a tag into a string
  854. make_tag_str(L, R) ->
  855. make_tag1(L, R, []).
  856. make_tag1([], _R, Acc) ->
  857. lists:reverse(Acc);
  858. make_tag1([{{{tag, _Type}, _Tag}, B} | T], R, Acc) ->
  859. make_tag1(T, R, [B | Acc]);
  860. make_tag1([H | T], R, Acc) ->
  861. make_tag1(T, R, [make_string([H], R) | Acc]).
  862. esc_tag(String) ->
  863. esc_tag1(String, []).
  864. esc_tag1([], Acc) ->
  865. lists:reverse(Acc);
  866. esc_tag1([?NBSP | T], Acc) ->
  867. esc_tag1(T, [?SPACE | Acc]); % non-breaking space to space
  868. esc_tag1([H | T], Acc) ->
  869. esc_tag1(T, [H | Acc]).
  870. %% if it is a list we need to discard the initial white space...
  871. make_list_str([{{ws, _}, _} | T] = List) ->
  872. case is_double_indent(List) of
  873. false ->
  874. T;
  875. {true, R} ->
  876. lists:flatten([{tags, "<pre><code>"} ,R ,
  877. {tags, "</code></pre>\n\n"} | []])
  878. end.
  879. %% All ref processing can ignore the original values 'cos those
  880. %% have already been captured at a higher level
  881. snip_ref(List) ->
  882. case get_id(List) of
  883. {[{_, Id}], Rest} ->
  884. {_Rest2, Ref, Title} = parse_inline(Rest),
  885. Ref2 = trim(Ref),
  886. Rs = htmlencode(make_plain_string(Ref2)),
  887. Ts = make_plain_string(Title),
  888. {inlineref, {Id, {Rs, Ts}}};
  889. normal ->
  890. normal
  891. end.
  892. get_id(List) ->
  893. g_id1(List, []).
  894. g_id1([], _Acc) ->
  895. normal;
  896. g_id1([{{inline, close}, _}, {{punc, colon}, _}, {{ws, _}, _} | T], Acc) ->
  897. {lists:reverse(Acc), T};
  898. g_id1([H | T], Acc) ->
  899. g_id1(T, [H | Acc]).
  900. parse_inline(List) ->
  901. parse_inline1(List, []).
  902. %% snip off the terminal linefeed (if there is one...)
  903. parse_inline1([{{lf, _}, _} | []], A) ->
  904. {[], lists:reverse(A), []};
  905. parse_inline1([], A) ->
  906. {[], lists:reverse(A), []};
  907. parse_inline1([{{punc, bslash}, _}, {bra, _} = B | T], A) ->
  908. %% brackets can be escaped
  909. parse_inline1(T, [B | A]);
  910. parse_inline1([{{punc, bslash}, _}, {ket, _} = B | T], A) ->
  911. parse_inline1(T, [B | A]);
  912. parse_inline1([{{punc, bslash}, _}, {{punc, doubleq}, _} = Q | T], A) ->
  913. parse_inline1(T, [Q | A]);
  914. parse_inline1([{{punc, bslash}, _}, {{punc, singleq}, _} = Q | T], A) ->
  915. parse_inline1(T, [Q | A]);
  916. parse_inline1([{{punc, doubleq}, _} | T], A) ->
  917. %% these clauses capture the start of the title...
  918. parse_inline2(T, lists:reverse(A), doubleq, []);
  919. parse_inline1([{{punc, singleq}, _} | T], A) ->
  920. parse_inline2(T, lists:reverse(A), singleq, []);
  921. parse_inline1([{bra, _} | T], A) ->
  922. parse_inline2(T, lists:reverse(A), brackets, []);
  923. parse_inline1([{ket, _} | T], A) ->
  924. {T, lists:reverse(A), []};
  925. parse_inline1([H | T], A) ->
  926. parse_inline1(T, [H | A]).
  927. %% this gets titles in single and double quotes
  928. %% the delimiter type is passed in as 'D'
  929. parse_inline2([], Url, _D, A) ->
  930. {[], Url, lists:flatten(lists:reverse(A))};
  931. parse_inline2([{{punc, bslash}, _}, {bra, _} = B | T], Url, D, A) ->
  932. %% brackets can be escaped
  933. parse_inline2(T, Url, D, [B | A]);
  934. parse_inline2([{{punc, bslash}, _}, {ket, _} = B | T], Url, D, A) ->
  935. parse_inline2(T, Url, D, [B | A]);
  936. parse_inline2([{{punc, bslash}, _},
  937. {{punc, doubleq}, _}= Q | T], Url, D, A) ->
  938. %% quotes can be escaped
  939. parse_inline2(T, Url, D, [Q | A]);
  940. parse_inline2([{{punc, bslash}, _}, {{punc, singleq}, _} = Q | T], Url, D, A) ->
  941. parse_inline2(T, Url, D, [Q | A]);
  942. parse_inline2([{{punc, doubleq}, _} | T], Url, doubleq, A) ->
  943. %% these clauses capture the end of the title and drop the delimiter...
  944. parse_inline2(T, Url, none, A);
  945. parse_inline2([{{punc, singleq}, _} | T], Url, singleq, A) ->
  946. parse_inline2(T, Url, none, A);
  947. parse_inline2([{ket, _} | T], Url, brackets, A) ->
  948. parse_inline2(T, Url, none, A);
  949. parse_inline2([{ket, _} | T], Url, none, A) ->
  950. %% terminator clause
  951. {T, Url, lists:flatten(lists:reverse(A))};
  952. parse_inline2([_H | T], Url, none, A) ->
  953. %% this clause silently discards stuff after the delimiter...
  954. parse_inline2(T, Url, none, [A]);
  955. parse_inline2([H | T], Url, D, A) ->
  956. parse_inline2(T, Url, D, [H | A]).
  957. trim(String) ->
  958. trim_left(trim_right(String)).
  959. trim_right(String) ->
  960. lists:reverse(trim_left(lists:reverse(String))).
  961. trim_left([{{ws, _}, _} | T]) ->
  962. trim_left(T);
  963. trim_left([[] | T]) ->
  964. trim_left(T);
  965. trim_left(List) ->
  966. List.
  967. snip(List) ->
  968. List2 = lists:reverse(List),
  969. case List2 of
  970. [{{lf, _}, _} | T] ->
  971. lists:reverse(T);
  972. _ ->
  973. List
  974. end.
  975. %% end of ref processing
  976. %%
  977. %% Build the Lexed Token List
  978. %% This is a two part lexer, first it chunks the input and then on the second
  979. %% pass it gathers it into lines and types the lines
  980. %%
  981. %% NOTE that there are two different styles of processing lines:
  982. %% * markdown transformed
  983. %% * block
  984. %% inside block processing the whole text is dumped and just url encoded
  985. %% and the original text is always maintained during the lexing/parsing
  986. %% so that it can be recreated if the context requires it...
  987. %%
  988. lex(String) ->
  989. merge_ws(lex1(String, [], [])).
  990. merge_ws(List) ->
  991. merge_ws1(List, []).
  992. merge_ws1([], Acc) ->
  993. lists:reverse(Acc);
  994. merge_ws1([{{ws, _}, W1}, {{ws, _}, W2} | T], Acc) ->
  995. merge_ws1([{{ws, comp}, W1 ++ W2} | T], Acc);
  996. merge_ws1([H | T], Acc) ->
  997. merge_ws1(T, [H | Acc]).
  998. %% this is the terminal head which ends the parsing...
  999. lex1([], [], A2) ->
  1000. lists:flatten(lists:reverse(A2));
  1001. lex1([], A1, A2) ->
  1002. lex1([], [], [lex2(A1) | A2]);
  1003. %% these two heads capture opening and closing tags
  1004. lex1([$<, $/|T], A1, A2) ->
  1005. {Tag, NewT} = closingdiv(T, []),
  1006. lex1(NewT, [], [Tag, lex2(A1) | A2]);
  1007. lex1([$< | T], A1, A2) ->
  1008. {Tag, NewT} = openingdiv(T),
  1009. lex1(NewT, [], [Tag , lex2(A1) | A2]);
  1010. %% these clauses are the normal lexer clauses
  1011. lex1([$= | T], A1, A2) ->
  1012. lex1(T, [], [{{md, eq}, "="}, lex2(A1) | A2]);
  1013. lex1([$- | T], A1, A2) ->
  1014. lex1(T, [], [{{md, dash}, "-"}, lex2(A1) | A2]);
  1015. lex1([$# | T], A1, A2) ->
  1016. lex1(T, [], [{{md, atx}, "#"}, lex2(A1) | A2]);
  1017. lex1([$> | T], A1, A2) ->
  1018. lex1(T, [], [{{md, gt}, ">"}, lex2(A1) | A2]);
  1019. lex1([$+ | T], A1, A2) ->
  1020. lex1(T, [], [{{md, plus}, "+"}, lex2(A1) | A2]);
  1021. lex1([$* | T], A1, A2) ->
  1022. lex1(T, [], [{{md, star}, "*"}, lex2(A1) | A2]);
  1023. lex1([$_ | T], A1, A2) ->
  1024. lex1(T, [], [{{md, underscore}, "_"}, lex2(A1) | A2]);
  1025. lex1([$1 | T], A1, A2) ->
  1026. lex1(T, [], [{num, "1"}, lex2(A1) | A2]);
  1027. lex1([$2 | T], A1, A2) ->
  1028. lex1(T, [], [{num, "2"}, lex2(A1) | A2]);
  1029. lex1([$3 | T], A1, A2) ->
  1030. lex1(T, [], [{num, "3"}, lex2(A1) | A2]);
  1031. lex1([$4 | T], A1, A2) ->
  1032. lex1(T, [], [{num, "4"}, lex2(A1) | A2]);
  1033. lex1([$5 | T], A1, A2) ->
  1034. lex1(T, [], [{num, "5"}, lex2(A1) | A2]);
  1035. lex1([$6 | T], A1, A2) ->
  1036. lex1(T, [], [{num, "6"}, lex2(A1) | A2]);
  1037. lex1([$7 | T], A1, A2) ->
  1038. lex1(T, [], [{num, "7"}, lex2(A1) | A2]);
  1039. lex1([$8 | T], A1, A2) ->
  1040. lex1(T, [], [{num, "8"}, lex2(A1) | A2]);
  1041. lex1([$9 | T], A1, A2) ->
  1042. lex1(T, [], [{num, "9"}, lex2(A1) | A2]);
  1043. lex1([$0 | T], A1, A2) ->
  1044. lex1(T, [], [{num, "0"}, lex2(A1) | A2]);
  1045. lex1([$. | T], A1, A2) ->
  1046. lex1(T, [], [{{punc, fullstop}, "."}, lex2(A1) | A2]);
  1047. lex1([$: | T], A1, A2) ->
  1048. lex1(T, [], [{{punc, colon}, ":"}, lex2(A1) | A2]);
  1049. lex1([$' | T], A1, A2) ->
  1050. lex1(T, [], [{{punc, singleq}, "'"}, lex2(A1) | A2]); %'
  1051. lex1([$" | T], A1, A2) ->
  1052. lex1(T, [], [{{punc, doubleq}, "\""}, lex2(A1) | A2]); %"
  1053. lex1([$` | T], A1, A2) ->
  1054. lex1(T, [], [{{punc, backtick}, "`"}, lex2(A1) | A2]); %"
  1055. lex1([$! | T], A1, A2) ->
  1056. lex1(T, [], [{{punc, bang}, "!"}, lex2(A1) | A2]); %"
  1057. lex1([$\\ | T], A1, A2)->
  1058. lex1(T, [], [{{punc, bslash}, "\\"}, lex2(A1) | A2]); %"
  1059. lex1([$/ | T], A1, A2) ->
  1060. lex1(T, [], [{{punc, fslash}, "/"}, lex2(A1) | A2]); %"
  1061. lex1([$( | T], A1, A2) ->
  1062. lex1(T, [], [{bra, "("}, lex2(A1) | A2]);
  1063. lex1([$) | T], A1, A2) ->
  1064. lex1(T, [], [{ket, ")"}, lex2(A1) | A2]);
  1065. lex1([$[ | T], A1, A2) ->
  1066. lex1(T, [], [{{inline, open}, "["}, lex2(A1) | A2]);
  1067. lex1([$] | T], A1, A2) ->
  1068. lex1(T, [], [{{inline, close}, "]"}, lex2(A1) | A2]);
  1069. lex1([?SPACE | T], A1, A2) ->
  1070. %% note there is a special 'whitespace' {{ws, none}, ""} which is used to generate non-space
  1071. %% filling whitespace for cases like '*bob* is great' which needs a non-space filling
  1072. %% whitespace prepended to trigger emphasis so it renders as "<em>bob</em> is great...
  1073. %% that 'character' doesn't exist so isn't in the lexer but appears in the parser
  1074. lex1(T, [], [{{ws, sp}, " "}, lex2(A1) | A2]);
  1075. lex1([?TAB | T], A1, A2) ->
  1076. lex1(T, [], [{{ws, tab}, "\t"}, lex2(A1) | A2]);
  1077. lex1([?NBSP | T], A1, A2) ->
  1078. lex1(T, [], [{{ws, sp}, "&nbsp"}, lex2(A1) | A2]);
  1079. lex1([?CR, ?LF | T], A1, A2) ->
  1080. lex1(T, [], [{{lf, crlf}, [?CR , ?LF]}, lex2(A1) | A2]);
  1081. lex1([?LF | T], A1, A2) ->
  1082. lex1(T, [], [{{lf, lf}, [?LF]}, lex2(A1) | A2]);
  1083. lex1([H|T], A1, A2) ->
  1084. %% this final clause accumulates line fragments
  1085. lex1(T, [H |A1] , A2).
  1086. lex2([]) ->
  1087. [];
  1088. lex2(List) ->
  1089. {string, lists:flatten(lists:reverse(List))}.
  1090. %% need to put in regexes for urls and e-mail addies
  1091. openingdiv(String) ->
  1092. case get_url(String) of
  1093. {{url, URL}, R1} ->
  1094. {{url, URL}, R1};
  1095. not_url ->
  1096. case get_email_addie(String) of
  1097. {{email, EM}, R2} ->
  1098. {{email, EM}, R2};
  1099. not_email ->
  1100. openingdiv1(String, [])
  1101. end
  1102. end.
  1103. % dumps out a list if it is not an opening div
  1104. openingdiv1([], Acc) ->
  1105. {lists:flatten([{{punc, bra}, "<"} | lex(lists:reverse(Acc))]), []};
  1106. openingdiv1([$/,$>| T], Acc) ->
  1107. Acc2 = lists:flatten(lists:reverse(Acc)),
  1108. Acc3 = string:to_lower(Acc2),
  1109. [Tag | _T] = string:tokens(Acc3, " "),
  1110. {{{{tag, self_closing}, Tag}, "<"
  1111. ++ Acc2 ++ "/>"}, T};
  1112. openingdiv1([$>| T], []) ->
  1113. %% special for non-tags
  1114. {[{{punc, bra}, "<"},
  1115. {{punc, ket}, ">"}], T};
  1116. openingdiv1([$>| T], Acc) ->
  1117. Acc2 = lists:flatten(lists:reverse(Acc)),
  1118. Acc3 = string:to_lower(Acc2),
  1119. [Tag | _T] = string:tokens(Acc3, " "),
  1120. {{{{tag, open}, Tag}, "<"
  1121. ++ Acc2 ++ ">"}, T};
  1122. openingdiv1([H|T], Acc) ->
  1123. openingdiv1(T, [H | Acc]).
  1124. % dumps out a list if it is not an closing div
  1125. closingdiv([], Acc) ->
  1126. {lists:flatten([{{punc, bra}, "<"}, {{punc, fslash}, "/"}
  1127. | lex(lists:reverse(Acc))]), []};
  1128. closingdiv([$>| T], Acc) ->
  1129. Acc2 = lists:flatten(lists:reverse(Acc)),
  1130. Acc3 = string:to_lower(Acc2),
  1131. [Tag | _T] = string:tokens(Acc3, " "),
  1132. {{{{tag, close}, Tag}, "</"
  1133. ++ Acc2 ++ ">"}, T};
  1134. closingdiv([H|T], Acc) ->
  1135. closingdiv(T, [H | Acc]).
  1136. get_url(String) ->
  1137. HTTP_regex = "^(H|h)(T|t)(T|t)(P|p)(S|s)*://",
  1138. case re:run(String, HTTP_regex) of
  1139. nomatch ->
  1140. not_url;
  1141. {match, _} ->
  1142. get_url1(String, [])
  1143. end.
  1144. get_url1([], Acc) ->
  1145. URL = lists:flatten(lists:reverse(Acc)),
  1146. {{url, URL}, []};
  1147. get_url1([$\\, $> | T], Acc) ->
  1148. %% allow escaped kets
  1149. get_url1(T, [$>, $\\ | Acc]);
  1150. get_url1([$> | T], Acc) ->
  1151. URL = lists:flatten(lists:reverse(Acc)),
  1152. {{url, URL}, T};
  1153. get_url1([H | T], Acc) ->
  1154. get_url1(T, [H | Acc]).
  1155. get_email_addie(String) ->
  1156. Snip_regex = ">",
  1157. case re:run(String, Snip_regex) of
  1158. nomatch ->
  1159. not_email;
  1160. {match, [{N, _} | _T]} ->
  1161. {Possible, [$> | T]} = lists:split(N, String),
  1162. EMail_regex = "[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+"
  1163. ++ "(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*"
  1164. ++ "@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+"
  1165. ++ "(?:[a-zA-Z]{2}|com|org|net|gov|mil"
  1166. ++ "|biz|info|mobi|name|aero|jobs|museum)",
  1167. case re:run(Possible, EMail_regex) of
  1168. nomatch ->
  1169. not_email;
  1170. {match, _} ->
  1171. {{email, Possible}, T}
  1172. end
  1173. end.
  1174. make_plain_string(List) ->
  1175. make_plain_string(List, []).
  1176. make_plain_string([], Acc) ->
  1177. lists:flatten(lists:reverse(Acc));
  1178. make_plain_string([{{ws, none}, none} | T], Acc) ->
  1179. make_plain_string(T, [" " | Acc]);
  1180. make_plain_string([{_, Str} | T], Acc) ->
  1181. make_plain_string(T, [Str | Acc]).
  1182. make_escape_string(List, Refs) ->
  1183. make_escape_string(List, Refs, []).
  1184. make_escape_string([], _R, A) ->
  1185. lists:flatten(lists:reverse(A));
  1186. make_escape_string([{tags, Tag} | T], R, A) ->
  1187. make_escape_string(T, R, [{tags, Tag} | A]);
  1188. make_escape_string([H | T], R, A) ->
  1189. make_escape_string(T, R, [make_string([H], R) | A]).
  1190. make_string(List, Refs) ->
  1191. make_string(List, Refs, []).
  1192. make_string([], _R, A) ->
  1193. Flat = lists:flatten(lists:reverse(A)),
  1194. htmlchars(Flat);
  1195. make_string([{{punc, bang}, B}, {{inline, open}, O} | T], R, A) ->
  1196. case get_inline(T, R, [], img) of
  1197. {Rest, {Url, Title, Acc}} ->
  1198. Tag = [make_img_tag(Url, Acc, Title)],
  1199. make_string(Rest, R, [Tag | A]);
  1200. {Rest, Tag} ->
  1201. make_string(Rest, R, [Tag, O, B | A])
  1202. end;
  1203. %% escape inline open's...
  1204. make_string([{{punc, bslash}, _}, {{inline, open}, O} | T], R, A) ->
  1205. make_string(T, R, [O | A]);
  1206. make_string([{{inline, open}, O} | T], R, A) ->
  1207. case get_inline(T, R, [], url) of
  1208. {Rest, {Url, Title, Acc}} ->
  1209. Tit = case Title of
  1210. [] ->
  1211. [];
  1212. _ ->
  1213. " title=\"" ++ Title ++ "\""
  1214. end,
  1215. Tag = [{tags, "<a href=\"" ++ Url ++ "\""
  1216. ++ Tit ++ ">"}, Acc,
  1217. {tags, "</a>"} | []],
  1218. make_string(Rest, R, [Tag | A]);
  1219. {Rest, Tag} ->
  1220. make_string(Rest, R, [Tag, O | A])
  1221. end;
  1222. make_string([{email, Addie} | T], R, A) ->
  1223. make_string(T, R, [{tags, "\" />"}, Addie, {tags, "<a href=\"mailto:"}| A]);
  1224. make_string([{url, Url} | T], R, A) ->
  1225. make_string(T, R, [ {tags, "</a>"}, Url, {tags, "\">"}, Url,
  1226. {tags, "<a href=\""} | A]);
  1227. make_string([{tags, _} = Tag | T], R, A) ->
  1228. make_string(T, R, [Tag | A]);
  1229. make_string([{{{tag, Type}, Tag}, _} | T], R, A) ->
  1230. Tag2 = esc_tag(Tag),
  1231. TagStr = case Type of
  1232. open ->
  1233. {tags, "&lt;" ++ Tag2 ++ "&gt;"};
  1234. close ->
  1235. {tags, "&lt;/" ++ Tag2 ++ "&gt;"};
  1236. self_closing ->
  1237. {tags, "&lt;" ++ Tag2 ++ " /&gt;"}
  1238. end,
  1239. make_string(T, R, [TagStr | A]);
  1240. make_string([{_, Orig} | T], R, A) ->
  1241. make_string(T, R, [Orig | A]).
  1242. get_inline([], _R, A, _) ->
  1243. %% if the inline doesn't terminate its not an inline...
  1244. {[], make_plain_string(lists:reverse(A))};
  1245. get_inline([{{punc, bang}, _B}, {{inline, open}, _O} | T], R, A, url) ->
  1246. %% a url can contain an image inline
  1247. {Rest, {Url, Title, Acc}} = get_inline(T, R, A, img),
  1248. Tag = make_img_tag(Url, Acc, Title),
  1249. %% We double tag the tag so that it can get through the lists:flatteners..
  1250. get_inline(Rest, R, [{tags, Tag} | A], url);
  1251. get_inline([{{inline, close}, _}, {bra, _} | T], _R, A, _) ->
  1252. {Rest, Url, Title} = parse_inline(T),
  1253. Tag = {string:strip(make_plain_string(Url)),
  1254. make_plain_string(Title),
  1255. make_plain_string(lists:reverse(A))},
  1256. {Rest, Tag};
  1257. get_inline([{{inline, close}, _}, {{ws, sp}, _}, {bra, _} | T], _R, A, img) ->
  1258. %% for img's but not url's you need to allow a single space between them
  1259. %% to be compatible with showdown :(
  1260. {Rest, Url, Title} = parse_inline(T),
  1261. Tag = {string:strip(make_plain_string(Url)),
  1262. make_plain_string(Title),
  1263. make_plain_string(lists:reverse(A))},
  1264. {Rest, Tag};
  1265. get_inline([{{inline, close}, _}, {{inline, open}, _} | T], R, A, _) ->
  1266. %% this clause detects references to images/links...
  1267. Text = make_plain_string(lists:reverse(A)),
  1268. case get_id_diff(T) of
  1269. normal ->
  1270. {[], make_plain_string(lists:reverse(A))};
  1271. {[{_, Id}], Rest} ->
  1272. {Url, Title} = case lists:keyfind(Id, 1, R) of
  1273. false ->
  1274. {"", ""};
  1275. {Id, {U, Tit}} ->
  1276. {U, Tit}
  1277. end,
  1278. Tag = {Url, Title, Text},
  1279. {Rest, Tag};
  1280. _Other ->
  1281. {[], make_plain_string(lists:reverse(A))} % random failing id's
  1282. end;
  1283. get_inline([{{inline, close}, _} = C , {{ws, _}, _},
  1284. {{inline, open}, _} = O | T], R, A, Type) ->
  1285. %% so does this one - just delete the space and rethrow it
  1286. get_inline([C, O | T], R, A, Type);
  1287. get_inline([{{inline, close}, _} | T], R, A, _) ->
  1288. %% this is the markdown extension clause that takes an id in square brackets without
  1289. %% any additional stuff as a valid id marker
  1290. Id = make_plain_string(lists:reverse(A)),
  1291. case lists:keyfind(Id, 1, R) of
  1292. false ->
  1293. {T, lists:flatten([Id , $]])};
  1294. {Id, {Url, Title}} ->
  1295. Tag = {Url, Title, Id},
  1296. {T, Tag}
  1297. end;
  1298. get_inline([H | T], R, A, Type) ->
  1299. get_inline(T, R, [H | A], Type).
  1300. get_id_diff(List) ->
  1301. g_id_diff1(List, []).
  1302. g_id_diff1([], _Acc) ->
  1303. normal;
  1304. g_id_diff1([{{inline, close}, _}| T], Acc) ->
  1305. {lists:reverse(Acc), T};
  1306. g_id_diff1([H | T], Acc) ->
  1307. g_id_diff1(T, [H | Acc]).
  1308. %% convert ascii into html characters
  1309. htmlencode(List) ->
  1310. htmlencode(List, []).
  1311. htmlencode([], Acc) ->
  1312. lists:flatten(lists:reverse(Acc));
  1313. htmlencode([$& | Rest], Acc) ->
  1314. htmlencode(Rest, ["&amp;" | Acc]);
  1315. htmlencode([$< | Rest], Acc) ->
  1316. htmlencode(Rest, ["&lt;" | Acc]);
  1317. htmlencode([$> | Rest], Acc) ->
  1318. htmlencode(Rest, ["&gt;" | Acc]);
  1319. htmlencode([160 | Rest], Acc) ->
  1320. htmlencode(Rest, ["&nbsp;" | Acc]);
  1321. htmlencode([Else | Rest], Acc) ->
  1322. htmlencode(Rest, [Else | Acc]).
  1323. htmlchars(List) ->
  1324. htmlchars1(List, []).
  1325. htmlchars1([], Acc) ->
  1326. lists:flatten(lists:reverse(Acc));
  1327. htmlchars1([{tags, Tag} | T], Acc) ->
  1328. %% tags are just wheeched out unescaped
  1329. htmlchars1(T, [Tag | Acc]);
  1330. htmlchars1([?CR, ?LF | T], Acc) ->
  1331. %% line ends are pushed to a space..
  1332. htmlchars1(T, ["\n" | Acc]);
  1333. htmlchars1([?LF | T], Acc) ->
  1334. %% line ends are pushed to a space..
  1335. htmlchars1(T, ["\n" | Acc]);
  1336. htmlchars1([?CR | T], Acc) ->
  1337. htmlchars1(T, ["\r" | Acc]);
  1338. htmlchars1([$\\, $*, $*, $* | T], A) ->
  1339. %% there is a non-space filling white space represented by the atom 'none'
  1340. %% which is created in the parser (NOT IN THE LEXER!) and which triggers
  1341. %% emphasis or strong tags being turned on...
  1342. htmlchars1(T, [$*, $*, $* | A]);
  1343. htmlchars1([$*, $*, $* | T], A) ->
  1344. {T2, NewA} = superstrong(T, $*),
  1345. htmlchars1(T2, [NewA | A]);
  1346. htmlchars1([$\\, $*, $* | T], A) ->
  1347. %% repeat for strong
  1348. htmlchars1(T, [$*, $* | A]);
  1349. htmlchars1([$*, $* | T], A) ->
  1350. {T2, NewA} = strong(T, $*),
  1351. htmlchars1(T2, [NewA | A]);
  1352. htmlchars1([$\\, $* | T], A) ->
  1353. %% likewise for strong
  1354. htmlchars1(T, [$* | A]);
  1355. htmlchars1([$* | T], A) ->
  1356. {T2, NewA} = emphasis(T, $*),
  1357. htmlchars1(T2, [NewA | A]);
  1358. htmlchars1([$\\, $_, $_, $_ | T], A) ->
  1359. %% and again for underscores
  1360. htmlchars1(T, [$_, $_, $_ | A]);
  1361. htmlchars1([$_, $_, $_ | T], A) ->
  1362. %% the none atom is the non-space filling whitespace
  1363. {T2, NewA} = superstrong(T, $_),
  1364. htmlchars1(T2, [NewA | A]);
  1365. htmlchars1([$\\, $_, $_ | T], A) ->
  1366. %% and strong
  1367. %% and again for underscores
  1368. htmlchars1(T, [$_, $_ | A]);
  1369. htmlchars1([$_, $_ | T], A) ->
  1370. {T2, NewA} = strong(T, $_),
  1371. htmlchars1(T2, [NewA | A]);
  1372. htmlchars1([$\\, $_ | T], A) ->
  1373. %% likewise for strong
  1374. htmlchars1(T, [$_ | A]);
  1375. htmlchars1([$_ | T], A) ->
  1376. {T2, NewA} = emphasis(T, $_),
  1377. htmlchars1(T2, [NewA | A]);
  1378. htmlchars1([$\\, $` | T], A) ->
  1379. %% handle backtick escaping
  1380. htmlchars1(T, [$` | A]);
  1381. htmlchars1([$`, $` | T], A) ->
  1382. {T2, NewA} = dblcode(T),
  1383. htmlchars1(T2, [NewA | A]);
  1384. htmlchars1([$` | T], A) ->
  1385. {T2, NewA} = code(T),
  1386. htmlchars1(T2, [NewA | A]);
  1387. htmlchars1([?COPY | T], A) ->
  1388. htmlchars1(T, ["&copy;" | A]);
  1389. htmlchars1([?AMP | T], A) ->
  1390. htmlchars1(T, ["&amp;" | A]);
  1391. htmlchars1([$& | T], A) ->
  1392. htmlchars1(T, ["&amp;" | A]);
  1393. htmlchars1([$< | T], A) ->
  1394. htmlchars1(T, ["&lt;" | A]);
  1395. htmlchars1([?NBSP | T], A) ->
  1396. htmlchars1(T, ["&nbsp;" | A]);
  1397. htmlchars1([?TAB | T], A) ->
  1398. htmlchars1(T, [" " | A]);
  1399. htmlchars1([none | T], A) ->
  1400. htmlchars1(T, A);
  1401. htmlchars1([H | T], A) ->
  1402. htmlchars1(T, [H | A]).
  1403. emphasis(List, Delim) ->
  1404. interpolate(List, Delim, "em", "" ,[]).
  1405. strong(List, Delim) ->
  1406. interpolate2(List, Delim, "strong", "", []).
  1407. superstrong(List, Delim) ->
  1408. interpolate3(List, Delim, "strong", "em", "", []).
  1409. dblcode(List) ->
  1410. {T, Tag} = interpolate2(List, $`, "code", "" ,[]),
  1411. {T, "<pre>" ++ Tag ++ "</pre>"}.
  1412. code(List) ->
  1413. interpolateX(List, $`, "code", "", []).
  1414. %% pain in the arse - sometimes the closing tag should be preceded by
  1415. %% a "\n" and sometimes not in showdown.js
  1416. %% interpolate is for single delimiters...
  1417. interpolateX([], Delim, _Tag, _X, Acc) ->
  1418. {[], [Delim] ++ htmlchars(lists:reverse(Acc))};
  1419. interpolateX([Delim | T], Delim, Tag, X, Acc) ->
  1420. {T, "<" ++ Tag ++ ">" ++ htmlchars(lists:reverse(Acc)) ++ X ++
  1421. "</" ++ Tag ++ ">"};
  1422. interpolateX([H | T], Delim, Tag, X, Acc) ->
  1423. interpolateX(T, Delim, Tag, X, [H | Acc]).
  1424. interpolate([], Delim, _Tag, _X, Acc) ->
  1425. {[], [Delim] ++ htmlchars(lists:reverse(Acc))};
  1426. interpolate([Delim | T], Delim, Tag, X, Acc) ->
  1427. {T, "<" ++ Tag ++ ">" ++ htmlchars(lists:reverse(Acc)) ++ X ++
  1428. "</" ++ Tag ++ ">"};
  1429. interpolate([H | T], Delim, Tag, X, Acc) ->
  1430. interpolate(T, Delim, Tag, X, [H | Acc]).
  1431. %% interpolate two is for double delimiters...
  1432. interpolate2([], Delim, _Tag, _X, Acc) ->
  1433. {[], [Delim] ++ [Delim] ++ htmlchars(lists:reverse(Acc))};
  1434. interpolate2([Delim, Delim | T], Delim, Tag, X, Acc) ->
  1435. {T, "<" ++ Tag ++ ">" ++ htmlchars(lists:reverse(Acc)) ++ X ++
  1436. "</" ++ Tag ++ ">"};
  1437. interpolate2([H | T], Delim, Tag, X, Acc) ->
  1438. interpolate2(T, Delim, Tag, X, [H | Acc]).
  1439. %% interpolate three is for double delimiters...
  1440. interpolate3([], D, _Tag1, Tag2, _X, Acc) ->
  1441. {[], "<" ++ Tag2 ++ ">" ++ [D] ++ "</" ++ Tag2 ++ ">"
  1442. ++ htmlchars(lists:reverse(Acc))};
  1443. interpolate3([D, D, D | T], D, Tag1, Tag2, _X, Acc) ->
  1444. {T, "<" ++ Tag1 ++ ">" ++ "<" ++ Tag2 ++ ">"
  1445. ++ htmlchars(lists:reverse(Acc)) ++ "</" ++ Tag2 ++ ">"
  1446. ++ "</" ++ Tag1 ++ ">"};
  1447. interpolate3([H | T], D, Tag1, Tag2, X, Acc) ->
  1448. interpolate3(T, D, Tag1, Tag2, X, [H | Acc]).
  1449. make_img_tag(Url, Acc, Title) ->
  1450. {tags, "<img src=\"" ++ Url ++ "\""
  1451. ++ " alt=\"" ++ Acc ++ "\""
  1452. ++ " title=\"" ++ Title ++ "\""
  1453. ++ " />"}.