erlydtl_scanner.slex 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. %%%-------------------------------------------------------------------
  2. %%% File: erlydtl_scanner.slex
  3. %%% @author Andreas Stenius <kaos@astekk.se>
  4. %%% @copyright 2013 Andreas Stenius
  5. %%% @doc
  6. %%% erlydtl scanner
  7. %%% @end
  8. %%%
  9. %%% The MIT License
  10. %%%
  11. %%% Copyright (c) 2013 Andreas Stenius
  12. %%%
  13. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  14. %%% of this software and associated documentation files (the "Software"), to deal
  15. %%% in the Software without restriction, including without limitation the rights
  16. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. %%% copies of the Software, and to permit persons to whom the Software is
  18. %%% furnished to do so, subject to the following conditions:
  19. %%%
  20. %%% The above copyright notice and this permission notice shall be included in
  21. %%% all copies or substantial portions of the Software.
  22. %%%
  23. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29. %%% THE SOFTWARE.
  30. %%%
  31. %%% @since 2013-11-05 by Andreas Stenius
  32. %%%
  33. %%% Rules based on the original erlydtl_scanner by Robert Saccon and Evan Miller.
  34. %%%-------------------------------------------------------------------
  35. -module erlydtl_scanner.
  36. -function scan.
  37. -init_state in_text.
  38. form -compile(nowarn_unused_vars) end.
  39. form -export([resume/1, format_error/1]) end.
  40. form \
  41. -record(scanner_state, { \
  42. template=[], \
  43. scanned=[], \
  44. pos={1,1}, \
  45. state=in_text \
  46. }) \
  47. end.
  48. form \
  49. resume(#scanner_state{ template=Template, scanned=Scanned, \
  50. pos=Pos, state=State }) -> \
  51. scan(Template, Scanned, Pos, State) \
  52. end.
  53. %% Rule syntax: Prio Prefix|any|- InState[-]|any[+|-] [, Guard] : {: Body}|{[Action...,] NewState [until Closer]}.
  54. %% `state-' means a state without a closer state.
  55. %% Where Guard and Body are one erlang expression (see it as a begin ... end block): expr <erlang code...> end
  56. %% Open tags
  57. 10 {{ in_text-: open_var, in_code until }}.
  58. 10 {% in_text-: open_tag, in_code until %}.
  59. 10 <!--{{ in_text-: open_var, in_code until }}-->.
  60. 10 <!--{% in_text-: open_tag, in_code until %}-->.
  61. %% Comments
  62. 20 {# in_text-: in_comment until #}.
  63. 20 <!--{# in_text-: in_comment until #}-->.
  64. %% `any+' will match the closer with the prefix..
  65. 30 #}--> any+: skip, in_text-.
  66. 30 #} any+: skip, in_text-.
  67. %% must come before the `space any' rule
  68. 40 any in_comment: +comment_tag.
  69. %% end comment rules
  70. %% The rest is "just" text..
  71. 50 any in_text-: +string.
  72. %% Quoted strings
  73. 60 \" in_code: string_literal, in_double_quote.
  74. 62 \" in_double_quote: +string_literal, in_code.
  75. 64 \\ in_double_quote: +string_literal, in_double_quote_escape.
  76. 66 any in_double_quote: +string_literal.
  77. 68 any in_double_quote_escape: +string_literal, in_double_quote.
  78. 60 \' in_code: string_literal-\", in_single_quote.
  79. 62 \' in_single_quote: +string_literal-\", in_code.
  80. 64 \\ in_single_quote: +string_literal, in_single_quote_escape.
  81. 66 any in_single_quote: +string_literal.
  82. 68 any in_single_quote_escape: +string_literal, in_single_quote.
  83. %% Close tags
  84. 70 }}--> any+: close_var, in_text-.
  85. 70 %}--> any+: close_tag, in_text-.
  86. 72 }} any+: close_var, in_text-.
  87. 72 %} any+:
  88. expr \
  89. case S of \
  90. [{identifier,_,"mitabrev"}, {open_tag,_,'{%'}|Ss] -> \
  91. scan(T, [{string, {R, C + 2}, ""} | Ss], \
  92. {R, C + 2}, {in_verbatim, undefined}); \
  93. [{identifier,_,Tag}, {identifier,_,verbatim}, {open_tag,_,'{%'}|Ss] -> \
  94. scan(T, [{string, {R, C + 2}, ""} | Ss], \
  95. {R, C + 2}, {in_verbatim, Tag}); \
  96. _ -> scan(T, [{close_tag, P, "%}"} | post_process(S, close_tag)], \
  97. {R, C + 2}, in_text) \
  98. end \
  99. end.
  100. %% verbatim stuff
  101. 80 {% in_verbatim:
  102. expr scan(T, S, {R, C + 2}, {in_verbatim_code, {E, "%{"}}) end.
  103. 82 \s in_verbatim_code:
  104. expr \
  105. {Tag, Backtrack} = E, \
  106. scan(T, S, {R, C + 1}, {in_verbatim_code, {Tag, [$\ |Backtrack]}}) \
  107. end.
  108. 84 'endverbatim%}' in_verbatim_code,
  109. expr element(1, E) =:= undefined end:
  110. expr scan(T, S, {R, C + 13}, in_text) end.
  111. 86 'endverbatim ' in_verbatim_code:
  112. expr \
  113. {Tag, Backtrack} = E, \
  114. scan(T, S, {R, C + 12}, \
  115. {in_endverbatim_code, \
  116. {Tag, lists:reverse("endverbatim ", Backtrack), ""}}) \
  117. end.
  118. 88 \s in_endverbatim_code,
  119. expr element(3, E) =:= "" end:
  120. expr \
  121. {Tag, Backtrack, EndTag} = E, \
  122. scan(T, S, {R, C + 1}, \
  123. {in_endverbatim_code, \
  124. {Tag, [$\ |Backtrack], EndTag}}) \
  125. end.
  126. 90 any in_endverbatim_code,
  127. expr \
  128. H >= $a andalso H =< $z orelse \
  129. H >= $0 andalso H =< $9 orelse H =:= $_ end:
  130. expr \
  131. {Tag, Backtrack, EndTag} = E, \
  132. scan(T, S, {R, C + 1}, \
  133. {in_endverbatim_code, \
  134. {Tag, [H|Backtrack], [H|EndTag]}}) \
  135. end.
  136. 92 \s in_endverbatim_code,
  137. expr element(1, E) =:= element(3, E) end:
  138. expr \
  139. {Tag, Backtrack, Tag} = E, \
  140. scan(T, S, {R, C + 1}, \
  141. {in_endverbatim_code, \
  142. {Tag, [$\ |Backtrack], Tag}}) \
  143. end.
  144. 94 %} in_endverbatim_code,
  145. expr element(1, E) =:= element(3, E) end:
  146. expr scan(T, S, {R, C + 2}, (in_text)) end.
  147. 96 %} in_endverbatim_code,
  148. expr element(1, E) =:= undefined andalso \
  149. element(3, E) =:= "" end:
  150. expr scan(T, S, {R, C + 2}, in_text) end.
  151. 98 any in_endverbatim_code:
  152. expr \
  153. {Tag, Backtrack, _} = E, \
  154. scan(T, \
  155. case S of \
  156. [{string,_, L}=M|Ss] -> \
  157. [setelement(3, M, [H|Backtrack] ++ L)|Ss]; \
  158. _ -> [{string, P, [H|Backtrack]}|S] \
  159. end, \
  160. case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \
  161. {in_verbatim, Tag}) \
  162. end.
  163. 100 any in_verbatim_code:
  164. expr \
  165. {Tag, Backtrack} = E, \
  166. scan(T, \
  167. case S of \
  168. [{string,_, L}=M|Ss] -> \
  169. [setelement(3, M, [H|Backtrack] ++ L)|Ss]; \
  170. _ -> [{string, P, [H|Backtrack]}|S] \
  171. end, \
  172. case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \
  173. {in_verbatim, Tag}) \
  174. end.
  175. 102 any in_verbatim:
  176. expr \
  177. scan(T, \
  178. case S of \
  179. [{string,_, L}=M|Ss] -> \
  180. [setelement(3, M, [H|L])|Ss]; \
  181. _ -> [{string, P, [H]}|S] \
  182. end, \
  183. case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \
  184. {in_verbatim, E}) \
  185. end.
  186. %% Get back to `in_code' on these tokens:
  187. 110 == any: ==, in_code.
  188. 110 != any: !=, in_code.
  189. 110 >= any: >=, in_code.
  190. 110 <= any: <=, in_code.
  191. 110 > any: >, in_code.
  192. 110 < any: <, in_code.
  193. 110 ( any: (, in_code.
  194. 110 ) any: ), in_code.
  195. 110 \, any: \,, in_code.
  196. 110 | any: |, in_code.
  197. 110 = any: =, in_code.
  198. 110 \: any: \:, in_code.
  199. 110 \. any: \., in_code.
  200. 110 \_( any: \_ \(, in_code.
  201. %% Eat space (and get back to `in_code')
  202. %% note that `any' here will match states *with* a closer, i.e. not `in_text'.
  203. %% (`any-' would match any stateless state.)
  204. 110 \s any: skip, in_code.
  205. 120 any in_code,
  206. expr \
  207. (H >= $a andalso H =< $z) orelse \
  208. (H >= $A andalso H =< $Z) orelse \
  209. H == $_ \
  210. end: identifier, in_identifier.
  211. 122 any in_code,
  212. expr \
  213. (H >= $0 andalso H =< $9) orelse H == $- \
  214. end: number_literal, in_number.
  215. 124 any in_code: expr return_error({illegal_char, H}, P, [H|T], S, St) end.
  216. 130 any in_number, expr H >= $0 andalso H =< $9 end: +number_literal.
  217. 132 any in_number: expr return_error({illegal_char, H}, P, [H|T], S, St) end.
  218. 140 any in_identifier,
  219. expr \
  220. (H >= $a andalso H =< $z) orelse \
  221. (H >= $A andalso H =< $Z) orelse \
  222. (H >= $0 andalso H =< $9) orelse \
  223. H == $_ \
  224. end: +identifier, in_identifier.
  225. 142 any in_identifier: expr return_error({illegal_char, H}, P, [H|T], S, St) end.
  226. 200 : in_text- :
  227. expr \
  228. {ok, lists:reverse(post_process(S,eof))} \
  229. end.
  230. 202 : in_comment : expr return_error({eof, in_comment}, P) end.
  231. 204 : any : expr return_error({eof, in_code}, P) end.
  232. %% Process tokens as we parse them
  233. string: lists reverse.
  234. string_literal: lists reverse.
  235. comment_tag: lists reverse.
  236. number_literal: lists reverse, list_to_integer.
  237. open_var: to_atom.
  238. close_var: to_atom.
  239. open_tag: to_atom.
  240. close_tag: to_atom.
  241. open_tag identifier, close_tag: expr is_keyword(all, T) end.
  242. open_tag identifier: expr is_keyword(open_tag, T) end.
  243. identifier, close_tag: expr is_keyword(close_tag, T) end.
  244. identifier: expr is_keyword(any, T) end.
  245. %% Utility functions
  246. form return_error(Error, P, T, S, St) -> \
  247. {error, \
  248. {P, erlydtl_scanner, Error}, \
  249. #scanner_state{ template=T, \
  250. scanned=post_process(S, err), \
  251. pos=P, state=St } \
  252. } \
  253. end.
  254. form return_error(Error, P) -> {error, {P, erlydtl_scanner, Error}} end.
  255. form to_atom(L) when is_list(L) -> list_to_atom(L) end.
  256. form to_keyword(L, P) -> {to_atom(L ++ "_keyword"), P, L} end.
  257. form atomize(L, T) -> setelement(3, T, to_atom(L)) end.
  258. form \
  259. is_keyword(Class, {_, _, L} = T) -> \
  260. L1 = lists:reverse(L), \
  261. case is_keyword(Class, L1) of \
  262. true -> to_keyword(L1, element(2, T)); \
  263. false -> atomize(L1, T) \
  264. end; \
  265. is_keyword([C|Cs], L) -> \
  266. is_keyword(C, L) orelse \
  267. is_keyword(Cs, L); \
  268. is_keyword(all, L) -> is_keyword([any, open, close], L); \
  269. is_keyword(open_tag, L) -> is_keyword([any, open], L); \
  270. is_keyword(close_tag, L) -> is_keyword([any, close], L); \
  271. \
  272. is_keyword(any, "in") -> true; \
  273. is_keyword(any, "not") -> true; \
  274. is_keyword(any, "or") -> true; \
  275. is_keyword(any, "and") -> true; \
  276. is_keyword(any, "as") -> true; \
  277. is_keyword(any, "by") -> true; \
  278. is_keyword(any, "with") -> true; \
  279. is_keyword(any, "from") -> true; \
  280. is_keyword(any, "count") -> true; \
  281. is_keyword(any, "context") -> true; \
  282. \
  283. is_keyword(close, "only") -> true; \
  284. is_keyword(close, "parsed") -> true; \
  285. is_keyword(close, "noop") -> true; \
  286. is_keyword(close, "reversed") -> true; \
  287. is_keyword(close, "openblock") -> true; \
  288. is_keyword(close, "closeblock") -> true; \
  289. is_keyword(close, "openvariable") -> true; \
  290. is_keyword(close, "closevariable") -> true; \
  291. is_keyword(close, "openbrace") -> true; \
  292. is_keyword(close, "closebrace") -> true; \
  293. is_keyword(close, "opencomment") -> true; \
  294. is_keyword(close, "closecomment") -> true; \
  295. \
  296. is_keyword(open, "autoescape") -> true; \
  297. is_keyword(open, "endautoescape") -> true; \
  298. is_keyword(open, "block") -> true; \
  299. is_keyword(open, "endblock") -> true; \
  300. is_keyword(open, "comment") -> true; \
  301. is_keyword(open, "endcomment") -> true; \
  302. is_keyword(open, "cycle") -> true; \
  303. is_keyword(open, "extends") -> true; \
  304. is_keyword(open, "filter") -> true; \
  305. is_keyword(open, "endfilter") -> true; \
  306. is_keyword(open, "firstof") -> true; \
  307. is_keyword(open, "for") -> true; \
  308. is_keyword(open, "empty") -> true; \
  309. is_keyword(open, "endfor") -> true; \
  310. is_keyword(open, "if") -> true; \
  311. is_keyword(open, "elif") -> true; \
  312. is_keyword(open, "else") -> true; \
  313. is_keyword(open, "endif") -> true; \
  314. is_keyword(open, "ifchanged") -> true; \
  315. is_keyword(open, "endifchanged") -> true; \
  316. is_keyword(open, "ifequal") -> true; \
  317. is_keyword(open, "endifequal") -> true; \
  318. is_keyword(open, "ifnotequal") -> true; \
  319. is_keyword(open, "endifnotequal") -> true; \
  320. is_keyword(open, "include") -> true; \
  321. is_keyword(open, "now") -> true; \
  322. is_keyword(open, "regroup") -> true; \
  323. is_keyword(open, "endregroup") -> true; \
  324. is_keyword(open, "spaceless") -> true; \
  325. is_keyword(open, "endspaceless") -> true; \
  326. is_keyword(open, "ssi") -> true; \
  327. is_keyword(open, "templatetag") -> true; \
  328. is_keyword(open, "widthratio") -> true; \
  329. is_keyword(open, "call") -> true; \
  330. is_keyword(open, "endwith") -> true; \
  331. is_keyword(open, "trans") -> true; \
  332. is_keyword(open, "blocktrans") -> true; \
  333. is_keyword(open, "endblocktrans") -> true; \
  334. is_keyword(open, "load") -> true; \
  335. is_keyword(open, "plural") -> true; \
  336. is_keyword(_, _) -> false \
  337. end.
  338. form format_error({illegal_char, C}) -> \
  339. io_lib:format("Illegal character '~s'", [[C]]); \
  340. format_error({eof, Where}) -> \
  341. io_lib:format("Unexpected end of file ~s", [format_where(Where)]) \
  342. end.
  343. form format_where(in_comment) -> "in comment"; \
  344. format_where(in_code) -> "in code block" \
  345. end.