erlydtl_scanner.erl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. %%%-------------------------------------------------------------------
  2. %%% File: erlydtl_scanner.erl
  3. %%% @author Roberto Saccon <rsaccon@gmail.com> [http://rsaccon.com]
  4. %%% @author Evan Miller <emmiller@gmail.com>
  5. %%% @copyright 2008 Roberto Saccon, Evan Miller
  6. %%% @doc
  7. %%% Template language scanner
  8. %%% @end
  9. %%%
  10. %%% The MIT License
  11. %%%
  12. %%% Copyright (c) 2007 Roberto Saccon, Evan Miller
  13. %%%
  14. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  15. %%% of this software and associated documentation files (the "Software"), to deal
  16. %%% in the Software without restriction, including without limitation the rights
  17. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  18. %%% copies of the Software, and to permit persons to whom the Software is
  19. %%% furnished to do so, subject to the following conditions:
  20. %%%
  21. %%% The above copyright notice and this permission notice shall be included in
  22. %%% all copies or substantial portions of the Software.
  23. %%%
  24. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  29. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  30. %%% THE SOFTWARE.
  31. %%%
  32. %%% @since 2007-11-11 by Roberto Saccon, Evan Miller
  33. %%%-------------------------------------------------------------------
  34. -module(erlydtl_scanner).
  35. -author('rsaccon@gmail.com').
  36. -author('emmiller@gmail.com').
  37. -export([scan/1]).
  38. %%====================================================================
  39. %% API
  40. %%====================================================================
  41. %%--------------------------------------------------------------------
  42. %% @spec scan(T::template()) -> {ok, S::tokens()} | {error, Reason}
  43. %% @type template() = string() | binary(). Template to parse
  44. %% @type tokens() = [tuple()].
  45. %% @doc Scan the template string T and return the a token list or
  46. %% an error.
  47. %% @end
  48. %%--------------------------------------------------------------------
  49. scan(Template) ->
  50. scan(Template, [], {1, 1}, in_text).
  51. scan([], Scanned, _, in_text) ->
  52. {ok, lists:reverse(lists:map(
  53. fun
  54. ({identifier, Pos, String}) ->
  55. RevString = lists:reverse(String),
  56. Keywords = [
  57. "autoescape", "endautoescape",
  58. "block", "endblock",
  59. "comment", "endcomment",
  60. %TODO "csrf_token",
  61. "cycle",
  62. "extends",
  63. "filter", "endfilter",
  64. "firstof",
  65. "for", "in", "empty", "endfor",
  66. "if", "else", "endif", "not", "or", "and",
  67. %TODO "ifchanged",
  68. "ifequal", "endifequal",
  69. "ifnotequal", "endifnotequal",
  70. "include",
  71. "now",
  72. %TODO "regroup",
  73. "spaceless", "endspaceless",
  74. "ssi",
  75. "templatetag", "openblock", "closeblock", "openvariable", "closevariable", "openbrace", "closebrace", "opencomment", "closecomment",
  76. %TODO "url",
  77. "widthratio",
  78. "call", "with", "endwith",
  79. "trans", "noop"
  80. ],
  81. Type = case lists:member(RevString, Keywords) of
  82. true ->
  83. list_to_atom(RevString ++ "_keyword");
  84. _ ->
  85. identifier
  86. end,
  87. {Type, Pos, list_to_atom(RevString)};
  88. ({Category, Pos, String}) when Category =:= string;
  89. Category =:= string_literal;
  90. Category =:= number_literal ->
  91. {Category, Pos, lists:reverse(String)};
  92. (Other) -> Other
  93. end, Scanned))};
  94. scan([], _Scanned, _, {in_comment, _}) ->
  95. {error, "Reached end of file inside a comment."};
  96. scan([], _Scanned, _, _) ->
  97. {error, "Reached end of file inside a code block."};
  98. scan("<!--{{" ++ T, Scanned, {Row, Column}, in_text) ->
  99. scan(T, [{open_var, {Row, Column}, '<!--{{'} | Scanned], {Row, Column + length("<!--{{")}, {in_code, "}}-->"});
  100. scan("{{" ++ T, Scanned, {Row, Column}, in_text) ->
  101. scan(T, [{open_var, {Row, Column}, '{{'} | Scanned], {Row, Column + 2}, {in_code, "}}"});
  102. scan("<!--{#" ++ T, Scanned, {Row, Column}, in_text) ->
  103. scan(T, Scanned, {Row, Column + length("<!--{#")}, {in_comment, "#}-->"});
  104. scan("{#" ++ T, Scanned, {Row, Column}, in_text) ->
  105. scan(T, Scanned, {Row, Column + 2}, {in_comment, "#}"});
  106. scan("#}-->" ++ T, Scanned, {Row, Column}, {in_comment, "#}-->"}) ->
  107. scan(T, Scanned, {Row, Column + length("#}-->")}, in_text);
  108. scan("#}" ++ T, Scanned, {Row, Column}, {in_comment, "#}"}) ->
  109. scan(T, Scanned, {Row, Column + 2}, in_text);
  110. scan("<!--{%" ++ T, Scanned, {Row, Column}, in_text) ->
  111. scan(T, [{open_tag, {Row, Column}, '<!--{%'} | Scanned],
  112. {Row, Column + length("<!--{%")}, {in_code, "%}-->"});
  113. scan("{%" ++ T, Scanned, {Row, Column}, in_text) ->
  114. scan(T, [{open_tag, {Row, Column}, '{%'} | Scanned],
  115. {Row, Column + 2}, {in_code, "%}"});
  116. scan([_ | T], Scanned, {Row, Column}, {in_comment, Closer}) ->
  117. scan(T, Scanned, {Row, Column + 1}, {in_comment, Closer});
  118. scan("\n" ++ T, Scanned, {Row, Column}, in_text) ->
  119. scan(T, append_text_char(Scanned, {Row, Column}, $\n), {Row + 1, 1}, in_text);
  120. scan([H | T], Scanned, {Row, Column}, in_text) ->
  121. scan(T, append_text_char(Scanned, {Row, Column}, H), {Row, Column + 1}, in_text);
  122. scan("\"" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
  123. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
  124. scan("\"" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
  125. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
  126. scan("\'" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
  127. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
  128. scan("\'" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
  129. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
  130. scan([$\\ | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
  131. scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_double_quote_slash, Closer});
  132. scan([H | T], Scanned, {Row, Column}, {in_double_quote_slash, Closer}) ->
  133. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
  134. scan([$\\ | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
  135. scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_single_quote_slash, Closer});
  136. scan([H | T], Scanned, {Row, Column}, {in_single_quote_slash, Closer}) ->
  137. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
  138. % end quote
  139. scan("\"" ++ T, Scanned, {Row, Column}, {in_double_quote, Closer}) ->
  140. scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
  141. % treat single quotes the same as double quotes
  142. scan("\'" ++ T, Scanned, {Row, Column}, {in_single_quote, Closer}) ->
  143. scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
  144. scan([H | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
  145. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
  146. scan([H | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
  147. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
  148. scan("}}-->" ++ T, Scanned, {Row, Column}, {_, "}}-->"}) ->
  149. scan(T, [{close_var, {Row, Column}, '}}-->'} | Scanned],
  150. {Row, Column + 2}, in_text);
  151. scan("}}" ++ T, Scanned, {Row, Column}, {_, "}}"}) ->
  152. scan(T, [{close_var, {Row, Column}, '}}'} | Scanned], {Row, Column + 2}, in_text);
  153. scan("%}-->" ++ T, Scanned, {Row, Column}, {_, "%}-->"}) ->
  154. scan(T, [{close_tag, {Row, Column}, '%}-->'} | Scanned],
  155. {Row, Column + 2}, in_text);
  156. scan("%}" ++ T, Scanned, {Row, Column}, {_, "%}"}) ->
  157. scan(T, [{close_tag, {Row, Column}, '%}'} | Scanned],
  158. {Row, Column + 2}, in_text);
  159. scan("==" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  160. scan(T, [{'==', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
  161. scan("!=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  162. scan(T, [{'!=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
  163. scan(">=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  164. scan(T, [{'>=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
  165. scan("<=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  166. scan(T, [{'<=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
  167. scan("<" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  168. scan(T, [{'<', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  169. scan(">" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  170. scan(T, [{'>', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  171. scan("("++ T, Scanned, {Row, Column}, {_, Closer}) ->
  172. scan(T, [{'(', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  173. scan(")" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  174. scan(T, [{')', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  175. scan("," ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  176. scan(T, [{',', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  177. scan("|" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  178. scan(T, [{'|', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  179. scan("=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  180. scan(T, [{'=', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  181. scan(":" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  182. scan(T, [{':', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  183. scan("." ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  184. scan(T, [{'.', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
  185. scan(" " ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  186. scan(T, Scanned, {Row, Column + 1}, {in_code, Closer});
  187. scan([H | T], Scanned, {Row, Column}, {in_code, Closer}) ->
  188. case char_type(H) of
  189. letter_underscore ->
  190. scan(T, [{identifier, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_identifier, Closer});
  191. digit ->
  192. scan(T, [{number_literal, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_number, Closer});
  193. _ ->
  194. {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])}}
  195. end;
  196. scan([H | T], Scanned, {Row, Column}, {in_number, Closer}) ->
  197. case char_type(H) of
  198. digit ->
  199. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_number, Closer});
  200. _ ->
  201. {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])}}
  202. end;
  203. scan([H | T], Scanned, {Row, Column}, {in_identifier, Closer}) ->
  204. case char_type(H) of
  205. letter_underscore ->
  206. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
  207. digit ->
  208. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
  209. _ ->
  210. {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])}}
  211. end.
  212. % internal functions
  213. append_char(Scanned, Char) ->
  214. [String | Scanned1] = Scanned,
  215. [setelement(3, String, [Char | element(3, String)]) | Scanned1].
  216. append_text_char(Scanned, {Row, Column}, Char) ->
  217. case length(Scanned) of
  218. 0 ->
  219. [{string, {Row, Column}, [Char]}];
  220. _ ->
  221. [Token | Scanned1] = Scanned,
  222. case element(1, Token) of
  223. string ->
  224. [{string, element(2, Token), [Char | element(3, Token)]} | Scanned1];
  225. _ ->
  226. [{string, element(2, Token), [Char]} | Scanned]
  227. end
  228. end.
  229. char_type(Char) ->
  230. case Char of
  231. C when ((C >= $a) and (C =< $z)) or ((C >= $A) and (C =< $Z)) or (C == $_) ->
  232. letter_underscore;
  233. C when ((C >= $0) and (C =< $9)) ->
  234. digit;
  235. _ ->
  236. undefined
  237. end.