erlydtl_scanner.erl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. %%%-------------------------------------------------------------------
  2. %%% File: erlydtl_scanner.erl
  3. %%% @author Roberto Saccon <rsaccon@gmail.com> [http://rsaccon.com]
  4. %%% @author Evan Miller <emmiller@gmail.com>
  5. %%% @copyright 2008 Roberto Saccon, Evan Miller
  6. %%% @doc
  7. %%% Template language scanner
  8. %%% @end
  9. %%%
  10. %%% The MIT License
  11. %%%
  12. %%% Copyright (c) 2007 Roberto Saccon, Evan Miller
  13. %%%
  14. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  15. %%% of this software and associated documentation files (the "Software"), to deal
  16. %%% in the Software without restriction, including without limitation the rights
  17. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  18. %%% copies of the Software, and to permit persons to whom the Software is
  19. %%% furnished to do so, subject to the following conditions:
  20. %%%
  21. %%% The above copyright notice and this permission notice shall be included in
  22. %%% all copies or substantial portions of the Software.
  23. %%%
  24. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  29. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  30. %%% THE SOFTWARE.
  31. %%%
  32. %%% @since 2007-11-11 by Roberto Saccon, Evan Miller
  33. %%%-------------------------------------------------------------------
  34. -module(erlydtl_scanner).
  35. -author('rsaccon@gmail.com').
  36. -author('emmiller@gmail.com').
  37. -export([scan/1]).
  38. %%====================================================================
  39. %% API
  40. %%====================================================================
  41. %%--------------------------------------------------------------------
  42. %% @spec scan(T::template()) -> {ok, S::tokens()} | {error, Reason}
  43. %% @type template() = string() | binary(). Template to parse
  44. %% @type tokens() = [tuple()].
  45. %% @doc Scan the template string T and return the a token list or
  46. %% an error.
  47. %% @end
  48. %%--------------------------------------------------------------------
  49. scan(Template) ->
  50. scan(Template, [], {1, 1}, in_text).
  51. scan([], Scanned, _, in_text) ->
  52. {ok, lists:reverse(lists:map(
  53. fun
  54. ({identifier, Pos, String}) ->
  55. RevString = lists:reverse(String),
  56. Keywords = ["for", "endfor", "in", "include", "block", "endblock",
  57. "extends", "autoescape", "endautoescape", "if", "else", "endif",
  58. "not", "or", "and", "comment", "endcomment", "cycle", "firstof",
  59. "ifchanged", "ifequal", "endifequal", "ifnotequal", "endifnotequal",
  60. "now", "regroup", "spaceless", "endspaceless", "ssi", "templatetag",
  61. "load", "call", "with"],
  62. Type = case lists:member(RevString, Keywords) of
  63. true ->
  64. list_to_atom(RevString ++ "_keyword");
  65. _ ->
  66. identifier
  67. end,
  68. {Type, Pos, RevString};
  69. ({Type, Pos, String}) ->
  70. {Type, Pos, lists:reverse(String)}
  71. end, Scanned))};
  72. scan([], _Scanned, _, {in_comment, _}) ->
  73. {error, "Reached end of file inside a comment."};
  74. scan([], _Scanned, _, _) ->
  75. {error, "Reached end of file inside a code block."};
  76. scan("<!--{{" ++ T, Scanned, {Row, Column}, in_text) ->
  77. scan(T, [{open_var, {Row, Column}, "<!--{{"} | Scanned], {Row, Column + length("<!--{{")}, {in_code, "}}-->"});
  78. scan("{{" ++ T, Scanned, {Row, Column}, in_text) ->
  79. scan(T, [{open_var, {Row, Column}, "{{"} | Scanned], {Row, Column + 2}, {in_code, "}}"});
  80. scan("<!--{#" ++ T, Scanned, {Row, Column}, in_text) ->
  81. scan(T, Scanned, {Row, Column + length("<!--{#")}, {in_comment, "#}-->"});
  82. scan("{#" ++ T, Scanned, {Row, Column}, in_text) ->
  83. scan(T, Scanned, {Row, Column + 2}, {in_comment, "#}"});
  84. scan("#}-->" ++ T, Scanned, {Row, Column}, {in_comment, "#}-->"}) ->
  85. scan(T, Scanned, {Row, Column + length("#}-->")}, in_text);
  86. scan("#}" ++ T, Scanned, {Row, Column}, {in_comment, "#}"}) ->
  87. scan(T, Scanned, {Row, Column + 2}, in_text);
  88. scan("<!--{%" ++ T, Scanned, {Row, Column}, in_text) ->
  89. scan(T, [{open_tag, {Row, Column}, lists:reverse("<!--{%")} | Scanned],
  90. {Row, Column + length("<!--{%")}, {in_code, "%}-->"});
  91. scan("{%" ++ T, Scanned, {Row, Column}, in_text) ->
  92. scan(T, [{open_tag, {Row, Column}, lists:reverse("{%")} | Scanned],
  93. {Row, Column + 2}, {in_code, "%}"});
  94. scan([_ | T], Scanned, {Row, Column}, {in_comment, Closer}) ->
  95. scan(T, Scanned, {Row, Column + 1}, {in_comment, Closer});
  96. scan("\n" ++ T, Scanned, {Row, Column}, in_text) ->
  97. scan(T, append_text_char(Scanned, {Row, Column}, $\n), {Row + 1, 1}, in_text);
  98. scan([H | T], Scanned, {Row, Column}, in_text) ->
  99. scan(T, append_text_char(Scanned, {Row, Column}, H), {Row, Column + 1}, in_text);
  100. scan("\"" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
  101. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
  102. scan("\"" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
  103. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
  104. scan("\'" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
  105. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
  106. scan("\'" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
  107. scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
  108. scan([$\\ | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
  109. scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_double_quote_slash, Closer});
  110. scan([H | T], Scanned, {Row, Column}, {in_double_quote_slash, Closer}) ->
  111. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
  112. scan([$\\ | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
  113. scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_single_quote_slash, Closer});
  114. scan([H | T], Scanned, {Row, Column}, {in_single_quote_slash, Closer}) ->
  115. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
  116. % end quote
  117. scan("\"" ++ T, Scanned, {Row, Column}, {in_double_quote, Closer}) ->
  118. scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
  119. % treat single quotes the same as double quotes
  120. scan("\'" ++ T, Scanned, {Row, Column}, {in_single_quote, Closer}) ->
  121. scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
  122. scan([H | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
  123. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
  124. scan([H | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
  125. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
  126. scan("," ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  127. scan(T, [{comma, {Row, Column}, ","} | Scanned], {Row, Column + 1}, {in_code, Closer});
  128. scan("|" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  129. scan(T, [{pipe, {Row, Column}, "|"} | Scanned], {Row, Column + 1}, {in_code, Closer});
  130. scan("=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  131. scan(T, [{equal, {Row, Column}, "="} | Scanned], {Row, Column + 1}, {in_code, Closer});
  132. scan(":" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  133. scan(T, [{colon, {Row, Column}, ":"} | Scanned], {Row, Column + 1}, {in_code, Closer});
  134. scan("." ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  135. scan(T, [{dot, {Row, Column}, "."} | Scanned], {Row, Column + 1}, {in_code, Closer});
  136. scan(" " ++ T, Scanned, {Row, Column}, {_, Closer}) ->
  137. scan(T, Scanned, {Row, Column + 1}, {in_code, Closer});
  138. scan("}}-->" ++ T, Scanned, {Row, Column}, {_, "}}-->"}) ->
  139. scan(T, [{close_var, {Row, Column}, lists:reverse("}}-->")} | Scanned],
  140. {Row, Column + 2}, in_text);
  141. scan("}}" ++ T, Scanned, {Row, Column}, {_, "}}"}) ->
  142. scan(T, [{close_var, {Row, Column}, "}}"} | Scanned], {Row, Column + 2}, in_text);
  143. scan("%}-->" ++ T, Scanned, {Row, Column}, {_, "%}-->"}) ->
  144. scan(T, [{close_tag, {Row, Column}, lists:reverse("%}-->")} | Scanned],
  145. {Row, Column + 2}, in_text);
  146. scan("%}" ++ T, Scanned, {Row, Column}, {_, "%}"}) ->
  147. scan(T, [{close_tag, {Row, Column}, lists:reverse("%}")} | Scanned],
  148. {Row, Column + 2}, in_text);
  149. scan([H | T], Scanned, {Row, Column}, {in_code, Closer}) ->
  150. case char_type(H) of
  151. letter_underscore ->
  152. scan(T, [{identifier, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_identifier, Closer});
  153. digit ->
  154. scan(T, [{number_literal, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_number, Closer});
  155. _ ->
  156. {error, io_lib:format("Illegal character line ~p column ~p", [Row, Column])}
  157. end;
  158. scan([H | T], Scanned, {Row, Column}, {in_number, Closer}) ->
  159. case char_type(H) of
  160. digit ->
  161. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_number, Closer});
  162. _ ->
  163. {error, io_lib:format("Illegal character line ~p column ~p", [Row, Column])}
  164. end;
  165. scan([H | T], Scanned, {Row, Column}, {in_identifier, Closer}) ->
  166. case char_type(H) of
  167. letter_underscore ->
  168. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
  169. digit ->
  170. scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
  171. _ ->
  172. {error, io_lib:format("Illegal character line ~p column ~p", [Row, Column])}
  173. end.
  174. % internal functions
  175. append_char(Scanned, Char) ->
  176. [String | Scanned1] = Scanned,
  177. [setelement(3, String, [Char | element(3, String)]) | Scanned1].
  178. append_text_char(Scanned, {Row, Column}, Char) ->
  179. case length(Scanned) of
  180. 0 ->
  181. [{text, {Row, Column}, [Char]}];
  182. _ ->
  183. [Token | Scanned1] = Scanned,
  184. case element(1, Token) of
  185. text ->
  186. [{text, element(2, Token), [Char | element(3, Token)]} | Scanned1];
  187. _ ->
  188. [{text, element(2, Token), [Char]} | Scanned]
  189. end
  190. end.
  191. char_type(Char) ->
  192. case Char of
  193. C when ((C >= $a) and (C =< $z)) or ((C >= $A) and (C =< $Z)) or (C == $_) ->
  194. letter_underscore;
  195. C when ((C >= $0) and (C =< $9)) ->
  196. digit;
  197. _ ->
  198. undefined
  199. end.