123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315 |
- %%%-------------------------------------------------------------------
- %%% File: erlydtl_scanner.erl
- %%% @author Roberto Saccon <rsaccon@gmail.com> [http://rsaccon.com]
- %%% @author Evan Miller <emmiller@gmail.com>
- %%% @copyright 2008 Roberto Saccon, Evan Miller
- %%% @doc
- %%% Template language scanner
- %%% @end
- %%%
- %%% The MIT License
- %%%
- %%% Copyright (c) 2007 Roberto Saccon, Evan Miller
- %%%
- %%% Permission is hereby granted, free of charge, to any person obtaining a copy
- %%% of this software and associated documentation files (the "Software"), to deal
- %%% in the Software without restriction, including without limitation the rights
- %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- %%% copies of the Software, and to permit persons to whom the Software is
- %%% furnished to do so, subject to the following conditions:
- %%%
- %%% The above copyright notice and this permission notice shall be included in
- %%% all copies or substantial portions of the Software.
- %%%
- %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- %%% THE SOFTWARE.
- %%%
- %%% @since 2007-11-11 by Roberto Saccon, Evan Miller
- %%%-------------------------------------------------------------------
- -module(erlydtl_scanner).
- -author('rsaccon@gmail.com').
- -author('emmiller@gmail.com').
- -export([scan/1]).
- %%====================================================================
- %% API
- %%====================================================================
- %%--------------------------------------------------------------------
- %% @spec scan(T::template()) -> {ok, S::tokens()} | {error, Reason}
- %% @type template() = string() | binary(). Template to parse
- %% @type tokens() = [tuple()].
- %% @doc Scan the template string T and return the a token list or
- %% an error.
- %% @end
- %%--------------------------------------------------------------------
- scan(Template) ->
- scan(Template, [], {1, 1}, in_text).
- scan([], Scanned, _, in_text) ->
- {ok, lists:reverse(lists:map(
- fun
- ({identifier, Pos, String}) ->
- RevString = lists:reverse(String),
- Keywords = [
- "autoescape", "endautoescape",
- "block", "endblock",
- "comment", "endcomment",
- %TODO "csrf_token",
-
- "cycle",
-
- "extends",
- "filter", "endfilter",
- "firstof",
- "for", "in", "empty", "endfor",
- "if", "else", "endif", "not", "or", "and",
- %TODO "ifchanged",
-
- "ifequal", "endifequal",
- "ifnotequal", "endifnotequal",
- "include",
- "now",
- %TODO "regroup",
-
- "spaceless", "endspaceless",
-
- "ssi",
-
- "templatetag", "openblock", "closeblock", "openvariable", "closevariable", "openbrace", "closebrace", "opencomment", "closecomment",
- %TODO "url",
- "widthratio",
- "call", "with", "endwith",
-
- "trans", "noop"
- ],
- Type = case lists:member(RevString, Keywords) of
- true ->
- list_to_atom(RevString ++ "_keyword");
- _ ->
- identifier
- end,
- {Type, Pos, list_to_atom(RevString)};
- ({Category, Pos, String}) when Category =:= string;
- Category =:= string_literal;
- Category =:= number_literal ->
- {Category, Pos, lists:reverse(String)};
- (Other) -> Other
- end, Scanned))};
- scan([], _Scanned, _, {in_comment, _}) ->
- {error, "Reached end of file inside a comment."};
- scan([], _Scanned, _, _) ->
- {error, "Reached end of file inside a code block."};
- scan("<!--{{" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, [{open_var, {Row, Column}, '<!--{{'} | Scanned], {Row, Column + length("<!--{{")}, {in_code, "}}-->"});
- scan("{{" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, [{open_var, {Row, Column}, '{{'} | Scanned], {Row, Column + 2}, {in_code, "}}"});
- scan("<!--{#" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, Scanned, {Row, Column + length("<!--{#")}, {in_comment, "#}-->"});
- scan("{#" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, Scanned, {Row, Column + 2}, {in_comment, "#}"});
- scan("#}-->" ++ T, Scanned, {Row, Column}, {in_comment, "#}-->"}) ->
- scan(T, Scanned, {Row, Column + length("#}-->")}, in_text);
- scan("#}" ++ T, Scanned, {Row, Column}, {in_comment, "#}"}) ->
- scan(T, Scanned, {Row, Column + 2}, in_text);
- scan("<!--{%" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, [{open_tag, {Row, Column}, '<!--{%'} | Scanned],
- {Row, Column + length("<!--{%")}, {in_code, "%}-->"});
- scan("{%" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, [{open_tag, {Row, Column}, '{%'} | Scanned],
- {Row, Column + 2}, {in_code, "%}"});
- scan([_ | T], Scanned, {Row, Column}, {in_comment, Closer}) ->
- scan(T, Scanned, {Row, Column + 1}, {in_comment, Closer});
- scan("\n" ++ T, Scanned, {Row, Column}, in_text) ->
- scan(T, append_text_char(Scanned, {Row, Column}, $\n), {Row + 1, 1}, in_text);
- scan([H | T], Scanned, {Row, Column}, in_text) ->
- scan(T, append_text_char(Scanned, {Row, Column}, H), {Row, Column + 1}, in_text);
- scan("\"" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
- scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
- scan("\"" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
- scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
- scan("\'" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
- scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
- scan("\'" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
- scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
- scan([$\\ | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
- scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_double_quote_slash, Closer});
- scan([H | T], Scanned, {Row, Column}, {in_double_quote_slash, Closer}) ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
- scan([$\\ | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
- scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_single_quote_slash, Closer});
- scan([H | T], Scanned, {Row, Column}, {in_single_quote_slash, Closer}) ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
- % end quote
- scan("\"" ++ T, Scanned, {Row, Column}, {in_double_quote, Closer}) ->
- scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
- % treat single quotes the same as double quotes
- scan("\'" ++ T, Scanned, {Row, Column}, {in_single_quote, Closer}) ->
- scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
- scan([H | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
- scan([H | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
- scan("}}-->" ++ T, Scanned, {Row, Column}, {_, "}}-->"}) ->
- scan(T, [{close_var, {Row, Column}, '}}-->'} | Scanned],
- {Row, Column + 2}, in_text);
- scan("}}" ++ T, Scanned, {Row, Column}, {_, "}}"}) ->
- scan(T, [{close_var, {Row, Column}, '}}'} | Scanned], {Row, Column + 2}, in_text);
- scan("%}-->" ++ T, Scanned, {Row, Column}, {_, "%}-->"}) ->
- scan(T, [{close_tag, {Row, Column}, '%}-->'} | Scanned],
- {Row, Column + 2}, in_text);
- scan("%}" ++ T, Scanned, {Row, Column}, {_, "%}"}) ->
- scan(T, [{close_tag, {Row, Column}, '%}'} | Scanned],
- {Row, Column + 2}, in_text);
- scan("==" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'==', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
- scan("!=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'!=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
- scan(">=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'>=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
- scan("<=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'<=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
- scan("<" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'<', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan(">" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'>', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan("("++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'(', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan(")" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{')', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan("," ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{',', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan("|" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'|', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan("=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'=', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan(":" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{':', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan("." ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, [{'.', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
- scan(" " ++ T, Scanned, {Row, Column}, {_, Closer}) ->
- scan(T, Scanned, {Row, Column + 1}, {in_code, Closer});
- scan([H | T], Scanned, {Row, Column}, {in_code, Closer}) ->
- case char_type(H) of
- letter_underscore ->
- scan(T, [{identifier, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_identifier, Closer});
- digit ->
- scan(T, [{number_literal, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_number, Closer});
- _ ->
- {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])}}
- end;
- scan([H | T], Scanned, {Row, Column}, {in_number, Closer}) ->
- case char_type(H) of
- digit ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_number, Closer});
- _ ->
- {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])}}
- end;
- scan([H | T], Scanned, {Row, Column}, {in_identifier, Closer}) ->
- case char_type(H) of
- letter_underscore ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
- digit ->
- scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
- _ ->
- {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])}}
- end.
- % internal functions
- append_char(Scanned, Char) ->
- [String | Scanned1] = Scanned,
- [setelement(3, String, [Char | element(3, String)]) | Scanned1].
- append_text_char(Scanned, {Row, Column}, Char) ->
- case length(Scanned) of
- 0 ->
- [{string, {Row, Column}, [Char]}];
- _ ->
- [Token | Scanned1] = Scanned,
- case element(1, Token) of
- string ->
- [{string, element(2, Token), [Char | element(3, Token)]} | Scanned1];
- _ ->
- [{string, element(2, Token), [Char]} | Scanned]
- end
- end.
- char_type(Char) ->
- case Char of
- C when ((C >= $a) and (C =< $z)) or ((C >= $A) and (C =< $Z)) or (C == $_) ->
- letter_underscore;
- C when ((C >= $0) and (C =< $9)) ->
- digit;
- _ ->
- undefined
- end.
|