%%%------------------------------------------------------------------- %%% File: erlydtl_scanner.slex %%% @author Andreas Stenius %%% @copyright 2013 Andreas Stenius %%% @doc %%% erlydtl scanner %%% @end %%% %%% The MIT License %%% %%% Copyright (c) 2013 Andreas Stenius %%% %%% Permission is hereby granted, free of charge, to any person obtaining a copy %%% of this software and associated documentation files (the "Software"), to deal %%% in the Software without restriction, including without limitation the rights %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell %%% copies of the Software, and to permit persons to whom the Software is %%% furnished to do so, subject to the following conditions: %%% %%% The above copyright notice and this permission notice shall be included in %%% all copies or substantial portions of the Software. %%% %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN %%% THE SOFTWARE. %%% %%% @since 2013-11-05 by Andreas Stenius %%% %%% Rules based on the original erlydtl_scanner by Robert Saccon and Evan Miller. %%%------------------------------------------------------------------- -module erlydtl_scanner. -function scan. -init_state in_text. form -compile(nowarn_unused_vars) end. form -export([resume/1, format_error/1]) end. form \ -record(scanner_state, { \ template=[], \ scanned=[], \ pos={1,1}, \ state=in_text \ }) \ end. form \ resume(#scanner_state{ template=Template, scanned=Scanned, \ pos=Pos, state=State }) -> \ scan(Template, Scanned, Pos, State) \ end. %% Rule syntax: Prio Prefix|any|- InState[-]|any[+|-] [, Guard] : {: Body}|{[Action...,] NewState [until Closer]}. %% `state-' means a state without a closer state. %% Where Guard and Body are one erlang expression (see it as a begin ... end block): expr end %% Open tags 10 {{ in_text-: open_var, in_code until }}. 10 {% in_text-: open_tag, in_code until %}. 10 . 10 . %% Comments 20 {# in_text-: in_comment until #}. 20 . %% `any+' will match the closer with the prefix.. 30 #}--> any+: skip, in_text-. 30 #} any+: skip, in_text-. %% must come before the `space any' rule 40 any in_comment: +comment_tag. %% end comment rules %% The rest is "just" text.. 50 any in_text-: +string. %% Quoted strings 60 \" in_code: string_literal, in_double_quote. 62 \" in_double_quote: +string_literal, in_code. 64 \\ in_double_quote: +string_literal, in_double_quote_escape. 66 any in_double_quote: +string_literal. 68 any in_double_quote_escape: +string_literal, in_double_quote. 60 \' in_code: string_literal-\", in_single_quote. 62 \' in_single_quote: +string_literal-\", in_code. 64 \\ in_single_quote: +string_literal, in_single_quote_escape. 66 any in_single_quote: +string_literal. 68 any in_single_quote_escape: +string_literal, in_single_quote. %% Close tags 70 }}--> any+: close_var, in_text-. 70 %}--> any+: close_tag, in_text-. 72 }} any+: close_var, in_text-. 72 %} any+: expr \ case S of \ [{identifier,_,"mitabrev"}, {open_tag,_,'{%'}|Ss] -> \ scan(T, [{string, {R, C + 2}, ""} | Ss], \ {R, C + 2}, {in_verbatim, undefined}); \ [{identifier,_,Tag}, {identifier,_,verbatim}, {open_tag,_,'{%'}|Ss] -> \ scan(T, [{string, {R, C + 2}, ""} | Ss], \ {R, C + 2}, {in_verbatim, Tag}); \ _ -> scan(T, [{close_tag, P, "%}"} | post_process(S, close_tag)], \ {R, C + 2}, in_text) \ end \ end. %% verbatim stuff 80 {% in_verbatim: expr scan(T, S, {R, C + 2}, {in_verbatim_code, {E, "%{"}}) end. 82 \s in_verbatim_code: expr \ {Tag, Backtrack} = E, \ scan(T, S, {R, C + 1}, {in_verbatim_code, {Tag, [$\ |Backtrack]}}) \ end. 84 'endverbatim%}' in_verbatim_code, expr element(1, E) =:= undefined end: expr scan(T, S, {R, C + 13}, in_text) end. 86 'endverbatim ' in_verbatim_code: expr \ {Tag, Backtrack} = E, \ scan(T, S, {R, C + 12}, \ {in_endverbatim_code, \ {Tag, lists:reverse("endverbatim ", Backtrack), ""}}) \ end. 88 \s in_endverbatim_code, expr element(3, E) =:= "" end: expr \ {Tag, Backtrack, EndTag} = E, \ scan(T, S, {R, C + 1}, \ {in_endverbatim_code, \ {Tag, [$\ |Backtrack], EndTag}}) \ end. 90 any in_endverbatim_code, expr \ H >= $a andalso H =< $z orelse \ H >= $0 andalso H =< $9 orelse H =:= $_ end: expr \ {Tag, Backtrack, EndTag} = E, \ scan(T, S, {R, C + 1}, \ {in_endverbatim_code, \ {Tag, [H|Backtrack], [H|EndTag]}}) \ end. 92 \s in_endverbatim_code, expr element(1, E) =:= element(3, E) end: expr \ {Tag, Backtrack, Tag} = E, \ scan(T, S, {R, C + 1}, \ {in_endverbatim_code, \ {Tag, [$\ |Backtrack], Tag}}) \ end. 94 %} in_endverbatim_code, expr element(1, E) =:= element(3, E) end: expr scan(T, S, {R, C + 2}, (in_text)) end. 96 %} in_endverbatim_code, expr element(1, E) =:= undefined andalso \ element(3, E) =:= "" end: expr scan(T, S, {R, C + 2}, in_text) end. 98 any in_endverbatim_code: expr \ {Tag, Backtrack, _} = E, \ scan(T, \ case S of \ [{string,_, L}=M|Ss] -> \ [setelement(3, M, [H|Backtrack] ++ L)|Ss]; \ _ -> [{string, P, [H|Backtrack]}|S] \ end, \ case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \ {in_verbatim, Tag}) \ end. 100 any in_verbatim_code: expr \ {Tag, Backtrack} = E, \ scan(T, \ case S of \ [{string,_, L}=M|Ss] -> \ [setelement(3, M, [H|Backtrack] ++ L)|Ss]; \ _ -> [{string, P, [H|Backtrack]}|S] \ end, \ case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \ {in_verbatim, Tag}) \ end. 102 any in_verbatim: expr \ scan(T, \ case S of \ [{string,_, L}=M|Ss] -> \ [setelement(3, M, [H|L])|Ss]; \ _ -> [{string, P, [H]}|S] \ end, \ case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \ {in_verbatim, E}) \ end. %% Get back to `in_code' on these tokens: 110 == any: ==, in_code. 110 != any: !=, in_code. 110 >= any: >=, in_code. 110 <= any: <=, in_code. 110 > any: >, in_code. 110 < any: <, in_code. 110 ( any: (, in_code. 110 ) any: ), in_code. 110 \, any: \,, in_code. 110 | any: |, in_code. 110 = any: =, in_code. 110 \: any: \:, in_code. 110 \. any: \., in_code. 110 \_( any: \_ \(, in_code. %% Eat space (and get back to `in_code') %% note that `any' here will match states *with* a closer, i.e. not `in_text'. %% (`any-' would match any stateless state.) 110 \s any: skip, in_code. 120 any in_code, expr \ (H >= $a andalso H =< $z) orelse \ (H >= $A andalso H =< $Z) orelse \ H == $_ \ end: identifier, in_identifier. 122 any in_code, expr \ (H >= $0 andalso H =< $9) orelse H == $- \ end: number_literal, in_number. 124 any in_code: expr return_error({illegal_char, H}, P, [H|T], S, St) end. 130 any in_number, expr H >= $0 andalso H =< $9 end: +number_literal. 132 any in_number: expr return_error({illegal_char, H}, P, [H|T], S, St) end. 140 any in_identifier, expr \ (H >= $a andalso H =< $z) orelse \ (H >= $A andalso H =< $Z) orelse \ (H >= $0 andalso H =< $9) orelse \ H == $_ \ end: +identifier, in_identifier. 142 any in_identifier: expr return_error({illegal_char, H}, P, [H|T], S, St) end. 200 : in_text- : expr \ {ok, lists:reverse(post_process(S,eof))} \ end. 202 : in_comment : expr return_error({eof, in_comment}, P) end. 204 : any : expr return_error({eof, in_code}, P) end. %% Process tokens as we parse them string: lists reverse. string_literal: lists reverse. comment_tag: lists reverse. number_literal: lists reverse, list_to_integer. open_var: to_atom. close_var: to_atom. open_tag: to_atom. close_tag: to_atom. open_tag identifier, close_tag: expr is_keyword(all, T) end. open_tag identifier: expr is_keyword(open_tag, T) end. identifier, close_tag: expr is_keyword(close_tag, T) end. identifier: expr is_keyword(any, T) end. %% Utility functions form return_error(Error, P, T, S, St) -> \ {error, \ {P, erlydtl_scanner, Error}, \ #scanner_state{ template=T, \ scanned=post_process(S, err), \ pos=P, state=St } \ } \ end. form return_error(Error, P) -> {error, {P, erlydtl_scanner, Error}} end. form to_atom(L) when is_list(L) -> list_to_atom(L) end. form to_keyword(L, P) -> {to_atom(L ++ "_keyword"), P, L} end. form atomize(L, T) -> setelement(3, T, to_atom(L)) end. form \ is_keyword(Class, {_, _, L} = T) -> \ L1 = lists:reverse(L), \ case is_keyword(Class, L1) of \ true -> to_keyword(L1, element(2, T)); \ false -> atomize(L1, T) \ end; \ is_keyword([C|Cs], L) -> \ is_keyword(C, L) orelse \ is_keyword(Cs, L); \ is_keyword(all, L) -> is_keyword([any, open, close], L); \ is_keyword(open_tag, L) -> is_keyword([any, open], L); \ is_keyword(close_tag, L) -> is_keyword([any, close], L); \ \ is_keyword(any, "in") -> true; \ is_keyword(any, "not") -> true; \ is_keyword(any, "or") -> true; \ is_keyword(any, "and") -> true; \ is_keyword(any, "as") -> true; \ is_keyword(any, "by") -> true; \ is_keyword(any, "with") -> true; \ is_keyword(any, "from") -> true; \ is_keyword(any, "count") -> true; \ is_keyword(any, "context") -> true; \ \ is_keyword(close, "only") -> true; \ is_keyword(close, "parsed") -> true; \ is_keyword(close, "noop") -> true; \ is_keyword(close, "reversed") -> true; \ is_keyword(close, "openblock") -> true; \ is_keyword(close, "closeblock") -> true; \ is_keyword(close, "openvariable") -> true; \ is_keyword(close, "closevariable") -> true; \ is_keyword(close, "openbrace") -> true; \ is_keyword(close, "closebrace") -> true; \ is_keyword(close, "opencomment") -> true; \ is_keyword(close, "closecomment") -> true; \ \ is_keyword(open, "autoescape") -> true; \ is_keyword(open, "endautoescape") -> true; \ is_keyword(open, "block") -> true; \ is_keyword(open, "endblock") -> true; \ is_keyword(open, "comment") -> true; \ is_keyword(open, "endcomment") -> true; \ is_keyword(open, "cycle") -> true; \ is_keyword(open, "extends") -> true; \ is_keyword(open, "filter") -> true; \ is_keyword(open, "endfilter") -> true; \ is_keyword(open, "firstof") -> true; \ is_keyword(open, "for") -> true; \ is_keyword(open, "empty") -> true; \ is_keyword(open, "endfor") -> true; \ is_keyword(open, "if") -> true; \ is_keyword(open, "elif") -> true; \ is_keyword(open, "else") -> true; \ is_keyword(open, "endif") -> true; \ is_keyword(open, "ifchanged") -> true; \ is_keyword(open, "endifchanged") -> true; \ is_keyword(open, "ifequal") -> true; \ is_keyword(open, "endifequal") -> true; \ is_keyword(open, "ifnotequal") -> true; \ is_keyword(open, "endifnotequal") -> true; \ is_keyword(open, "include") -> true; \ is_keyword(open, "now") -> true; \ is_keyword(open, "regroup") -> true; \ is_keyword(open, "endregroup") -> true; \ is_keyword(open, "spaceless") -> true; \ is_keyword(open, "endspaceless") -> true; \ is_keyword(open, "ssi") -> true; \ is_keyword(open, "templatetag") -> true; \ is_keyword(open, "widthratio") -> true; \ is_keyword(open, "call") -> true; \ is_keyword(open, "endwith") -> true; \ is_keyword(open, "trans") -> true; \ is_keyword(open, "blocktrans") -> true; \ is_keyword(open, "endblocktrans") -> true; \ is_keyword(open, "load") -> true; \ is_keyword(open, "plural") -> true; \ is_keyword(_, _) -> false \ end. form format_error({illegal_char, C}) -> \ io_lib:format("Illegal character '~s'", [[C]]); \ format_error({eof, Where}) -> \ io_lib:format("Unexpected end of file ~s", [format_where(Where)]) \ end. form format_where(in_comment) -> "in comment"; \ format_where(in_code) -> "in code block" \ end.