Browse Source

Switch to new slex based scanner.

Andreas Stenius 11 years ago
parent
commit
9af4b7d837
5 changed files with 893 additions and 420 deletions
  1. 2 0
      .gitignore
  2. 18 2
      Makefile
  3. 8 0
      rebar-slex.config
  4. 502 418
      src/erlydtl_scanner.erl
  5. 363 0
      src/erlydtl_scanner.slex

+ 2 - 0
.gitignore

@@ -7,3 +7,5 @@ src/erlydtl_parser.erl
 ebintest
 tests/src/erlydtl_extension_testparser.erl
 tests/output
+deps
+.emacs*

+ 18 - 2
Makefile

@@ -1,12 +1,16 @@
 ERL=erl
 ERLC=erlc
-REBAR=./rebar
+REBAR=./rebar $(REBAR_ARGS)
 
 all: compile
 
-compile:
+compile: check-slex
 	@$(REBAR) compile
 
+check-slex: src/erlydtl_scanner.erl
+src/erlydtl_scanner.erl: src/erlydtl_scanner.slex
+	@echo Notice: $@ is outdated by $<, consider running "'make slex'".
+
 compile_test:
 	-mkdir -p ebintest
 	$(ERLC) -o tests/src -I include/erlydtl_preparser.hrl tests/src/erlydtl_extension_testparser.yrl
@@ -25,3 +29,15 @@ clean:
 	rm -fv ebintest/*
 	rm -fv erl_crash.dump
 	rm -fv tests/output/*
+
+# rebuild any .slex files as well..  not included by default to avoid
+# the slex dependency, which is only needed in case the .slex file has
+# been modified locally.
+slex: REBAR_DEPS ?= get-deps update-deps
+slex: slex-compile
+
+slex-skip-deps: REBAR_DEPS:=
+slex-skip-deps: slex-compile
+
+slex-compile:
+	@$(REBAR) -C rebar-slex.config $(REBAR_DEPS) compile

+ 8 - 0
rebar-slex.config

@@ -0,0 +1,8 @@
+%% -*- mode: erlang -*-
+
+{deps,
+ [{slex, ".*", {git, "git://github.com/erlydtl/slex.git", {branch, "master"}}}
+ ]
+}.
+
+{plugins, [rebar_slex]}.

+ 502 - 418
src/erlydtl_scanner.erl

@@ -1,16 +1,14 @@
 %%%-------------------------------------------------------------------
-%%% File:      erlydtl_scanner.erl
-%%% @author    Roberto Saccon <rsaccon@gmail.com> [http://rsaccon.com]
-%%% @author    Evan Miller <emmiller@gmail.com>
+%%% File:      erlydtl_scanner.slex
 %%% @author    Andreas Stenius <kaos@astekk.se>
-%%% @copyright 2008 Roberto Saccon, Evan Miller
-%%% @doc 
-%%% Template language scanner
-%%% @end  
+%%% @copyright 2013 Andreas Stenius
+%%% @doc
+%%% erlydtl scanner
+%%% @end
 %%%
 %%% The MIT License
 %%%
-%%% Copyright (c) 2007 Roberto Saccon, Evan Miller
+%%% Copyright (c) 2013 Andreas Stenius
 %%%
 %%% Permission is hereby granted, free of charge, to any person obtaining a copy
 %%% of this software and associated documentation files (the "Software"), to deal
@@ -30,426 +28,512 @@
 %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 %%% THE SOFTWARE.
 %%%
-%%% @since 2007-11-11 by Roberto Saccon, Evan Miller
+%%% @since 2013-11-05 by Andreas Stenius
+%%%
+%%% Rules based on the original erlydtl_scanner by Robert Saccon and Evan Miller.
 %%%-------------------------------------------------------------------
 -module(erlydtl_scanner).
--author('rsaccon@gmail.com').
--author('emmiller@gmail.com').
--author('Andreas Stenius <kaos@astekk.se>').
-
--export([scan/1, resume/1]).
--include("erlydtl_ext.hrl").
-
-
-%%====================================================================
-%% API
-%%====================================================================
-%%--------------------------------------------------------------------
-%% @spec scan(T::template()) -> {ok, S::tokens()} | {error, Reason}
-%% @type template() = string() | binary(). Template to parse
-%% @type tokens() = [tuple()].
-%% @doc Scan the template string T and return the a token list or
-%% an error.
-%% @end
-%%--------------------------------------------------------------------
-scan(Template) ->
-    scan(Template, [], {1, 1}, in_text).    
-
-resume(#scanner_state{ template=Template, scanned=Scanned, 
-		     pos=Pos, state=State}) ->
-    scan(Template, Scanned, Pos, State).
-
-scan([], Scanned, _, in_text) ->
-    Tokens = lists:reverse(Scanned),
-    FixedTokens = reverse_strings(Tokens),
-    MarkedTokens = mark_keywords(FixedTokens),
-    AtomizedTokens = atomize_identifiers(MarkedTokens),
-    {ok, AtomizedTokens};
-
-scan([], _Scanned, _, {in_comment, _}) ->
-    {error, "Reached end of file inside a comment."};
-
-scan([], _Scanned, _, _) ->
-    {error, "Reached end of file inside a code block."};
-
-scan("<!--{{" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, [{open_var, {Row, Column}, '<!--{{'} | Scanned], {Row, Column + length("<!--{{")}, {in_code, "}}-->"});
-
-scan("{{" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, [{open_var, {Row, Column}, '{{'} | Scanned], {Row, Column + length("{{")}, {in_code, "}}"});
-
-scan("<!--{#" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, Scanned, {Row, Column + length("<!--{#")}, {in_comment, "#}-->"});
-
-scan("{#" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, Scanned, {Row, Column + length("{#")}, {in_comment, "#}"});
-
-scan("#}-->" ++ T, Scanned, {Row, Column}, {in_comment, "#}-->"}) ->
-    scan(T, Scanned, {Row, Column + length("#}-->")}, in_text);
-
-scan("#}" ++ T, Scanned, {Row, Column}, {in_comment, "#}"}) ->
-    scan(T, Scanned, {Row, Column + length("#}")}, in_text);
-
-scan("<!--{%" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, [{open_tag, {Row, Column}, '<!--{%'} | Scanned], 
-	 {Row, Column + length("<!--{%")}, {in_code, "%}-->"});
-
-scan("{%" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, [{open_tag, {Row, Column}, '{%'} | Scanned], 
-	 {Row, Column + length("{%")}, {in_code, "%}"});
-
-scan([_ | T], Scanned, {Row, Column}, {in_comment, Closer}) ->
-    scan(T, Scanned, {Row, Column + 1}, {in_comment, Closer});
-
-scan("\n" ++ T, Scanned, {Row, Column}, in_text) ->
-    scan(T, append_text_char(Scanned, {Row, Column}, $\n), {Row + 1, 1}, in_text);
-
-scan([H | T], Scanned, {Row, Column}, in_text) ->
-    scan(T, append_text_char(Scanned, {Row, Column}, H), {Row, Column + 1}, in_text);
-
-scan("\"" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
-    scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
-
-scan("\"" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
-    scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_double_quote, Closer});
-
-scan("\'" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
-    scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
-
-scan("\'" ++ T, Scanned, {Row, Column}, {in_identifier, Closer}) ->
-    scan(T, [{string_literal, {Row, Column}, "\""} | Scanned], {Row, Column + 1}, {in_single_quote, Closer});
-
-scan([$\\ | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
-    scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_double_quote_slash, Closer});
-
-scan([H | T], Scanned, {Row, Column}, {in_double_quote_slash, Closer}) ->
-    scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
-
-scan([$\\ | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
-    scan(T, append_char(Scanned, $\\), {Row, Column + 1}, {in_single_quote_slash, Closer});
-
-scan([H | T], Scanned, {Row, Column}, {in_single_quote_slash, Closer}) ->
-    scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
-
-						% end quote
-scan("\"" ++ T, Scanned, {Row, Column}, {in_double_quote, Closer}) ->
-    scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
-
-						% treat single quotes the same as double quotes
-scan("\'" ++ T, Scanned, {Row, Column}, {in_single_quote, Closer}) ->
-    scan(T, append_char(Scanned, 34), {Row, Column + 1}, {in_code, Closer});
-
-scan([H | T], Scanned, {Row, Column}, {in_double_quote, Closer}) ->
-    scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_double_quote, Closer});
-
-scan([H | T], Scanned, {Row, Column}, {in_single_quote, Closer}) ->
-    scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_single_quote, Closer});
-
-
-scan("}}-->" ++ T, Scanned, {Row, Column}, {_, "}}-->"}) ->
-    scan(T, [{close_var, {Row, Column}, '}}-->'} | Scanned], 
-	 {Row, Column + length("}}-->")}, in_text);
-
-scan("}}" ++ T, Scanned, {Row, Column}, {_, "}}"}) ->
-    scan(T, [{close_var, {Row, Column}, '}}'} | Scanned], {Row, Column + 2}, in_text);
-
-scan("%}-->" ++ T, Scanned, {Row, Column}, {_, "%}-->"}) ->
-    scan(T, [{close_tag, {Row, Column}, '%}-->'} | Scanned], 
-	 {Row, Column + length("%}-->")}, in_text);
 
-scan("%}" ++ T, [{identifier, _, "mitabrev"}, {open_tag, _, '{%'}|Scanned], {Row, Column}, {_, "%}"}) ->
-    scan(T, [{string, {Row, Column + 2}, ""}|Scanned], {Row, Column + 2}, {in_verbatim, undefined});
+-export([scan/1, scan/4]).
 
-scan("%}" ++ T, [{identifier, _, ReversedTag}, {identifier, _, "mitabrev"}, {open_tag, _, '{%'}|Scanned], 
-     {Row, Column}, {_, "%}"}) ->
-    scan(T, [{string, {Row, Column + 2}, ""}|Scanned], {Row, Column + 2}, {in_verbatim, ReversedTag});
+-compile(nowarn_unused_vars).
 
-scan("%}" ++ T, Scanned, {Row, Column}, {_, "%}"}) ->
-    scan(T, [{close_tag, {Row, Column}, '%}'} | Scanned], 
-	 {Row, Column + 2}, in_text);
+-export([resume/1]).
 
-scan("{%" ++ T, Scanned, {Row, Column}, {in_verbatim, Tag}) ->
-    scan(T, Scanned, {Row, Column + 2}, {in_verbatim_code, lists:reverse("{%"), Tag});
+-record(scanner_state,
+	{template = [], scanned = [], pos = {1, 1},
+	 state = in_text}).
 
-scan(" " ++ T, Scanned, {Row, Column}, {in_verbatim_code, BackTrack, Tag}) ->
-    scan(T, Scanned, {Row, Column + 1}, {in_verbatim_code, [$\ |BackTrack], Tag});
-
-scan("endverbatim%}" ++ T, Scanned, {Row, Column}, {in_verbatim_code, _BackTrack, undefined}) ->
-    scan(T, Scanned, {Row, Column + length("endverbatim%}")}, in_text);
-
-scan("endverbatim " ++ T, Scanned, {Row, Column}, {in_verbatim_code, BackTrack, Tag}) ->
-    scan(T, Scanned, {Row, Column + length("endverbatim ")}, 
-	 {in_endverbatim_code, "", lists:reverse("endverbatim ", BackTrack), Tag});
-
-scan(" " ++ T, Scanned, {Row, Column}, {in_endverbatim_code, "", BackTrack, Tag}) ->
-    scan(T, Scanned, {Row, Column + 1}, {in_endverbatim_code, "", [$\ |BackTrack], Tag});
-
-scan([H|T], Scanned, {Row, Column}, {in_endverbatim_code, EndTag, BackTrack, Tag}) when H >= $a, H =< $z; H >= $0, H =< $9; H =:= $_  ->
-    scan(T, Scanned, {Row, Column + 1}, {in_endverbatim_code, [H|EndTag], [H|BackTrack], Tag});
-
-scan(" " ++ T, Scanned, {Row, Column}, {in_endverbatim_code, Tag, BackTrack, Tag}) ->
-    scan(T, Scanned, {Row, Column + 1}, {in_endverbatim_code, Tag, [$\ |BackTrack], Tag});
-
-scan("%}" ++ T, Scanned, {Row, Column}, {in_endverbatim_code, Tag, _BackTrack, Tag}) ->
-    scan(T, Scanned, {Row, Column + 2}, in_text);
-
-scan("%}" ++ T, Scanned, {Row, Column}, {in_endverbatim_code, "", _BackTrack, undefined}) ->
-    scan(T, Scanned, {Row, Column + 2}, in_text);
-
-scan([H|T], [{string, Pos, Data}|Scanned], {Row, Column}, {in_endverbatim_code, _, BackTrack, Tag}) ->
-    NewPos = case H of $\n -> {Row + 1, 1}; _ -> {Row, Column + 1} end,
-    scan(T, [{string, Pos, [H|BackTrack] ++ Data}|Scanned], NewPos, {in_verbatim, Tag});
-
-scan([H|T], [{string, Pos, Data}|Scanned], {Row, Column}, {in_verbatim_code, BackTrack, Tag}) ->
-    NewPos = case H of $\n -> {Row + 1, 1}; _ -> {Row, Column + 1} end,
-    scan(T, [{string, Pos, [H|BackTrack] ++ Data}|Scanned], NewPos, {in_verbatim, Tag});
-
-scan([H|T], [{string, Pos, Data}|Scanned], {Row, Column}, {in_verbatim, Tag}) ->
-    NewPos = case H of $\n -> {Row + 1, 1}; _ -> {Row, Column + 1} end,
-    scan(T, [{string, Pos, [H|Data]}|Scanned], NewPos, {in_verbatim, Tag});
-
-scan("==" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'==', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
-
-scan("!=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'!=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
-
-scan(">=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'>=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
-
-scan("<=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'<=', {Row, Column}} | Scanned], {Row, Column + 2}, {in_code, Closer});
-
-scan("<" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'<', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan(">" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'>', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan("("++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'(', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan(")" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{')', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan("," ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{',', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan("|" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'|', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan("=" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'=', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan(":" ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{':', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
-
-scan("." ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, [{'.', {Row, Column}} | Scanned], {Row, Column + 1}, {in_code, Closer});
+resume(#scanner_state{template = Template,
+		      scanned = Scanned, pos = Pos, state = State}) ->
+    scan(Template, Scanned, Pos, State).
 
-scan("_(" ++ T, Scanned, {Row, Column}, {in_code, Closer}) ->
-    scan(T, lists:reverse([{'_', {Row, Column}}, {'(', {Row, Column + 1}}], Scanned), {Row, Column + 2}, {in_code, Closer});
+to_atom(L) when is_list(L) -> list_to_atom(L).
 
-scan(" " ++ T, Scanned, {Row, Column}, {_, Closer}) ->
-    scan(T, Scanned, {Row, Column + 1}, {in_code, Closer});
+to_keyword(L, P) -> {to_atom(L ++ "_keyword"), P, L}.
 
+atomize(L, T) -> setelement(3, T, to_atom(L)).
 
-scan([H | T], Scanned, {Row, Column}, {in_code, Closer}) ->
-    case char_type(H) of
-        letter_underscore ->
-            scan(T, [{identifier, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_identifier, Closer});
-        hyphen_minus ->
-            scan(T, [{number_literal, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_number, Closer});
-        digit ->
-            scan(T, [{number_literal, {Row, Column}, [H]} | Scanned], {Row, Column + 1}, {in_number, Closer});
-        _ ->
-            {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])},
-	     #scanner_state{ template=[H|T], scanned=Scanned, pos={Row, Column}, state={in_code, Closer}}}
+is_keyword(Class, {_, _, L} = T) ->
+    L1 = lists:reverse(L),
+    case is_keyword(Class, L1) of
+      true -> to_keyword(L1, element(2, T));
+      false -> atomize(L1, T)
     end;
-
-scan([H | T], Scanned, {Row, Column}, {in_number, Closer}) ->
-    case char_type(H) of
-        digit ->
-            scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_number, Closer});
-        _ ->
-            {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])},
-	     #scanner_state{ template=[H|T], scanned=Scanned, pos={Row, Column}, state={in_code, Closer}}}
+is_keyword([C | Cs], L) ->
+    is_keyword(C, L) orelse is_keyword(Cs, L);
+is_keyword(all, L) -> is_keyword([any, open, close], L);
+is_keyword(open_tag, L) -> is_keyword([any, open], L);
+is_keyword(close_tag, L) -> is_keyword([any, close], L);
+is_keyword(any, "in") -> true;
+is_keyword(any, "not") -> true;
+is_keyword(any, "or") -> true;
+is_keyword(any, "and") -> true;
+is_keyword(any, "as") -> true;
+is_keyword(any, "by") -> true;
+is_keyword(any, "with") -> true;
+is_keyword(close, "only") -> true;
+is_keyword(close, "parsed") -> true;
+is_keyword(close, "noop") -> true;
+is_keyword(close, "reversed") -> true;
+is_keyword(close, "openblock") -> true;
+is_keyword(close, "closeblock") -> true;
+is_keyword(close, "openvariable") -> true;
+is_keyword(close, "closevariable") -> true;
+is_keyword(close, "openbrace") -> true;
+is_keyword(close, "closebrace") -> true;
+is_keyword(close, "opencomment") -> true;
+is_keyword(close, "closecomment") -> true;
+is_keyword(open, "autoescape") -> true;
+is_keyword(open, "endautoescape") -> true;
+is_keyword(open, "block") -> true;
+is_keyword(open, "endblock") -> true;
+is_keyword(open, "comment") -> true;
+is_keyword(open, "endcomment") -> true;
+is_keyword(open, "cycle") -> true;
+is_keyword(open, "extends") -> true;
+is_keyword(open, "filter") -> true;
+is_keyword(open, "endfilter") -> true;
+is_keyword(open, "firstof") -> true;
+is_keyword(open, "for") -> true;
+is_keyword(open, "empty") -> true;
+is_keyword(open, "endfor") -> true;
+is_keyword(open, "if") -> true;
+is_keyword(open, "elif") -> true;
+is_keyword(open, "else") -> true;
+is_keyword(open, "endif") -> true;
+is_keyword(open, "ifchanged") -> true;
+is_keyword(open, "endifchanged") -> true;
+is_keyword(open, "ifequal") -> true;
+is_keyword(open, "endifequal") -> true;
+is_keyword(open, "ifnotequal") -> true;
+is_keyword(open, "endifnotequal") -> true;
+is_keyword(open, "include") -> true;
+is_keyword(open, "now") -> true;
+is_keyword(open, "regroup") -> true;
+is_keyword(open, "endregroup") -> true;
+is_keyword(open, "spaceless") -> true;
+is_keyword(open, "endspaceless") -> true;
+is_keyword(open, "ssi") -> true;
+is_keyword(open, "templatetag") -> true;
+is_keyword(open, "widthratio") -> true;
+is_keyword(open, "call") -> true;
+is_keyword(open, "endwith") -> true;
+is_keyword(open, "trans") -> true;
+is_keyword(open, "blocktrans") -> true;
+is_keyword(open, "endblocktrans") -> true;
+is_keyword(_, _) -> false.
+
+scan(Template) when is_list(Template) ->
+    scan(Template, [], {1, 1}, in_text).
+
+scan("{{" ++ T, S, {R, C} = P, in_text) ->
+    scan(T,
+	 [{open_var, P, "{{"} | post_process(S, open_var)],
+	 {R, C + 2}, {in_code, "}}"});
+scan("{%" ++ T, S, {R, C} = P, in_text) ->
+    scan(T,
+	 [{open_tag, P, "{%"} | post_process(S, open_tag)],
+	 {R, C + 2}, {in_code, "%}"});
+scan("<!--{{" ++ T, S, {R, C} = P, in_text) ->
+    scan(T,
+	 [{open_var, P, "<!--{{"} | post_process(S, open_var)],
+	 {R, C + 6}, {in_code, "}}-->"});
+scan("<!--{%" ++ T, S, {R, C} = P, in_text) ->
+    scan(T,
+	 [{open_tag, P, "<!--{%"} | post_process(S, open_tag)],
+	 {R, C + 6}, {in_code, "%}-->"});
+scan("{#" ++ T, S, {R, C}, in_text) ->
+    scan(T, S, {R, C + 2}, {in_comment, "#}"});
+scan("<!--{#" ++ T, S, {R, C}, in_text) ->
+    scan(T, S, {R, C + 6}, {in_comment, "#}-->"});
+scan("#}-->" ++ T, S, {R, C}, {_, "#}-->"}) ->
+    scan(T, S, {R, C + 5}, in_text);
+scan("#}" ++ T, S, {R, C}, {_, "#}"}) ->
+    scan(T, S, {R, C + 2}, in_text);
+scan([H | T], S, {R, C}, {in_comment, E} = St) ->
+    scan(T, S,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 St);
+scan([H | T], S, {R, C} = P, in_text = St) ->
+    scan(T,
+	 case S of
+	   [{string, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ -> [{string, P, [H]} | post_process(S, string)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 St);
+scan("\"" ++ T, S, {R, C} = P, {in_code, E}) ->
+    scan(T,
+	 [{string_literal, P, "\""} | post_process(S,
+						   string_literal)],
+	 {R, C + 1}, {in_double_quote, E});
+scan("'" ++ T, S, {R, C} = P, {in_code, E}) ->
+    scan(T,
+	 [{string_literal, P, "\""} | post_process(S,
+						   string_literal)],
+	 {R, C + 1}, {in_single_quote, E});
+scan("\"" ++ T, S, {R, C} = P, {in_double_quote, E}) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, "\"" ++ L) | Ss];
+	   _ ->
+	       [{string_literal, P, "\""} | post_process(S,
+							 string_literal)]
+	 end,
+	 {R, C + 1}, {in_code, E});
+scan("'" ++ T, S, {R, C} = P, {in_single_quote, E}) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, "\"" ++ L) | Ss];
+	   _ ->
+	       [{string_literal, P, "\""} | post_process(S,
+							 string_literal)]
+	 end,
+	 {R, C + 1}, {in_code, E});
+scan("\\" ++ T, S, {R, C} = P, {in_double_quote, E}) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, "\\" ++ L) | Ss];
+	   _ ->
+	       [{string_literal, P, "\\"} | post_process(S,
+							 string_literal)]
+	 end,
+	 {R, C + 1}, {in_double_quote_escape, E});
+scan("\\" ++ T, S, {R, C} = P, {in_single_quote, E}) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, "\\" ++ L) | Ss];
+	   _ ->
+	       [{string_literal, P, "\\"} | post_process(S,
+							 string_literal)]
+	 end,
+	 {R, C + 1}, {in_single_quote_escape, E});
+scan([H | T], S, {R, C} = P,
+     {in_double_quote, E} = St) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ ->
+	       [{string_literal, P, [H]} | post_process(S,
+							string_literal)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 St);
+scan([H | T], S, {R, C} = P,
+     {in_single_quote, E} = St) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ ->
+	       [{string_literal, P, [H]} | post_process(S,
+							string_literal)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 St);
+scan([H | T], S, {R, C} = P,
+     {in_double_quote_escape, E}) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ ->
+	       [{string_literal, P, [H]} | post_process(S,
+							string_literal)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_double_quote, E});
+scan([H | T], S, {R, C} = P,
+     {in_single_quote_escape, E}) ->
+    scan(T,
+	 case S of
+	   [{string_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ ->
+	       [{string_literal, P, [H]} | post_process(S,
+							string_literal)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_single_quote, E});
+scan("}}-->" ++ T, S, {R, C} = P, {_, "}}-->"}) ->
+    scan(T,
+	 [{close_var, P, "}}-->"} | post_process(S, close_var)],
+	 {R, C + 5}, in_text);
+scan("%}-->" ++ T, S, {R, C} = P, {_, "%}-->"}) ->
+    scan(T,
+	 [{close_tag, P, "%}-->"} | post_process(S, close_tag)],
+	 {R, C + 5}, in_text);
+scan("}}" ++ T, S, {R, C} = P, {_, "}}"}) ->
+    scan(T,
+	 [{close_var, P, "}}"} | post_process(S, close_var)],
+	 {R, C + 2}, in_text);
+scan("%}" ++ T, S, {R, C} = P, {_, "%}"} = St) ->
+    case S of
+      [{identifier, _, "mitabrev"}, {open_tag, _, '{%'}
+       | Ss] ->
+	  scan(T, [{string, {R, C + 2}, ""} | Ss], {R, C + 2},
+	       {in_verbatim, undefined});
+      [{identifier, _, Tag}, {identifier, _, verbatim},
+       {open_tag, _, '{%'}
+       | Ss] ->
+	  scan(T, [{string, {R, C + 2}, ""} | Ss], {R, C + 2},
+	       {in_verbatim, Tag});
+      _ ->
+	  scan(T,
+	       [{close_tag, P, "%}"} | post_process(S, close_tag)],
+	       {R, C + 2}, in_text)
     end;
-
-scan([H | T], Scanned, {Row, Column}, {in_identifier, Closer}) ->
-    case char_type(H) of
-        letter_underscore ->
-            scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
-        digit ->
-            scan(T, append_char(Scanned, H), {Row, Column + 1}, {in_identifier, Closer});
-        _ ->
-            {error, {Row, ?MODULE, lists:concat(["Illegal character in column ", Column])},
-	     #scanner_state{ template=[H|T], scanned=Scanned, pos={Row, Column}, state={in_code, Closer}}}
-    end.
-
-						% internal functions
-
-append_char([{Type, Pos, Chars}|Scanned], Char) ->
-    [{Type, Pos, [Char | Chars]} | Scanned].
-
-append_text_char([], {Row, Column}, Char) ->
-    [{string, {Row, Column}, [Char]}];
-append_text_char([{string, StrPos, Chars} |Scanned1], _, Char) ->
-    [{string, StrPos, [Char | Chars]} | Scanned1];
-append_text_char(Scanned, {Row, Column}, Char) ->
-    [{string, {Row, Column}, [Char]} | Scanned].
-
-char_type(C) when ((C >= $a) andalso (C =< $z)) orelse ((C >= $A) andalso (C =< $Z)) orelse (C == $_) ->
-    letter_underscore;
-char_type(C) when ((C >= $0) andalso (C =< $9)) ->
-    digit;
-char_type($-) ->
-    hyphen_minus;
-char_type(_C) ->
-    undefined.
-
-reverse_strings(Tokens) ->
-    reverse_strings(Tokens, []).
-
-reverse_strings([], Acc) ->
-    lists:reverse(Acc);
-reverse_strings([{Category, Pos, String}|T], Acc) when Category =:= string; Category =:= identifier;
-                                                       Category =:= string_literal; Category =:= number_literal ->
-    reverse_strings(T, [{Category, Pos, lists:reverse(String)}|Acc]);
-reverse_strings([Other|T], Acc) ->
-    reverse_strings(T, [Other|Acc]).
-
-mark_keywords(Tokens) ->
-    mark_keywords(Tokens, []).
-
-mark_keywords([], Acc) ->
-    lists:reverse(Acc);
-mark_keywords([{identifier, Pos, "in" = String}|T], Acc) ->
-    mark_keywords(T, [{in_keyword, Pos, String}|Acc]);
-mark_keywords([{identifier, Pos, "not" = String}|T], Acc) ->
-    mark_keywords(T, [{not_keyword, Pos, String}|Acc]);
-mark_keywords([{identifier, Pos, "or" = String}|T], Acc) ->
-    mark_keywords(T, [{or_keyword, Pos, String}|Acc]);
-mark_keywords([{identifier, Pos, "and" = String}|T], Acc) ->
-    mark_keywords(T, [{and_keyword, Pos, String}|Acc]);
-mark_keywords([{identifier, Pos, "as" = String}|T], Acc) ->
-    mark_keywords(T, [{as_keyword, Pos, String}|Acc]);
-mark_keywords([{identifier, Pos, "by" = String}|T], Acc) ->
-    mark_keywords(T, [{by_keyword, Pos, String}|Acc]);
-mark_keywords([{identifier, Pos, "with" = String}|T], Acc) ->
-    mark_keywords(T, [{with_keyword, Pos, String}|Acc]);
-						% These must be succeeded by a close_tag
-mark_keywords([{identifier, Pos, "only" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{only_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "parsed" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{parsed_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "noop" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{noop_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "reversed" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{reversed_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "openblock" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{openblock_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "closeblock" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{closeblock_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "openvariable" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{openvariable_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "closevariable" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{closevariable_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "openbrace" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{openbrace_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "closebrace" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{closebrace_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "opencomment" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{opencomment_keyword, Pos, String}, CloseTag], Acc));
-mark_keywords([{identifier, Pos, "closecomment" = String}, {close_tag, _, _} = CloseTag|T], Acc) ->
-    mark_keywords(T, lists:reverse([{closecomment_keyword, Pos, String}, CloseTag], Acc));
-						% The rest must be preceded by an open_tag.
-						% This allows variables to have the same names as tags.
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "autoescape" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {autoescape_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endautoescape" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endautoescape_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "block" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {block_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endblock" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endblock_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "comment" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {comment_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endcomment" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endcomment_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "cycle" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {cycle_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "extends" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {extends_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "filter" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {filter_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endfilter" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endfilter_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "firstof" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {firstof_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "for" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {for_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "empty" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {empty_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endfor" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endfor_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "if" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {if_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "elif" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {elif_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "else" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {else_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endif" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endif_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "ifchanged" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {ifchanged_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endifchanged" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endifchanged_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "ifequal" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {ifequal_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endifequal" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endifequal_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "ifnotequal" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {ifnotequal_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endifnotequal" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endifnotequal_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "include" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {include_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "now" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {now_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "regroup" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {regroup_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endregroup" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endregroup_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "spaceless" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {spaceless_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endspaceless" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endspaceless_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "ssi" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {ssi_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "templatetag" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {templatetag_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "widthratio" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {widthratio_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "call" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {call_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endwith" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endwith_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "trans" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {trans_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "blocktrans" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {blocktrans_keyword, Pos, String}], Acc));
-mark_keywords([{open_tag, _, _} = OpenToken, {identifier, Pos, "endblocktrans" = String}|T], Acc) ->
-    mark_keywords(T, lists:reverse([OpenToken, {endblocktrans_keyword, Pos, String}], Acc));
-mark_keywords([H|T], Acc) ->
-    mark_keywords(T, [H|Acc]).
-
-atomize_identifiers(Tokens) ->
-    atomize_identifiers(Tokens, []).
-
-atomize_identifiers([], Acc) ->
-    lists:reverse(Acc);
-atomize_identifiers([{identifier, Pos, String}|T], Acc) ->
-    atomize_identifiers(T, [{identifier, Pos, list_to_atom(String)}|Acc]);
-atomize_identifiers([H|T], Acc) ->
-    atomize_identifiers(T, [H|Acc]).
+scan("{%" ++ T, S, {R, C} = P, {in_verbatim, E} = St) ->
+    scan(T, S, {R, C + 2}, {in_verbatim_code, {E, "%{"}});
+scan(" " ++ T, S, {R, C} = P,
+     {in_verbatim_code, E} = St) ->
+    {Tag, Backtrack} = E,
+    scan(T, S, {R, C + 1},
+	 {in_verbatim_code, {Tag, [$  | Backtrack]}});
+scan("endverbatim%}" ++ T, S, {R, C} = P,
+     {in_verbatim_code, E} = St)
+    when element(1, E) =:= undefined ->
+    scan(T, S, {R, C + 13}, in_text);
+scan("endverbatim " ++ T, S, {R, C} = P,
+     {in_verbatim_code, E} = St) ->
+    {Tag, Backtrack} = E,
+    scan(T, S, {R, C + 12},
+	 {in_endverbatim_code,
+	  {Tag, lists:reverse("endverbatim ", Backtrack), ""}});
+scan(" " ++ T, S, {R, C} = P,
+     {in_endverbatim_code, E} = St)
+    when element(3, E) =:= "" ->
+    {Tag, Backtrack, EndTag} = E,
+    scan(T, S, {R, C + 1},
+	 {in_endverbatim_code, {Tag, [$  | Backtrack], EndTag}});
+scan([H | T], S, {R, C} = P,
+     {in_endverbatim_code, E} = St)
+    when H >= $a andalso H =< $z orelse
+	   H >= $0 andalso H =< $9 orelse H =:= $_ ->
+    {Tag, Backtrack, EndTag} = E,
+    scan(T, S, {R, C + 1},
+	 {in_endverbatim_code,
+	  {Tag, [H | Backtrack], [H | EndTag]}});
+scan(" " ++ T, S, {R, C} = P,
+     {in_endverbatim_code, E} = St)
+    when element(1, E) =:= element(3, E) ->
+    {Tag, Backtrack, Tag} = E,
+    scan(T, S, {R, C + 1},
+	 {in_endverbatim_code, {Tag, [$  | Backtrack], Tag}});
+scan("%}" ++ T, S, {R, C} = P,
+     {in_endverbatim_code, E} = St)
+    when element(1, E) =:= element(3, E) ->
+    scan(T, S, {R, C + 2}, in_text);
+scan("%}" ++ T, S, {R, C} = P,
+     {in_endverbatim_code, E} = St)
+    when element(1, E) =:= undefined andalso
+	   element(3, E) =:= "" ->
+    scan(T, S, {R, C + 2}, in_text);
+scan([H | T], S, {R, C} = P,
+     {in_endverbatim_code, E} = St) ->
+    {Tag, Backtrack, _} = E,
+    scan(T,
+	 case S of
+	   [{string, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | Backtrack] ++ L) | Ss];
+	   _ -> [{string, P, [H | Backtrack]} | S]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_verbatim, Tag});
+scan([H | T], S, {R, C} = P,
+     {in_verbatim_code, E} = St) ->
+    {Tag, Backtrack} = E,
+    scan(T,
+	 case S of
+	   [{string, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | Backtrack] ++ L) | Ss];
+	   _ -> [{string, P, [H | Backtrack]} | S]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_verbatim, Tag});
+scan([H | T], S, {R, C} = P, {in_verbatim, E} = St) ->
+    scan(T,
+	 case S of
+	   [{string, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ -> [{string, P, [H]} | S]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_verbatim, E});
+scan("==" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'==', P} | post_process(S, '==')], {R, C + 2},
+	 {in_code, E});
+scan("!=" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'!=', P} | post_process(S, '!=')], {R, C + 2},
+	 {in_code, E});
+scan(">=" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'>=', P} | post_process(S, '>=')], {R, C + 2},
+	 {in_code, E});
+scan("<=" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'<=', P} | post_process(S, '<=')], {R, C + 2},
+	 {in_code, E});
+scan(">" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'>', P} | post_process(S, '>')], {R, C + 1},
+	 {in_code, E});
+scan("<" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'<', P} | post_process(S, '<')], {R, C + 1},
+	 {in_code, E});
+scan("(" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'(', P} | post_process(S, '(')], {R, C + 1},
+	 {in_code, E});
+scan(")" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{')', P} | post_process(S, ')')], {R, C + 1},
+	 {in_code, E});
+scan("," ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{',', P} | post_process(S, ',')], {R, C + 1},
+	 {in_code, E});
+scan("|" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'|', P} | post_process(S, '|')], {R, C + 1},
+	 {in_code, E});
+scan("=" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'=', P} | post_process(S, '=')], {R, C + 1},
+	 {in_code, E});
+scan(":" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{':', P} | post_process(S, ':')], {R, C + 1},
+	 {in_code, E});
+scan("." ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'.', P} | post_process(S, '.')], {R, C + 1},
+	 {in_code, E});
+scan("_(" ++ T, S, {R, C} = P, {_, E}) ->
+    scan(T, [{'(', P}, {'_', P} | post_process(S, '_')],
+	 {R, C + 2}, {in_code, E});
+scan(" " ++ T, S, {R, C}, {_, E}) ->
+    scan(T, S, {R, C + 1}, {in_code, E});
+scan([H | T], S, {R, C} = P, {in_code, E})
+    when H >= $a andalso H =< $z orelse
+	   H >= $A andalso H =< $Z orelse H == $_ ->
+    scan(T,
+	 [{identifier, P, [H]} | post_process(S, identifier)],
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_identifier, E});
+scan([H | T], S, {R, C} = P, {in_code, E})
+    when H >= $0 andalso H =< $9 orelse H == $- ->
+    scan(T,
+	 [{number_literal, P, [H]} | post_process(S,
+						  number_literal)],
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_number, E});
+scan([H | T], S, {R, C} = P, {in_code, E} = St) ->
+    {error,
+     {R, erlydtl_scanner,
+      lists:concat(["Illegal character in column ", C])},
+     #scanner_state{template = [H | T], scanned = S, pos = P,
+		    state = St}};
+scan([H | T], S, {R, C} = P, {in_number, E} = St)
+    when H >= $0 andalso H =< $9 ->
+    scan(T,
+	 case S of
+	   [{number_literal, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ ->
+	       [{number_literal, P, [H]} | post_process(S,
+							number_literal)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 St);
+scan([H | T], S, {R, C} = P, {in_number, E} = St) ->
+    {error,
+     {R, erlydtl_scanner,
+      lists:concat(["Illegal character in column ", C])},
+     #scanner_state{template = [H | T], scanned = S, pos = P,
+		    state = St}};
+scan([H | T], S, {R, C} = P, {in_identifier, E})
+    when H >= $a andalso H =< $z orelse
+	   H >= $A andalso H =< $Z orelse
+	     H >= $0 andalso H =< $9 orelse H == $_ ->
+    scan(T,
+	 case S of
+	   [{identifier, _, L} = M | Ss] ->
+	       [setelement(3, M, [H | L]) | Ss];
+	   _ ->
+	       [{identifier, P, [H]} | post_process(S, identifier)]
+	 end,
+	 case H of
+	   $\n -> {R + 1, 1};
+	   _ -> {R, C + 1}
+	 end,
+	 {in_identifier, E});
+scan([], S, {R, C} = P, in_text = St) ->
+    {ok, lists:reverse(post_process(S, eof))};
+scan([], S, {R, C} = P, {in_comment, E} = St) ->
+    {error, "Reached end of file inside a comment."};
+scan([], S, {R, C} = P, {_, E} = St) ->
+    {error, "Reached end of file inside a code block."}.
+
+post_process(_, {string, _, L} = T, _) ->
+    setelement(3, T, begin L1 = lists:reverse(L), L1 end);
+post_process(_, {string_literal, _, L} = T, _) ->
+    setelement(3, T, begin L1 = lists:reverse(L), L1 end);
+post_process(_, {number_literal, _, L} = T, _) ->
+    setelement(3, T, begin L1 = lists:reverse(L), L1 end);
+post_process(_, {open_var, _, L} = T, _) ->
+    setelement(3, T, begin L1 = to_atom(L), L1 end);
+post_process(_, {close_var, _, L} = T, _) ->
+    setelement(3, T, begin L1 = to_atom(L), L1 end);
+post_process(_, {open_tag, _, L} = T, _) ->
+    setelement(3, T, begin L1 = to_atom(L), L1 end);
+post_process(_, {close_tag, _, L} = T, _) ->
+    setelement(3, T, begin L1 = to_atom(L), L1 end);
+post_process([{open_tag, _, _} | _],
+	     {identifier, _, L} = T, close_tag) ->
+    is_keyword(all, T);
+post_process([{open_tag, _, _} | _],
+	     {identifier, _, L} = T, _) ->
+    is_keyword(open_tag, T);
+post_process(_, {identifier, _, L} = T, close_tag) ->
+    is_keyword(close_tag, T);
+post_process(_, {identifier, _, L} = T, _) ->
+    is_keyword(any, T);
+post_process(_, T, _) -> T.
+
+post_process([S | Ss], N) ->
+    [post_process(Ss, S, N) | Ss];
+post_process(T, N) -> post_process(undefined, T, N).

+ 363 - 0
src/erlydtl_scanner.slex

@@ -0,0 +1,363 @@
+%%%-------------------------------------------------------------------
+%%% File:      erlydtl_scanner.slex
+%%% @author    Andreas Stenius <kaos@astekk.se>
+%%% @copyright 2013 Andreas Stenius
+%%% @doc
+%%% erlydtl scanner
+%%% @end
+%%%
+%%% The MIT License
+%%%
+%%% Copyright (c) 2013 Andreas Stenius
+%%%
+%%% Permission is hereby granted, free of charge, to any person obtaining a copy
+%%% of this software and associated documentation files (the "Software"), to deal
+%%% in the Software without restriction, including without limitation the rights
+%%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+%%% copies of the Software, and to permit persons to whom the Software is
+%%% furnished to do so, subject to the following conditions:
+%%%
+%%% The above copyright notice and this permission notice shall be included in
+%%% all copies or substantial portions of the Software.
+%%%
+%%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+%%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+%%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+%%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+%%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+%%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+%%% THE SOFTWARE.
+%%%
+%%% @since 2013-11-05 by Andreas Stenius
+%%%
+%%% Rules based on the original erlydtl_scanner by Robert Saccon and Evan Miller.
+%%%-------------------------------------------------------------------
+
+-module erlydtl_scanner.
+-function scan.
+-init_state in_text.
+form -compile(nowarn_unused_vars) end.
+form -export([resume/1]) end.
+form \
+-record(scanner_state, { \
+          template=[], \
+          scanned=[], \
+          pos={1,1}, \
+          state=in_text \
+         }) \
+end.
+
+form \
+resume(#scanner_state{ template=Template, scanned=Scanned, \
+                       pos=Pos, state=State }) -> \
+    scan(Template, Scanned, Pos, State) \
+end.
+
+%% Rule syntax: Prio Prefix|any|- InState[-]|any[+|-] [, Guard] : {: Body}|{[Action...,] NewState [until Closer]}.
+%% `state-' means a state without a closer state.
+%% Where Guard and Body are one erlang expression (see it as a begin ... end block): expr <erlang code...> end
+
+%% Open tags
+10 {{ in_text-: open_var, in_code until }}.
+10 {% in_text-: open_tag, in_code until %}.
+10 <!--{{ in_text-: open_var, in_code until }}-->.
+10 <!--{% in_text-: open_tag, in_code until %}-->.
+
+%% Comments
+20 {# in_text-: in_comment until #}.
+20 <!--{# in_text-: in_comment until #}-->.
+
+%% `any+' will match the closer with the prefix..
+30 #}--> any+: skip, in_text-.
+30 #} any+: skip, in_text-.
+
+%% must come before the `space any' rule
+40 any in_comment: skip.
+%% end comment rules
+
+%% The rest is "just" text..
+50 any in_text-: +string.
+
+%% Quoted strings
+60 \" in_code: string_literal, in_double_quote.
+62 \" in_double_quote: +string_literal, in_code.
+64 \\ in_double_quote: +string_literal, in_double_quote_escape.
+66 any in_double_quote: +string_literal.
+68 any in_double_quote_escape: +string_literal, in_double_quote.
+
+60 \' in_code: string_literal-\", in_single_quote.
+62 \' in_single_quote: +string_literal-\", in_code.
+64 \\ in_single_quote: +string_literal, in_single_quote_escape.
+66 any in_single_quote: +string_literal.
+68 any in_single_quote_escape: +string_literal, in_single_quote.
+
+%% Close tags
+70 }}--> any+: close_var, in_text-.
+70 %}--> any+: close_tag, in_text-.
+72 }} any+: close_var, in_text-.
+72 %} any+::
+  expr \
+    case S of \
+      [{identifier,_,"mitabrev"}, {open_tag,_,'{%'}|Ss] -> \
+        scan(T, [{string, {R, C + 2}, ""} | Ss], \
+             {R, C + 2}, {in_verbatim, undefined}); \
+      [{identifier,_,Tag}, {identifier,_,verbatim}, {open_tag,_,'{%'}|Ss] -> \
+        scan(T, [{string, {R, C + 2}, ""} | Ss], \
+             {R, C + 2}, {in_verbatim, Tag}); \
+      _ -> scan(T, [{close_tag, P, "%}"} | post_process(S, close_tag)], \
+                {R, C + 2}, in_text) \
+    end \
+  end.  
+
+%% verbatim stuff
+80 {% in_verbatim::
+  expr scan(T, S, {R, C + 2}, {in_verbatim_code, {E, "%{"}}) end.
+82 \s in_verbatim_code::
+  expr \
+    {Tag, Backtrack} = E, \
+    scan(T, S, {R, C + 1}, {in_verbatim_code, {Tag, [$\ |Backtrack]}}) \
+  end.
+84 'endverbatim%}' in_verbatim_code,
+  expr element(1, E) =:= undefined end::
+  expr scan(T, S, {R, C + 13}, in_text) end.
+86 'endverbatim ' in_verbatim_code::
+  expr \
+    {Tag, Backtrack} = E, \
+    scan(T, S, {R, C + 12}, \
+         {in_endverbatim_code, \
+          {Tag, lists:reverse("endverbatim ", Backtrack), ""}}) \
+  end.
+88 \s in_endverbatim_code,
+  expr element(3, E) =:= "" end::
+  expr \
+    {Tag, Backtrack, EndTag} = E, \
+    scan(T, S, {R, C + 1}, \
+         {in_endverbatim_code, \
+          {Tag, [$\ |Backtrack], EndTag}}) \
+  end.
+90 any in_endverbatim_code,
+  expr \
+    H >= $a andalso H =< $z orelse \
+    H >= $0 andalso H =< $9 orelse H =:= $_ end::
+  expr \
+    {Tag, Backtrack, EndTag} = E, \
+    scan(T, S, {R, C + 1}, \
+         {in_endverbatim_code, \
+          {Tag, [H|Backtrack], [H|EndTag]}}) \
+  end.
+92 \s in_endverbatim_code,
+  expr element(1, E) =:= element(3, E) end::
+  expr \
+    {Tag, Backtrack, Tag} = E, \
+    scan(T, S, {R, C + 1}, \
+         {in_endverbatim_code, \
+          {Tag, [$\ |Backtrack], Tag}}) \
+  end.
+94 %} in_endverbatim_code,
+  expr element(1, E) =:= element(3, E) end::
+  expr scan(T, S, {R, C + 2}, (in_text)) end.
+96 %} in_endverbatim_code,
+  expr element(1, E) =:= undefined andalso \
+        element(3, E) =:= "" end::
+  expr scan(T, S, {R, C + 2}, in_text) end.
+98 any in_endverbatim_code::
+  expr \
+    {Tag, Backtrack, _} = E, \
+    scan(T, \
+         case S of \
+           [{string,_, L}=M|Ss] -> \
+             [setelement(3, M, [H|Backtrack] ++ L)|Ss]; \
+           _ -> [{string, P, [H|Backtrack]}|S] \
+         end, \
+         case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \
+         {in_verbatim, Tag}) \
+  end.
+100 any in_verbatim_code::
+  expr \
+    {Tag, Backtrack} = E, \
+    scan(T, \
+         case S of \
+           [{string,_, L}=M|Ss] -> \
+             [setelement(3, M, [H|Backtrack] ++ L)|Ss]; \
+           _ -> [{string, P, [H|Backtrack]}|S] \
+         end, \
+         case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \
+         {in_verbatim, Tag}) \
+  end.
+102 any in_verbatim::
+  expr \
+    scan(T, \
+         case S of \
+           [{string,_, L}=M|Ss] -> \
+             [setelement(3, M, [H|L])|Ss]; \
+           _ -> [{string, P, [H]}|S] \
+         end, \
+         case H of $\n -> {R + 1, 1}; _ -> {R, C + 1} end, \
+         {in_verbatim, E}) \
+  end.
+
+
+%% Get back to `in_code' on these tokens:
+110 == any: ==, in_code.
+110 != any: !=, in_code.
+110 >= any: >=, in_code.
+110 <= any: <=, in_code.
+110 > any: >, in_code.
+110 < any: <, in_code.
+110 ( any: (, in_code.
+110 ) any: ), in_code.
+110 \, any: \,, in_code.
+110 | any: |, in_code.
+110 = any: =, in_code.
+110 \: any: \:, in_code.
+110 \. any: \., in_code.
+110 \_( any: \_ \(, in_code.
+
+%% Eat space (and get back to `in_code')
+%% note that `any' here will match states *with* a closer, i.e. not `in_text'.
+%% (`any-' would match any stateless state.)
+110 \s any: skip, in_code.
+
+120 any in_code,
+  expr \
+    (H >= $a andalso H =< $z) orelse \
+    (H >= $A andalso H =< $Z) orelse \
+     H == $_ \
+  end: identifier, in_identifier.
+
+122 any in_code,
+  expr \
+    (H >= $0 andalso H =< $9) orelse H == $- \
+  end: number_literal, in_number.
+
+124 any in_code::
+  expr \
+    {error, {R, erlydtl_scanner, \
+             lists:concat(["Illegal character in column ", C])}, \
+     #scanner_state{ template=[H|T], scanned=S, pos=P, state=St } \
+    } \
+  end.
+
+130 any in_number, expr H >= $0 andalso H =< $9 end: +number_literal.
+132 any in_number::
+  expr \
+    {error, {R, erlydtl_scanner, \
+             lists:concat(["Illegal character in column ", C])}, \
+     #scanner_state{ template=[H|T], scanned=S, pos=P, state=St } \
+    } \
+  end.
+
+140 any in_identifier,
+  expr \
+    (H >= $a andalso H =< $z) orelse \
+    (H >= $A andalso H =< $Z) orelse \
+    (H >= $0 andalso H =< $9) orelse \
+     H == $_ \
+  end: +identifier, in_identifier.
+
+200 : in_text- ::
+  expr \
+    {ok, lists:reverse(post_process(S,eof))} \
+  end.
+
+202 : in_comment :: expr {error, "Reached end of file inside a comment."} end.
+204 : any :: expr {error, "Reached end of file inside a code block."} end.
+
+
+%% Process tokens as we parse them
+
+string: lists reverse.
+string_literal: lists reverse.
+number_literal: lists reverse.
+open_var: to_atom.
+close_var: to_atom.
+open_tag: to_atom.
+close_tag: to_atom.
+
+open_tag identifier, close_tag:: expr is_keyword(all, T) end.
+open_tag identifier:: expr is_keyword(open_tag, T) end.
+identifier, close_tag:: expr is_keyword(close_tag, T) end.
+identifier:: expr is_keyword(any, T) end.
+
+
+%% Utility functions
+
+form to_atom(L) when is_list(L) -> list_to_atom(L) end.
+form to_keyword(L, P) -> {to_atom(L ++ "_keyword"), P, L} end.
+form atomize(L, T) -> setelement(3, T, to_atom(L)) end.
+
+form \
+  is_keyword(Class, {_, _, L} = T) -> \
+    L1 = lists:reverse(L), \
+    case is_keyword(Class, L1) of \
+      true -> to_keyword(L1, element(2, T)); \
+      false -> atomize(L1, T) \
+    end; \
+  is_keyword([C|Cs], L) -> \
+    is_keyword(C, L) orelse \
+    is_keyword(Cs, L); \
+  is_keyword(all, L) -> is_keyword([any, open, close], L); \
+  is_keyword(open_tag, L) -> is_keyword([any, open], L); \
+  is_keyword(close_tag, L) -> is_keyword([any, close], L); \
+  \
+  is_keyword(any, "in") -> true; \
+  is_keyword(any, "not") -> true; \
+  is_keyword(any, "or") -> true; \
+  is_keyword(any, "and") -> true; \
+  is_keyword(any, "as") -> true; \
+  is_keyword(any, "by") -> true; \
+  is_keyword(any, "with") -> true; \
+  \
+  is_keyword(close, "only") -> true; \
+  is_keyword(close, "parsed") -> true; \
+  is_keyword(close, "noop") -> true; \
+  is_keyword(close, "reversed") -> true; \
+  is_keyword(close, "openblock") -> true; \
+  is_keyword(close, "closeblock") -> true; \
+  is_keyword(close, "openvariable") -> true; \
+  is_keyword(close, "closevariable") -> true; \
+  is_keyword(close, "openbrace") -> true; \
+  is_keyword(close, "closebrace") -> true; \
+  is_keyword(close, "opencomment") -> true; \
+  is_keyword(close, "closecomment") -> true; \
+  \
+  is_keyword(open, "autoescape") -> true; \
+  is_keyword(open, "endautoescape") -> true; \
+  is_keyword(open, "block") -> true; \
+  is_keyword(open, "endblock") -> true; \
+  is_keyword(open, "comment") -> true; \
+  is_keyword(open, "endcomment") -> true; \
+  is_keyword(open, "cycle") -> true; \
+  is_keyword(open, "extends") -> true; \
+  is_keyword(open, "filter") -> true; \
+  is_keyword(open, "endfilter") -> true; \
+  is_keyword(open, "firstof") -> true; \
+  is_keyword(open, "for") -> true; \
+  is_keyword(open, "empty") -> true; \
+  is_keyword(open, "endfor") -> true; \
+  is_keyword(open, "if") -> true; \
+  is_keyword(open, "elif") -> true; \
+  is_keyword(open, "else") -> true; \
+  is_keyword(open, "endif") -> true; \
+  is_keyword(open, "ifchanged") -> true; \
+  is_keyword(open, "endifchanged") -> true; \
+  is_keyword(open, "ifequal") -> true; \
+  is_keyword(open, "endifequal") -> true; \
+  is_keyword(open, "ifnotequal") -> true; \
+  is_keyword(open, "endifnotequal") -> true; \
+  is_keyword(open, "include") -> true; \
+  is_keyword(open, "now") -> true; \
+  is_keyword(open, "regroup") -> true; \
+  is_keyword(open, "endregroup") -> true; \
+  is_keyword(open, "spaceless") -> true; \
+  is_keyword(open, "endspaceless") -> true; \
+  is_keyword(open, "ssi") -> true; \
+  is_keyword(open, "templatetag") -> true; \
+  is_keyword(open, "widthratio") -> true; \
+  is_keyword(open, "call") -> true; \
+  is_keyword(open, "endwith") -> true; \
+  is_keyword(open, "trans") -> true; \
+  is_keyword(open, "blocktrans") -> true; \
+  is_keyword(open, "endblocktrans") -> true; \
+  is_keyword(_, _) -> false \
+end.