Browse Source

New api for sources parser: add translators comments extraction.

Old API still available: new api's responses are converted to
old ones.
There is phrase information fields added, which we currently don't
extract, like 'msgid_plural', 'context'. Will be implemented later.
Сергей Прохоров 11 years ago
parent
commit
3b1fa97875
2 changed files with 163 additions and 26 deletions
  1. 139 26
      src/i18n/sources_parser.erl
  2. 24 0
      test/sources_parser_tests.erl

+ 139 - 26
src/i18n/sources_parser.erl

@@ -1,29 +1,104 @@
 %% Author: dave
 %% Author: dave
+%% Author: Sergey Prokhorov <me@seriyps.ru> (new/ext API)
 %% Created: Mar 1, 2010
 %% Created: Mar 1, 2010
-%% Description: Parses source files and extracts translation directives on templates
+%% @doc:
+%% Parses source files and extracts translation directives on templates
+%% Examples:
+%% <pre>
+%% Tpl = <<"111"
+%%         "{#Translators: btrans comment #}{%blocktrans%}btrns{%endblocktrans%}"
+%%         "{%comment%}  TRANSLATORS: trans comment {%endcomment%}222{%trans 'trns'%}"
+%%         "333">>,
+%% Phrases = sources_parser:parse_content("filename.dtl", Tpl),
+%% Msgids = [sources_parser:phrase_info(msgid, P) || P <- Phrases].
+%% %% -> ["btrns", "trns"]
+%% InOldFormat = [begin
+%%                  [Str, File, Line, Col] = sources_parser:phrase_info([msgid, file, line, col], P),
+%%                  {Str, {File, Line, Col}}
+%%                end || P <- Phrases].
+%% %% -> [{"btrns", {"filename.dtl", 1, 47}}, {"trns", {"filename.dtl", 1, 135}]
+%% </pre>
 -module(sources_parser).
 -module(sources_parser).
 
 
 %%
 %%
+%% Exported Functions
+%%
+
+%% New API
+-export([parse_pattern/1, parse_file/1, parse_content/2, phrase_info/2]).
+%% Deprecated API
+-export([parse/0, parse/1, process_content/2]).
+
+-export_type([phrase/0, compat_phrase/0, field/0]).
+
+%%
 %% Include files
 %% Include files
 %%
 %%
 
 
 -include("include/erlydtl_ext.hrl").
 -include("include/erlydtl_ext.hrl").
 
 
+-record(phrase, {msgid :: string(),
+                 msgid_plural :: string() | undefined, %TODO
+                 context :: string() | undefined,      %TODO
+                 comment :: string() | undefined,
+                 file :: string(),
+                 line :: non_neg_integer(),
+                 col :: non_neg_integer()}).
+-record(state, {acc=[], translators_comment}).
+
+-opaque phrase() :: #phrase{}.
+-type compat_phrase() :: {string(), {string(), non_neg_integer(), non_neg_integer()}}.
+-type field() :: msgid | msgid_plural | context | comment | file | line | col.
+
 -define(bail(Fmt, Args),
 -define(bail(Fmt, Args),
         throw(lists:flatten(io_lib:format(Fmt, Args)))).
         throw(lists:flatten(io_lib:format(Fmt, Args)))).
 
 
-%%
-%% Exported Functions
-%%
--export([parse/0,parse/1, process_content/2]).
+-define(GET_FIELD(Name), phrase_info(Name, P) -> P#phrase.Name).
 
 
 %%
 %%
 %% API Functions
 %% API Functions
 %%
 %%
+
+%% Old API
 parse() ->
 parse() ->
     Parsed_Files = parse(["./views/*/*.html"]),
     Parsed_Files = parse(["./views/*/*.html"]),
     io:format("Parsed files are ~p~n",[Parsed_Files]).
     io:format("Parsed files are ~p~n",[Parsed_Files]).
+
 parse(Pattern) ->
 parse(Pattern) ->
+    to_compat(parse_pattern(Pattern)).
+
+process_content(Path, Content) ->
+    to_compat(parse_content(Path, Content)).
+
+%% @doc convert new API output to old one.
+-spec to_compat([phrase()]) -> [compat_phrase()].
+to_compat(Phrases) ->
+    Convert = fun(#phrase{msgid=Str, file=File, line=Line, col=Col}) ->
+                      {Str, {File, Line, Col}}
+              end,
+    lists:map(Convert, Phrases).
+
+%% New API
+
+%% @doc extract info about phrase.
+%% See `field()' type for list of available info field names.
+-spec phrase_info([field()] | field(), phrase()) -> [Info] | Info
+                                                        when
+      Info :: non_neg_integer() | string() | undefined.
+?GET_FIELD(msgid);                                  %little magick
+?GET_FIELD(msgid_plural);
+?GET_FIELD(context);
+?GET_FIELD(comment);
+?GET_FIELD(file);
+?GET_FIELD(line);
+?GET_FIELD(col);
+phrase_info(Fields, Phrase) when is_list(Fields) ->
+    %% you may pass list of fields
+    lists:map(fun(Field) -> phrase_info(Field, Phrase) end, Fields).
+
+%% @doc list files, using wildcard and extract phrases from them
+-spec parse_pattern([string()]) -> [phrase()].
+parse_pattern(Pattern) ->
     %%We assume a basedir
     %%We assume a basedir
     GetFiles = fun(Path,Acc) -> Acc ++ filelib:wildcard(Path) end,
     GetFiles = fun(Path,Acc) -> Acc ++ filelib:wildcard(Path) end,
     Files = lists:foldl(GetFiles,[],Pattern),
     Files = lists:foldl(GetFiles,[],Pattern),
@@ -31,18 +106,18 @@ parse(Pattern) ->
     ParsedFiles = lists:map(fun(File)-> parse_file(File) end, Files),
     ParsedFiles = lists:map(fun(File)-> parse_file(File) end, Files),
     lists:flatten(ParsedFiles).
     lists:flatten(ParsedFiles).
 
 
-%%
-%% Local Functions
-%%
+%% @doc extract phrases from single file
 parse_file(Path) ->
 parse_file(Path) ->
     case file:read_file((Path)) of
     case file:read_file((Path)) of
         {ok, Content} ->
         {ok, Content} ->
-            process_content(Path, Content);
+            parse_content(Path, Content);
         Error ->
         Error ->
             ?bail("Cannot read file ~s problem ~p~n", [Path, Error])
             ?bail("Cannot read file ~s problem ~p~n", [Path, Error])
     end.
     end.
 
 
-process_content(Path,Content)->
+%% @doc extract phrases from string / binary
+-spec parse_content(string(), binary()) -> [phrase()].
+parse_content(Path,Content)->
     case erlydtl_compiler:do_parse_template(Content, #dtl_context{}) of
     case erlydtl_compiler:do_parse_template(Content, #dtl_context{}) of
         {ok, Data} ->
         {ok, Data} ->
             {ok, Result} = process_ast(Path, Data),
             {ok, Result} = process_ast(Path, Data),
@@ -52,24 +127,43 @@ process_content(Path,Content)->
     end.
     end.
 
 
 
 
-process_ast(Fname, Tokens) -> {ok, process_ast(Fname, Tokens ,[]) }.
-process_ast(_Fname, [],Acc) -> Acc;
-process_ast(Fname,[Head|Tail], Acc) ->
-    NewAcc = process_token(Fname,Head,Acc),
-    process_ast(Fname, Tail, NewAcc).
+%%
+%% Local Functions
+%%
+
+process_ast(Fname, Tokens) ->
+    {ok, (process_ast(Fname, Tokens, #state{}))#state.acc }.
+process_ast(_Fname, [], St) -> St;
+process_ast(Fname,[Head|Tail], St) ->
+    NewSt = process_token(Fname,Head,St),
+    process_ast(Fname, Tail, NewSt).
 
 
 %%Block are recursivelly processed, trans are accumulated and other tags are ignored
 %%Block are recursivelly processed, trans are accumulated and other tags are ignored
-process_token(Fname, {block,{identifier,{_Line,_Col},_Identifier},Children}, Acc ) -> process_ast(Fname, Children, Acc);
-process_token(Fname, {trans,{string_literal,{Line,Col},String}}, Acc ) -> [{unescape(String), {Fname, Line, Col}} | Acc];
-process_token(_Fname, {apply_filter, _Value, _Filter}, Acc) -> Acc;
-process_token(_Fname, {date, now, _Filter}, Acc) -> Acc;
-process_token(Fname, {blocktrans, Args, Contents}, Acc) -> [{lists:flatten(erlydtl_unparser:unparse(Contents)),
-                                                             guess_blocktrans_lc(Fname, Args, Contents)} | Acc];
-process_token(Fname, {_Instr, _Cond, Children}, Acc) -> process_ast(Fname, Children, Acc);
-process_token(Fname, {_Instr, _Cond, Children, Children2}, Acc) ->
-    AccModified = process_ast(Fname, Children, Acc),
-    process_ast(Fname, Children2, AccModified);
-process_token(_,_AST,Acc) -> Acc.
+process_token(Fname, {block,{identifier,{_Line,_Col},_Identifier},Children}, St) -> process_ast(Fname, Children, St);
+process_token(Fname, {trans,{string_literal,{Line,Col},String}}, #state{acc=Acc, translators_comment=Comment}=St) ->
+    Phrase = #phrase{msgid=unescape(String),
+                     comment=Comment,
+                     file=Fname,
+                     line=Line,
+                     col=Col},
+    St#state{acc=[Phrase | Acc], translators_comment=undefined};
+process_token(_Fname, {apply_filter, _Value, _Filter}, St) -> St;
+process_token(_Fname, {date, now, _Filter}, St) -> St;
+process_token(Fname, {blocktrans, Args, Contents}, #state{acc=Acc, translators_comment=Comment}=St) ->
+    {Fname, Line, Col} = guess_blocktrans_lc(Fname, Args, Contents),
+    Phrase = #phrase{msgid=lists:flatten(erlydtl_unparser:unparse(Contents)),
+                     comment=Comment,
+                     file=Fname,
+                     line=Line,
+                     col=Col},
+    St#state{acc=[Phrase | Acc], translators_comment=undefined};
+process_token(_, {comment, Comment}, St) ->
+    St#state{translators_comment=maybe_translators_comment(Comment)};
+process_token(Fname, {_Instr, _Cond, Children}, St) -> process_ast(Fname, Children, St);
+process_token(Fname, {_Instr, _Cond, Children, Children2}, St) ->
+    StModified = process_ast(Fname, Children, St),
+    process_ast(Fname, Children2, StModified);
+process_token(_,_AST,St) -> St.
 
 
 unescape(String) ->string:sub_string(String, 2, string:len(String) -1).
 unescape(String) ->string:sub_string(String, 2, string:len(String) -1).
 
 
@@ -85,3 +179,22 @@ guess_blocktrans_lc(Fname, _, [{variable, {identifier, {L, C}, _}} | _]) ->
     {Fname, L, C - length("blocktrans %}")};
     {Fname, L, C - length("blocktrans %}")};
 guess_blocktrans_lc(Fname, _, _) ->
 guess_blocktrans_lc(Fname, _, _) ->
     {Fname, -1, -1}.
     {Fname, -1, -1}.
+
+
+maybe_translators_comment([{string, _Pos, S}]) ->
+    %% fast path
+    case is_translators(S) of
+        true -> S;
+        false -> undefined
+    end;
+maybe_translators_comment(Other) ->
+    %% smth like "{%comment%}Translators: Hey, {{var}} is variable substitution{%endcomment%}"
+    Unparsed = lists:flatten(erlydtl_unparser:unparse(Other)),
+    case is_translators(Unparsed) of
+        true -> Unparsed;
+        false -> undefined
+    end.
+
+is_translators(S) ->
+    Stripped = string:strip(S, left),
+    "translators:" == string:to_lower(string:substr(Stripped, 1, 12)).

+ 24 - 0
test/sources_parser_tests.erl

@@ -6,6 +6,10 @@ all_sources_parser_test_() ->
     [{Title, [test_fun(Test) || Test <- Tests]}
     [{Title, [test_fun(Test) || Test <- Tests]}
      || {Title, Tests} <- test_defs()].
      || {Title, Tests} <- test_defs()].
 
 
+all_sources_parser_ext_test_() ->
+    [test_ext_fun(Test) || Test <- ext_test_defs()].
+
+
 test_fun({Name, Content, Output}) ->
 test_fun({Name, Content, Output}) ->
     {Name, fun () ->
     {Name, fun () ->
                    Tokens = (catch sources_parser:process_content("dummy_path", Content)),
                    Tokens = (catch sources_parser:process_content("dummy_path", Content)),
@@ -34,3 +38,23 @@ test_defs() ->
         [ {"Hello inside an else",{"dummy_path",1,94}}, {"Hello inside an if",{"dummy_path",1,50}}]}
         [ {"Hello inside an else",{"dummy_path",1,94}}, {"Hello inside an if",{"dummy_path",1,50}}]}
       ]}
       ]}
     ].
     ].
+
+
+test_ext_fun({Name, Tpl, {Fields, Output}}) ->
+    {Name, fun() ->
+                   Tokens = [sources_parser:phrase_info(Fields, P)
+                             || P <- sources_parser:parse_content("dummy_path", Tpl)],
+                   ?assertEqual(Output, Tokens)
+           end}.
+
+ext_test_defs() ->
+    [{"trans with inline comments",
+      <<"{#TrAnSlATORs: hi!#}{%trans 'phrase'%}">>,
+      {[msgid, comment], [["phrase", "TrAnSlATORs: hi!"]]}},
+     {"trans with comments",
+      <<"{%comment%}translators: com{{me}}nt{%endcomment%}{%trans 'phrase'%}">>,
+      {[msgid, comment], [["phrase", "translators: com{{ me }}nt"]]}},
+     {"blocktrans with comments",
+      <<"{%comment%}translators: comment{%endcomment%}{%blocktrans with a=b%}B={{b}}{%endblocktrans%}">>,
+      {[msgid, comment], [["B={{ b }}", "translators: comment"]]}}
+    ].