Browse Source

Replace our po scanner with the one used by zotonic.

Fixes #23.

The example data given in issue #23 now parses OK:
```
1> po_scanner:parse_po_file("23.po").
[{header,"Project-Id-Version: Project\nReport-Msgid-Bugs-To: \nPOT-Creation-Date: 2012-02-10 17:50+0100\nPO-Revision-Date: 2012-02-10 17:50+0100\nLast-Translator: Thomas Schaaf <schaaf@komola.de>\nLanguage-Team: komola UG <team@komola.de>\nMIME-Version: 1.0\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\nX-Poedit-KeywordsList: _;gettext;gettext_noop;_t\nX-Poedit-Basepath: .\nX-Poedit-Language: German\nX-Poedit-Country: GERMANY\nX-Poedit-SourceCharset: utf-8\nX-Poedit-SearchPath-0: .\n"},
 {"PAGE_NOT_FOUND","Seite nicht gefunden!"}]
```
Andreas Stenius 11 years ago
parent
commit
1548261290
2 changed files with 150 additions and 84 deletions
  1. 1 1
      src/i18n/i18n_manager.erl
  2. 149 83
      src/i18n/po_scanner.erl

+ 1 - 1
src/i18n/i18n_manager.erl

@@ -32,7 +32,7 @@ generate_pos([Lang,Files])->
 						insert_tokens(ProcessedFiles),
 						insert_tokens(ProcessedFiles),
 						
 						
 						%%Recover already present translations
 						%%Recover already present translations
-						TranslationsForLanguage = po_scanner:scan(BaseDir ++ Language ++ "/gettext.po"),
+						TranslationsForLanguage = po_scanner:parse_po_file(BaseDir ++ Language ++ "/gettext.po"),
 						io:format("Updating translations~n"),
 						io:format("Updating translations~n"),
 						insert_translations(TranslationsForLanguage),
 						insert_translations(TranslationsForLanguage),
 						Data = dets_data(),
 						Data = dets_data(),

+ 149 - 83
src/i18n/po_scanner.erl

@@ -1,91 +1,157 @@
-%% Author: dave
-%% Created: Mar 1, 2010
-%% Description: TODO: Add description to po_scanner
+%% Copyright (c) 2006, Torbjorn Tornkvist, tobbe@tornkvist.org
+%% All rights reserved.
+%% 
+%% Redistribution and use in source and binary forms, with or without 
+%% modification, are permitted provided that the following conditions are met:
+%% 
+%%     * Redistributions of source code must retain the above copyright 
+%%       notice, this list of conditions and the following disclaimer.
+%%     * Redistributions in binary form must reproduce the above copyright 
+%%       notice, this list of conditions and the following disclaimer in the 
+%%       documentation and/or other materials provided with the distribution.
+%%     * Neither the name of "Torbjorn Tornkvist" nor the names of any other
+%%       contributors may be used to endorse or promote products derived from
+%%       this software without specific prior written permission.
+%% 
+%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+%% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+%% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+%% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+%% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+%% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+%% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+%% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+%% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+%% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+%% POSSIBILITY OF SUCH DAMAGE.
+
+%%%----------------------------------------------------------------------
+%%% Created:  27 Oct 2003 by tobbe@bluetail.com
+%%% Function: Tools for multi-lingual capabilities,
+%%%           similar to GNU gettext.
+%%%----------------------------------------------------------------------
+%%% Modified: 2010-05-18 by marc@worrell.nl
+%%%
+%%% Adaptations for Zotonic. 
+%%% Original code is at http://github.com/noss/erlang-gettext
+%%%----------------------------------------------------------------------
+%%% Adapted for erlydtl, 2014-01-18 by Andreas Stenius <git@astekk.se>
+%%% From: https://raw.github.com/zotonic/zotonic/4b2ec1486aaa67d758f43fbf38fd7da58e7cdfdc/src/i18n/z_gettext.erl
+%%%----------------------------------------------------------------------
 -module(po_scanner).
 -module(po_scanner).
-%%
-%% Include files
-%%
-
-%%
-%% Exported Functions
-%%
--export([scan/1]).
-
-%%
-%% API Functions
-%%
-scan(Path) -> 
-    case file:read_file(Path) of
-        {ok,File} ->
-			Str = re:replace(File, "\\\\n", "\\\n", [global, {return,list}]),
-            scan(Str, [], {1, 1}, [in_text]);
-        _Error ->
-            io:format("No po file found at path ~p~n",[Path]),
-            []
-    end.
-
-
-scan("#" ++ T, Scanned, {Row, Column}, Status = [in_text]) -> 
-    scan(T, Scanned, {Row, Column + 1}, lists:append([{in_comment, []}],Status));
-scan("\n" ++ T, Scanned, {Row, _Column}, [{in_comment, Comment}|Status]) -> 
-    scan(T, lists:append(Scanned, [{comment, Comment}]), {Row +1 , 1}, Status);
-scan([Head | T], Scanned, {Row, Column}, _Status = [{in_comment, Comment}|Stack]) ->
-    NewStatus = lists:append([{in_comment, lists:append(Comment,[Head])}],Stack),
-    scan(T, Scanned, {Row, Column + 1}, NewStatus);
-
-%%Msg id
-scan("msgid" ++ T, Scanned, {Row, Column}, Status = [in_text]) ->  
-    scan(T, Scanned, {Row, Column + 5}, lists:append([{in_message_id, []}],Status));
-
-%%scan("msgid" ++ T, Scanned, {Row, Column}, [{in_message_str, Body}|Stack]) ->  
-%%	scan(T, lists:append(Scanned , [{str, Body}]), {Row, Column + 5}, lists:append([{in_message_id, []}],Stack));
-
-scan("\n\n" ++ T, Scanned, {Row, _Column}, [{in_message_str, Body}|Stack]) ->  
-    scan(T, lists:append(Scanned , [{str, Body}]), {Row + 2, 1}, Stack);
-scan("\n", Scanned, {Row, _Column}, [{in_message_str, Body}|Stack]) ->
-    scan([], lists:append(Scanned , [{str, Body}]), {Row + 2, 1}, Stack);
 
 
-%%Msg str
-scan("msgstr" ++ T, Scanned, {Row, Column}, [{in_message_id, Body} | Stack]) ->
-    %%io:format("Id is ~s~n",[Body]),
-    scan(T, lists:append(Scanned ,[{id, Body}]), {Row, Column + 6}, lists:append([{in_message_str, []}],Stack));
+-export([parse_po_file/1, parse_po/1, test/0]).
 
 
 
 
-scan([$\\, C|T], Scanned, {Row, Column}, [{in_string_body, Body}|Stack]) ->
-    scan(T, Scanned, {Row, Column + 2}, [{in_string_body, lists:append(Body, [C])} | Stack]);
+-define(GETTEXT_HEADER_INFO, header).
 
 
-%%Start and end for a message body
-scan("\"" ++ T, Scanned, {Row, Column}, [{in_string_body, Body}|Stack]) ->
-    %%io:format("Ending string ~s ~p~n",[Body, Stack]),
-    end_of_string(Body, Stack, T, Scanned, Row, Column);
-scan("\"" ++ T, Scanned, {Row, Column}, Stack) ->
-  scan(T, Scanned, {Row, Column + 1}, lists:append([{in_string_body, []}], Stack));
 
 
-%%Carriage return are ignored
-%% scan("\n" ++ T, Scanned, {Row, _Column}, Status) ->
-%%     scan(T, Scanned, {Row + 1, 1}, Status);
+%%% --------------------------------------------------------------------
+%%% Parse a PO-file
+%%% --------------------------------------------------------------------
 
 
-%%Concat string body to already parsed
-scan([H | T] , Scanned, {Row, Column}, [{in_string_body, Body} | Stack]) ->
-    scan(T, Scanned, {Row, Column + 1}, [{in_string_body, lists:append(Body, [H])} | Stack]);
-
-%%Others characters are ignored
-scan([_H | T] , Scanned, {Row, Column}, Status) ->
-    scan(T, Scanned, {Row, Column + 1}, Status);
-
-%%EOF
-scan([], Scanned, {_Row, _Column}, _Stack) ->Scanned;
-scan(In, Scanned, {_Row, _Column}, _Status) ->
-    io:format("Cannot process ~p, scanned ~p ~n",[In, Scanned]).
-
-end_of_string(String, [{in_message_id, Body}|Stack] ,T, Scanned, Row, Column) ->
-    scan(T, Scanned, {Row, Column}, [{in_message_id, lists:append(Body ,String)} | Stack ]);
-end_of_string(String, [{in_message_str, Body}|Stack] , T, Scanned, Row, Column) ->
-    scan(T, Scanned, {Row, Column }, [{in_message_str, lists:append(Body,String)} |Stack ]).
-	
-	
-
-%%
-%% Local Functions
-%%
+parse_po_file(Fname) ->
+    case file:read_file(Fname) of
+        {ok, Bin} -> 
+            parse_po(Bin);
+        {error, Reason} ->
+            io:format(
+              "Error reading po file ~s: ~s~n",
+              [Fname, file:format_error(Reason)]),
+            []
+    end.
 
 
+parse_po(Bin) when is_binary(Bin) ->
+    parse_po(to_list(Bin));
+parse_po(List) when is_list(List) ->
+    lists:reverse(
+      lists:foldl(fun ({"", R}, AccIn) ->
+                          [{?GETTEXT_HEADER_INFO, R}|AccIn];
+                      ({_, ""}, AccIn) ->
+                          AccIn;
+                      (R, AccIn) ->
+                          [R|AccIn]
+                  end,
+                  [],
+                  parse_po_list(List))).
+
+parse_po_list("msgid" ++ T) ->
+    {Key, R0} = get_po_string(T),
+    {Val, Rest} = get_msgstr(R0),
+    [{Key,Val} | parse_po_list(Rest)];
+parse_po_list([_ | T]) ->
+    parse_po_list(T);
+parse_po_list([]) ->
+    [].
+
+get_msgstr("msgstr" ++ T) ->
+    get_po_string(T);
+get_msgstr([_ | T]) ->
+    get_msgstr(T).
+
+%%%
+%%% A PO-string has the same syntax as a C character string.
+%%% For example:
+%%%
+%%%   msgstr ""
+%%%     "Hello "
+%%%
+%%%     "\\World\n"
+%%%
+%%% Is parsed as: "Hello \World\n"
+%%%
+get_po_string([$\s|T]) -> get_po_string(T);
+get_po_string([$\r|T]) -> get_po_string(T);
+get_po_string([$\n|T]) -> get_po_string(T);
+get_po_string([$\t|T]) -> get_po_string(T);
+get_po_string([$"|T])  -> eat_string(T).
+
+eat_string(S) ->
+    eat_string(S,[]).
+
+eat_string([$\\,$"|T], Acc)   -> eat_string(T, [$"|Acc]);   % unescape !
+eat_string([$\\,$\\ |T], Acc) -> eat_string(T, [$\\|Acc]);  % unescape !
+eat_string([$\\,$n |T], Acc)  -> eat_string(T, [$\n|Acc]);  % unescape !
+eat_string([$"|T], Acc)       -> eat_more(T,Acc);
+eat_string([H|T], Acc)        -> eat_string(T, [H|Acc]).
+
+eat_more([$\s|T], Acc) -> eat_more(T, Acc);
+eat_more([$\n|T], Acc) -> eat_more(T, Acc);
+eat_more([$\r|T], Acc) -> eat_more(T, Acc);
+eat_more([$\t|T], Acc) -> eat_more(T, Acc);
+eat_more([$"|T], Acc)  -> eat_string(T, Acc);
+eat_more(T, Acc)       -> {lists:reverse(Acc), T}.
+
+
+to_list(A) when is_atom(A)    -> atom_to_list(A);
+to_list(I) when is_integer(I) -> integer_to_list(I);
+to_list(B) when is_binary(B)  -> binary_to_list(B);
+to_list(L) when is_list(L)    -> L.
+
+test() ->
+
+    X = parse_po(<<"msgid \"\"
+msgstr \"header value\"">>),
+
+[{header, "header value"}] = X,
+
+    X2 = parse_po(<<"msgid \"\"
+msgstr \"header value\"
+
+msgid \"en\"
+msgstr \"nl\"
+">>),
+
+[{header, "header value"}, {"en", "nl"}] = X2,
+
+    X3 = parse_po(<<"msgid \"\"
+msgstr \"header value\"
+
+msgid \"en\"
+msgstr \"nl\"
+
+msgid \"empty trans\"
+msgstr \"\"
+">>),
+
+[{header, "header value"}, {"en", "nl"}, {"empty trans", "empty trans"}] = X3.