Browse Source

エンコード処理を最適化

Takeru Ohta 11 years ago
parent
commit
236f334685
7 changed files with 111 additions and 90 deletions
  1. 2 4
      README.md
  2. 3 3
      doc/jsone.md
  3. 1 1
      src/jsone.app.src
  4. 2 2
      src/jsone.erl
  5. 1 1
      src/jsone_decode.erl
  6. 80 57
      src/jsone_encode.erl
  7. 22 22
      test/jsone_encode_tests.erl

+ 2 - 4
README.md

@@ -1,4 +1,4 @@
-jsone (0.1.1)
+jsone (0.1.2)
 =============
 
 Erlangで実装されたJSONのエンコード/デコードライブラリ。
@@ -48,10 +48,8 @@ API
 
 %% エンコード
 > jsone:encode([1,2,3]).
-[91,[[<<"1">>,44,<<"2">>],44,<<"3">>],93]  % iodata()が返る
-
-> iolist_to_binary(jsone:encode([1,2,3])).
 <<"[1,2,3]">>
+
 ```
 
 参考

+ 3 - 3
doc/jsone.md

@@ -103,7 +103,7 @@ json_value() = <a href="#type-json_number">json_number()</a> | <a href="#type-js
 ## Function Index ##
 
 
-<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#decode-1">decode/1</a></td><td>JSONバイナリをデコードする.</td></tr><tr><td valign="top"><a href="#encode-1">encode/1</a></td><td>JSON値をiodata形式にエンコードする.</td></tr></table>
+<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#decode-1">decode/1</a></td><td>JSONバイナリをデコードする.</td></tr><tr><td valign="top"><a href="#encode-1">encode/1</a></td><td>JSON値をバイナリ形式にエンコードする.</td></tr></table>
 
 
 <a name="functions"></a>
@@ -133,14 +133,14 @@ JSONバイナリをデコードする.
 
 
 <pre><code>
-encode(JsonValue::<a href="#type-json_value">json_value()</a>) -&gt; iodata()
+encode(JsonValue::<a href="#type-json_value">json_value()</a>) -&gt; binary()
 </code></pre>
 
 <br></br>
 
 
 
-JSON値をiodata形式にエンコードする.
+JSON値をバイナリ形式にエンコードする.
 
 
 エンコードに失敗した場合はエラーが送出される

+ 1 - 1
src/jsone.app.src

@@ -2,7 +2,7 @@
 {application, jsone,
  [
   {description, "Erlang JSON Library"},
-  {vsn, "0.1.1"},
+  {vsn, "0.1.2"},
   {registered, []},
   {applications, [
                   kernel,

+ 2 - 2
src/jsone.erl

@@ -65,9 +65,9 @@
 decode(Json) ->
     jsone_decode:decode(Json).
 
-%% @doc JSON値をiodata形式にエンコードする.
+%% @doc JSON値をバイナリ形式にエンコードする.
 %%
 %% エンコードに失敗した場合はエラーが送出される
--spec encode(json_value()) -> iodata().
+-spec encode(json_value()) -> binary().
 encode(JsonValue) ->
     jsone_encode:encode(JsonValue).

+ 1 - 1
src/jsone_decode.erl

@@ -163,7 +163,7 @@ unicode_string(<<N:4/binary, Bin/binary>>, Start, Nexts, Buf) ->
 unicode_string(<<Bin/binary>>, _Acc, _Nexts, _Buf) ->
     ?ERROR(Bin).
 
--spec unicode_to_utf8(0..1114111, binary()) -> iolist().
+-spec unicode_to_utf8(0..1114111, binary()) -> binary().
 unicode_to_utf8(Code, Buf) when Code < 16#80 ->
     <<Buf/binary, Code>>;
 unicode_to_utf8(Code, Buf) when Code < 16#800 ->

+ 80 - 57
src/jsone_encode.erl

@@ -33,71 +33,94 @@
 -export([encode/1]).
 
 %%--------------------------------------------------------------------------------
+%% Macros
+%%--------------------------------------------------------------------------------
+-define(IS_REDUNDANT_UTF8(B1, B2, FirstBitN), (B1 =:= 0 andalso B2 < (1 bsl (FirstBitN + 1)))).
+-define(HEX(N, I), (binary:at(<<"0123456789abcdef">>, (N bsr (I * 4)) band 2#1111))).
+-define(UNICODE_TO_HEX(Code), ?HEX(Code, 3), ?HEX(Code, 2), ?HEX(Code, 1), ?HEX(Code, 0)).
+
+%%--------------------------------------------------------------------------------
 %% Exported Functions
 %%--------------------------------------------------------------------------------
-%% @doc JSON値をiodata形式にエンコードする.
--spec encode(jsone:json_value()) -> iodata().
-encode(null)                         -> <<"null">>;
-encode(false)                        -> <<"false">>;
-encode(true)                         -> <<"true">>;
-encode(Value) when is_integer(Value) -> integer_to_binary(Value);
-encode(Value) when is_float(Value)   -> float_to_binary(Value);
-encode(Value) when is_binary(Value)  -> string(Value);
-encode(Value) when is_list(Value)    -> array(Value);
-encode({object, _} = Value)          -> object(Value);
-encode(Value)                        -> error({invalid_json_value, Value}).
+%% @doc JSON値をバイナリ形式にエンコードする.
+-spec encode(jsone:json_value()) -> binary().
+encode(Value) ->
+    value(Value, <<"">>).
 
 %%--------------------------------------------------------------------------------
 %% Internal Functions
 %%--------------------------------------------------------------------------------
--spec string(jsone:json_string()) -> iodata().
-string(Str) ->
-    %% XXX: 手抜き実装 (一回変換を挟んで無駄)
-    UnicodeStr = unicode:characters_to_list(Str),
-    [$", escape_string(UnicodeStr, []), $"].
+-spec value(jsone:json_value(), binary()) -> binary().
+value(null, Buf)                         -> <<Buf/binary, "null">>;
+value(false, Buf)                        -> <<Buf/binary, "false">>;
+value(true, Buf)                         -> <<Buf/binary, "true">>;
+value(Value, Buf) when is_integer(Value) -> <<Buf/binary, (integer_to_binary(Value))/binary>>;
+value(Value, Buf) when is_float(Value)   -> <<Buf/binary, (float_to_binary(Value))/binary>>;
+value(Value, Buf) when is_binary(Value)  -> string(Value, Buf);
+value(Value, Buf) when is_list(Value)    -> array(Value, Buf);
+value({object, _} = Value, Buf)          -> object(Value, Buf);
+value(Value, Buf)                        -> error(badarg, [Value, Buf]).
+
+-spec string(jsone:json_string(), binary()) -> binary().
+string(<<Str/binary>>, Buf) ->
+    escape_string(Str, <<Buf/binary, $">>).
+
+-spec escape_string(binary(), binary()) -> binary().
+escape_string(<<"">>,                   Buf) -> <<Buf/binary, $">>;
+escape_string(<<$", Str/binary>>,       Buf) -> escape_string(Str, <<Buf/binary, $\\, $">>);
+escape_string(<<$\/, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $\/>>);
+escape_string(<<$\\, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $\\>>);
+escape_string(<<$\b, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $b>>);
+escape_string(<<$\f, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $f>>);
+escape_string(<<$\n, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $n>>);
+escape_string(<<$\r, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $r>>);
+escape_string(<<$\t, Str/binary>>,      Buf) -> escape_string(Str, <<Buf/binary, $\\, $t>>);
+escape_string(<<0:1, C:7, Str/binary>>, Buf) -> escape_string(Str, <<Buf/binary, C>>);
+escape_string(<<2#110:3, B1:5, 2#10:2, B2:6, Str/binary>>, Buf) when not ?IS_REDUNDANT_UTF8(B1, B2, 5) ->
+    Unicode = (B1 bsl 6) + B2,
+    escape_unicode_char(Str, Unicode, Buf);
+escape_string(<<2#1110:4, B1:4, 2#10:2, B2:6, 2#10:2, B3:6, Str/binary>>, Buf) when not ?IS_REDUNDANT_UTF8(B1, B2, 4) ->
+    Unicode = (B1 bsl 12) + (B2 bsl 6) + B3,
+    escape_unicode_char(Str, Unicode, Buf);
+escape_string(<<2#11110:5, B1:3, 2#10:2, B2:6, 2#10:2, B3:6, 2#10:2, B4:6, Str/binary>>, Buf) when not ?IS_REDUNDANT_UTF8(B1, B2, 3) ->
+    Unicode = (B1 bsl 18) + (B2 bsl 12) + (B3 bsl 6) + B4,
+    escape_unicode_char(Str, Unicode, Buf);
+escape_string(Str, Buf) ->
+    error(badarg, [Str, Buf]).
 
--spec escape_string(string(), iolist()) -> iodata().
-escape_string([], Acc)         -> lists:reverse(Acc);
-escape_string([$"  | Str], Acc) -> escape_string(Str, [$", $\\ | Acc]);
-escape_string([$\/ | Str], Acc) -> escape_string(Str, [$/, $\\ | Acc]); 
-escape_string([$\\ | Str], Acc) -> escape_string(Str, [$\\,$\\ | Acc]); 
-escape_string([$\b | Str], Acc) -> escape_string(Str, [$b, $\\ | Acc]);
-escape_string([$\f | Str], Acc) -> escape_string(Str, [$f, $\\ | Acc]); 
-escape_string([$\n | Str], Acc) -> escape_string(Str, [$n, $\\ | Acc]);
-escape_string([$\r | Str], Acc) -> escape_string(Str, [$r, $\\ | Acc]); 
-escape_string([$\t | Str], Acc) -> escape_string(Str, [$t, $\\ | Acc]); 
-escape_string([C   | Str], Acc) ->
-    case C < 16#80 of
-        true  -> escape_string(Str, [C | Acc]);
-        false -> escape_string(Str, [escape_unicode_char(C) | Acc])
-    end. 
+-spec escape_unicode_char(binary(), char(), binary()) -> binary().
+escape_unicode_char(<<Str/binary>>, Unicode, Buf) when Unicode =< 16#FFFF ->
+    escape_string(Str, <<Buf/binary, $\\, $u, ?UNICODE_TO_HEX(Unicode)>>);
+escape_unicode_char(<<Str/binary>>, Unicode, Buf) ->
+    %% サロゲートペア
+    <<High:10, Low:10>> = <<(Unicode - 16#10000):20>>, % 非効率
+    escape_string(Str, <<Buf/binary, $\\, $u, ?UNICODE_TO_HEX(High + 16#D800), $\\, $u, ?UNICODE_TO_HEX(Low + 16#DC00)>>).
 
-%% NOTE: `Unicode'の値が適切であることは`unicode'モジュールが保証してくれていることを期待
--spec escape_unicode_char(char()) -> iodata().
-escape_unicode_char(Unicode) when Unicode =< 16#FFFF ->
-    io_lib:format("\\u~4.16.0b", [Unicode]);
-escape_unicode_char(Unicode) ->
-    %% サロゲートペア (非効率実装)
-    <<High:10, Low:10>> = <<(Unicode - 16#10000):20>>,
-    io_lib:format("\\u~4.16.0b\\u~4.16.0b", [High + 16#D800, Low + 16#DC00]).
+-spec array(jsone:json_array(), binary()) -> binary().
+array(List, Buf) ->
+    array_values(List, <<Buf/binary, $[>>).
 
--spec array(jsone:json_array()) -> iodata().
-array(List) ->
-    [$[, iodata_join(lists:map(fun encode/1, List), $,), $]].
+-spec array_values(jsone:json_array(), binary()) -> binary().
+array_values([],       Buf) -> <<Buf/binary, $]>>;
+array_values([X | Xs], Buf) ->
+    Buf2 = value(X, Buf),
+    case Xs of
+        [] -> <<Buf2/binary, $]>>;
+        _  -> array_values(Xs, <<Buf2/binary, $,>>)
+    end.
 
--spec object(jsone:json_object()) -> iodata().
-object({object, Members} = Object) ->
-    [${,
-     iodata_join([case is_binary(Key) of
-                      false -> error({invalid_json_value, Object});
-                      true  -> [string(Key), $:, encode(Value)]
-                  end || {Key, Value} <- Members],
-                 $,),
-     $}].
+-spec object(jsone:json_object(), binary()) -> binary().
+object({object, Members}, Buf) ->
+    object_members(Members, <<Buf/binary, ${>>).
 
--spec iodata_join([iodata()], char()) -> iodata().
-iodata_join([], _Delimiter)           -> [];
-iodata_join([Head | Tail], Delimiter) ->
-    lists:foldl(fun (IoData, Acc) -> [Acc, Delimiter, IoData] end,
-                Head,
-                Tail).
+-spec object_members(jsone:json_object_members(), binary()) -> binary().
+object_members([],                             Buf) -> <<Buf/binary, $}>>;
+object_members([{<<Key/binary>>, Value} | Xs], Buf) ->
+    Buf2 = string(Key, Buf),
+    Buf3 = value(Value, <<Buf2/binary, $:>>),
+    case Xs of
+        [] -> <<Buf3/binary, $}>>;
+        _  -> object_members(Xs, <<Buf3/binary, $,>>)
+    end;
+object_members(Arg, Buf) ->
+    error(badarg, [Arg, Buf]).

+ 22 - 22
test/jsone_encode_tests.erl

@@ -8,72 +8,72 @@ encode_test_() ->
      %% シンボル系
      {"false がエンコード可能",
       fun () ->
-              ?assertEqual(<<"false">>, iolist_to_binary(jsone_encode:encode(false)))
+              ?assertEqual(<<"false">>, jsone_encode:encode(false))
       end},
      {"true がエンコード可能",
       fun () ->
-              ?assertEqual(<<"true">>, iolist_to_binary(jsone_encode:encode(true)))
+              ?assertEqual(<<"true">>, jsone_encode:encode(true))
       end},
      {"null がエンコード可能",
       fun () ->
-              ?assertEqual(<<"null">>, iolist_to_binary(jsone_encode:encode(null)))
+              ?assertEqual(<<"null">>, jsone_encode:encode(null))
       end},
 
      %% 数値系: 整数
      {"0がエンコード可能",
       fun () ->
-              ?assertEqual(<<"0">>, iolist_to_binary(jsone_encode:encode(0)))
+              ?assertEqual(<<"0">>, jsone_encode:encode(0))
       end},
      {"正の整数がエンコード可能",
       fun () ->
-              ?assertEqual(<<"1">>, iolist_to_binary(jsone_encode:encode(1)))
+              ?assertEqual(<<"1">>, jsone_encode:encode(1))
       end},
      {"負の整数がエンコード可能",
       fun () ->
-              ?assertEqual(<<"-1">>, iolist_to_binary(jsone_encode:encode(-1)))
+              ?assertEqual(<<"-1">>, jsone_encode:encode(-1))
       end},
      {"巨大な整数がエンコード可能",
       fun () ->
               ?assertEqual(<<"11111111111111111111111111111111111111111111111111111111111111111111111">>,
-                           iolist_to_binary(jsone_encode:encode(11111111111111111111111111111111111111111111111111111111111111111111111)))
+                           jsone_encode:encode(11111111111111111111111111111111111111111111111111111111111111111111111))
       end},
 
      %% 数値系: 小数
      {"小数がエンコード可能",
       fun () ->
               Input   = 1.234,
-              Encoded = iolist_to_binary(jsone_encode:encode(Input)),
+              Encoded = jsone_encode:encode(Input),
               ?assertEqual(Input, binary_to_float(Encoded))
       end},
 
      %% 文字列系
      {"文字列がエンコード可能",
       fun () ->
-              ?assertEqual(<<"\"abc\"">>, iolist_to_binary(jsone_encode:encode(<<"abc">>)))
+              ?assertEqual(<<"\"abc\"">>, jsone_encode:encode(<<"abc">>))
       end},
      {"各種エスケープ文字を含む文字列をエンコード可能",
       fun () ->
               Input    = <<"\"\/\\\b\f\n\r\t">>,
               Expected = list_to_binary([$", [[$\\, C] || C <- [$", $/, $\\, $b, $f, $n, $r, $t]], $"]),
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
      {"UTF-8形式のマルチバイト文字列がエンコード可能",
       fun () ->
               %% 日本語
               Input1    = <<"あいうえお">>,  % このファイルの文字エンコーディングがUTF-8であることが前提
               Expected1 = <<"\"\\u3042\\u3044\\u3046\\u3048\\u304a\"">>,
-              ?assertEqual(Expected1, iolist_to_binary(jsone_encode:encode(Input1))),
+              ?assertEqual(Expected1, jsone_encode:encode(Input1)),
 
               %% 日本語以外のマルチバイト文字
               Input2    = <<"۝۞ႮႯ">>,
               Expected2 = <<"\"\\u06dd\\u06de\\u10ae\\u10af\"">>,
-              ?assertEqual(Expected2, iolist_to_binary(jsone_encode:encode(Input2)))
+              ?assertEqual(Expected2, jsone_encode:encode(Input2))
       end},
      {"サロゲートペアを含む文字列がエンコード可能",
       fun () ->
               Input    = <<"𢁉𢂚𢃼">>,
               Expected = <<"\"\\ud848\\udc49\\ud848\\udc9a\\ud848\\udcfc\"">>,
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
 
      %% 配列系
@@ -81,13 +81,13 @@ encode_test_() ->
       fun () ->
               Input    = [1, 2, 3],
               Expected = <<"[1,2,3]">>,
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
      {"空配列がエンコード可能",
       fun () ->
               Input    = [],
               Expected = <<"[]">>,
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
 
      %% オブジェクト系
@@ -95,19 +95,19 @@ encode_test_() ->
       fun () ->
               Input    = {object, [{<<"key">>, <<"value">>}, {<<"1">>, 2}]},
               Expected = <<"{\"key\":\"value\",\"1\":2}">>,
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
      {"空オブジェクトがエンコード可能",
       fun () ->
               Input    = {object, []},
               Expected = <<"{}">>,
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
      {"オブジェクトのメンバのキーにはバイナリのみが使用可能",
       fun () ->
-              ?assertError({invalid_json_value, _}, jsone_encode:encode({object, [{1, 2}]})),
-              ?assertError({invalid_json_value, _}, jsone_encode:encode({object, [{"1", 2}]})),
-              ?assertError({invalid_json_value, _}, jsone_encode:encode({object, [{true, 2}]}))
+              ?assertError(badarg, jsone_encode:encode({object, [{1, 2}]})),
+              ?assertError(badarg, jsone_encode:encode({object, [{"1", 2}]})),
+              ?assertError(badarg, jsone_encode:encode({object, [{true, 2}]}))
       end},
 
      %% その他
@@ -115,10 +115,10 @@ encode_test_() ->
       fun () ->
               Input    = [true, {object, [{<<"1">>, 2}, {<<"array">>, [[[[1]]], {object, [{<<"ab">>, <<"cd">>}]}, false]}]}, null],
               Expected = <<"[true,{\"1\":2,\"array\":[[[[1]]],{\"ab\":\"cd\"},false]},null]">>,
-              ?assertEqual(Expected, iolist_to_binary(jsone_encode:encode(Input)))
+              ?assertEqual(Expected, jsone_encode:encode(Input))
       end},
      {"不正な値",
       fun () ->
-              ?assertError({invalid_json_value, _}, jsone_encode:encode(self()))
+              ?assertError(badarg, jsone_encode:encode(self()))
       end}
     ].