Browse Source

デコードモジュールのユニットテストを作成

Takeru Ohta 11 years ago
parent
commit
f39b8f0c9d
2 changed files with 189 additions and 34 deletions
  1. 46 27
      src/jsone_decode.erl
  2. 143 7
      test/jsone_decode_tests.erl

+ 46 - 27
src/jsone_decode.erl

@@ -1,11 +1,13 @@
 -module(jsone_decode).
- 
+
 -export([
          decode/1,
          decode/2
         ]).
 
-decode(Bin) ->
+-define(ERROR(Json), error({invalid_json, Json})).
+
+decode(<<Bin/binary>>) ->
     decode(Bin, []).
 
 decode(Bin, _Options) -> % TODO: delete
@@ -29,8 +31,10 @@ whitespace(<<Bin/binary>>,      Next, Nexts) ->
         value  -> value(Bin, Nexts);
         array  -> array(Bin, Nexts);
         object -> object(Bin, Nexts);
-        string -> <<$", Bin2/binary>> = Bin,
-                  string(Bin2, [], Nexts);
+        string -> case Bin of
+                      <<$", Bin2/binary>> -> string(Bin2, [], Nexts);
+                      _                   -> ?ERROR(Bin)
+                  end;
         {array_next, Values} -> array_next(Bin, Values, Nexts);
         {object_value, Key, Entries} -> object_value(Bin, Key, Entries, Nexts);
         {object_next, Key, Value, Entries} -> object_next(Bin, [{Key, Value} | Entries], Nexts)
@@ -48,18 +52,21 @@ array(<<$], Bin/binary>>, Nexts) -> next(Bin, [], Nexts);
 array(<<Bin/binary>>, Nexts)     -> whitespace(Bin, value, [{array_next, []} | Nexts]).
 
 array_next(<<$], Bin/binary>>, Values, Nexts) -> next(Bin, lists:reverse(Values), Nexts);
-array_next(<<$,, Bin/binary>>, Values, Nexts) -> whitespace(Bin, value, [{array_next, Values} | Nexts]).
+array_next(<<$,, Bin/binary>>, Values, Nexts) -> whitespace(Bin, value, [{array_next, Values} | Nexts]);
+array_next(<<Bin/binary>>, _Values, _Nexts)   -> ?ERROR(Bin).
 
 object(<<$}, Bin/binary>>, Nexts) -> next(Bin, {object, []}, Nexts);
 object(<<Bin/binary>>, Nexts) -> whitespace(Bin, string, [{object_value, []} | Nexts]).
 
-object_value(<<$:, Bin/binary>>, Key, Entries, Nexts) -> whitespace(Bin, value, [{object_next, Key, Entries} | Nexts]).
+object_value(<<$:, Bin/binary>>, Key, Entries, Nexts) -> whitespace(Bin, value, [{object_next, Key, Entries} | Nexts]);
+object_value(<<Bin/binary>>, _Key, _Entries, _Nexts)  -> ?ERROR(Bin).
 
 object_next(<<$}, Bin/binary>>, Entries, Nexts) -> next(Bin, {object, lists:reverse(Entries)}, Nexts);
-object_next(<<$,, Bin/binary>>, Entries, Nexts) -> whitespace(Bin, string, [{object_value, Entries} | Nexts]).
+object_next(<<$,, Bin/binary>>, Entries, Nexts) -> whitespace(Bin, string, [{object_value, Entries} | Nexts]);
+object_next(<<Bin/binary>>, _Entries, _Nexts)   -> ?ERROR(Bin).
 
-string(<<$", Bin/binary>>,      Acc, Nexts) -> next(Bin, list_to_binary(lists:reverse(Acc)), Nexts);
-string(<<$\\, B/binary>>, Acc, Nexts) ->
+string(<<$", Bin/binary>>, Acc, Nexts) -> next(Bin, list_to_binary(lists:reverse(Acc)), Nexts);
+string(<<$\\, B/binary>>,  Acc, Nexts) ->
     case B of
         <<$", Bin/binary>> -> string(Bin, [$" | Acc], Nexts);
         <<$/, Bin/binary>> -> string(Bin, [$/ | Acc], Nexts);
@@ -69,23 +76,33 @@ string(<<$\\, B/binary>>, Acc, Nexts) ->
         <<$n, Bin/binary>> -> string(Bin, [$\n | Acc], Nexts);
         <<$r, Bin/binary>> -> string(Bin, [$\r | Acc], Nexts);
         <<$t, Bin/binary>> -> string(Bin, [$\t | Acc], Nexts);
-        <<$u, Bin/binary>> -> unicode_string(Bin, Acc, Nexts)
+        <<$u, Bin/binary>> -> unicode_string(Bin, Acc, Nexts);
+        _                  -> ?ERROR(B)
     end;
 string(<<C, Bin/binary>>, Acc, Nexts) when 16#20 =< C ->
     string(Bin, [C | Acc], Nexts).
 
-unicode_string(<<N:4/binary, $\\, $u, N2:4/binary, Bin/binary>>, Acc, Nexts) ->
-    case {binary_to_integer(N, 16), binary_to_integer(N2, 16)} of
-        {High, Low} when 16#D800 =< High, High =< 16#DBFF,
-                         16#DC00 =< Low, Low =< 16#DFFF ->
-            Unicode = 16#10000 + (High - 16#D800) * 16#400 + (Low - 16#DC00),
-            string(Bin, unicode_to_utf8(Unicode, Acc), Nexts);
-        {A, B} ->
-            string(Bin, unicode_to_utf8(B, unicode_to_utf8(A, Acc)), Nexts)
-    end;
 unicode_string(<<N:4/binary, Bin/binary>>, Acc, Nexts) ->
-    Unicode = binary_to_integer(N, 16),
-    string(Bin, unicode_to_utf8(Unicode, Acc), Nexts).
+    case binary_to_integer(N, 16) of
+        High when 16#D800 =< High, High =< 16#DBFF ->
+            %% サロゲートペア
+            case Bin of
+                <<$\\, $u, N2:4/binary, Bin2/binary>> ->
+                    case binary_to_integer(N2, 16) of
+                        Low when 16#DC00 =< Low, Low =< 16#DFFF ->
+                            Unicode = 16#10000 + (High - 16#D800) * 16#400 + (Low - 16#DC00),
+                            string(Bin2, unicode_to_utf8(Unicode, Acc), Nexts);
+                        _ -> ?ERROR(Bin)
+                    end;
+                _ -> ?ERROR(Bin)
+            end;
+        Unicode when 16#DC00 =< Unicode, Unicode =< 16#DFFF ->  % サロゲートペアの後半部分
+            ?ERROR(<<N/binary, Bin/binary>>);
+        Unicode -> 
+            string(Bin, unicode_to_utf8(Unicode, Acc), Nexts)
+    end;
+unicode_string(<<Bin/binary>>, _Acc, _Nexts) ->
+    ?ERROR(Bin).
 
 unicode_to_utf8(Code, Acc) when Code < 16#80 ->
     [Code | Acc];
@@ -94,13 +111,11 @@ unicode_to_utf8(Code, Acc) when Code < 16#800 ->
     B = 2#10000000 bor (Code band 2#111111),
     [B, A | Acc];
 unicode_to_utf8(Code, Acc) when Code < 16#10000 ->
-    %% NOTE: Codeの値が 16#D800 から 16#DFFF の範囲内にないことは、呼び出し元が保証している
     A = 2#11100000 bor (Code bsr 12),
     B = 2#10000000 bor ((Code bsr 6) band 2#111111),
     C = 2#10000000 bor (Code band 2#111111),
     [C, B, A | Acc];
-unicode_to_utf8(Code, Acc) ->
-    %% NOTE: ユニコード範囲内云々
+unicode_to_utf8(Code, Acc) -> % NOTE: サロゲートペアの仕組み上、コード値が上限を越えることはないので、ここでの範囲チェックは不要
     A = 2#11110000 bor (Code bsr 18),
     B = 2#10000000 bor ((Code bsr 12) band 2#111111),
     C = 2#10000000 bor ((Code bsr  6) band 2#111111),
@@ -115,7 +130,7 @@ number_integer_part(<<$0, Bin/binary>>, Sign, Nexts) ->
 number_integer_part(<<C, Bin/binary>>, Sign, Nexts) when $1 =< C, C =< $9 ->
     number_integer_part_rest(Bin, C - $0, Sign, Nexts);
 number_integer_part(<<Bin/binary>>, _Sign, _Nexts) ->
-    error({invalid_json, number_integer_part, Bin}).
+    ?ERROR(Bin).
 
 number_integer_part_rest(<<C, Bin/binary>>, N, Sign, Nexts) when $0 =< C, C =< $9 ->
     number_integer_part_rest(Bin, N * 10 + C - $0, Sign, Nexts);
@@ -130,7 +145,9 @@ number_fraction_part(<<Bin/binary>>, Sign, Int, Nexts) ->
 number_fraction_part_rest(<<C, Bin/binary>>, Sign, N, DecimalOffset, Nexts) when $0 =< C, C =< $9 ->
     number_fraction_part_rest(Bin, Sign, N * 10 + C - $0, DecimalOffset + 1, Nexts);
 number_fraction_part_rest(<<Bin/binary>>, Sign, N, DecimalOffset, Nexts) when DecimalOffset > 0 ->
-    number_exponation_part(Bin, Sign * N, DecimalOffset, Nexts).
+    number_exponation_part(Bin, Sign * N, DecimalOffset, Nexts);
+number_fraction_part_rest(<<Bin/binary>>, _Sign, _N, _DecimalOffset, _Nexts) ->
+    ?ERROR(Bin).
 
 number_exponation_part(<<$e, $+, Bin/binary>>, N, DecimalOffset, Nexts) ->
     number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts);
@@ -154,4 +171,6 @@ number_exponation_part(<<C, Bin/binary>>, N, DecimalOffset, ExpSign, Exp, _, Nex
     number_exponation_part(Bin, N, DecimalOffset, ExpSign, Exp * 10 + C - $0, false, Nexts);
 number_exponation_part(<<Bin/binary>>, N, DecimalOffset, ExpSign, Exp, false, Nexts) ->
     Pos = ExpSign * Exp - DecimalOffset,
-    next(Bin, N * math:pow(10, Pos), Nexts).
+    next(Bin, N * math:pow(10, Pos), Nexts);
+number_exponation_part(<<Bin/binary>>, _N, _DecimalOffset, _ExpSign, _Exp, _IsFirst, _Nexts) ->
+    ?ERROR(Bin).

+ 143 - 7
test/jsone_decode_tests.erl

@@ -5,6 +5,7 @@
 
 decode_test_() ->
     [
+     %% シンボル系
      {"'false'がデコード可能",
       fun () ->
               ?assertEqual({false, <<"">>}, jsone_decode:decode(<<"false">>))
@@ -21,6 +22,8 @@ decode_test_() ->
       fun () ->
               ?assertEqual({1, <<"">>}, jsone_decode:decode(<<"1">>))
       end},
+
+     %% 数値系: 整数
      {"0がデコード可能",
       fun () ->
               ?assertEqual({0, <<"">>}, jsone_decode:decode(<<"0">>))
@@ -42,8 +45,10 @@ decode_test_() ->
       end},
      {"正の整数の前の'+'記号は許可されない",
       fun () ->
-              ?assertError({invalid_json, number_integer_part, _}, jsone_decode:decode(<<"+1">>))
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"+1">>))
       end},
+
+     %% 数値系: 小数
      {"小数がデコード可能",
       fun () ->
               ?assertEqual({1.23, <<"">>}, jsone_decode:decode(<<"1.23">>))
@@ -53,12 +58,143 @@ decode_test_() ->
               ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"12345e-3">>)),
               ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"12345E-3">>)), % 'e'は大文字でも可
               ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"12345.0e-3">>)),
-              ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"0.12345e2">>)),
+              ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"0.12345E2">>)),
               ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"0.12345e+2">>)), % 指数部では'+'をつけても良い
+              ?assertEqual({12.345, <<"">>}, jsone_decode:decode(<<"0.12345E+2">>)), % 指数部では'+'をつけても良い
               ?assertEqual({-12.345, <<"">>}, jsone_decode:decode(<<"-0.012345e3">>))
-      end}%% ,
-     %% {"不正な形式の小数",
-     %%  fun () ->
-     %%          ?assertEqual(ok, jsone_decode:decode(<<"
-     %%  end}
+      end},
+     {"不正な形式の小数",
+      fun () ->
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<".123">>)),  % 整数部が省略されている
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"0.">>)),    % '.'の後ろに小数部が続かない
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"0.e+3">>)), % '.'の後ろに指数部が来る
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"0.1e">>)),    % 指数部が欠けている
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"0.1e-">>)),   % 指数部が欠けている
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"0.1ee-1">>)), % 'e'が複数ある
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"0.1e--1">>)), % 符号が複数ある
+              ?assertEqual({0.1, <<".2">>}, jsone_decode:decode(<<"0.1.2">>))  % '.'が複数ある => 別々のトークンと判断される
+      end},
+     
+     %% 文字列系
+     {"文字列がデコード可能",
+      fun () ->
+              ?assertEqual({<<"abc">>,  <<"">>}, jsone_decode:decode(<<"\"abc\"">>))
+      end},
+     {"各種エスケープ文字がデコード可能",
+      fun () ->
+              Input    = list_to_binary([$", [[$\\, C] || C <- [$", $/, $\\, $b, $f, $n, $r, $t]], $"]),
+              Expected = <<"\"\/\\\b\f\n\r\t">>,
+              ?assertEqual({Expected, <<"">>}, jsone_decode:decode(Input))
+      end},
+     {"エスケープされたUTF-16文字列がデコード可能",
+      fun () ->
+              %% 日本語
+              Input1    = <<"\"\\u3042\\u3044\\u3046\\u3048\\u304A\"">>,
+              Expected1 = <<"あいうえお">>,  % このファイルの文字エンコーディングがUTF-8であることが前提
+              ?assertEqual({Expected1, <<"">>}, jsone_decode:decode(Input1)),
+
+              %% ascii
+              Input2    = <<"\"\\u0061\\u0062\\u0063\"">>,
+              Expected2 = <<"abc">>,
+              ?assertEqual({Expected2, <<"">>}, jsone_decode:decode(Input2)),
+
+              %% 日本語以外のマルチバイト文字
+              Input3    = <<"\"\\u06DD\\u06DE\\u10AE\\u10AF\"">>,
+              Expected3 = <<"۝۞ႮႯ">>,
+              ?assertEqual({Expected3, <<"">>}, jsone_decode:decode(Input3))
+      end},
+     {"サロゲートペアを含む文字列がデコード可能",
+      fun () ->
+              Input    = <<"\"\\ud848\\udc49\\ud848\\udc9a\\ud848\\udcfc\"">>,
+              Expected = <<"𢁉𢂚𢃼">>,
+              ?assertEqual({Expected, <<"">>}, jsone_decode:decode(Input))
+      end},
+     {"不正なエスケープ文字",
+      fun () ->
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"\"\\z\"">>)),    % '\z'は未定義のエスケープ文字
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"\"\\uab\"">>)),  % '\u'の後ろに続く数値が足りない
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"\"\\ud848\"">>)), % 上位サロゲートが単独で出現
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"\"\\udc49\"">>)), % 下位サロゲーが単独で出現
+              ?assertError({invalid_json, _}, jsone_decode:decode(<<"\"\\ud848\\u0061\"">>)) % 上位サロゲートの後ろに下位サロゲートが続かない
+      end},
+
+     %% 配列系
+     {"配列がデコード可能",
+      fun () ->
+              Input    = <<"[1, 2, \"abc\", null]">>,
+              Expected = [1, 2, <<"abc">>, null],
+              ?assertEqual({Expected, <<"">>}, jsone_decode:decode(Input))
+      end},
+     {"空配列がデコード可能",
+      fun () ->
+              ?assertEqual({[], <<"">>}, jsone_decode:decode(<<"[]">>)),
+              ?assertEqual({[], <<"">>}, jsone_decode:decode(<<"[ \t\r\n]">>))
+      end},
+     {"配列の末尾のカンマは許容されない",
+      fun () ->
+              Input = <<"[1, 2, \"abc\", null, ]">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"区切り文字のカンマが抜けているとエラーとなる",
+      fun () ->
+              Input = <<"[1 2, \"abc\", null]">>, % 1と2の間にカンマがない
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"配列が閉じていないとエラー",
+      fun () ->
+              Input = <<"[1, 2, \"abc\", null">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+
+     %% オブジェクト系
+     {"オブジェクトがデコード可能",
+      fun () ->
+              Input    = <<"{\"1\":2, \"key\":\"value\"}">>,
+              Expected = {object, [{<<"1">>, 2}, {<<"key">>, <<"value">>}]},
+              ?assertEqual({Expected, <<"">>}, jsone_decode:decode(Input))
+      end},
+     {"空オブジェクトがデコード可能",
+      fun () ->
+              ?assertEqual({{object, []}, <<"">>}, jsone_decode:decode(<<"{}">>)),
+              ?assertEqual({{object, []}, <<"">>}, jsone_decode:decode(<<"{ \t\r\n}">>))
+      end},
+     {"オブジェクトの末尾のカンマは許容されない",
+      fun () ->
+              Input = <<"{\"1\":2, \"key\":\"value\", }">>,
+              io:format("~p\n", [catch jsone_decode:decode(Input)]),
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"区切り文字のカンマが抜けているとエラーとなる",
+      fun () ->
+              Input = <<"{\"1\":2 \"key\":\"value\"}">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"メンバのキーがない場合はエラー",
+      fun () ->
+              Input = <<"{:2, \"key\":\"value\"}">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"メンバのキーが文字列以外の場合はエラー",
+      fun () ->
+              Input = <<"{1:2, \"key\":\"value\"}">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"メンバの値がない場合はエラー",
+      fun () ->
+              Input = <<"{\"1\", \"key\":\"value\"}">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+     {"オブジェクトが閉じていないとエラー",
+      fun () ->
+              Input = <<"{\"1\":2 \"key\":\"value\"">>,
+              ?assertError({invalid_json, _}, jsone_decode:decode(Input))
+      end},
+
+     %% その他
+     {"複雑なデータがデコード可能",
+      fun () ->
+              Input    = <<"  [true, {\"1\" : 2, \"array\":[[[[1]]], {\"ab\":\"cd\"}, false]}, null]   ">>,
+              Expected = [true, {object, [{<<"1">>, 2}, {<<"array">>, [[[[1]]], {object, [{<<"ab">>, <<"cd">>}]}, false]}]}, null],
+              ?assertEqual({Expected, <<"   ">>}, jsone_decode:decode(Input))
+      end}
     ].