Browse Source

Add encode_opt() to choose native or escaped utf8 code

UENISHI Kota 10 years ago
parent
commit
80c7fb518d
3 changed files with 125 additions and 71 deletions
  1. 17 3
      src/jsone.erl
  2. 103 67
      src/jsone_encode.erl
  3. 5 1
      test/jsone_encode_tests.erl

+ 17 - 3
src/jsone.erl

@@ -31,7 +31,8 @@
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 -export([
 -export([
          decode/1, try_decode/1,
          decode/1, try_decode/1,
-         encode/1, try_encode/1
+         encode/1, encode/2,
+         try_encode/1, try_encode/2
         ]).
         ]).
 
 
 -export_type([
 -export_type([
@@ -55,6 +56,11 @@
 -type json_object()         :: {json_object_members()}.
 -type json_object()         :: {json_object_members()}.
 -type json_object_members() :: [{json_string(), json_value()}].
 -type json_object_members() :: [{json_string(), json_value()}].
 
 
+-type option()              :: {atom(), atom()|boolean()}.
+-export_type([option/0]).
+
+-define(DEFAULT_OPTIONS, []).
+
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 %% Exported Functions
 %% Exported Functions
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
@@ -113,8 +119,12 @@ try_decode(Json) ->
 %% '''
 %% '''
 -spec encode(json_value()) -> binary().
 -spec encode(json_value()) -> binary().
 encode(JsonValue) ->
 encode(JsonValue) ->
+    encode(JsonValue, ?DEFAULT_OPTIONS).
+
+-spec encode(json_value(), [option()]) -> binary().
+encode(JsonValue, Options) ->
     try
     try
-        {ok, Binary} = try_encode(JsonValue),
+        {ok, Binary} = try_encode(JsonValue, Options),
         Binary
         Binary
     catch
     catch
         error:{badmatch, {error, {Reason, [StackItem]}}} ->
         error:{badmatch, {error, {Reason, [StackItem]}}} ->
@@ -134,4 +144,8 @@ encode(JsonValue) ->
 %% '''
 %% '''
 -spec try_encode(json_value()) -> {ok, binary()} | {error, {Reason::term(), [erlang:stack_item()]}}.
 -spec try_encode(json_value()) -> {ok, binary()} | {error, {Reason::term(), [erlang:stack_item()]}}.
 try_encode(JsonValue) ->
 try_encode(JsonValue) ->
-    jsone_encode:encode(JsonValue).
+    jsone_encode:encode(JsonValue, ?DEFAULT_OPTIONS).
+
+-spec try_encode(json_value(), [option()]) -> {ok, binary()} | {error, {Reason::term(), [erlang:stack_item()]}}.
+try_encode(JsonValue, Options) ->
+    jsone_encode:encode(JsonValue, Options).

+ 103 - 67
src/jsone_encode.erl

@@ -30,7 +30,7 @@
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 %% Exported API
 %% Exported API
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
--export([encode/1]).
+-export([encode/1, encode/2]).
 
 
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 %% Macros & Types
 %% Macros & Types
@@ -45,98 +45,134 @@
               | {object_value, jsone:json_value(), jsone:json_object_members()}
               | {object_value, jsone:json_value(), jsone:json_object_members()}
               | {object_members, jsone:json_object_members()}.
               | {object_members, jsone:json_object_members()}.
 
 
+-record(encode_opt_v1, { native_utf8 = false :: boolean() }).
+-define(ENCODE_OPT, #encode_opt_v1).
+-type encode_opt() :: #encode_opt_v1{}.
+
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 %% Exported Functions
 %% Exported Functions
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
 %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
 -spec encode(jsone:json_value()) -> encode_result().
 -spec encode(jsone:json_value()) -> encode_result().
 encode(Value) ->
 encode(Value) ->
-    value(Value, [], <<"">>).
+    jsone:try_encode(Value).
+
+-spec encode(jsone:json_value(), [jsone:option()]) -> encode_result().
+encode(Value, Options) ->
+    Opt = parse_options(Options),
+    value(Value, [], <<"">>, Opt).
 
 
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
 %% Internal Functions
 %% Internal Functions
 %%--------------------------------------------------------------------------------
 %%--------------------------------------------------------------------------------
--spec next([next()], binary()) -> encode_result().
-next([], Buf)             -> {ok, Buf};
-next([Next | Nexts], Buf) ->
+-spec next([next()], binary(), encode_opt()) -> encode_result().
+next([], Buf, _)             -> {ok, Buf};
+next([Next | Nexts], Buf, Opt) ->
     case Next of
     case Next of
         {array_values, Values} ->
         {array_values, Values} ->
             case Values of
             case Values of
-                [] -> array_values(Values, Nexts, Buf);
-                _  -> array_values(Values, Nexts, <<Buf/binary, $,>>)
+                [] -> array_values(Values, Nexts, Buf, Opt);
+                _  -> array_values(Values, Nexts, <<Buf/binary, $,>>, Opt)
             end;
             end;
         {object_value, Value, Members} ->
         {object_value, Value, Members} ->
-            object_value(Value, Members, Nexts, Buf);
+            object_value(Value, Members, Nexts, Buf, Opt);
         {object_members, Members} ->
         {object_members, Members} ->
             case Members of
             case Members of
-                [] -> object_members(Members, Nexts, Buf);
-                _  -> object_members(Members, Nexts, <<Buf/binary, $,>>)
+                [] -> object_members(Members, Nexts, Buf, Opt);
+                _  -> object_members(Members, Nexts, <<Buf/binary, $,>>, Opt)
             end
             end
     end.
     end.
 
 
--spec value(jsone:json_value(), [next()], binary()) -> encode_result().
-value(null, Nexts, Buf)                         -> next(Nexts, <<Buf/binary, "null">>);
-value(false, Nexts, Buf)                        -> next(Nexts, <<Buf/binary, "false">>);
-value(true, Nexts, Buf)                         -> next(Nexts, <<Buf/binary, "true">>);
-value(Value, Nexts, Buf) when is_integer(Value) -> next(Nexts, <<Buf/binary, (integer_to_binary(Value))/binary>>);
-value(Value, Nexts, Buf) when is_float(Value)   -> next(Nexts, <<Buf/binary, (float_to_binary(Value))/binary>>);
-value(Value, Nexts, Buf) when is_binary(Value)  -> string(Value, Nexts, Buf);
-value(Value, Nexts, Buf) when is_list(Value)    -> array(Value, Nexts, Buf);
-value({_} = Value, Nexts, Buf)                  -> object(Value, Nexts, Buf);
-value(Value, Nexts, Buf)                        -> ?ERROR(value, [Value, Nexts, Buf]).
-
--spec string(jsone:json_string(), [next()], binary()) -> encode_result().
-string(<<Str/binary>>, Nexts, Buf) ->
-    escape_string(Str, Nexts, <<Buf/binary, $">>).
-
--spec escape_string(binary(), [next()], binary()) -> encode_result().
-escape_string(<<"">>,                   Nexts, Buf) -> next(Nexts, <<Buf/binary, $">>);
-escape_string(<<$", Str/binary>>,       Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $">>);
-escape_string(<<$\/, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $\/>>);
-escape_string(<<$\\, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $\\>>);
-escape_string(<<$\b, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $b>>);
-escape_string(<<$\f, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $f>>);
-escape_string(<<$\n, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $n>>);
-escape_string(<<$\r, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $r>>);
-escape_string(<<$\t, Str/binary>>,      Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $t>>);
-escape_string(<<0:1, C:7, Str/binary>>, Nexts, Buf) -> escape_string(Str, Nexts, <<Buf/binary, C>>);
-escape_string(<<2#110:3, B1:5, 2#10:2, B2:6, Str/binary>>, Nexts, Buf) when not ?IS_REDUNDANT_UTF8(B1, B2, 5) ->
-    Unicode = (B1 bsl 6) + B2,
-    escape_unicode_char(Str, Unicode, Nexts, Buf);
-escape_string(<<2#1110:4, B1:4, 2#10:2, B2:6, 2#10:2, B3:6, Str/binary>>, Nexts, Buf) when not ?IS_REDUNDANT_UTF8(B1, B2, 4) ->
-    Unicode = (B1 bsl 12) + (B2 bsl 6) + B3,
-    escape_unicode_char(Str, Unicode, Nexts, Buf);
-escape_string(<<2#11110:5, B1:3, 2#10:2, B2:6, 2#10:2, B3:6, 2#10:2, B4:6, Str/binary>>, Nexts, Buf) when not ?IS_REDUNDANT_UTF8(B1, B2, 3) ->
-    Unicode = (B1 bsl 18) + (B2 bsl 12) + (B3 bsl 6) + B4,
-    escape_unicode_char(Str, Unicode, Nexts, Buf);
-escape_string(Str, Nexts, Buf) ->
+-spec value(jsone:json_value(), [next()], binary(), encode_opt()) -> encode_result().
+value(null, Nexts, Buf, Opt)                         -> next(Nexts, <<Buf/binary, "null">>, Opt);
+value(false, Nexts, Buf, Opt)                        -> next(Nexts, <<Buf/binary, "false">>, Opt);
+value(true, Nexts, Buf, Opt)                         -> next(Nexts, <<Buf/binary, "true">>, Opt);
+value(Value, Nexts, Buf, Opt) when is_integer(Value) -> next(Nexts, <<Buf/binary, (integer_to_binary(Value))/binary>>, Opt);
+value(Value, Nexts, Buf, Opt) when is_float(Value)   -> next(Nexts, <<Buf/binary, (float_to_binary(Value))/binary>>, Opt);
+value(Value, Nexts, Buf, Opt) when is_binary(Value)  -> string(Value, Nexts, Buf, Opt);
+value(Value, Nexts, Buf, Opt) when is_list(Value)    -> array(Value, Nexts, Buf, Opt);
+value({_} = Value, Nexts, Buf, Opt)                  -> object(Value, Nexts, Buf, Opt);
+value(Value, Nexts, Buf, _)                        -> ?ERROR(value, [Value, Nexts, Buf]).
+
+-spec string(jsone:json_string(), [next()], binary(), encode_opt()) -> encode_result().
+string(<<Str/binary>>, Nexts, Buf, Opt) ->
+    escape_string(Str, Nexts, <<Buf/binary, $">>, Opt).
+
+-spec escape_string(binary(), [next()], binary(), encode_opt()) -> encode_result().
+escape_string(<<"">>,                   Nexts, Buf, Opt) -> next(Nexts, <<Buf/binary, $">>, Opt);
+escape_string(<<$", Str/binary>>,       Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $">>, Opt);
+escape_string(<<$\/, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $\/>>, Opt);
+escape_string(<<$\\, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $\\>>, Opt);
+escape_string(<<$\b, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $b>>, Opt);
+escape_string(<<$\f, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $f>>, Opt);
+escape_string(<<$\n, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $n>>, Opt);
+escape_string(<<$\r, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $r>>, Opt);
+escape_string(<<$\t, Str/binary>>,      Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, $\\, $t>>, Opt);
+escape_string(<<0:1, C:7, Str/binary>>, Nexts, Buf, Opt) -> escape_string(Str, Nexts, <<Buf/binary, C>>, Opt);
+escape_string(<<2#110:3, B1:5, 2#10:2, B2:6, Str/binary>>, Nexts, Buf, Opt) when not ?IS_REDUNDANT_UTF8(B1, B2, 5) ->
+    case Opt?ENCODE_OPT.native_utf8 of
+        false ->
+            Unicode = (B1 bsl 6) + B2,
+            escape_unicode_char(Str, Unicode, Nexts, Buf, Opt);
+        true ->
+            unicode_char(Str, <<2#110:3, B1:5, 2#10:2, B2:6>>, Nexts, Buf, Opt)
+    end;
+escape_string(<<2#1110:4, B1:4, 2#10:2, B2:6, 2#10:2, B3:6, Str/binary>>, Nexts, Buf, Opt) when not ?IS_REDUNDANT_UTF8(B1, B2, 4) ->
+    case Opt?ENCODE_OPT.native_utf8 of
+        false ->
+            Unicode = (B1 bsl 12) + (B2 bsl 6) + B3,
+            escape_unicode_char(Str, Unicode, Nexts, Buf, Opt);
+        true ->
+            unicode_char(Str, <<2#1110:4, B1:4, 2#10:2, B2:6, 2#10:2, B3:6>>, Nexts, Buf, Opt)
+    end;
+escape_string(<<2#11110:5, B1:3, 2#10:2, B2:6, 2#10:2, B3:6, 2#10:2, B4:6, Str/binary>>, Nexts, Buf, Opt) when not ?IS_REDUNDANT_UTF8(B1, B2, 3) ->
+    case Opt?ENCODE_OPT.native_utf8 of
+        false ->
+            Unicode = (B1 bsl 18) + (B2 bsl 12) + (B3 bsl 6) + B4,
+            escape_unicode_char(Str, Unicode, Nexts, Buf, Opt);
+        true ->
+            unicode_char(Str, <<2#11000:5, B1:3, 2#10:2, B2:6, 2#10:2, B3:6, 2#10:2, B4:6>>, Nexts, Buf, Opt)
+    end;
+escape_string(Str, Nexts, Buf, _) ->
     ?ERROR(escape_string, [Str, Nexts, Buf]).
     ?ERROR(escape_string, [Str, Nexts, Buf]).
 
 
--spec escape_unicode_char(binary(), char(), [next()], binary()) -> encode_result().
-escape_unicode_char(<<Str/binary>>, Unicode, Nexts, Buf) when Unicode =< 16#FFFF ->
-    escape_string(Str, Nexts, <<Buf/binary, $\\, $u, ?UNICODE_TO_HEX(Unicode)>>);
-escape_unicode_char(<<Str/binary>>, Unicode, Nexts, Buf) ->
+unicode_char(Str, Char, Nexts, Buf, Opt) ->
+    escape_string(Str, Nexts, <<Buf/binary, Char/binary>>, Opt).
+
+-spec escape_unicode_char(binary(), char(), [next()], binary(), encode_opt()) -> encode_result().
+escape_unicode_char(<<Str/binary>>, Unicode, Nexts, Buf, Opt) when Unicode =< 16#FFFF ->
+    escape_string(Str, Nexts, <<Buf/binary, $\\, $u, ?UNICODE_TO_HEX(Unicode)>>, Opt);
+escape_unicode_char(<<Str/binary>>, Unicode, Nexts, Buf, Opt) ->
     %% Surrogate Pair
     %% Surrogate Pair
     <<High:10, Low:10>> = <<(Unicode - 16#10000):20>>, % XXX: inefficient
     <<High:10, Low:10>> = <<(Unicode - 16#10000):20>>, % XXX: inefficient
-    escape_string(Str, Nexts, <<Buf/binary, $\\, $u, ?UNICODE_TO_HEX(High + 16#D800), $\\, $u, ?UNICODE_TO_HEX(Low + 16#DC00)>>).
+    escape_string(Str, Nexts, <<Buf/binary, $\\, $u, ?UNICODE_TO_HEX(High + 16#D800), $\\, $u, ?UNICODE_TO_HEX(Low + 16#DC00)>>, Opt).
+
+-spec array(jsone:json_array(), [next()], binary(), encode_opt()) -> encode_result().
+array(List, Nexts, Buf, Opt) ->
+    array_values(List, Nexts, <<Buf/binary, $[>>, Opt).
+
+-spec array_values(jsone:json_array(), [next()], binary(), encode_opt()) -> encode_result().
+array_values([],       Nexts, Buf, Opt) -> next(Nexts, <<Buf/binary, $]>>, Opt);
+array_values([X | Xs], Nexts, Buf, Opt) -> value(X, [{array_values, Xs} | Nexts], Buf, Opt).
+
+-spec object(jsone:json_object(), [next()], binary(), encode_opt()) -> encode_result().
+object({Members}, Nexts, Buf, Opt) ->
+    object_members(Members, Nexts, <<Buf/binary, ${>>, Opt).
 
 
--spec array(jsone:json_array(), [next()], binary()) -> encode_result().
-array(List, Nexts, Buf) ->
-    array_values(List, Nexts, <<Buf/binary, $[>>).
+-spec object_members(jsone:json_object_members(), [next()], binary(), encode_opt()) -> encode_result().
+object_members([],                             Nexts, Buf, Opt) -> next(Nexts, <<Buf/binary, $}>>, Opt);
+object_members([{<<Key/binary>>, Value} | Xs], Nexts, Buf, Opt) -> string(Key, [{object_value, Value, Xs} | Nexts], Buf, Opt);
+object_members(Arg, Nexts, Buf, _)                            -> ?ERROR(object_members, [Arg, Nexts, Buf]).
 
 
--spec array_values(jsone:json_array(), [next()], binary()) -> encode_result().
-array_values([],       Nexts, Buf) -> next(Nexts, <<Buf/binary, $]>>);
-array_values([X | Xs], Nexts, Buf) -> value(X, [{array_values, Xs} | Nexts], Buf).
+-spec object_value(jsone:json_value(), jsone:json_object_members(), [next()], binary(), encode_opt()) -> encode_result().
+object_value(Value, Members, Nexts, Buf, Opt) ->
+    value(Value, [{object_members, Members} | Nexts], <<Buf/binary, $:>>, Opt).
 
 
--spec object(jsone:json_object(), [next()], binary()) -> encode_result().
-object({Members}, Nexts, Buf) ->
-    object_members(Members, Nexts, <<Buf/binary, ${>>).
 
 
--spec object_members(jsone:json_object_members(), [next()], binary()) -> encode_result().
-object_members([],                             Nexts, Buf) -> next(Nexts, <<Buf/binary, $}>>);
-object_members([{<<Key/binary>>, Value} | Xs], Nexts, Buf) -> string(Key, [{object_value, Value, Xs} | Nexts], Buf);
-object_members(Arg, Nexts, Buf)                            -> ?ERROR(object_members, [Arg, Nexts, Buf]).
+-spec parse_options([jsone:option()]) -> encode_opt().
+parse_options(Options) ->
+    parse_option(Options, ?ENCODE_OPT{}).
 
 
--spec object_value(jsone:json_value(), jsone:json_object_members(), [next()], binary()) -> encode_result().
-object_value(Value, Members, Nexts, Buf) ->
-    value(Value, [{object_members, Members} | Nexts], <<Buf/binary, $:>>).
+parse_option([], Opt) -> Opt;
+parse_option([native_utf8|T], Opt) ->
+    parse_option(T, Opt?ENCODE_OPT{native_utf8=true}).

+ 5 - 1
test/jsone_encode_tests.erl

@@ -64,11 +64,15 @@ encode_test_() ->
               Input1    = <<"あいうえお">>,  % assumed that the encoding of this file is UTF-8
               Input1    = <<"あいうえお">>,  % assumed that the encoding of this file is UTF-8
               Expected1 = <<"\"\\u3042\\u3044\\u3046\\u3048\\u304a\"">>,
               Expected1 = <<"\"\\u3042\\u3044\\u3046\\u3048\\u304a\"">>,
               ?assertEqual({ok, Expected1}, jsone_encode:encode(Input1)),
               ?assertEqual({ok, Expected1}, jsone_encode:encode(Input1)),
+              Expected12 = <<$", Input1/binary, $">>,
+              ?assertEqual({ok, Expected12}, jsone_encode:encode(Input1, [native_utf8])),
 
 
               %% other multi-byte characters
               %% other multi-byte characters
               Input2    = <<"۝۞ႮႯ">>,
               Input2    = <<"۝۞ႮႯ">>,
               Expected2 = <<"\"\\u06dd\\u06de\\u10ae\\u10af\"">>,
               Expected2 = <<"\"\\u06dd\\u06de\\u10ae\\u10af\"">>,
-              ?assertEqual({ok, Expected2}, jsone_encode:encode(Input2))
+              ?assertEqual({ok, Expected2}, jsone_encode:encode(Input2)),
+              Expected22 = <<$", Input2/binary, $">>,
+              ?assertEqual({ok, Expected22}, jsone_encode:encode(Input2, [native_utf8]))
       end},
       end},
      {"string: containts surrogate pairs",
      {"string: containts surrogate pairs",
       fun () ->
       fun () ->