Browse Source

Add option to return last duplicate property

RFC4627 says "The names within an object SHOULD be unique." Not
MUST. Most JSON parsers pick either the first duplicate instance, or
the last. I'm working between Rust and Erlang (via rustler) and wanted
the option to match serde_json's approach of picking the last
duplicate key in an object.

This change adds a new option `duplicate_map_keys` with the allowed
values `first` and `last`. `first` matches the current behaviour, and
is the default.
Russell Brown 5 years ago
parent
commit
b3cfac0658
3 changed files with 31 additions and 2 deletions
  1. 15 1
      src/jsone.erl
  2. 7 1
      src/jsone_decode.erl
  3. 9 0
      test/jsone_decode_tests.erl

+ 15 - 1
src/jsone.erl

@@ -234,6 +234,7 @@
                        | {allow_ctrl_chars, boolean()}
                        | {allow_ctrl_chars, boolean()}
                        | reject_invalid_utf8
                        | reject_invalid_utf8
                        | {'keys', 'binary' | 'atom' | 'existing_atom' | 'attempt_atom'}
                        | {'keys', 'binary' | 'atom' | 'existing_atom' | 'attempt_atom'}
+                       | {duplicate_map_keys, first | last}
                        | common_option().
                        | common_option().
 %% `object_format': <br />
 %% `object_format': <br />
 %% - Decoded JSON object format <br />
 %% - Decoded JSON object format <br />
@@ -260,7 +261,20 @@
 %% existing atom raises `badarg' exception. <br />
 %% existing atom raises `badarg' exception. <br />
 %% - `attempt_atom': Returns existing atom as `existing_atom' but returns a
 %% - `attempt_atom': Returns existing atom as `existing_atom' but returns a
 %% binary string if fails find one.
 %% binary string if fails find one.
-
+%%
+%% `duplicate_map_keys': <br />
+%% https://www.ietf.org/rfc/rfc4627.txt says that keys SHOULD be
+%% unique, but they don't have to be. Most JSON parsers will either
+%% give you the value of the first, or last duplicate property
+%% encountered. When `object_format' is `tuple' or `proplist' all
+%% duplicates are returned. When `object_format' is `map' by default
+%% the first instance of a duplicate is returned. Setting
+%% `duplicate_map_keys' to `last' will change this behaviour to return
+%% the last such instance.
+%% - If the value is `first' then the first duplicate key/value is returned.  <br />
+%% - If the value is `last' then the last duplicate key/value is returned.
+%% - default: `first'<br />
+%%
 
 
 -type stack_item() :: {Module :: module(),
 -type stack_item() :: {Module :: module(),
                        Function :: atom(),
                        Function :: atom(),

+ 7 - 1
src/jsone_decode.erl

@@ -69,7 +69,8 @@
           allow_ctrl_chars=false :: boolean(),
           allow_ctrl_chars=false :: boolean(),
           reject_invalid_utf8=false :: boolean(),
           reject_invalid_utf8=false :: boolean(),
           keys=binary :: 'binary' | 'atom' | 'existing_atom' | 'attempt_atom',
           keys=binary :: 'binary' | 'atom' | 'existing_atom' | 'attempt_atom',
-          undefined_as_null=false :: boolean()
+          undefined_as_null=false :: boolean(),
+          duplicate_map_keys=first :: first | last
         }).
         }).
 -define(OPT, #decode_opt_v2).
 -define(OPT, #decode_opt_v2).
 -type opt() :: #decode_opt_v2{}.
 -type opt() :: #decode_opt_v2{}.
@@ -292,6 +293,8 @@ number_exponation_part(Bin, N, DecimalOffset, ExpSign, Exp, IsFirst, Nexts, Buf,
 
 
 -spec make_object(jsone:json_object_members(), opt()) -> jsone:json_object().
 -spec make_object(jsone:json_object_members(), opt()) -> jsone:json_object().
 make_object(Members, ?OPT{object_format = tuple}) -> {lists:reverse(Members)};
 make_object(Members, ?OPT{object_format = tuple}) -> {lists:reverse(Members)};
+make_object(Members, ?OPT{object_format = map, duplicate_map_keys = last}) ->
+    ?LIST_TO_MAP(lists:reverse(Members));
 make_object(Members, ?OPT{object_format = map})   -> ?LIST_TO_MAP(Members);
 make_object(Members, ?OPT{object_format = map})   -> ?LIST_TO_MAP(Members);
 make_object([],      _)                           -> [{}];
 make_object([],      _)                           -> [{}];
 make_object(Members, _)                           -> lists:reverse(Members).
 make_object(Members, _)                           -> lists:reverse(Members).
@@ -313,5 +316,8 @@ parse_option([{keys, K}|T], Opt)
     parse_option(T, Opt?OPT{keys = K});
     parse_option(T, Opt?OPT{keys = K});
 parse_option([undefined_as_null|T], Opt) ->
 parse_option([undefined_as_null|T], Opt) ->
     parse_option(T, Opt?OPT{undefined_as_null = true});
     parse_option(T, Opt?OPT{undefined_as_null = true});
+parse_option([{duplicate_map_keys, V} | T], Opt)
+  when V =:= first; V =:= last ->
+    parse_option(T, Opt?OPT{duplicate_map_keys=V});
 parse_option(List, Opt) ->
 parse_option(List, Opt) ->
     error(badarg, [List, Opt]).
     error(badarg, [List, Opt]).

+ 9 - 0
test/jsone_decode_tests.erl

@@ -16,6 +16,7 @@
 -define(OBJ1(K, V), #{K => V}).
 -define(OBJ1(K, V), #{K => V}).
 -define(OBJ2(K1, V1, K2, V2), #{K1 => V1, K2 => V2}).
 -define(OBJ2(K1, V1, K2, V2), #{K1 => V1, K2 => V2}).
 -define(OBJ2_DUP_KEY(K1, V1, _K2, _V2), #{K1 => V1}). % the first (leftmost) value is used
 -define(OBJ2_DUP_KEY(K1, V1, _K2, _V2), #{K1 => V1}). % the first (leftmost) value is used
+-define(OBJ2_DUP_KEY_LAST(_K1, _V1, K2, V2), #{K2 => V2}). % the last value is used
 -endif.
 -endif.
 
 
 decode_test_() ->
 decode_test_() ->
@@ -232,6 +233,14 @@ decode_test_() ->
               Expected = ?OBJ2_DUP_KEY(<<"1">>, <<"first">>, <<"1">>, <<"second">>),
               Expected = ?OBJ2_DUP_KEY(<<"1">>, <<"first">>, <<"1">>, <<"second">>),
               ?assertEqual({ok, Expected, <<"">>}, jsone_decode:decode(Input, [{object_format, ?MAP_OBJECT_TYPE}]))
               ?assertEqual({ok, Expected, <<"">>}, jsone_decode:decode(Input, [{object_format, ?MAP_OBJECT_TYPE}]))
       end},
       end},
+     {"duplicated members last: map",
+      fun () ->
+              Input    = <<"{\"1\":\"first\",\"1\":\"second\"}">>,
+              Expected = ?OBJ2_DUP_KEY_LAST(<<"1">>, <<"first">>, <<"1">>, <<"second">>),
+              ?assertEqual({ok, Expected, <<"">>}, jsone_decode:decode(Input,
+                                                                       [{object_format, ?MAP_OBJECT_TYPE},
+                                                                        {duplicate_map_keys, last}]))
+      end},
      {"object: trailing comma is disallowed",
      {"object: trailing comma is disallowed",
       fun () ->
       fun () ->
               Input = <<"{\"1\":2, \"key\":\"value\", }">>,
               Input = <<"{\"1\":2, \"key\":\"value\", }">>,