jsone_decode.erl 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. %%% @doc JSON decoding module
  2. %%% @private
  3. %%% @end
  4. %%%
  5. %%% Copyright (c) 2013-2016, Takeru Ohta <phjgt308@gmail.com>
  6. %%%
  7. %%% The MIT License
  8. %%%
  9. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  10. %%% of this software and associated documentation files (the "Software"), to deal
  11. %%% in the Software without restriction, including without limitation the rights
  12. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13. %%% copies of the Software, and to permit persons to whom the Software is
  14. %%% furnished to do so, subject to the following conditions:
  15. %%%
  16. %%% The above copyright notice and this permission notice shall be included in
  17. %%% all copies or substantial portions of the Software.
  18. %%%
  19. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. %%% THE SOFTWARE.
  26. %%%
  27. %%%---------------------------------------------------------------------------------------
  28. -module(jsone_decode).
  29. -ifdef(ENABLE_HIPE).
  30. -compile([native, {hipe, [o3]}]).
  31. -endif.
  32. %%--------------------------------------------------------------------------------
  33. %% Exported API
  34. %%--------------------------------------------------------------------------------
  35. -export([decode/1, decode/2]).
  36. %%--------------------------------------------------------------------------------
  37. %% Macros & Records & Types
  38. %%--------------------------------------------------------------------------------
  39. -define(ERROR(Function, Args), {error, {badarg, [{?MODULE, Function, Args, [{line, ?LINE}]}]}}).
  40. -ifdef('NO_MAP_TYPE').
  41. -define(DEFAULT_OBJECT_FORMAT, tuple).
  42. -define(LIST_TO_MAP(X), error({this_erts_does_not_support_maps, X})).
  43. -else.
  44. -define(DEFAULT_OBJECT_FORMAT, map).
  45. -define(LIST_TO_MAP(X), maps:from_list(X)).
  46. -endif.
  47. -type next() :: {array_next, [jsone:json_value()]}
  48. | {object_value, jsone:json_object_members()}
  49. | {object_next, jsone:json_string(), jsone:json_object_members()}.
  50. -type whitespace_next() :: value
  51. | array
  52. | object
  53. | {array_next, [jsone:json_value()]}
  54. | {object_key, jsone:json_object_members()}
  55. | {object_value, jsone:json_string(), jsone:json_object_members()}
  56. | {object_next, jsone:json_object_members()}.
  57. -type decode_result() :: {ok, jsone:json_value(), Rest::binary()} | {error, {Reason::term(), [erlang:stack_item()]}}.
  58. -record(decode_opt_v2,
  59. {
  60. object_format=?DEFAULT_OBJECT_FORMAT :: tuple | proplist | map,
  61. allow_ctrl_chars=false :: boolean()
  62. }).
  63. -define(OPT, #decode_opt_v2).
  64. -type opt() :: #decode_opt_v2{}.
  65. %%--------------------------------------------------------------------------------
  66. %% Exported Functions
  67. %%--------------------------------------------------------------------------------
  68. -spec decode(binary()) -> decode_result().
  69. decode(Json) ->
  70. decode(Json, []).
  71. -spec decode(binary(), [jsone:decode_option()]) -> decode_result().
  72. decode(<<Json/binary>>, Options) ->
  73. Opt = parse_options(Options),
  74. whitespace(Json, value, [], <<"">>, Opt).
  75. %%--------------------------------------------------------------------------------
  76. %% Internal Functions
  77. %%--------------------------------------------------------------------------------
  78. -spec next(binary(), jsone:json_value(), [next()], binary(), opt()) -> decode_result().
  79. next(<<Bin/binary>>, Value, [], _Buf, _Opt) ->
  80. {ok, Value, Bin};
  81. next(<<Bin/binary>>, Value, [Next | Nexts], Buf, Opt) ->
  82. case Next of
  83. {array_next, Values} -> whitespace(Bin, {array_next, [Value | Values]}, Nexts, Buf, Opt);
  84. {object_value, Members} -> whitespace(Bin, {object_value, Value, Members}, Nexts, Buf, Opt);
  85. {object_next, Key, Members} -> whitespace(Bin, {object_next, [{Key, Value} | Members]}, Nexts, Buf, Opt)
  86. end.
  87. -spec whitespace(binary(), whitespace_next(), [next()], binary(), opt()) -> decode_result().
  88. whitespace(<<$ , Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  89. whitespace(<<$\t, Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  90. whitespace(<<$\r, Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  91. whitespace(<<$\n, Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  92. whitespace(<<Bin/binary>>, Next, Nexts, Buf, Opt) ->
  93. case Next of
  94. value -> value(Bin, Nexts, Buf, Opt);
  95. array -> array(Bin, Nexts, Buf, Opt);
  96. object -> object(Bin, Nexts, Buf, Opt);
  97. {object_key, Members} -> object_key(Bin, Members, Nexts, Buf, Opt);
  98. {array_next, Values} -> array_next(Bin, Values, Nexts, Buf, Opt);
  99. {object_value, Key, Members} -> object_value(Bin, Key, Members, Nexts, Buf, Opt);
  100. {object_next, Members} -> object_next(Bin, Members, Nexts, Buf, Opt)
  101. end.
  102. -spec value(binary(), [next()], binary(), opt()) -> decode_result().
  103. value(<<"false", Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, false, Nexts, Buf, Opt);
  104. value(<<"true", Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, true, Nexts, Buf, Opt);
  105. value(<<"null", Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, null, Nexts, Buf, Opt);
  106. value(<<$[, Bin/binary>>, Nexts, Buf, Opt) -> whitespace(Bin, array, Nexts, Buf, Opt);
  107. value(<<${, Bin/binary>>, Nexts, Buf, Opt) -> whitespace(Bin, object, Nexts, Buf, Opt);
  108. value(<<$", Bin/binary>>, Nexts, Buf, Opt) -> string(Bin, byte_size(Buf), Nexts, Buf, Opt);
  109. value(<<Bin/binary>>, Nexts, Buf, Opt) -> number(Bin, Nexts, Buf, Opt).
  110. -spec array(binary(), [next()], binary(), opt()) -> decode_result().
  111. array(<<$], Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, [], Nexts, Buf, Opt);
  112. array(<<Bin/binary>>, Nexts, Buf, Opt) -> value(Bin, [{array_next, []} | Nexts], Buf, Opt).
  113. -spec array_next(binary(), [jsone:json_value()], [next()], binary(), opt()) -> decode_result().
  114. array_next(<<$], Bin/binary>>, Values, Nexts, Buf, Opt) -> next(Bin, lists:reverse(Values), Nexts, Buf, Opt);
  115. array_next(<<$,, Bin/binary>>, Values, Nexts, Buf, Opt) -> whitespace(Bin, value, [{array_next, Values} | Nexts], Buf, Opt);
  116. array_next(Bin, Values, Nexts, Buf, Opt) -> ?ERROR(array_next, [Bin, Values, Nexts, Buf, Opt]).
  117. -spec object(binary(), [next()], binary(), opt()) -> decode_result().
  118. object(<<$}, Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, make_object([], Opt), Nexts, Buf, Opt);
  119. object(<<Bin/binary>>, Nexts, Buf, Opt) -> object_key(Bin, [], Nexts, Buf, Opt).
  120. -spec object_key(binary(), jsone:json_object_members(), [next()], binary(), opt()) -> decode_result().
  121. object_key(<<$", Bin/binary>>, Members, Nexts, Buf, Opt) -> string(Bin, byte_size(Buf), [{object_value, Members} | Nexts], Buf, Opt);
  122. object_key(<<Bin/binary>>, Members, Nexts, Buf, Opt) -> ?ERROR(object_key, [Bin, Members, Nexts, Buf, Opt]).
  123. -spec object_value(binary(), jsone:json_string(), jsone:json_object_members(), [next()], binary(), opt()) -> decode_result().
  124. object_value(<<$:, Bin/binary>>, Key, Members, Nexts, Buf, Opt) -> whitespace(Bin, value, [{object_next, Key, Members} | Nexts], Buf, Opt);
  125. object_value(Bin, Key, Members, Nexts, Buf, Opt) -> ?ERROR(object_value, [Bin, Key, Members, Nexts, Buf, Opt]).
  126. -spec object_next(binary(), jsone:json_object_members(), [next()], binary(), opt()) -> decode_result().
  127. object_next(<<$}, Bin/binary>>, Members, Nexts, Buf, Opt) -> next(Bin, make_object(Members, Opt), Nexts, Buf, Opt);
  128. object_next(<<$,, Bin/binary>>, Members, Nexts, Buf, Opt) -> whitespace(Bin, {object_key, Members}, Nexts, Buf, Opt);
  129. object_next(Bin, Members, Nexts, Buf, Opt) -> ?ERROR(object_next, [Bin, Members, Nexts, Buf, Opt]).
  130. -spec string(binary(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  131. string(<<Bin/binary>>, Start, Nexts, Buf, Opt) ->
  132. string(Bin, Bin, Start, Nexts, Buf, Opt).
  133. -spec string(binary(), binary(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  134. string(<<$", Bin/binary>>, Base, Start, Nexts, Buf, Opt) ->
  135. Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(Bin) - 1),
  136. case Start =:= byte_size(Buf) of
  137. true -> next(Bin, Prefix, Nexts, Buf, Opt);
  138. false ->
  139. Buf2 = <<Buf/binary, Prefix/binary>>,
  140. next(Bin, binary:part(Buf2, Start, byte_size(Buf2) - Start), Nexts, Buf2, Opt)
  141. end;
  142. string(<<$\\, B/binary>>, Base, Start, Nexts, Buf, Opt) ->
  143. Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(B) - 1),
  144. case B of
  145. <<$", Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $">>, Opt);
  146. <<$/, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $/>>, Opt);
  147. <<$\\,Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\\>>, Opt);
  148. <<$b, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\b>>, Opt);
  149. <<$f, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\f>>, Opt);
  150. <<$n, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\n>>, Opt);
  151. <<$r, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\r>>, Opt);
  152. <<$t, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\t>>, Opt);
  153. <<$u, Bin/binary>> -> unicode_string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary>>, Opt);
  154. _ -> ?ERROR(string, [<<$\\, B/binary>>, Base, Start, Nexts, Buf, Opt])
  155. end;
  156. string(<<_, Bin/binary>>, Base, Start, Nexts, Buf, Opt) when Opt?OPT.allow_ctrl_chars ->
  157. string(Bin, Base, Start, Nexts, Buf, Opt);
  158. string(<<C, Bin/binary>>, Base, Start, Nexts, Buf, Opt) when 16#20 =< C ->
  159. string(Bin, Base, Start, Nexts, Buf, Opt);
  160. string(Bin, Base, Start, Nexts, Buf, Opt) ->
  161. ?ERROR(string, [Bin, Base, Start, Nexts, Buf, Opt]).
  162. -spec unicode_string(binary(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  163. unicode_string(<<N:4/binary, Bin/binary>>, Start, Nexts, Buf, Opt) ->
  164. case binary_to_integer(N, 16) of
  165. High when 16#D800 =< High, High =< 16#DBFF ->
  166. %% surrogate pair
  167. case Bin of
  168. <<$\\, $u, N2:4/binary, Bin2/binary>> ->
  169. case binary_to_integer(N2, 16) of
  170. Low when 16#DC00 =< Low, Low =< 16#DFFF ->
  171. <<Unicode/utf16>> = <<High:16, Low:16>>,
  172. string(Bin2, Start, Nexts, <<Buf/binary, Unicode/utf8>>, Opt);
  173. _ -> ?ERROR(unicode_string, [<<N/binary, Bin/binary>>, Start, Nexts, Buf, Opt])
  174. end;
  175. _ -> ?ERROR(unicode_string, [<<N/binary, Bin/binary>>, Start, Nexts, Buf, Opt])
  176. end;
  177. Unicode when 16#DC00 =< Unicode, Unicode =< 16#DFFF -> % second part of surrogate pair (without first part)
  178. ?ERROR(unicode_string, [<<N/binary, Bin/binary>>, Start, Nexts, Buf, Opt]);
  179. Unicode ->
  180. string(Bin, Start, Nexts, <<Buf/binary, Unicode/utf8>>, Opt)
  181. end;
  182. unicode_string(Bin, Start, Nexts, Buf, Opt) ->
  183. ?ERROR(unicode_string, [Bin, Start, Nexts, Buf, Opt]).
  184. -spec number(binary(), [next()], binary(), opt()) -> decode_result().
  185. number(<<$-, Bin/binary>>, Nexts, Buf, Opt) -> number_integer_part(Bin, -1, Nexts, Buf, Opt);
  186. number(<<Bin/binary>>, Nexts, Buf, Opt) -> number_integer_part(Bin, 1, Nexts, Buf, Opt).
  187. -spec number_integer_part(binary(), 1|-1, [next()], binary(), opt()) -> decode_result().
  188. number_integer_part(<<$0, Bin/binary>>, Sign, Nexts, Buf, Opt) ->
  189. number_fraction_part(Bin, Sign, 0, Nexts, Buf, Opt);
  190. number_integer_part(<<C, Bin/binary>>, Sign, Nexts, Buf, Opt) when $1 =< C, C =< $9 ->
  191. number_integer_part_rest(Bin, C - $0, Sign, Nexts, Buf, Opt);
  192. number_integer_part(Bin, Sign, Nexts, Buf, Opt) ->
  193. ?ERROR(number_integer_part, [Bin, Sign, Nexts, Buf, Opt]).
  194. -spec number_integer_part_rest(binary(), non_neg_integer(), 1|-1, [next()], binary(), opt()) -> decode_result().
  195. number_integer_part_rest(<<C, Bin/binary>>, N, Sign, Nexts, Buf, Opt) when $0 =< C, C =< $9 ->
  196. number_integer_part_rest(Bin, N * 10 + C - $0, Sign, Nexts, Buf, Opt);
  197. number_integer_part_rest(<<Bin/binary>>, N, Sign, Nexts, Buf, Opt) ->
  198. number_fraction_part(Bin, Sign, N, Nexts, Buf, Opt).
  199. -spec number_fraction_part(binary(), 1|-1, non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  200. number_fraction_part(<<$., Bin/binary>>, Sign, Int, Nexts, Buf, Opt) ->
  201. number_fraction_part_rest(Bin, Sign, Int, 0, Nexts, Buf, Opt);
  202. number_fraction_part(<<Bin/binary>>, Sign, Int, Nexts, Buf, Opt) ->
  203. number_exponation_part(Bin, Sign * Int, 0, Nexts, Buf, Opt).
  204. -spec number_fraction_part_rest(binary(), 1|-1, non_neg_integer(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  205. number_fraction_part_rest(<<C, Bin/binary>>, Sign, N, DecimalOffset, Nexts, Buf, Opt) when $0 =< C, C =< $9 ->
  206. number_fraction_part_rest(Bin, Sign, N * 10 + C - $0, DecimalOffset + 1, Nexts, Buf, Opt);
  207. number_fraction_part_rest(<<Bin/binary>>, Sign, N, DecimalOffset, Nexts, Buf, Opt) when DecimalOffset > 0 ->
  208. number_exponation_part(Bin, Sign * N, DecimalOffset, Nexts, Buf, Opt);
  209. number_fraction_part_rest(Bin, Sign, N, DecimalOffset, Nexts, Buf, Opt) ->
  210. ?ERROR(number_fraction_part_rest, [Bin, Sign, N, DecimalOffset, Nexts, Buf, Opt]).
  211. -spec number_exponation_part(binary(), integer(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  212. number_exponation_part(<<$e, $+, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  213. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  214. number_exponation_part(<<$E, $+, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  215. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  216. number_exponation_part(<<$e, $-, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  217. number_exponation_part(Bin, N, DecimalOffset, -1, 0, true, Nexts, Buf, Opt);
  218. number_exponation_part(<<$E, $-, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  219. number_exponation_part(Bin, N, DecimalOffset, -1, 0, true, Nexts, Buf, Opt);
  220. number_exponation_part(<<$e, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  221. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  222. number_exponation_part(<<$E, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  223. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  224. number_exponation_part(<<Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  225. case DecimalOffset of
  226. 0 -> next(Bin, N, Nexts, Buf, Opt);
  227. _ -> next(Bin, N / math:pow(10, DecimalOffset), Nexts, Buf, Opt)
  228. end.
  229. -spec number_exponation_part(binary(), integer(), non_neg_integer(), 1|-1, non_neg_integer(), boolean(), [next()], binary(), opt()) -> decode_result().
  230. number_exponation_part(<<C, Bin/binary>>, N, DecimalOffset, ExpSign, Exp, _, Nexts, Buf, Opt) when $0 =< C, C =< $9 ->
  231. number_exponation_part(Bin, N, DecimalOffset, ExpSign, Exp * 10 + C - $0, false, Nexts, Buf, Opt);
  232. number_exponation_part(<<Bin/binary>>, N, DecimalOffset, ExpSign, Exp, false, Nexts, Buf, Opt) ->
  233. Pos = ExpSign * Exp - DecimalOffset,
  234. next(Bin, N * math:pow(10, Pos), Nexts, Buf, Opt);
  235. number_exponation_part(Bin, N, DecimalOffset, ExpSign, Exp, IsFirst, Nexts, Buf, Opt) ->
  236. ?ERROR(number_exponation_part, [Bin, N, DecimalOffset, ExpSign, Exp, IsFirst, Nexts, Buf, Opt]).
  237. -spec make_object(jsone:json_object_members(), opt()) -> jsone:json_object().
  238. make_object(Members, ?OPT{object_format = tuple}) -> {lists:reverse(Members)};
  239. make_object(Members, ?OPT{object_format = map}) -> ?LIST_TO_MAP(Members);
  240. make_object([], _) -> [{}];
  241. make_object(Members, _) -> lists:reverse(Members).
  242. -spec parse_options([jsone:decode_option()]) -> opt().
  243. parse_options(Options) ->
  244. parse_option(Options, ?OPT{}).
  245. -spec parse_option([jsone:decode_option()], opt()) -> opt().
  246. parse_option([], Opt) -> Opt;
  247. parse_option([{object_format,F}|T], Opt) when F =:= tuple; F =:= proplist; F =:= map ->
  248. parse_option(T, Opt?OPT{object_format=F});
  249. parse_option([{allow_ctrl_chars,B}|T], Opt) when is_boolean(B) ->
  250. parse_option(T, Opt?OPT{allow_ctrl_chars=B});
  251. parse_option(List, Opt) ->
  252. error(badarg, [List, Opt]).