jsone_decode.erl 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. %%% @doc JSON decoding module
  2. %%% @private
  3. %%% @end
  4. %%%
  5. %%% Copyright (c) 2013-2015, Takeru Ohta <phjgt308@gmail.com>
  6. %%%
  7. %%% The MIT License
  8. %%%
  9. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  10. %%% of this software and associated documentation files (the "Software"), to deal
  11. %%% in the Software without restriction, including without limitation the rights
  12. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13. %%% copies of the Software, and to permit persons to whom the Software is
  14. %%% furnished to do so, subject to the following conditions:
  15. %%%
  16. %%% The above copyright notice and this permission notice shall be included in
  17. %%% all copies or substantial portions of the Software.
  18. %%%
  19. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. %%% THE SOFTWARE.
  26. %%%
  27. %%%---------------------------------------------------------------------------------------
  28. -module(jsone_decode).
  29. -compile([native, {hipe, [o3]}]).
  30. %%--------------------------------------------------------------------------------
  31. %% Exported API
  32. %%--------------------------------------------------------------------------------
  33. -export([decode/1, decode/2]).
  34. %%--------------------------------------------------------------------------------
  35. %% Macros & Records & Types
  36. %%--------------------------------------------------------------------------------
  37. -define(ERROR(Function, Args), {error, {badarg, [{?MODULE, Function, Args, [{line, ?LINE}]}]}}).
  38. -type next() :: {array_next, [jsone:json_value()]}
  39. | {object_value, jsone:json_object_members()}
  40. | {object_next, jsone:json_string(), jsone:json_object_members()}.
  41. -type whitespace_next() :: value
  42. | array
  43. | object
  44. | {array_next, [jsone:json_value()]}
  45. | {object_key, jsone:json_object_members()}
  46. | {object_value, jsone:json_string(), jsone:json_object_members()}
  47. | {object_next, jsone:json_object_members()}.
  48. -type decode_result() :: {ok, jsone:json_value(), Rest::binary()} | {error, {Reason::term(), [erlang:stack_item()]}}.
  49. -record(decode_opt_v1, { object_format=tuple :: tuple | proplist | map}).
  50. -define(OPT, #decode_opt_v1).
  51. -type opt() :: #decode_opt_v1{}.
  52. %%--------------------------------------------------------------------------------
  53. %% Exported Functions
  54. %%--------------------------------------------------------------------------------
  55. -spec decode(binary()) -> decode_result().
  56. decode(Json) ->
  57. decode(Json, []).
  58. -spec decode(binary(), [jsone:decode_option()]) -> decode_result().
  59. decode(<<Json/binary>>, Options) ->
  60. Opt = parse_options(Options),
  61. whitespace(Json, value, [], <<"">>, Opt).
  62. %%--------------------------------------------------------------------------------
  63. %% Internal Functions
  64. %%--------------------------------------------------------------------------------
  65. -spec next(binary(), jsone:json_value(), [next()], binary(), opt()) -> decode_result().
  66. next(<<Bin/binary>>, Value, [], _Buf, _Opt) ->
  67. {ok, Value, Bin};
  68. next(<<Bin/binary>>, Value, [Next | Nexts], Buf, Opt) ->
  69. case Next of
  70. {array_next, Values} -> whitespace(Bin, {array_next, [Value | Values]}, Nexts, Buf, Opt);
  71. {object_value, Members} -> whitespace(Bin, {object_value, Value, Members}, Nexts, Buf, Opt);
  72. {object_next, Key, Members} -> whitespace(Bin, {object_next, [{Key, Value} | Members]}, Nexts, Buf, Opt)
  73. end.
  74. -spec whitespace(binary(), whitespace_next(), [next()], binary(), opt()) -> decode_result().
  75. whitespace(<<$ , Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  76. whitespace(<<$\t, Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  77. whitespace(<<$\r, Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  78. whitespace(<<$\n, Bin/binary>>, Next, Nexts, Buf, Opt) -> whitespace(Bin, Next, Nexts, Buf, Opt);
  79. whitespace(<<Bin/binary>>, Next, Nexts, Buf, Opt) ->
  80. case Next of
  81. value -> value(Bin, Nexts, Buf, Opt);
  82. array -> array(Bin, Nexts, Buf, Opt);
  83. object -> object(Bin, Nexts, Buf, Opt);
  84. {object_key, Members} -> object_key(Bin, Members, Nexts, Buf, Opt);
  85. {array_next, Values} -> array_next(Bin, Values, Nexts, Buf, Opt);
  86. {object_value, Key, Members} -> object_value(Bin, Key, Members, Nexts, Buf, Opt);
  87. {object_next, Members} -> object_next(Bin, Members, Nexts, Buf, Opt)
  88. end.
  89. -spec value(binary(), [next()], binary(), opt()) -> decode_result().
  90. value(<<"false", Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, false, Nexts, Buf, Opt);
  91. value(<<"true", Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, true, Nexts, Buf, Opt);
  92. value(<<"null", Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, null, Nexts, Buf, Opt);
  93. value(<<$[, Bin/binary>>, Nexts, Buf, Opt) -> whitespace(Bin, array, Nexts, Buf, Opt);
  94. value(<<${, Bin/binary>>, Nexts, Buf, Opt) -> whitespace(Bin, object, Nexts, Buf, Opt);
  95. value(<<$", Bin/binary>>, Nexts, Buf, Opt) -> string(Bin, byte_size(Buf), Nexts, Buf, Opt);
  96. value(<<Bin/binary>>, Nexts, Buf, Opt) -> number(Bin, Nexts, Buf, Opt).
  97. -spec array(binary(), [next()], binary(), opt()) -> decode_result().
  98. array(<<$], Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, [], Nexts, Buf, Opt);
  99. array(<<Bin/binary>>, Nexts, Buf, Opt) -> value(Bin, [{array_next, []} | Nexts], Buf, Opt).
  100. -spec array_next(binary(), [jsone:json_value()], [next()], binary(), opt()) -> decode_result().
  101. array_next(<<$], Bin/binary>>, Values, Nexts, Buf, Opt) -> next(Bin, lists:reverse(Values), Nexts, Buf, Opt);
  102. array_next(<<$,, Bin/binary>>, Values, Nexts, Buf, Opt) -> whitespace(Bin, value, [{array_next, Values} | Nexts], Buf, Opt);
  103. array_next(Bin, Values, Nexts, Buf, Opt) -> ?ERROR(array_next, [Bin, Values, Nexts, Buf, Opt]).
  104. -spec object(binary(), [next()], binary(), opt()) -> decode_result().
  105. object(<<$}, Bin/binary>>, Nexts, Buf, Opt) -> next(Bin, make_object([], Opt), Nexts, Buf, Opt);
  106. object(<<Bin/binary>>, Nexts, Buf, Opt) -> object_key(Bin, [], Nexts, Buf, Opt).
  107. -spec object_key(binary(), jsone:json_object_members(), [next()], binary(), opt()) -> decode_result().
  108. object_key(<<$", Bin/binary>>, Members, Nexts, Buf, Opt) -> string(Bin, byte_size(Buf), [{object_value, Members} | Nexts], Buf, Opt);
  109. object_key(<<Bin/binary>>, Members, Nexts, Buf, Opt) -> ?ERROR(object_key, [Bin, Members, Nexts, Buf, Opt]).
  110. -spec object_value(binary(), jsone:json_string(), jsone:json_object_members(), [next()], binary(), opt()) -> decode_result().
  111. object_value(<<$:, Bin/binary>>, Key, Members, Nexts, Buf, Opt) -> whitespace(Bin, value, [{object_next, Key, Members} | Nexts], Buf, Opt);
  112. object_value(Bin, Key, Members, Nexts, Buf, Opt) -> ?ERROR(object_value, [Bin, Key, Members, Nexts, Buf, Opt]).
  113. -spec object_next(binary(), jsone:json_object_members(), [next()], binary(), opt()) -> decode_result().
  114. object_next(<<$}, Bin/binary>>, Members, Nexts, Buf, Opt) -> next(Bin, make_object(Members, Opt), Nexts, Buf, Opt);
  115. object_next(<<$,, Bin/binary>>, Members, Nexts, Buf, Opt) -> whitespace(Bin, {object_key, Members}, Nexts, Buf, Opt);
  116. object_next(Bin, Members, Nexts, Buf, Opt) -> ?ERROR(object_next, [Bin, Members, Nexts, Buf, Opt]).
  117. -spec string(binary(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  118. string(<<Bin/binary>>, Start, Nexts, Buf, Opt) ->
  119. string(Bin, Bin, Start, Nexts, Buf, Opt).
  120. -spec string(binary(), binary(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  121. string(<<$", Bin/binary>>, Base, Start, Nexts, Buf, Opt) ->
  122. Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(Bin) - 1),
  123. case Start =:= byte_size(Buf) of
  124. true -> next(Bin, Prefix, Nexts, Buf, Opt);
  125. false ->
  126. Buf2 = <<Buf/binary, Prefix/binary>>,
  127. next(Bin, binary:part(Buf2, Start, byte_size(Buf2) - Start), Nexts, Buf2, Opt)
  128. end;
  129. string(<<$\\, B/binary>>, Base, Start, Nexts, Buf, Opt) ->
  130. Prefix = binary:part(Base, 0, byte_size(Base) - byte_size(B) - 1),
  131. case B of
  132. <<$", Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $">>, Opt);
  133. <<$/, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $/>>, Opt);
  134. <<$\\,Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\\>>, Opt);
  135. <<$b, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\b>>, Opt);
  136. <<$f, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\f>>, Opt);
  137. <<$n, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\n>>, Opt);
  138. <<$r, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\r>>, Opt);
  139. <<$t, Bin/binary>> -> string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary, $\t>>, Opt);
  140. <<$u, Bin/binary>> -> unicode_string(Bin, Start, Nexts, <<Buf/binary, Prefix/binary>>, Opt);
  141. _ -> ?ERROR(string, [<<$\\, B/binary>>, Base, Start, Nexts, Buf, Opt])
  142. end;
  143. string(<<C, Bin/binary>>, Base, Start, Nexts, Buf, Opt) when 16#20 =< C ->
  144. string(Bin, Base, Start, Nexts, Buf, Opt).
  145. -spec unicode_string(binary(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  146. unicode_string(<<N:4/binary, Bin/binary>>, Start, Nexts, Buf, Opt) ->
  147. case binary_to_integer(N, 16) of
  148. High when 16#D800 =< High, High =< 16#DBFF ->
  149. %% surrogate pair
  150. case Bin of
  151. <<$\\, $u, N2:4/binary, Bin2/binary>> ->
  152. case binary_to_integer(N2, 16) of
  153. Low when 16#DC00 =< Low, Low =< 16#DFFF ->
  154. Unicode = 16#10000 + (High - 16#D800) * 16#400 + (Low - 16#DC00),
  155. string(Bin2, Start, Nexts, unicode_to_utf8(Unicode, Buf), Opt);
  156. _ -> ?ERROR(unicode_string, [<<N/binary, Bin/binary>>, Start, Nexts, Buf, Opt])
  157. end;
  158. _ -> ?ERROR(unicode_string, [<<N/binary, Bin/binary>>, Start, Nexts, Buf, Opt])
  159. end;
  160. Unicode when 16#DC00 =< Unicode, Unicode =< 16#DFFF -> % second part of surrogate pair (without first part)
  161. ?ERROR(unicode_string, [<<N/binary, Bin/binary>>, Start, Nexts, Buf, Opt]);
  162. Unicode ->
  163. string(Bin, Start, Nexts, unicode_to_utf8(Unicode, Buf), Opt)
  164. end;
  165. unicode_string(Bin, Start, Nexts, Buf, Opt) ->
  166. ?ERROR(unicode_string, [Bin, Start, Nexts, Buf, Opt]).
  167. -spec unicode_to_utf8(0..1114111, binary()) -> binary().
  168. unicode_to_utf8(Code, Buf) when Code < 16#80 ->
  169. <<Buf/binary, Code>>;
  170. unicode_to_utf8(Code, Buf) when Code < 16#800 ->
  171. A = 2#11000000 bor (Code bsr 6),
  172. B = 2#10000000 bor (Code band 2#111111),
  173. <<Buf/binary, A, B>>;
  174. unicode_to_utf8(Code, Buf) when Code < 16#10000 ->
  175. A = 2#11100000 bor (Code bsr 12),
  176. B = 2#10000000 bor ((Code bsr 6) band 2#111111),
  177. C = 2#10000000 bor (Code band 2#111111),
  178. <<Buf/binary, A, B, C>>;
  179. unicode_to_utf8(Code, Buf) ->
  180. A = 2#11110000 bor (Code bsr 18),
  181. B = 2#10000000 bor ((Code bsr 12) band 2#111111),
  182. C = 2#10000000 bor ((Code bsr 6) band 2#111111),
  183. D = 2#10000000 bor (Code band 2#111111),
  184. <<Buf/binary, A, B, C, D>>.
  185. -spec number(binary(), [next()], binary(), opt()) -> decode_result().
  186. number(<<$-, Bin/binary>>, Nexts, Buf, Opt) -> number_integer_part(Bin, -1, Nexts, Buf, Opt);
  187. number(<<Bin/binary>>, Nexts, Buf, Opt) -> number_integer_part(Bin, 1, Nexts, Buf, Opt).
  188. -spec number_integer_part(binary(), 1|-1, [next()], binary(), opt()) -> decode_result().
  189. number_integer_part(<<$0, Bin/binary>>, Sign, Nexts, Buf, Opt) ->
  190. number_fraction_part(Bin, Sign, 0, Nexts, Buf, Opt);
  191. number_integer_part(<<C, Bin/binary>>, Sign, Nexts, Buf, Opt) when $1 =< C, C =< $9 ->
  192. number_integer_part_rest(Bin, C - $0, Sign, Nexts, Buf, Opt);
  193. number_integer_part(Bin, Sign, Nexts, Buf, Opt) ->
  194. ?ERROR(number_integer_part, [Bin, Sign, Nexts, Buf, Opt]).
  195. -spec number_integer_part_rest(binary(), non_neg_integer(), 1|-1, [next()], binary(), opt()) -> decode_result().
  196. number_integer_part_rest(<<C, Bin/binary>>, N, Sign, Nexts, Buf, Opt) when $0 =< C, C =< $9 ->
  197. number_integer_part_rest(Bin, N * 10 + C - $0, Sign, Nexts, Buf, Opt);
  198. number_integer_part_rest(<<Bin/binary>>, N, Sign, Nexts, Buf, Opt) ->
  199. number_fraction_part(Bin, Sign, N, Nexts, Buf, Opt).
  200. -spec number_fraction_part(binary(), 1|-1, non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  201. number_fraction_part(<<$., Bin/binary>>, Sign, Int, Nexts, Buf, Opt) ->
  202. number_fraction_part_rest(Bin, Sign, Int, 0, Nexts, Buf, Opt);
  203. number_fraction_part(<<Bin/binary>>, Sign, Int, Nexts, Buf, Opt) ->
  204. number_exponation_part(Bin, Sign * Int, 0, Nexts, Buf, Opt).
  205. -spec number_fraction_part_rest(binary(), 1|-1, non_neg_integer(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  206. number_fraction_part_rest(<<C, Bin/binary>>, Sign, N, DecimalOffset, Nexts, Buf, Opt) when $0 =< C, C =< $9 ->
  207. number_fraction_part_rest(Bin, Sign, N * 10 + C - $0, DecimalOffset + 1, Nexts, Buf, Opt);
  208. number_fraction_part_rest(<<Bin/binary>>, Sign, N, DecimalOffset, Nexts, Buf, Opt) when DecimalOffset > 0 ->
  209. number_exponation_part(Bin, Sign * N, DecimalOffset, Nexts, Buf, Opt);
  210. number_fraction_part_rest(Bin, Sign, N, DecimalOffset, Nexts, Buf, Opt) ->
  211. ?ERROR(number_fraction_part_rest, [Bin, Sign, N, DecimalOffset, Nexts, Buf, Opt]).
  212. -spec number_exponation_part(binary(), integer(), non_neg_integer(), [next()], binary(), opt()) -> decode_result().
  213. number_exponation_part(<<$e, $+, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  214. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  215. number_exponation_part(<<$E, $+, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  216. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  217. number_exponation_part(<<$e, $-, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  218. number_exponation_part(Bin, N, DecimalOffset, -1, 0, true, Nexts, Buf, Opt);
  219. number_exponation_part(<<$E, $-, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  220. number_exponation_part(Bin, N, DecimalOffset, -1, 0, true, Nexts, Buf, Opt);
  221. number_exponation_part(<<$e, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  222. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  223. number_exponation_part(<<$E, Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  224. number_exponation_part(Bin, N, DecimalOffset, 1, 0, true, Nexts, Buf, Opt);
  225. number_exponation_part(<<Bin/binary>>, N, DecimalOffset, Nexts, Buf, Opt) ->
  226. case DecimalOffset of
  227. 0 -> next(Bin, N, Nexts, Buf, Opt);
  228. _ -> next(Bin, N / math:pow(10, DecimalOffset), Nexts, Buf, Opt)
  229. end.
  230. -spec number_exponation_part(binary(), integer(), non_neg_integer(), 1|-1, non_neg_integer(), boolean(), [next()], binary(), opt()) -> decode_result().
  231. number_exponation_part(<<C, Bin/binary>>, N, DecimalOffset, ExpSign, Exp, _, Nexts, Buf, Opt) when $0 =< C, C =< $9 ->
  232. number_exponation_part(Bin, N, DecimalOffset, ExpSign, Exp * 10 + C - $0, false, Nexts, Buf, Opt);
  233. number_exponation_part(<<Bin/binary>>, N, DecimalOffset, ExpSign, Exp, false, Nexts, Buf, Opt) ->
  234. Pos = ExpSign * Exp - DecimalOffset,
  235. next(Bin, N * math:pow(10, Pos), Nexts, Buf, Opt);
  236. number_exponation_part(Bin, N, DecimalOffset, ExpSign, Exp, IsFirst, Nexts, Buf, Opt) ->
  237. ?ERROR(number_exponation_part, [Bin, N, DecimalOffset, ExpSign, Exp, IsFirst, Nexts, Buf, Opt]).
  238. -spec make_object(jsone:json_object_members(), opt()) -> jsone:json_object().
  239. make_object(Members, ?OPT{object_format = tuple}) -> {lists:reverse(Members)};
  240. make_object(Members, ?OPT{object_format = map}) -> maps:from_list(Members);
  241. make_object([], _) -> [{}];
  242. make_object(Members, _) -> lists:reverse(Members).
  243. -spec parse_options([jsone:decode_option()]) -> opt().
  244. parse_options(Options) ->
  245. parse_option(Options, ?OPT{}).
  246. -spec parse_option([jsone:decode_option()], opt()) -> opt().
  247. parse_option([], Opt) -> Opt;
  248. parse_option([{object_format,F}|T], Opt) when F =:= tuple; F =:= proplist; F =:= map ->
  249. parse_option(T, Opt?OPT{object_format=F}).