jsone.erl 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. %%% @doc JSON decoding/encoding module
  2. %%% @end
  3. %%%
  4. %%% Copyright (c) 2013-2015, Takeru Ohta <phjgt308@gmail.com>
  5. %%%
  6. %%% The MIT License
  7. %%%
  8. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  9. %%% of this software and associated documentation files (the "Software"), to deal
  10. %%% in the Software without restriction, including without limitation the rights
  11. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. %%% copies of the Software, and to permit persons to whom the Software is
  13. %%% furnished to do so, subject to the following conditions:
  14. %%%
  15. %%% The above copyright notice and this permission notice shall be included in
  16. %%% all copies or substantial portions of the Software.
  17. %%%
  18. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. %%% THE SOFTWARE.
  25. %%%
  26. %%%---------------------------------------------------------------------------------------
  27. -module(jsone).
  28. %%--------------------------------------------------------------------------------
  29. %% Exported API
  30. %%--------------------------------------------------------------------------------
  31. -export([decode/1, decode/2,
  32. try_decode/1, try_decode/2,
  33. encode/1, encode/2,
  34. try_encode/1, try_encode/2,
  35. term_to_json_string/1,
  36. ip_address_to_json_string/1]).
  37. -export_type([json_value/0,
  38. json_boolean/0,
  39. json_number/0,
  40. json_string/0,
  41. json_array/0,
  42. json_object/0,
  43. json_object_members/0,
  44. json_term/0,
  45. json_object_format_tuple/0,
  46. json_object_format_proplist/0,
  47. json_object_format_map/0,
  48. json_scalar/0,
  49. encode_option/0,
  50. decode_option/0,
  51. float_format_option/0,
  52. datetime_encode_format/0,
  53. datetime_format/0,
  54. timezone/0,
  55. utc_offset_seconds/0,
  56. stack_item/0]).
  57. %%--------------------------------------------------------------------------------
  58. %% Types & Macros
  59. %%--------------------------------------------------------------------------------
  60. -type json_value() :: json_number() |
  61. json_string() |
  62. json_array() |
  63. json_object() |
  64. json_boolean() |
  65. null |
  66. undefined |
  67. json_term().
  68. -type json_boolean() :: boolean().
  69. -type json_number() :: number().
  70. -type json_string() :: binary() | atom() | calendar:datetime(). % NOTE: `decode/1' always returns `binary()' value
  71. -type json_array() :: [json_value()].
  72. -type json_object() :: json_object_format_tuple() | json_object_format_proplist() | json_object_format_map().
  73. -type json_object_members() :: [{json_string(), json_value()}].
  74. -type json_term() :: {{json, iolist()}} | {{json_utf8, unicode:chardata()}}.
  75. %% `json_term()' allows inline already encoded JSON value. `json' variant
  76. %% expects byte encoded utf8 data values as list members. `json_utf8' expect
  77. %% Unicode code points as list members. Binaries are copied "as is" in both
  78. %% variants except `json_utf8' will check if binary contain valid `UTF-8'
  79. %% encoded data. In short, `json' uses `erlang:iolist_to_binary/1' and
  80. %% `json_utf8' uses `unicode:chardata_to_binary/1' for encoding.
  81. %%
  82. %% A simple example is worth a thousand words.
  83. %%
  84. %% ```
  85. %% 1> S = "hélo".
  86. %% "hélo"
  87. %% 2> shell:strings(false).
  88. %% true
  89. %% 3> S.
  90. %% [104,233,108,111]
  91. %% 4> B = jsone:encode({{json, S}}). % invalid UTF-8
  92. %% <<104,233,108,111>>
  93. %% 5> B2 = jsone:encode({{json_utf8, S}}). % valid UTF-8
  94. %% <<104,195,169,108,111>>
  95. %% 6> jsone:encode({{json, B}}).
  96. %% <<104,233,108,111>>
  97. %% 7> jsone:encode({{json_utf8, B}}).
  98. %% ** exception error: {invalid_json_utf8,<<104>>,<<233,108,111>>}
  99. %% in function jsone_encode:value/4
  100. %% called as jsone_encode:value({json_utf8,<<104,233,108,111>>},
  101. %% [],<<>>,
  102. %% {encode_opt_v2,false,
  103. %% [{scientific,20}],
  104. %% {iso8601,0},
  105. %% string,0,0})
  106. %% in call from jsone:encode/2 (/home/hynek/work/altworx/jsone/_build/default/lib/jsone/src/jsone.erl, line 302)
  107. %% 8> jsone:encode({{json_utf8, B2}}).
  108. %% <<104,195,169,108,111>>
  109. %% 9> shell:strings(true).
  110. %% false
  111. %% 10> jsone:encode({{json_utf8, B2}}).
  112. %% <<"hélo"/utf8>>
  113. %% 11> jsone:encode({{json, binary_to_list(B2)}}). % UTF-8 encoded list leads to valid UTF-8
  114. %% <<"hélo"/utf8>>
  115. %% '''
  116. %%
  117. -type json_object_format_tuple() :: {json_object_members()}.
  118. -type json_object_format_proplist() :: [{}] | json_object_members().
  119. -ifdef('NO_MAP_TYPE').
  120. -opaque json_object_format_map() :: json_object_format_proplist().
  121. %% `maps' is not supported in this erts version
  122. -else.
  123. -type json_object_format_map() :: map().
  124. -endif.
  125. -type json_scalar() :: json_boolean() | json_number() | json_string().
  126. -type float_format_option() :: {scientific, Decimals :: 0..249} | {decimals, Decimals :: 0..253} | compact | short.
  127. %% `scientific': <br />
  128. %% - The float will be formatted using scientific notation with `Decimals' digits of precision. <br />
  129. %%
  130. %% `decimals': <br />
  131. %% - The encoded string will contain at most `Decimals' number of digits past the decimal point. <br />
  132. %% - If `compact' is provided the trailing zeros at the end of the string are truncated. <br />
  133. %% - If `short' is provided the float is formatted with the smallest number of digits. <br />
  134. %%
  135. %% For more details, see <a href="http://erlang.org/doc/man/erlang.html#float_to_list-2">erlang:float_to_list/2</a>.
  136. %%
  137. %% ```
  138. %% > jsone:encode(1.23).
  139. %% <<"1.22999999999999998224e+00">>
  140. %%
  141. %% > jsone:encode(1.23, [{float_format, [{scientific, 4}]}]).
  142. %% <"1.2300e+00">>
  143. %%
  144. %% > jsone:encode(1.23, [{float_format, [{scientific, 1}]}]).
  145. %% <<"1.2e+00">>
  146. %%
  147. %% > jsone:encode(1.23, [{float_format, [{decimals, 4}]}]).
  148. %% <<"1.2300">>
  149. %%
  150. %% > jsone:encode(1.23, [{float_format, [{decimals, 4}, compact]}]).
  151. %% <<"1.23">>
  152. %% '''
  153. -type datetime_encode_format() :: Format :: datetime_format() | {Format :: datetime_format(), TimeZone :: timezone()}.
  154. %% Datetime encoding format.
  155. %%
  156. %% The default value of `TimeZone' is `utc'.
  157. %%
  158. %% ```
  159. %% %
  160. %% % Universal Time
  161. %% %
  162. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, iso8601}]).
  163. %% <<"\"2000-03-10T10:03:58Z\"">>
  164. %%
  165. %% %
  166. %% % Local Time (JST)
  167. %% %
  168. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, {iso8601, local}}]).
  169. %% <<"\"2000-03-10T10:03:58+09:00\"">>
  170. %%
  171. %% %
  172. %% % Explicit TimeZone Offset
  173. %% %
  174. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, {iso8601, -2*60*60}}]).
  175. %% <<"\"2000-03-10T10:03:58-02:00\"">>
  176. %% '''
  177. -type datetime_format() :: iso8601.
  178. -type timezone() :: utc | local | utc_offset_seconds().
  179. -type utc_offset_seconds() :: -86399..86399.
  180. -type common_option() :: undefined_as_null.
  181. %%
  182. %% `undefined_as_null': <br />
  183. %% - Treats `undefined' in Erlang as the conversion target for `null' in JSON. This means that `undefined' will be encoded to `null' and `null' will be decoded to `undefined'<br />
  184. -type encode_option() :: native_utf8 |
  185. native_forward_slash |
  186. canonical_form |
  187. {float_format, [float_format_option()]} |
  188. {datetime_format, datetime_encode_format()} |
  189. {object_key_type, string | scalar | value} |
  190. {space, non_neg_integer()} |
  191. {indent, non_neg_integer()} |
  192. {map_unknown_value, undefined | fun((term()) -> {ok, json_value()} | error)} |
  193. skip_undefined |
  194. common_option().
  195. %% `native_utf8': <br />
  196. %% - Encodes non ASCII UTF-8 characters as a human-readable(non-escaped) string <br />
  197. %%
  198. %% `native_forward_slash': <br />
  199. %% - Prevents forward slashes in a JSON string from being escaped <br />
  200. %%
  201. %% `canonical_form': <br />
  202. %% - produce a canonical form of a JSON document <br />
  203. %%
  204. %% `{float_format, Options}':
  205. %% - Encodes a `float()` value in the format which specified by `Options' <br />
  206. %% - default: `[{scientific, 20}]' <br />
  207. %%
  208. %% `{datetime_format, Format}`:
  209. %% - Encodes a `calendar:datetime()` value in the format which specified by `Format' <br />
  210. %% - default: `{iso8601, utc}' <br />
  211. %%
  212. %% `object_key_type':
  213. %% - Allowable object key type <br />
  214. %% - `string': Only string values are allowed (i.e. `json_string()' type) <br />
  215. %% - `scalar': In addition to `string', following values are allowed: nulls, booleans, numerics (i.e. `json_scalar()' type) <br />
  216. %% - `value': Any json compatible values are allowed (i.e. `json_value()' type) <br />
  217. %% - default: `string' <br />
  218. %% - NOTE: If `scalar' or `value' option is specified, non `json_string()' key will be automatically converted to a `binary()' value (e.g. `1' => `<<"1">>', `#{}' => `<<"{}">>') <br />
  219. %%
  220. %% `{space, N}': <br />
  221. %% - Inserts `N' spaces after every comma and colon <br />
  222. %% - default: `0' <br />
  223. %%
  224. %% `{indent, N}': <br />
  225. %% - Inserts a newline and `N' spaces for each level of indentation <br />
  226. %% - default: `0' <br />
  227. %%
  228. %% `skip_undefined': <br />
  229. %% - If specified, each entry having `undefined' value in a object isn't included in the result JSON <br />
  230. %%
  231. %% `{map_unknown_value, Fun}`: <br />
  232. %% - If `Fun' is a function, unknown values encountered during an encoding process are converted to `json_value()` by applying `Fun'. <br />
  233. %% - If `Fun' is `undefined', the encoding results in an error if there are unknown values. <br />
  234. %% - default: `term_to_json_string/1' <br />
  235. -type decode_option() :: {object_format, tuple | proplist | map} |
  236. {allow_ctrl_chars, boolean()} |
  237. reject_invalid_utf8 |
  238. {'keys', 'binary' | 'atom' | 'existing_atom' | 'attempt_atom'} |
  239. {duplicate_map_keys, first | last} |
  240. common_option().
  241. %% `object_format': <br />
  242. %% - Decoded JSON object format <br />
  243. %% - `tuple': An object is decoded as `{[]}' if it is empty, otherwise `{[{Key, Value}]}'. <br />
  244. %% - `proplist': An object is decoded as `[{}]' if it is empty, otherwise `[{Key, Value}]'. <br />
  245. %% - `map': An object is decoded as `#{}' if it is empty, otherwise `#{Key => Value}'. <br />
  246. %% - default: `map' if OTP version is OTP-17 or more, `tuple' otherwise <br />
  247. %%
  248. %% `allow_ctrl_chars': <br />
  249. %% - If the value is `true', strings which contain unescaped control characters will be regarded as a legal JSON string <br />
  250. %% - default: `false'<br />
  251. %%
  252. %% `reject_invalid_utf8': <br />
  253. %% - Rejects JSON strings which contain invalid UTF-8 byte sequences <br />
  254. %%
  255. %% `keys': <br />
  256. %% Defines way how object keys are decoded. The default value is `binary'.
  257. %% The option is compatible with `labels' option in `jsx'. <br />
  258. %% - `binary': The key is left as a string which is encoded as binary. It's default
  259. %% and backward compatible behaviour. <br />
  260. %% - `atom': The key is converted to an atom. Results in `badarg' if Key value
  261. %% regarded as UTF-8 is not a valid atom. <br />
  262. %% - `existing_atom': Returns existing atom. Any key value which is not
  263. %% existing atom raises `badarg' exception. <br />
  264. %% - `attempt_atom': Returns existing atom as `existing_atom' but returns a
  265. %% binary string if fails find one.
  266. %%
  267. %% `duplicate_map_keys': <br />
  268. %% https://www.ietf.org/rfc/rfc4627.txt says that keys SHOULD be
  269. %% unique, but they don't have to be. Most JSON parsers will either
  270. %% give you the value of the first, or last duplicate property
  271. %% encountered. When `object_format' is `tuple' or `proplist' all
  272. %% duplicates are returned. When `object_format' is `map' by default
  273. %% the first instance of a duplicate is returned. Setting
  274. %% `duplicate_map_keys' to `last' will change this behaviour to return
  275. %% the last such instance.
  276. %% - If the value is `first' then the first duplicate key/value is returned. <br />
  277. %% - If the value is `last' then the last duplicate key/value is returned.
  278. %% - default: `first'<br />
  279. %%
  280. -type stack_item() :: {Module :: module(),
  281. Function :: atom(),
  282. Arity :: arity() | (Args :: [term()]),
  283. Location :: [{file, Filename :: string()} | {line, Line :: pos_integer()}]}.
  284. %% An item in a stack back-trace.
  285. %%
  286. %% Note that the `erlang' module already defines the same `stack_item/0' type,
  287. %% but it is not exported from the module.
  288. %% So, maybe as a temporary measure, we redefine this type for passing full dialyzer analysis.
  289. -ifdef('OTP_RELEASE').
  290. %% The 'OTP_RELEASE' macro introduced at OTP-21,
  291. %% so we can use it for detecting whether the Erlang compiler supports new try/catch syntax or not.
  292. -define(CAPTURE_STACKTRACE, :__StackTrace).
  293. -define(GET_STACKTRACE, __StackTrace).
  294. -else.
  295. -define(CAPTURE_STACKTRACE, ).
  296. -define(GET_STACKTRACE, erlang:get_stacktrace()).
  297. -endif.
  298. %%--------------------------------------------------------------------------------
  299. %% Exported Functions
  300. %%--------------------------------------------------------------------------------
  301. %% @equiv decode(Json, [])
  302. -spec decode(binary()) -> json_value().
  303. decode(Json) ->
  304. decode(Json, []).
  305. %% @doc Decodes an erlang term from json text (a utf8 encoded binary)
  306. %%
  307. %% Raises an error exception if input is not valid json
  308. %%
  309. %% ```
  310. %% > jsone:decode(<<"1">>, []).
  311. %% 1
  312. %%
  313. %% > jsone:decode(<<"wrong json">>, []).
  314. %% ** exception error: bad argument
  315. %% in function jsone_decode:number_integer_part/4
  316. %% called as jsone_decode:number_integer_part(<<"wrong json">>,1,[],<<>>)
  317. %% in call from jsone:decode/1 (src/jsone.erl, line 71)
  318. %% '''
  319. -spec decode(binary(), [decode_option()]) -> json_value().
  320. decode(Json, Options) ->
  321. try
  322. {ok, Value, Remainings} = try_decode(Json, Options),
  323. check_decode_remainings(Remainings),
  324. Value
  325. catch
  326. error:{badmatch, {error, {Reason, [StackItem]}}} ?CAPTURE_STACKTRACE->
  327. erlang:raise(error, Reason, [StackItem | ?GET_STACKTRACE])
  328. end.
  329. %% @equiv try_decode(Json, [])
  330. -spec try_decode(binary()) -> {ok, json_value(), Remainings :: binary()} | {error, {Reason :: term(), [stack_item()]}}.
  331. try_decode(Json) ->
  332. try_decode(Json, []).
  333. %% @doc Decodes an erlang term from json text (a utf8 encoded binary)
  334. %%
  335. %% ```
  336. %% > jsone:try_decode(<<"[1,2,3] \"next value\"">>, []).
  337. %% {ok,[1,2,3],<<" \"next value\"">>}
  338. %%
  339. %% > jsone:try_decode(<<"wrong json">>, []).
  340. %% {error,{badarg,[{jsone_decode,number_integer_part,
  341. %% [<<"wrong json">>,1,[],<<>>],
  342. %% [{line,208}]}]}}
  343. %% '''
  344. -spec try_decode(binary(), [decode_option()]) ->
  345. {ok, json_value(), Remainings :: binary()} | {error, {Reason :: term(), [stack_item()]}}.
  346. try_decode(Json, Options) ->
  347. jsone_decode:decode(Json, Options).
  348. %% @equiv encode(JsonValue, [])
  349. -spec encode(json_value()) -> binary().
  350. encode(JsonValue) ->
  351. encode(JsonValue, []).
  352. %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
  353. %%
  354. %% Raises an error exception if input is not an instance of type `json_value()'
  355. %%
  356. %% ```
  357. %% > jsone:encode([1, null, 2]).
  358. %% <<"[1,null,2]">>
  359. %%
  360. %% > jsone:encode([1, self(), 2]). % A pid is not a json value
  361. %% ** exception error: bad argument
  362. %% in function jsone_encode:value/3
  363. %% called as jsone_encode:value(<0,34,0>,[{array_values,[2]}],<<"[1,">>)
  364. %% in call from jsone:encode/1 (src/jsone.erl, line 97)
  365. %% '''
  366. -spec encode(json_value(), [encode_option()]) -> binary().
  367. encode(JsonValue, Options) ->
  368. try
  369. {ok, Binary} = try_encode(JsonValue, Options),
  370. Binary
  371. catch
  372. error:{badmatch, {error, {Reason, [StackItem]}}} ?CAPTURE_STACKTRACE->
  373. erlang:raise(error, Reason, [StackItem | ?GET_STACKTRACE])
  374. end.
  375. %% @equiv try_encode(JsonValue, [])
  376. -spec try_encode(json_value()) -> {ok, binary()} | {error, {Reason :: term(), [stack_item()]}}.
  377. try_encode(JsonValue) ->
  378. try_encode(JsonValue, []).
  379. %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
  380. %%
  381. %% ```
  382. %% > jsone:try_encode([1, null, 2]).
  383. %% {ok,<<"[1,null,2]">>}
  384. %%
  385. %% > jsone:try_encode([1, hoge, 2]). % 'hoge' atom is not a json value
  386. %% {error,{badarg,[{jsone_encode,value,
  387. %% [hoge,[{array_values,[2]}],<<"[1,">>],
  388. %% [{line,86}]}]}}
  389. %% '''
  390. -spec try_encode(json_value(), [encode_option()]) -> {ok, binary()} | {error, {Reason :: term(), [stack_item()]}}.
  391. try_encode(JsonValue, Options) ->
  392. jsone_encode:encode(JsonValue, Options).
  393. %% @doc Converts the given term `X' to its string representation (i.e., the result of `io_lib:format("~p", [X])').
  394. -spec term_to_json_string(term()) -> {ok, json_string()} | error.
  395. term_to_json_string(X) ->
  396. {ok, list_to_binary(io_lib:format("~0p", [X]))}.
  397. %% @doc Convert an IP address into a text representation.
  398. %%
  399. %% This function can be specified as the value of the `map_unknown_value' encoding option.
  400. %%
  401. %% This function formats IPv6 addresses by following the recommendation defined in RFC 5952.
  402. %% Note that the trailing 32 bytes of special IPv6 addresses such as IPv4-Compatible (::X.X.X.X),
  403. %% IPv4-Mapped (::ffff:X.X.X.X), IPv4-Translated (::ffff:0:X.X.X.X) and IPv4/IPv6 translation
  404. %% (64:ff9b::X.X.X.X and 64:ff9b:1::X.X.X.X ~ 64:ff9b:1:ffff:ffff:ffff:X.X.X.X) are formatted
  405. %% using the IPv4 format.
  406. %%
  407. %% ```
  408. %% > EncodeOpt = [{map_unknown_value, fun jsone:ip_address_to_json_string/1}].
  409. %%
  410. %% > jsone:encode(#{ip => {127, 0, 0, 1}}, EncodeOpt).
  411. %% <<"{\"ip\":\"127.0.0.1\"}">>
  412. %%
  413. %% > {ok, Addr} = inet:parse_address("2001:DB8:0000:0000:0001:0000:0000:0001").
  414. %% > jsone:encode(Addr, EncodeOpt).
  415. %% <<"\"2001:db8::1:0:0:1\"">>
  416. %%
  417. %% > jsone:encode([foo, {0, 0, 0, 0, 0, 16#FFFF, 16#7F00, 16#0001}], EncodeOpt).
  418. %% <<"[\"foo\",\"::ffff:127.0.0.1\"]">>
  419. %% '''
  420. -spec ip_address_to_json_string(inet:ip_address() | any()) -> {ok, json_string()} | error.
  421. ip_address_to_json_string(X) ->
  422. jsone_inet:ip_address_to_json_string(X).
  423. %%--------------------------------------------------------------------------------
  424. %% Internal Functions
  425. %%--------------------------------------------------------------------------------
  426. -spec check_decode_remainings(binary()) -> ok.
  427. check_decode_remainings(<<>>) ->
  428. ok;
  429. check_decode_remainings(<<$ , Bin/binary>>) ->
  430. check_decode_remainings(Bin);
  431. check_decode_remainings(<<$\t, Bin/binary>>) ->
  432. check_decode_remainings(Bin);
  433. check_decode_remainings(<<$\r, Bin/binary>>) ->
  434. check_decode_remainings(Bin);
  435. check_decode_remainings(<<$\n, Bin/binary>>) ->
  436. check_decode_remainings(Bin);
  437. check_decode_remainings(<<Bin/binary>>) ->
  438. erlang:error(badarg, [Bin]).