jsone.erl 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. %%% @doc JSON decoding/encoding module
  2. %%% @end
  3. %%%
  4. %%% Copyright (c) 2013-2015, Takeru Ohta <phjgt308@gmail.com>
  5. %%%
  6. %%% The MIT License
  7. %%%
  8. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  9. %%% of this software and associated documentation files (the "Software"), to deal
  10. %%% in the Software without restriction, including without limitation the rights
  11. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. %%% copies of the Software, and to permit persons to whom the Software is
  13. %%% furnished to do so, subject to the following conditions:
  14. %%%
  15. %%% The above copyright notice and this permission notice shall be included in
  16. %%% all copies or substantial portions of the Software.
  17. %%%
  18. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. %%% THE SOFTWARE.
  25. %%%
  26. %%%---------------------------------------------------------------------------------------
  27. -module(jsone).
  28. %%--------------------------------------------------------------------------------
  29. %% Exported API
  30. %%--------------------------------------------------------------------------------
  31. -export([decode/1, decode/2,
  32. try_decode/1, try_decode/2,
  33. encode/1, encode/2,
  34. try_encode/1, try_encode/2,
  35. term_to_json_string/1,
  36. ip_address_to_json_string/1]).
  37. -export_type([json_value/0,
  38. json_boolean/0,
  39. json_number/0,
  40. json_string/0,
  41. json_array/0,
  42. json_object/0,
  43. json_object_members/0,
  44. json_term/0,
  45. json_object_format_tuple/0,
  46. json_object_format_proplist/0,
  47. json_object_format_map/0,
  48. json_scalar/0,
  49. encode_option/0,
  50. decode_option/0,
  51. float_format_option/0,
  52. datetime_encode_format/0,
  53. datetime_format/0,
  54. timezone/0,
  55. utc_offset_seconds/0,
  56. stack_item/0]).
  57. %%--------------------------------------------------------------------------------
  58. %% Types & Macros
  59. %%--------------------------------------------------------------------------------
  60. -type json_value() :: json_number() |
  61. json_string() |
  62. json_array() |
  63. json_object() |
  64. json_boolean() |
  65. null |
  66. undefined |
  67. json_term().
  68. -type json_boolean() :: boolean().
  69. -type json_number() :: number().
  70. -type json_string() :: binary() | atom() | calendar:datetime(). % NOTE: `decode/1' always returns `binary()' value
  71. -type json_array() :: [json_value()].
  72. -type json_object() :: json_object_format_tuple() | json_object_format_proplist() | json_object_format_map().
  73. -type json_object_members() :: [{json_string(), json_value()}].
  74. -type json_term() :: {{json, iolist()}} | {{json_utf8, unicode:chardata()}}.
  75. %% `json_term()' allows inline already encoded JSON value. `json' variant
  76. %% expects byte encoded utf8 data values as list members. `json_utf8' expect
  77. %% Unicode code points as list members. Binaries are copied "as is" in both
  78. %% variants except `json_utf8' will check if binary contain valid `UTF-8'
  79. %% encoded data. In short, `json' uses `erlang:iolist_to_binary/1' and
  80. %% `json_utf8' uses `unicode:chardata_to_binary/1' for encoding.
  81. %%
  82. %% A simple example is worth a thousand words.
  83. %%
  84. %% ```
  85. %% 1> S = "hélo".
  86. %% "hélo"
  87. %% 2> shell:strings(false).
  88. %% true
  89. %% 3> S.
  90. %% [104,233,108,111]
  91. %% 4> B = jsone:encode({{json, S}}). % invalid UTF-8
  92. %% <<104,233,108,111>>
  93. %% 5> B2 = jsone:encode({{json_utf8, S}}). % valid UTF-8
  94. %% <<104,195,169,108,111>>
  95. %% 6> jsone:encode({{json, B}}).
  96. %% <<104,233,108,111>>
  97. %% 7> jsone:encode({{json_utf8, B}}).
  98. %% ** exception error: {invalid_json_utf8,<<104>>,<<233,108,111>>}
  99. %% in function jsone_encode:value/4
  100. %% called as jsone_encode:value({json_utf8,<<104,233,108,111>>},
  101. %% [],<<>>,
  102. %% {encode_opt_v2,false,
  103. %% [{scientific,20}],
  104. %% {iso8601,0},
  105. %% string,0,0})
  106. %% in call from jsone:encode/2 (/home/hynek/work/altworx/jsone/_build/default/lib/jsone/src/jsone.erl, line 302)
  107. %% 8> jsone:encode({{json_utf8, B2}}).
  108. %% <<104,195,169,108,111>>
  109. %% 9> shell:strings(true).
  110. %% false
  111. %% 10> jsone:encode({{json_utf8, B2}}).
  112. %% <<"hélo"/utf8>>
  113. %% 11> jsone:encode({{json, binary_to_list(B2)}}). % UTF-8 encoded list leads to valid UTF-8
  114. %% <<"hélo"/utf8>>
  115. %% '''
  116. %%
  117. -type json_object_format_tuple() :: {json_object_members()}.
  118. -type json_object_format_proplist() :: [{}] | json_object_members().
  119. -ifdef('NO_MAP_TYPE').
  120. -opaque json_object_format_map() :: json_object_format_proplist().
  121. %% `maps' is not supported in this erts version
  122. -else.
  123. -type json_object_format_map() :: map().
  124. -endif.
  125. -type json_scalar() :: json_boolean() | json_number() | json_string().
  126. -type float_format_option() :: {scientific, Decimals :: 0 .. 249} | {decimals, Decimals :: 0 .. 253} | compact.
  127. %% `scientific': <br />
  128. %% - The float will be formatted using scientific notation with `Decimals' digits of precision. <br />
  129. %%
  130. %% `decimals': <br />
  131. %% - The encoded string will contain at most `Decimals' number of digits past the decimal point. <br />
  132. %% - If `compact' is provided the trailing zeros at the end of the string are truncated. <br />
  133. %%
  134. %% For more details, see <a href="http://erlang.org/doc/man/erlang.html#float_to_list-2">erlang:float_to_list/2</a>.
  135. %%
  136. %% ```
  137. %% > jsone:encode(1.23).
  138. %% <<"1.22999999999999998224e+00">>
  139. %%
  140. %% > jsone:encode(1.23, [{float_format, [{scientific, 4}]}]).
  141. %% <"1.2300e+00">>
  142. %%
  143. %% > jsone:encode(1.23, [{float_format, [{scientific, 1}]}]).
  144. %% <<"1.2e+00">>
  145. %%
  146. %% > jsone:encode(1.23, [{float_format, [{decimals, 4}]}]).
  147. %% <<"1.2300">>
  148. %%
  149. %% > jsone:encode(1.23, [{float_format, [{decimals, 4}, compact]}]).
  150. %% <<"1.23">>
  151. %% '''
  152. -type datetime_encode_format() :: Format :: datetime_format() | {Format :: datetime_format(), TimeZone :: timezone()}.
  153. %% Datetime encoding format.
  154. %%
  155. %% The default value of `TimeZone' is `utc'.
  156. %%
  157. %% ```
  158. %% %
  159. %% % Universal Time
  160. %% %
  161. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, iso8601}]).
  162. %% <<"\"2000-03-10T10:03:58Z\"">>
  163. %%
  164. %% %
  165. %% % Local Time (JST)
  166. %% %
  167. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, {iso8601, local}}]).
  168. %% <<"\"2000-03-10T10:03:58+09:00\"">>
  169. %%
  170. %% %
  171. %% % Explicit TimeZone Offset
  172. %% %
  173. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, {iso8601, -2*60*60}}]).
  174. %% <<"\"2000-03-10T10:03:58-02:00\"">>
  175. %% '''
  176. -type datetime_format() :: iso8601.
  177. -type timezone() :: utc | local | utc_offset_seconds().
  178. -type utc_offset_seconds() :: -86399 .. 86399.
  179. -type common_option() :: undefined_as_null.
  180. %%
  181. %% `undefined_as_null': <br />
  182. %% - Treats `undefined' in Erlang as the conversion target for `null' in JSON. This means that `undefined' will be encoded to `null' and `null' will be decoded to `undefined'<br />
  183. -type encode_option() :: native_utf8 |
  184. native_forward_slash |
  185. canonical_form |
  186. {float_format, [float_format_option()]} |
  187. {datetime_format, datetime_encode_format()} |
  188. {object_key_type, string | scalar | value} |
  189. {space, non_neg_integer()} |
  190. {indent, non_neg_integer()} |
  191. {map_unknown_value, undefined | fun((term()) -> {ok, json_value()} | error)} |
  192. skip_undefined |
  193. common_option().
  194. %% `native_utf8': <br />
  195. %% - Encodes non ASCII UTF-8 characters as a human-readable(non-escaped) string <br />
  196. %%
  197. %% `native_forward_slash': <br />
  198. %% - Prevents forward slashes in a JSON string from being escaped <br />
  199. %%
  200. %% `canonical_form': <br />
  201. %% - produce a canonical form of a JSON document <br />
  202. %%
  203. %% `{float_format, Options}':
  204. %% - Encodes a `float()` value in the format which specified by `Options' <br />
  205. %% - default: `[{scientific, 20}]' <br />
  206. %%
  207. %% `{datetime_format, Format}`:
  208. %% - Encodes a `calendar:datetime()` value in the format which specified by `Format' <br />
  209. %% - default: `{iso8601, utc}' <br />
  210. %%
  211. %% `object_key_type':
  212. %% - Allowable object key type <br />
  213. %% - `string': Only string values are allowed (i.e. `json_string()' type) <br />
  214. %% - `scalar': In addition to `string', following values are allowed: nulls, booleans, numerics (i.e. `json_scalar()' type) <br />
  215. %% - `value': Any json compatible values are allowed (i.e. `json_value()' type) <br />
  216. %% - default: `string' <br />
  217. %% - NOTE: If `scalar' or `value' option is specified, non `json_string()' key will be automatically converted to a `binary()' value (e.g. `1' => `<<"1">>', `#{}' => `<<"{}">>') <br />
  218. %%
  219. %% `{space, N}': <br />
  220. %% - Inserts `N' spaces after every comma and colon <br />
  221. %% - default: `0' <br />
  222. %%
  223. %% `{indent, N}': <br />
  224. %% - Inserts a newline and `N' spaces for each level of indentation <br />
  225. %% - default: `0' <br />
  226. %%
  227. %% `skip_undefined': <br />
  228. %% - If specified, each entry having `undefined' value in a object isn't included in the result JSON <br />
  229. %%
  230. %% `{map_unknown_value, Fun}`: <br />
  231. %% - If `Fun' is a function, unknown values encountered during an encoding process are converted to `json_value()` by applying `Fun'. <br />
  232. %% - If `Fun' is `undefined', the encoding results in an error if there are unknown values. <br />
  233. %% - default: `term_to_json_string/1' <br />
  234. -type decode_option() :: {object_format, tuple | proplist | map} |
  235. {allow_ctrl_chars, boolean()} |
  236. reject_invalid_utf8 |
  237. {'keys', 'binary' | 'atom' | 'existing_atom' | 'attempt_atom'} |
  238. {duplicate_map_keys, first | last} |
  239. common_option().
  240. %% `object_format': <br />
  241. %% - Decoded JSON object format <br />
  242. %% - `tuple': An object is decoded as `{[]}' if it is empty, otherwise `{[{Key, Value}]}'. <br />
  243. %% - `proplist': An object is decoded as `[{}]' if it is empty, otherwise `[{Key, Value}]'. <br />
  244. %% - `map': An object is decoded as `#{}' if it is empty, otherwise `#{Key => Value}'. <br />
  245. %% - default: `map' if OTP version is OTP-17 or more, `tuple' otherwise <br />
  246. %%
  247. %% `allow_ctrl_chars': <br />
  248. %% - If the value is `true', strings which contain unescaped control characters will be regarded as a legal JSON string <br />
  249. %% - default: `false'<br />
  250. %%
  251. %% `reject_invalid_utf8': <br />
  252. %% - Rejects JSON strings which contain invalid UTF-8 byte sequences <br />
  253. %%
  254. %% `keys': <br />
  255. %% Defines way how object keys are decoded. The default value is `binary'.
  256. %% The option is compatible with `labels' option in `jsx'. <br />
  257. %% - `binary': The key is left as a string which is encoded as binary. It's default
  258. %% and backward compatible behaviour. <br />
  259. %% - `atom': The key is converted to an atom. Results in `badarg' if Key value
  260. %% regarded as UTF-8 is not a valid atom. <br />
  261. %% - `existing_atom': Returns existing atom. Any key value which is not
  262. %% existing atom raises `badarg' exception. <br />
  263. %% - `attempt_atom': Returns existing atom as `existing_atom' but returns a
  264. %% binary string if fails find one.
  265. %%
  266. %% `duplicate_map_keys': <br />
  267. %% https://www.ietf.org/rfc/rfc4627.txt says that keys SHOULD be
  268. %% unique, but they don't have to be. Most JSON parsers will either
  269. %% give you the value of the first, or last duplicate property
  270. %% encountered. When `object_format' is `tuple' or `proplist' all
  271. %% duplicates are returned. When `object_format' is `map' by default
  272. %% the first instance of a duplicate is returned. Setting
  273. %% `duplicate_map_keys' to `last' will change this behaviour to return
  274. %% the last such instance.
  275. %% - If the value is `first' then the first duplicate key/value is returned. <br />
  276. %% - If the value is `last' then the last duplicate key/value is returned.
  277. %% - default: `first'<br />
  278. %%
  279. -type stack_item() :: {Module :: module(),
  280. Function :: atom(),
  281. Arity :: arity() | (Args :: [term()]),
  282. Location :: [{file, Filename :: string()} | {line, Line :: pos_integer()}]}.
  283. %% An item in a stack back-trace.
  284. %%
  285. %% Note that the `erlang' module already defines the same `stack_item/0' type,
  286. %% but it is not exported from the module.
  287. %% So, maybe as a temporary measure, we redefine this type for passing full dialyzer analysis.
  288. -ifdef('OTP_RELEASE').
  289. %% The 'OTP_RELEASE' macro introduced at OTP-21,
  290. %% so we can use it for detecting whether the Erlang compiler supports new try/catch syntax or not.
  291. -define(CAPTURE_STACKTRACE, :__StackTrace).
  292. -define(GET_STACKTRACE, __StackTrace).
  293. -else.
  294. -define(CAPTURE_STACKTRACE, ).
  295. -define(GET_STACKTRACE, erlang:get_stacktrace()).
  296. -endif.
  297. %%--------------------------------------------------------------------------------
  298. %% Exported Functions
  299. %%--------------------------------------------------------------------------------
  300. %% @equiv decode(Json, [])
  301. -spec decode(binary()) -> json_value().
  302. decode(Json) ->
  303. decode(Json, []).
  304. %% @doc Decodes an erlang term from json text (a utf8 encoded binary)
  305. %%
  306. %% Raises an error exception if input is not valid json
  307. %%
  308. %% ```
  309. %% > jsone:decode(<<"1">>, []).
  310. %% 1
  311. %%
  312. %% > jsone:decode(<<"wrong json">>, []).
  313. %% ** exception error: bad argument
  314. %% in function jsone_decode:number_integer_part/4
  315. %% called as jsone_decode:number_integer_part(<<"wrong json">>,1,[],<<>>)
  316. %% in call from jsone:decode/1 (src/jsone.erl, line 71)
  317. %% '''
  318. -spec decode(binary(), [decode_option()]) -> json_value().
  319. decode(Json, Options) ->
  320. try
  321. {ok, Value, Remainings} = try_decode(Json, Options),
  322. check_decode_remainings(Remainings),
  323. Value
  324. catch
  325. error:{badmatch, {error, {Reason, [StackItem]}}} ?CAPTURE_STACKTRACE->
  326. erlang:raise(error, Reason, [StackItem | ?GET_STACKTRACE])
  327. end.
  328. %% @equiv try_decode(Json, [])
  329. -spec try_decode(binary()) -> {ok, json_value(), Remainings :: binary()} | {error, {Reason :: term(), [stack_item()]}}.
  330. try_decode(Json) ->
  331. try_decode(Json, []).
  332. %% @doc Decodes an erlang term from json text (a utf8 encoded binary)
  333. %%
  334. %% ```
  335. %% > jsone:try_decode(<<"[1,2,3] \"next value\"">>, []).
  336. %% {ok,[1,2,3],<<" \"next value\"">>}
  337. %%
  338. %% > jsone:try_decode(<<"wrong json">>, []).
  339. %% {error,{badarg,[{jsone_decode,number_integer_part,
  340. %% [<<"wrong json">>,1,[],<<>>],
  341. %% [{line,208}]}]}}
  342. %% '''
  343. -spec try_decode(binary(), [decode_option()]) ->
  344. {ok, json_value(), Remainings :: binary()} | {error, {Reason :: term(), [stack_item()]}}.
  345. try_decode(Json, Options) ->
  346. jsone_decode:decode(Json, Options).
  347. %% @equiv encode(JsonValue, [])
  348. -spec encode(json_value()) -> binary().
  349. encode(JsonValue) ->
  350. encode(JsonValue, []).
  351. %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
  352. %%
  353. %% Raises an error exception if input is not an instance of type `json_value()'
  354. %%
  355. %% ```
  356. %% > jsone:encode([1, null, 2]).
  357. %% <<"[1,null,2]">>
  358. %%
  359. %% > jsone:encode([1, self(), 2]). % A pid is not a json value
  360. %% ** exception error: bad argument
  361. %% in function jsone_encode:value/3
  362. %% called as jsone_encode:value(<0,34,0>,[{array_values,[2]}],<<"[1,">>)
  363. %% in call from jsone:encode/1 (src/jsone.erl, line 97)
  364. %% '''
  365. -spec encode(json_value(), [encode_option()]) -> binary().
  366. encode(JsonValue, Options) ->
  367. try
  368. {ok, Binary} = try_encode(JsonValue, Options),
  369. Binary
  370. catch
  371. error:{badmatch, {error, {Reason, [StackItem]}}} ?CAPTURE_STACKTRACE->
  372. erlang:raise(error, Reason, [StackItem | ?GET_STACKTRACE])
  373. end.
  374. %% @equiv try_encode(JsonValue, [])
  375. -spec try_encode(json_value()) -> {ok, binary()} | {error, {Reason :: term(), [stack_item()]}}.
  376. try_encode(JsonValue) ->
  377. try_encode(JsonValue, []).
  378. %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
  379. %%
  380. %% ```
  381. %% > jsone:try_encode([1, null, 2]).
  382. %% {ok,<<"[1,null,2]">>}
  383. %%
  384. %% > jsone:try_encode([1, hoge, 2]). % 'hoge' atom is not a json value
  385. %% {error,{badarg,[{jsone_encode,value,
  386. %% [hoge,[{array_values,[2]}],<<"[1,">>],
  387. %% [{line,86}]}]}}
  388. %% '''
  389. -spec try_encode(json_value(), [encode_option()]) -> {ok, binary()} | {error, {Reason :: term(), [stack_item()]}}.
  390. try_encode(JsonValue, Options) ->
  391. jsone_encode:encode(JsonValue, Options).
  392. %% @doc Converts the given term `X' to its string representation (i.e., the result of `io_lib:format("~p", [X])').
  393. -spec term_to_json_string(term()) -> {ok, json_string()} | error.
  394. term_to_json_string(X) ->
  395. {ok, list_to_binary(io_lib:format("~p", [X]))}.
  396. %% @doc Convert an IP address into a text representation.
  397. %%
  398. %% This function can be specified as the value of the `map_unknown_value' encoding option.
  399. %%
  400. %% This function formats IPv6 addresses by following the recommendation defined in RFC 5952.
  401. %% Note that the trailing 32 bytes of special IPv6 addresses such as IPv4-Compatible (::X.X.X.X),
  402. %% IPv4-Mapped (::ffff:X.X.X.X), IPv4-Translated (::ffff:0:X.X.X.X) and IPv4/IPv6 translation
  403. %% (64:ff9b::X.X.X.X and 64:ff9b:1::X.X.X.X ~ 64:ff9b:1:ffff:ffff:ffff:X.X.X.X) are formatted
  404. %% using the IPv4 format.
  405. %%
  406. %% ```
  407. %% > EncodeOpt = [{map_unknown_value, fun jsone:ip_address_to_json_string/1}].
  408. %%
  409. %% > jsone:encode(#{ip => {127, 0, 0, 1}}, EncodeOpt).
  410. %% <<"{\"ip\":\"127.0.0.1\"}">>
  411. %%
  412. %% > {ok, Addr} = inet:parse_address("2001:DB8:0000:0000:0001:0000:0000:0001").
  413. %% > jsone:encode(Addr, EncodeOpt).
  414. %% <<"\"2001:db8::1:0:0:1\"">>
  415. %%
  416. %% > jsone:encode([foo, {0, 0, 0, 0, 0, 16#FFFF, 16#7F00, 16#0001}], EncodeOpt).
  417. %% <<"[\"foo\",\"::ffff:127.0.0.1\"]">>
  418. %% '''
  419. -spec ip_address_to_json_string(inet:ip_address() | any()) -> {ok, json_string()} | error.
  420. ip_address_to_json_string(X) ->
  421. jsone_inet:ip_address_to_json_string(X).
  422. %%--------------------------------------------------------------------------------
  423. %% Internal Functions
  424. %%--------------------------------------------------------------------------------
  425. -spec check_decode_remainings(binary()) -> ok.
  426. check_decode_remainings(<<>>) ->
  427. ok;
  428. check_decode_remainings(<<$ , Bin/binary>>) ->
  429. check_decode_remainings(Bin);
  430. check_decode_remainings(<<$\t, Bin/binary>>) ->
  431. check_decode_remainings(Bin);
  432. check_decode_remainings(<<$\r, Bin/binary>>) ->
  433. check_decode_remainings(Bin);
  434. check_decode_remainings(<<$\n, Bin/binary>>) ->
  435. check_decode_remainings(Bin);
  436. check_decode_remainings(<<Bin/binary>>) ->
  437. erlang:error(badarg, [Bin]).