jsone.erl 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. %%% @doc JSON decoding/encoding module
  2. %%% @end
  3. %%%
  4. %%% Copyright (c) 2013-2015, Takeru Ohta <phjgt308@gmail.com>
  5. %%%
  6. %%% The MIT License
  7. %%%
  8. %%% Permission is hereby granted, free of charge, to any person obtaining a copy
  9. %%% of this software and associated documentation files (the "Software"), to deal
  10. %%% in the Software without restriction, including without limitation the rights
  11. %%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. %%% copies of the Software, and to permit persons to whom the Software is
  13. %%% furnished to do so, subject to the following conditions:
  14. %%%
  15. %%% The above copyright notice and this permission notice shall be included in
  16. %%% all copies or substantial portions of the Software.
  17. %%%
  18. %%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. %%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. %%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. %%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. %%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. %%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. %%% THE SOFTWARE.
  25. %%%
  26. %%%---------------------------------------------------------------------------------------
  27. -module(jsone).
  28. %%--------------------------------------------------------------------------------
  29. %% Exported API
  30. %%--------------------------------------------------------------------------------
  31. -export([
  32. decode/1, decode/2,
  33. try_decode/1, try_decode/2,
  34. encode/1, encode/2,
  35. try_encode/1, try_encode/2
  36. ]).
  37. -export_type([
  38. json_value/0,
  39. json_boolean/0,
  40. json_number/0,
  41. json_string/0,
  42. json_array/0,
  43. json_object/0,
  44. json_object_members/0,
  45. json_term/0,
  46. json_object_format_tuple/0,
  47. json_object_format_proplist/0,
  48. json_object_format_map/0,
  49. json_scalar/0,
  50. encode_option/0,
  51. decode_option/0,
  52. float_format_option/0,
  53. datetime_encode_format/0, datetime_format/0,
  54. timezone/0, utc_offset_seconds/0, stack_item/0
  55. ]).
  56. %%--------------------------------------------------------------------------------
  57. %% Types & Macros
  58. %%--------------------------------------------------------------------------------
  59. -type json_value() :: json_number() | json_string() | json_array() | json_object() | json_boolean() | null | undefined | json_term().
  60. -type json_boolean() :: boolean().
  61. -type json_number() :: number().
  62. -type json_string() :: binary() | atom() | calendar:datetime(). % NOTE: `decode/1' always returns `binary()' value
  63. -type json_array() :: [json_value()].
  64. -type json_object() :: json_object_format_tuple()
  65. | json_object_format_proplist()
  66. | json_object_format_map().
  67. -type json_object_members() :: [{json_string(), json_value()}].
  68. -type json_term() :: {{json, iolist()}} | {{json_utf8, unicode:chardata()}}.
  69. %% `json_term()' allows inline already encoded JSON value. `json' variant
  70. %% expects byte encoded utf8 data values as list members. `json_utf8' expect
  71. %% Unicode code points as list members. Binaries are copied "as is" in both
  72. %% variants except `json_utf8' will check if binary contain valid `UTF-8'
  73. %% encoded data. In short, `json' uses `erlang:iolist_to_binary/1' and
  74. %% `json_utf8' uses `unicode:chardata_to_binary/1' for encoding.
  75. %%
  76. %% A simple example is worth a thousand words.
  77. %%
  78. %% ```
  79. %% 1> S = "hélo".
  80. %% "hélo"
  81. %% 2> shell:strings(false).
  82. %% true
  83. %% 3> S.
  84. %% [104,233,108,111]
  85. %% 4> B = jsone:encode({{json, S}}). % invalid UTF-8
  86. %% <<104,233,108,111>>
  87. %% 5> B2 = jsone:encode({{json_utf8, S}}). % valid UTF-8
  88. %% <<104,195,169,108,111>>
  89. %% 6> jsone:encode({{json, B}}).
  90. %% <<104,233,108,111>>
  91. %% 7> jsone:encode({{json_utf8, B}}).
  92. %% ** exception error: {invalid_json_utf8,<<104>>,<<233,108,111>>}
  93. %% in function jsone_encode:value/4
  94. %% called as jsone_encode:value({json_utf8,<<104,233,108,111>>},
  95. %% [],<<>>,
  96. %% {encode_opt_v2,false,
  97. %% [{scientific,20}],
  98. %% {iso8601,0},
  99. %% string,0,0})
  100. %% in call from jsone:encode/2 (/home/hynek/work/altworx/jsone/_build/default/lib/jsone/src/jsone.erl, line 302)
  101. %% 8> jsone:encode({{json_utf8, B2}}).
  102. %% <<104,195,169,108,111>>
  103. %% 9> shell:strings(true).
  104. %% false
  105. %% 10> jsone:encode({{json_utf8, B2}}).
  106. %% <<"hélo"/utf8>>
  107. %% 11> jsone:encode({{json, binary_to_list(B2)}}). % UTF-8 encoded list leads to valid UTF-8
  108. %% <<"hélo"/utf8>>
  109. %% '''
  110. %%
  111. -type json_object_format_tuple() :: {json_object_members()}.
  112. -type json_object_format_proplist() :: [{}] | json_object_members().
  113. -ifdef('NO_MAP_TYPE').
  114. -opaque json_object_format_map() :: json_object_format_proplist().
  115. %% `maps' is not supported in this erts version
  116. -else.
  117. -type json_object_format_map() :: map().
  118. -endif.
  119. -type json_scalar() :: json_boolean() | json_number() | json_string().
  120. -type float_format_option() :: {scientific, Decimals :: 0..249}
  121. | {decimals, Decimals :: 0..253}
  122. | compact.
  123. %% `scientific': <br />
  124. %% - The float will be formatted using scientific notation with `Decimals' digits of precision. <br />
  125. %%
  126. %% `decimals': <br />
  127. %% - The encoded string will contain at most `Decimals' number of digits past the decimal point. <br />
  128. %% - If `compact' is provided the trailing zeros at the end of the string are truncated. <br />
  129. %%
  130. %% For more details, see <a href="http://erlang.org/doc/man/erlang.html#float_to_list-2">erlang:float_to_list/2</a>.
  131. %%
  132. %% ```
  133. %% > jsone:encode(1.23).
  134. %% <<"1.22999999999999998224e+00">>
  135. %%
  136. %% > jsone:encode(1.23, [{float_format, [{scientific, 4}]}]).
  137. %% <"1.2300e+00">>
  138. %%
  139. %% > jsone:encode(1.23, [{float_format, [{scientific, 1}]}]).
  140. %% <<"1.2e+00">>
  141. %%
  142. %% > jsone:encode(1.23, [{float_format, [{decimals, 4}]}]).
  143. %% <<"1.2300">>
  144. %%
  145. %% > jsone:encode(1.23, [{float_format, [{decimals, 4}, compact]}]).
  146. %% <<"1.23">>
  147. %% '''
  148. -type datetime_encode_format() :: Format::datetime_format()
  149. | {Format::datetime_format(), TimeZone::timezone()}.
  150. %% Datetime encoding format.
  151. %%
  152. %% The default value of `TimeZone' is `utc'.
  153. %%
  154. %% ```
  155. %% %
  156. %% % Universal Time
  157. %% %
  158. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, iso8601}]).
  159. %% <<"\"2000-03-10T10:03:58Z\"">>
  160. %%
  161. %% %
  162. %% % Local Time (JST)
  163. %% %
  164. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, {iso8601, local}}]).
  165. %% <<"\"2000-03-10T10:03:58+09:00\"">>
  166. %%
  167. %% %
  168. %% % Explicit TimeZone Offset
  169. %% %
  170. %% > jsone:encode({{2000, 3, 10}, {10, 3, 58}}, [{datetime_format, {iso8601, -2*60*60}}]).
  171. %% <<"\"2000-03-10T10:03:58-02:00\"">>
  172. %% '''
  173. -type datetime_format() :: iso8601.
  174. -type timezone() :: utc | local | utc_offset_seconds().
  175. -type utc_offset_seconds() :: -86399..86399.
  176. -type common_option() :: undefined_as_null.
  177. %%
  178. %% `undefined_as_null': <br />
  179. %% - Treats `undefined' in Erlang as the conversion target for `null' in JSON. This means that `undefined' will be encoded to `null' and `null' will be decoded to `undefined'<br />
  180. -type encode_option() :: native_utf8
  181. | native_forward_slash
  182. | canonical_form
  183. | {float_format, [float_format_option()]}
  184. | {datetime_format, datetime_encode_format()}
  185. | {object_key_type, string | scalar | value}
  186. | {space, non_neg_integer()}
  187. | {indent, non_neg_integer()}
  188. | {map_unknown_value, fun ((term()) -> {ok, json_value()} | error)}
  189. | skip_undefined
  190. | common_option().
  191. %% `native_utf8': <br />
  192. %% - Encodes non ASCII UTF-8 characters as a human-readable(non-escaped) string <br />
  193. %%
  194. %% `native_forward_slash': <br />
  195. %% - Prevents forward slashes in a JSON string from being escaped <br />
  196. %%
  197. %% `canonical_form': <br />
  198. %% - produce a canonical form of a JSON document <br />
  199. %%
  200. %% `{float_format, Options}':
  201. %% - Encodes a `float()` value in the format which specified by `Options' <br />
  202. %% - default: `[{scientific, 20}]' <br />
  203. %%
  204. %% `{datetime_format, Format}`:
  205. %% - Encodes a `calendar:datetime()` value in the format which specified by `Format' <br />
  206. %% - default: `{iso8601, utc}' <br />
  207. %%
  208. %% `object_key_type':
  209. %% - Allowable object key type <br />
  210. %% - `string': Only string values are allowed (i.e. `json_string()' type) <br />
  211. %% - `scalar': In addition to `string', following values are allowed: nulls, booleans, numerics (i.e. `json_scalar()' type) <br />
  212. %% - `value': Any json compatible values are allowed (i.e. `json_value()' type) <br />
  213. %% - default: `string' <br />
  214. %% - NOTE: If `scalar' or `value' option is specified, non `json_string()' key will be automatically converted to a `binary()' value (e.g. `1' => `<<"1">>', `#{}' => `<<"{}">>') <br />
  215. %%
  216. %% `{space, N}': <br />
  217. %% - Inserts `N' spaces after every commna and colon <br />
  218. %% - default: `0' <br />
  219. %%
  220. %% `{indent, N}': <br />
  221. %% - Inserts a newline and `N' spaces for each level of indentation <br />
  222. %% - default: `0' <br />
  223. %%
  224. %% `skip_undefined': <br />
  225. %% - If speficied, each entry having `undefined' value in a object isn't included in the result JSON <br />
  226. %%
  227. %% `{map_unknown_value, Fun}`: <br />
  228. %% - If specified, unknown values encountered during an encoding process are converted to `json_value()` by applying `Fun'.
  229. -type decode_option() :: {object_format, tuple | proplist | map}
  230. | {allow_ctrl_chars, boolean()}
  231. | reject_invalid_utf8
  232. | {'keys', 'binary' | 'atom' | 'existing_atom' | 'attempt_atom'}
  233. | {duplicate_map_keys, first | last}
  234. | common_option().
  235. %% `object_format': <br />
  236. %% - Decoded JSON object format <br />
  237. %% - `tuple': An object is decoded as `{[]}' if it is empty, otherwise `{[{Key, Value}]}'. <br />
  238. %% - `proplist': An object is decoded as `[{}]' if it is empty, otherwise `[{Key, Value}]'. <br />
  239. %% - `map': An object is decoded as `#{}' if it is empty, otherwise `#{Key => Value}'. <br />
  240. %% - default: `map' if OTP version is OTP-17 or more, `tuple' otherwise <br />
  241. %%
  242. %% `allow_ctrl_chars': <br />
  243. %% - If the value is `true', strings which contain unescaped control characters will be regarded as a legal JSON string <br />
  244. %% - default: `false'<br />
  245. %%
  246. %% `reject_invalid_utf8': <br />
  247. %% - Rejects JSON strings which contain invalid UTF-8 byte sequences <br />
  248. %%
  249. %% `keys': <br />
  250. %% Defines way how object keys are decoded. The default value is `binary'.
  251. %% The option is compatible with `labels' option in `jsx'. <br />
  252. %% - `binary': The key is left as a string which is encoded as binary. It's default
  253. %% and backward compatible behaviour. <br />
  254. %% - `atom': The key is converted to an atom. Results in `badarg' if Key value
  255. %% regarded as UTF-8 is not a valid atom. <br />
  256. %% - `existing_atom': Returns existing atom. Any key value which is not
  257. %% existing atom raises `badarg' exception. <br />
  258. %% - `attempt_atom': Returns existing atom as `existing_atom' but returns a
  259. %% binary string if fails find one.
  260. %%
  261. %% `duplicate_map_keys': <br />
  262. %% https://www.ietf.org/rfc/rfc4627.txt says that keys SHOULD be
  263. %% unique, but they don't have to be. Most JSON parsers will either
  264. %% give you the value of the first, or last duplicate property
  265. %% encountered. When `object_format' is `tuple' or `proplist' all
  266. %% duplicates are returned. When `object_format' is `map' by default
  267. %% the first instance of a duplicate is returned. Setting
  268. %% `duplicate_map_keys' to `last' will change this behaviour to return
  269. %% the last such instance.
  270. %% - If the value is `first' then the first duplicate key/value is returned. <br />
  271. %% - If the value is `last' then the last duplicate key/value is returned.
  272. %% - default: `first'<br />
  273. %%
  274. -type stack_item() :: {Module :: module(),
  275. Function :: atom(),
  276. Arity :: arity() | (Args :: [term()]),
  277. Location :: [{file, Filename :: string()} |
  278. {line, Line :: pos_integer()}]}.
  279. %% An item in a stack back-trace.
  280. %%
  281. %% Note that the `erlang' module already defines the same `stack_item/0' type,
  282. %% but it is not exported from the module.
  283. %% So, maybe as a temporary measure, we redefine this type for passing full dialyzer analysis.
  284. -ifdef('OTP_RELEASE').
  285. %% The 'OTP_RELEASE' macro introduced at OTP-21,
  286. %% so we can use it for detecting whether the Erlang compiler supports new try/catch syntax or not.
  287. -define(CAPTURE_STACKTRACE, :__StackTrace).
  288. -define(GET_STACKTRACE, __StackTrace).
  289. -else.
  290. -define(CAPTURE_STACKTRACE, ).
  291. -define(GET_STACKTRACE, erlang:get_stacktrace()).
  292. -endif.
  293. %%--------------------------------------------------------------------------------
  294. %% Exported Functions
  295. %%--------------------------------------------------------------------------------
  296. %% @equiv decode(Json, [])
  297. -spec decode(binary()) -> json_value().
  298. decode(Json) ->
  299. decode(Json, []).
  300. %% @doc Decodes an erlang term from json text (a utf8 encoded binary)
  301. %%
  302. %% Raises an error exception if input is not valid json
  303. %%
  304. %% ```
  305. %% > jsone:decode(<<"1">>, []).
  306. %% 1
  307. %%
  308. %% > jsone:decode(<<"wrong json">>, []).
  309. %% ** exception error: bad argument
  310. %% in function jsone_decode:number_integer_part/4
  311. %% called as jsone_decode:number_integer_part(<<"wrong json">>,1,[],<<>>)
  312. %% in call from jsone:decode/1 (src/jsone.erl, line 71)
  313. %% '''
  314. -spec decode(binary(), [decode_option()]) -> json_value().
  315. decode(Json, Options) ->
  316. try
  317. {ok, Value, _} = try_decode(Json, Options),
  318. Value
  319. catch
  320. error:{badmatch, {error, {Reason, [StackItem]}}} ?CAPTURE_STACKTRACE ->
  321. erlang:raise(error, Reason, [StackItem | ?GET_STACKTRACE])
  322. end.
  323. %% @equiv try_decode(Json, [])
  324. -spec try_decode(binary()) -> {ok, json_value(), Remainings::binary()} | {error, {Reason::term(), [stack_item()]}}.
  325. try_decode(Json) ->
  326. try_decode(Json, []).
  327. %% @doc Decodes an erlang term from json text (a utf8 encoded binary)
  328. %%
  329. %% ```
  330. %% > jsone:try_decode(<<"[1,2,3] \"next value\"">>, []).
  331. %% {ok,[1,2,3],<<" \"next value\"">>}
  332. %%
  333. %% > jsone:try_decode(<<"wrong json">>, []).
  334. %% {error,{badarg,[{jsone_decode,number_integer_part,
  335. %% [<<"wrong json">>,1,[],<<>>],
  336. %% [{line,208}]}]}}
  337. %% '''
  338. -spec try_decode(binary(), [decode_option()]) -> {ok, json_value(), Remainings::binary()} | {error, {Reason::term(), [stack_item()]}}.
  339. try_decode(Json, Options) ->
  340. jsone_decode:decode(Json, Options).
  341. %% @equiv encode(JsonValue, [])
  342. -spec encode(json_value()) -> binary().
  343. encode(JsonValue) ->
  344. encode(JsonValue, []).
  345. %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
  346. %%
  347. %% Raises an error exception if input is not an instance of type `json_value()'
  348. %%
  349. %% ```
  350. %% > jsone:encode([1, null, 2]).
  351. %% <<"[1,null,2]">>
  352. %%
  353. %% > jsone:encode([1, self(), 2]). % A pid is not a json value
  354. %% ** exception error: bad argument
  355. %% in function jsone_encode:value/3
  356. %% called as jsone_encode:value(<0,34,0>,[{array_values,[2]}],<<"[1,">>)
  357. %% in call from jsone:encode/1 (src/jsone.erl, line 97)
  358. %% '''
  359. -spec encode(json_value(), [encode_option()]) -> binary().
  360. encode(JsonValue, Options) ->
  361. try
  362. {ok, Binary} = try_encode(JsonValue, Options),
  363. Binary
  364. catch
  365. error:{badmatch, {error, {Reason, [StackItem]}}} ?CAPTURE_STACKTRACE ->
  366. erlang:raise(error, Reason, [StackItem | ?GET_STACKTRACE])
  367. end.
  368. %% @equiv try_encode(JsonValue, [])
  369. -spec try_encode(json_value()) -> {ok, binary()} | {error, {Reason::term(), [stack_item()]}}.
  370. try_encode(JsonValue) ->
  371. try_encode(JsonValue, []).
  372. %% @doc Encodes an erlang term into json text (a utf8 encoded binary)
  373. %%
  374. %% ```
  375. %% > jsone:try_encode([1, null, 2]).
  376. %% {ok,<<"[1,null,2]">>}
  377. %%
  378. %% > jsone:try_encode([1, hoge, 2]). % 'hoge' atom is not a json value
  379. %% {error,{badarg,[{jsone_encode,value,
  380. %% [hoge,[{array_values,[2]}],<<"[1,">>],
  381. %% [{line,86}]}]}}
  382. %% '''
  383. -spec try_encode(json_value(), [encode_option()]) -> {ok, binary()} | {error, {Reason::term(), [stack_item()]}}.
  384. try_encode(JsonValue, Options) ->
  385. jsone_encode:encode(JsonValue, Options).