cow_http_struct_hd.erl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. %% Copyright (c) 2019, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. %% The mapping between Erlang and structured headers types is as follow:
  15. %%
  16. %% List: list()
  17. %% Dictionary: map()
  18. %% Bare item: one bare_item() that can be of type:
  19. %% Integer: integer()
  20. %% Float: float()
  21. %% String: {string, binary()}
  22. %% Token: {token, binary()}
  23. %% Byte sequence: {binary, binary()}
  24. %% Boolean: boolean()
  25. %% And finally:
  26. %% Type with Parameters: {with_params, Type, Parameters}
  27. %% Parameters: [{binary(), bare_item()}]
  28. -module(cow_http_struct_hd).
  29. -export([parse_dictionary/1]).
  30. -export([parse_item/1]).
  31. -export([parse_list/1]).
  32. -export([dictionary/1]).
  33. -export([item/1]).
  34. -export([list/1]).
  35. -include("cow_parse.hrl").
  36. -type sh_list() :: [sh_item() | sh_inner_list()].
  37. -type sh_inner_list() :: sh_with_params([sh_item()]).
  38. -type sh_params() :: #{binary() => sh_bare_item() | undefined}.
  39. -type sh_dictionary() :: {#{binary() => sh_item() | sh_inner_list()}, [binary()]}.
  40. -type sh_item() :: sh_with_params(sh_bare_item()).
  41. -type sh_bare_item() :: integer() | float() | boolean()
  42. | {string | token | binary, binary()}.
  43. -type sh_with_params(Type) :: {with_params, Type, sh_params()}.
  44. -define(IS_LC_ALPHA(C),
  45. (C =:= $a) or (C =:= $b) or (C =:= $c) or (C =:= $d) or (C =:= $e) or
  46. (C =:= $f) or (C =:= $g) or (C =:= $h) or (C =:= $i) or (C =:= $j) or
  47. (C =:= $k) or (C =:= $l) or (C =:= $m) or (C =:= $n) or (C =:= $o) or
  48. (C =:= $p) or (C =:= $q) or (C =:= $r) or (C =:= $s) or (C =:= $t) or
  49. (C =:= $u) or (C =:= $v) or (C =:= $w) or (C =:= $x) or (C =:= $y) or
  50. (C =:= $z)
  51. ).
  52. %% Parsing.
  53. -spec parse_dictionary(binary()) -> sh_dictionary().
  54. parse_dictionary(<<>>) ->
  55. {#{}, []};
  56. parse_dictionary(<<C,R/bits>>) when ?IS_LC_ALPHA(C) ->
  57. {Dict, Order, <<>>} = parse_dict_key(R, #{}, [], <<C>>),
  58. {Dict, Order}.
  59. parse_dict_key(<<$=,$(,R0/bits>>, Acc, Order, K) ->
  60. false = maps:is_key(K, Acc),
  61. {Item, R} = parse_inner_list(R0, []),
  62. parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]);
  63. parse_dict_key(<<$=,R0/bits>>, Acc, Order, K) ->
  64. false = maps:is_key(K, Acc),
  65. {Item, R} = parse_item1(R0),
  66. parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]);
  67. parse_dict_key(<<C,R/bits>>, Acc, Order, K)
  68. when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C)
  69. or (C =:= $_) or (C =:= $-) or (C =:= $*) ->
  70. parse_dict_key(R, Acc, Order, <<K/binary,C>>).
  71. parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) ->
  72. parse_dict_before_sep(R, Acc, Order);
  73. parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when C =:= $, ->
  74. parse_dict_before_member(R, Acc, Order);
  75. parse_dict_before_sep(<<>>, Acc, Order) ->
  76. {Acc, lists:reverse(Order), <<>>}.
  77. parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) ->
  78. parse_dict_before_member(R, Acc, Order);
  79. parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_LC_ALPHA(C) ->
  80. parse_dict_key(R, Acc, Order, <<C>>).
  81. -spec parse_item(binary()) -> sh_item().
  82. parse_item(Bin) ->
  83. {Item, <<>>} = parse_item1(Bin),
  84. Item.
  85. parse_item1(Bin) ->
  86. case parse_bare_item(Bin) of
  87. {Item, <<$;,R/bits>>} ->
  88. {Params, Rest} = parse_before_param(R, #{}),
  89. {{with_params, Item, Params}, Rest};
  90. {Item, Rest} ->
  91. {{with_params, Item, #{}}, Rest}
  92. end.
  93. -spec parse_list(binary()) -> sh_list().
  94. parse_list(<<>>) ->
  95. [];
  96. parse_list(Bin) ->
  97. parse_list_before_member(Bin, []).
  98. parse_list_member(<<$(,R0/bits>>, Acc) ->
  99. {Item, R} = parse_inner_list(R0, []),
  100. parse_list_before_sep(R, [Item|Acc]);
  101. parse_list_member(R0, Acc) ->
  102. {Item, R} = parse_item1(R0),
  103. parse_list_before_sep(R, [Item|Acc]).
  104. parse_list_before_sep(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
  105. parse_list_before_sep(R, Acc);
  106. parse_list_before_sep(<<$,,R/bits>>, Acc) ->
  107. parse_list_before_member(R, Acc);
  108. parse_list_before_sep(<<>>, Acc) ->
  109. lists:reverse(Acc).
  110. parse_list_before_member(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
  111. parse_list_before_member(R, Acc);
  112. parse_list_before_member(R, Acc) ->
  113. parse_list_member(R, Acc).
  114. %% Internal.
  115. parse_inner_list(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
  116. parse_inner_list(R, Acc);
  117. parse_inner_list(<<$),$;,R0/bits>>, Acc) ->
  118. {Params, R} = parse_before_param(R0, #{}),
  119. {{with_params, lists:reverse(Acc), Params}, R};
  120. parse_inner_list(<<$),R/bits>>, Acc) ->
  121. {{with_params, lists:reverse(Acc), #{}}, R};
  122. parse_inner_list(R0, Acc) ->
  123. {Item, R = <<C,_/bits>>} = parse_item1(R0),
  124. true = (C =:= $\s) orelse (C =:= $)),
  125. parse_inner_list(R, [Item|Acc]).
  126. parse_before_param(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
  127. parse_before_param(R, Acc);
  128. parse_before_param(<<C,R/bits>>, Acc) when ?IS_LC_ALPHA(C) ->
  129. parse_param(R, Acc, <<C>>).
  130. parse_param(<<$;,R/bits>>, Acc, K) ->
  131. parse_before_param(R, Acc#{K => undefined});
  132. parse_param(<<$=,R0/bits>>, Acc, K) ->
  133. case parse_bare_item(R0) of
  134. {Item, <<$;,R/bits>>} ->
  135. false = maps:is_key(K, Acc),
  136. parse_before_param(R, Acc#{K => Item});
  137. {Item, R} ->
  138. false = maps:is_key(K, Acc),
  139. {Acc#{K => Item}, R}
  140. end;
  141. parse_param(<<C,R/bits>>, Acc, K)
  142. when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C)
  143. or (C =:= $_) or (C =:= $-) or (C =:= $*) ->
  144. parse_param(R, Acc, <<K/binary,C>>);
  145. parse_param(R, Acc, K) ->
  146. false = maps:is_key(K, Acc),
  147. {Acc#{K => undefined}, R}.
  148. %% Integer or float.
  149. parse_bare_item(<<$-,R/bits>>) -> parse_number(R, 0, <<$->>);
  150. parse_bare_item(<<C,R/bits>>) when ?IS_DIGIT(C) -> parse_number(R, 1, <<C>>);
  151. %% String.
  152. parse_bare_item(<<$",R/bits>>) -> parse_string(R, <<>>);
  153. %% Token.
  154. parse_bare_item(<<C,R/bits>>) when ?IS_ALPHA(C) -> parse_token(R, <<C>>);
  155. %% Byte sequence.
  156. parse_bare_item(<<$*,R/bits>>) -> parse_binary(R, <<>>);
  157. %% Boolean.
  158. parse_bare_item(<<"?0",R/bits>>) -> {false, R};
  159. parse_bare_item(<<"?1",R/bits>>) -> {true, R}.
  160. parse_number(<<C,R/bits>>, L, Acc) when ?IS_DIGIT(C) ->
  161. parse_number(R, L+1, <<Acc/binary,C>>);
  162. parse_number(<<C,R/bits>>, L, Acc) when C =:= $. ->
  163. parse_float(R, L, 0, <<Acc/binary,C>>);
  164. parse_number(R, L, Acc) when L =< 15 ->
  165. {binary_to_integer(Acc), R}.
  166. parse_float(<<C,R/bits>>, L1, L2, Acc) when ?IS_DIGIT(C) ->
  167. parse_float(R, L1, L2+1, <<Acc/binary,C>>);
  168. parse_float(R, L1, L2, Acc) when
  169. L1 =< 9, L2 =< 6;
  170. L1 =< 10, L2 =< 5;
  171. L1 =< 11, L2 =< 4;
  172. L1 =< 12, L2 =< 3;
  173. L1 =< 13, L2 =< 2;
  174. L1 =< 14, L2 =< 1 ->
  175. {binary_to_float(Acc), R}.
  176. parse_string(<<$\\,$",R/bits>>, Acc) ->
  177. parse_string(R, <<Acc/binary,$">>);
  178. parse_string(<<$\\,$\\,R/bits>>, Acc) ->
  179. parse_string(R, <<Acc/binary,$\\>>);
  180. parse_string(<<$",R/bits>>, Acc) ->
  181. {{string, Acc}, R};
  182. parse_string(<<C,R/bits>>, Acc) when
  183. C >= 16#20, C =< 16#21;
  184. C >= 16#23, C =< 16#5b;
  185. C >= 16#5d, C =< 16#7e ->
  186. parse_string(R, <<Acc/binary,C>>).
  187. parse_token(<<C,R/bits>>, Acc) when ?IS_TOKEN(C) or (C =:= $:) or (C =:= $/) ->
  188. parse_token(R, <<Acc/binary,C>>);
  189. parse_token(R, Acc) ->
  190. {{token, Acc}, R}.
  191. parse_binary(<<$*,R/bits>>, Acc) ->
  192. {{binary, base64:decode(Acc)}, R};
  193. parse_binary(<<C,R/bits>>, Acc) when ?IS_ALPHANUM(C) or (C =:= $+) or (C =:= $/) or (C =:= $=) ->
  194. parse_binary(R, <<Acc/binary,C>>).
  195. -ifdef(TEST).
  196. parse_struct_hd_test_() ->
  197. Files = filelib:wildcard("deps/structured-header-tests/*.json"),
  198. lists:flatten([begin
  199. {ok, JSON} = file:read_file(File),
  200. Tests = jsx:decode(JSON, [return_maps]),
  201. [
  202. {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() ->
  203. %% The implementation is strict. We fail whenever we can.
  204. CanFail = maps:get(<<"can_fail">>, Test, false),
  205. MustFail = maps:get(<<"must_fail">>, Test, false),
  206. Expected = case MustFail of
  207. true -> undefined;
  208. false -> expected_to_term(maps:get(<<"expected">>, Test))
  209. end,
  210. Raw = raw_to_binary(Raw0),
  211. case HeaderType of
  212. <<"dictionary">> when MustFail; CanFail ->
  213. {'EXIT', _} = (catch parse_dictionary(Raw));
  214. %% The test "binary.json: non-zero pad bits" does not fail
  215. %% due to our reliance on Erlang/OTP's base64 module.
  216. <<"item">> when CanFail ->
  217. case (catch parse_item(Raw)) of
  218. {'EXIT', _} -> ok;
  219. Expected -> ok
  220. end;
  221. <<"item">> when MustFail ->
  222. {'EXIT', _} = (catch parse_item(Raw));
  223. <<"list">> when MustFail; CanFail ->
  224. {'EXIT', _} = (catch parse_list(Raw));
  225. <<"dictionary">> ->
  226. {Expected, _Order} = (catch parse_dictionary(Raw));
  227. <<"item">> ->
  228. Expected = (catch parse_item(Raw));
  229. <<"list">> ->
  230. Expected = (catch parse_list(Raw))
  231. end
  232. end}
  233. || Test=#{
  234. <<"name">> := Name,
  235. <<"header_type">> := HeaderType,
  236. <<"raw">> := Raw0
  237. } <- Tests]
  238. end || File <- Files]).
  239. %% Item.
  240. expected_to_term(E=[_, Params]) when is_map(Params) ->
  241. e2t(E);
  242. %% Outer list.
  243. expected_to_term(Expected) when is_list(Expected) ->
  244. [e2t(E) || E <- Expected];
  245. expected_to_term(Expected) ->
  246. e2t(Expected).
  247. %% Dictionary.
  248. e2t(Dict) when is_map(Dict) ->
  249. maps:map(fun(_, V) -> e2t(V) end, Dict);
  250. %% Inner list.
  251. e2t([List, Params]) when is_list(List) ->
  252. {with_params, [e2t(E) || E <- List],
  253. maps:map(fun(_, P) -> e2tb(P) end, Params)};
  254. %% Item.
  255. e2t([Bare, Params]) ->
  256. {with_params, e2tb(Bare),
  257. maps:map(fun(_, P) -> e2tb(P) end, Params)}.
  258. %% Bare item.
  259. e2tb(#{<<"__type">> := <<"token">>, <<"value">> := V}) ->
  260. {token, V};
  261. e2tb(#{<<"__type">> := <<"binary">>, <<"value">> := V}) ->
  262. {binary, base32:decode(V)};
  263. e2tb(V) when is_binary(V) ->
  264. {string, V};
  265. e2tb(null) ->
  266. undefined;
  267. e2tb(V) ->
  268. V.
  269. %% The Cowlib parsers currently do not support resuming parsing
  270. %% in the case of multiple headers. To make tests work we modify
  271. %% the raw value the same way Cowboy does when encountering
  272. %% multiple headers: by adding a comma and space in between.
  273. %%
  274. %% Similarly, the Cowlib parsers expect the leading and trailing
  275. %% whitespace to be removed before calling the parser.
  276. raw_to_binary(RawList) ->
  277. trim_ws(iolist_to_binary(lists:join(<<", ">>, RawList))).
  278. trim_ws(<<C,R/bits>>) when ?IS_WS(C) -> trim_ws(R);
  279. trim_ws(R) -> trim_ws_end(R, byte_size(R) - 1).
  280. trim_ws_end(_, -1) ->
  281. <<>>;
  282. trim_ws_end(Value, N) ->
  283. case binary:at(Value, N) of
  284. $\s -> trim_ws_end(Value, N - 1);
  285. $\t -> trim_ws_end(Value, N - 1);
  286. _ ->
  287. S = N + 1,
  288. << Value2:S/binary, _/bits >> = Value,
  289. Value2
  290. end.
  291. -endif.
  292. %% Building.
  293. -spec dictionary(#{binary() => sh_item() | sh_inner_list()}
  294. | [{binary(), sh_item() | sh_inner_list()}])
  295. -> iolist().
  296. %% @todo Also accept this? dictionary({Map, Order}) ->
  297. dictionary(Map) when is_map(Map) ->
  298. dictionary(maps:to_list(Map));
  299. dictionary(KVList) when is_list(KVList) ->
  300. lists:join(<<", ">>, [
  301. [Key, $=, item_or_inner_list(Value)]
  302. || {Key, Value} <- KVList]).
  303. -spec item(sh_item()) -> iolist().
  304. item({with_params, BareItem, Params}) ->
  305. [bare_item(BareItem), params(Params)].
  306. -spec list(sh_list()) -> iolist().
  307. list(List) ->
  308. lists:join(<<", ">>, [item_or_inner_list(Value) || Value <- List]).
  309. item_or_inner_list(Value={with_params, List, _}) when is_list(List) ->
  310. inner_list(Value);
  311. item_or_inner_list(Value) ->
  312. item(Value).
  313. inner_list({with_params, List, Params}) ->
  314. [$(, lists:join($\s, [item(Value) || Value <- List]), $), params(Params)].
  315. bare_item({string, String}) ->
  316. [$", escape_string(String, <<>>), $"];
  317. bare_item({token, Token}) ->
  318. Token;
  319. bare_item({binary, Binary}) ->
  320. [$*, base64:encode(Binary), $*];
  321. bare_item(Integer) when is_integer(Integer) ->
  322. integer_to_binary(Integer);
  323. %% In order to properly reproduce the float as a string we
  324. %% must first determine how many decimals we want in the
  325. %% fractional component, otherwise rounding errors may occur.
  326. bare_item(Float) when is_float(Float) ->
  327. Decimals = case trunc(Float) of
  328. I when I >= 10000000000000 -> 1;
  329. I when I >= 1000000000000 -> 2;
  330. I when I >= 100000000000 -> 3;
  331. I when I >= 10000000000 -> 4;
  332. I when I >= 1000000000 -> 5;
  333. _ -> 6
  334. end,
  335. float_to_binary(Float, [{decimals, Decimals}, compact]);
  336. bare_item(true) ->
  337. <<"?1">>;
  338. bare_item(false) ->
  339. <<"?0">>.
  340. escape_string(<<>>, Acc) -> Acc;
  341. escape_string(<<$\\,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$\\>>);
  342. escape_string(<<$",R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$">>);
  343. escape_string(<<C,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,C>>).
  344. params(Params) ->
  345. maps:fold(fun
  346. (Key, undefined, Acc) ->
  347. [[$;, Key]|Acc];
  348. (Key, Value, Acc) ->
  349. [[$;, Key, $=, bare_item(Value)]|Acc]
  350. end, [], Params).
  351. -ifdef(TEST).
  352. struct_hd_identity_test_() ->
  353. Files = filelib:wildcard("deps/structured-header-tests/*.json"),
  354. lists:flatten([begin
  355. {ok, JSON} = file:read_file(File),
  356. Tests = jsx:decode(JSON, [return_maps]),
  357. [
  358. {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() ->
  359. Expected = expected_to_term(Expected0),
  360. case HeaderType of
  361. <<"dictionary">> ->
  362. {Expected, _Order} = parse_dictionary(iolist_to_binary(dictionary(Expected)));
  363. <<"item">> ->
  364. Expected = parse_item(iolist_to_binary(item(Expected)));
  365. <<"list">> ->
  366. Expected = parse_list(iolist_to_binary(list(Expected)))
  367. end
  368. end}
  369. || #{
  370. <<"name">> := Name,
  371. <<"header_type">> := HeaderType,
  372. %% We only run tests that must not fail.
  373. <<"expected">> := Expected0
  374. } <- Tests]
  375. end || File <- Files]).
  376. -endif.