cow_http_hd.erl 118 KB


  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_accept_encoding/1]).
  18. -export([parse_accept_language/1]).
  19. -export([parse_accept_ranges/1]).
  20. -export([parse_age/1]).
  21. -export([parse_allow/1]).
  22. -export([parse_authorization/1]).
  23. -export([parse_cache_control/1]).
  24. -export([parse_connection/1]).
  25. -export([parse_content_encoding/1]).
  26. -export([parse_content_language/1]).
  27. -export([parse_content_length/1]).
  28. -export([parse_content_range/1]).
  29. -export([parse_content_type/1]).
  30. -export([parse_date/1]).
  31. -export([parse_etag/1]).
  32. -export([parse_expect/1]).
  33. -export([parse_expires/1]).
  34. -export([parse_host/1]).
  35. -export([parse_if_match/1]).
  36. -export([parse_if_modified_since/1]).
  37. -export([parse_if_none_match/1]).
  38. -export([parse_if_range/1]).
  39. -export([parse_if_unmodified_since/1]).
  40. -export([parse_last_modified/1]).
  41. -export([parse_max_forwards/1]).
  42. -export([parse_pragma/1]).
  43. -export([parse_proxy_authorization/1]).
  44. -export([parse_range/1]).
  45. -export([parse_retry_after/1]).
  46. -export([parse_sec_websocket_accept/1]).
  47. -export([parse_sec_websocket_extensions/1]).
  48. -export([parse_sec_websocket_key/1]).
  49. -export([parse_sec_websocket_protocol_req/1]).
  50. -export([parse_sec_websocket_protocol_resp/1]).
  51. -export([parse_sec_websocket_version_req/1]).
  52. -export([parse_sec_websocket_version_resp/1]).
  53. -export([parse_te/1]).
  54. -export([parse_trailer/1]).
  55. -export([parse_transfer_encoding/1]).
  56. -export([parse_upgrade/1]).
  57. -export([parse_vary/1]).
  58. -export([parse_x_forwarded_for/1]).
  59. -type etag() :: {weak | strong, binary()}.
  60. -export_type([etag/0]).
  61. -type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
  62. -export_type([media_type/0]).
  63. -type qvalue() :: 0..1000.
  64. -export_type([qvalue/0]).
  65. -type websocket_version() :: 0..255.
  66. -export_type([websocket_version/0]).
  67. -include("cow_inline.hrl").
  68. -ifdef(TEST).
  69. -include_lib("triq/include/triq.hrl").
  70. vector(Min, Max, Dom) -> ?LET(N, choose(Min, Max), vector(N, Dom)).
  71. small_list(Dom) -> vector(0, 10, Dom).
  72. small_non_empty_list(Dom) -> vector(1, 10, Dom).
  73. alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  74. alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  75. digit_chars() -> "0123456789".
  76. ows() -> list(elements([$\s, $\t])).
  77. alpha() -> elements(alpha_chars()).
  78. alphanum() -> elements(alphanum_chars()).
  79. digit() -> elements(digit_chars()).
  80. tchar() ->
  81. frequency([
  82. {1, elements([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])},
  83. {99, elements(alphanum_chars())}
  84. ]).
  85. token() ->
  86. ?LET(T,
  87. non_empty(list(tchar())),
  88. list_to_binary(T)).
  89. abnf_char() ->
  90. int(1, 127).
  91. vchar() ->
  92. int(33, 126).
  93. obs_text() ->
  94. int(128, 255).
  95. qdtext() ->
  96. frequency([
  97. {99, elements("\t\s!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  98. {1, obs_text()}
  99. ]).
  100. quoted_pair() ->
  101. [$\\, frequency([
  102. {99, elements("\t\s!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  103. {1, obs_text()}
  104. ])].
  105. quoted_string() ->
  106. [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
  107. %% Helper function for ( token / quoted-string ) values.
  108. unquote([$", V, $"]) -> unquote(V, <<>>);
  109. unquote(V) -> V.
  110. unquote([], Acc) -> Acc;
  111. unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
  112. unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
  113. parameter() ->
  114. ?SUCHTHAT({K, _, _, _},
  115. {token(), oneof([token(), quoted_string()]), ows(), ows()},
  116. K =/= <<"q">>).
  117. weight() ->
  118. frequency([
  119. {90, int(0, 1000)},
  120. {10, undefined}
  121. ]).
  122. %% Helper function for weight's qvalue formatting.
  123. qvalue_to_iodata(0) -> <<"0">>;
  124. qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
  125. qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
  126. qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
  127. qvalue_to_iodata(1000) -> <<"1">>.
  128. -endif.
  129. %% @doc Parse the Accept header.
  130. -spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
  131. parse_accept(<<"*/*">>) ->
  132. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  133. parse_accept(Accept) ->
  134. media_range_list(Accept, []).
  135. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  136. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  137. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  138. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  139. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  140. case C of
  141. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  142. end.
  143. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  144. %% Special clause for badly behaving user agents that send * instead of */*.
  145. media_range_type(<< $;, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  146. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  147. case C of
  148. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  149. end.
  150. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  151. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  152. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  153. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  154. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  155. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  156. case C of
  157. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  158. end.
  159. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  160. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  161. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  162. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  163. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  164. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  165. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  166. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  167. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  168. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  169. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  170. case C of
  171. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  172. end.
  173. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  174. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  175. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  176. case C of
  177. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  178. end.
  179. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  180. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  181. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  182. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  183. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  184. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  185. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  186. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  187. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  188. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  189. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  190. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  191. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  192. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  193. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  194. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  195. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  196. when A >= $0, A =< $9 ->
  197. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  198. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  199. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  200. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  201. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  202. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  203. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  204. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  205. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  206. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  207. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  208. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  209. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  210. when A >= $0, A =< $9 ->
  211. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  212. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  213. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  214. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  215. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  216. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  217. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  218. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  219. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  220. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  221. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  222. case C of
  223. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  224. end.
  225. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  226. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  227. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  228. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  229. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  230. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  231. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  232. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  233. case C of
  234. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  235. end.
  236. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  237. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR_OBS(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  238. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR_OBS(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  239. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  240. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  241. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_ext(R, Acc, T, S, P, Q, [{K, V}|E]);
  242. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  243. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  244. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  245. -ifdef(TEST).
  246. accept_ext() ->
  247. oneof([token(), parameter()]).
  248. accept_params() ->
  249. frequency([
  250. {90, []},
  251. {10, small_list(accept_ext())}
  252. ]).
  253. accept() ->
  254. ?LET({T, S, P, W, E},
  255. {token(), token(), small_list(parameter()), weight(), accept_params()},
  256. {T, S, P, W, E, iolist_to_binary([T, $/, S,
  257. [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P],
  258. case W of
  259. undefined -> [];
  260. _ -> [
  261. [<<";q=">>, qvalue_to_iodata(W)],
  262. [case Ext of
  263. {K, V, OWS1, OWS2} -> [OWS1, $;, OWS2, K, $=, V];
  264. K -> [$;, K]
  265. end || Ext <- E]]
  266. end])}
  267. ).
  268. prop_parse_accept() ->
  269. ?FORALL(L,
  270. vector(1, 50, accept()),
  271. begin
  272. << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]),
  273. ResL = parse_accept(Accept),
  274. CheckedL = [begin
  275. ExpectedP = [{?INLINE_LOWERCASE_BC(K), unquote(V)} || {K, V, _, _} <- P],
  276. ExpectedE = [case Ext of
  277. {K, V, _, _} -> {?INLINE_LOWERCASE_BC(K), unquote(V)};
  278. K -> ?INLINE_LOWERCASE_BC(K)
  279. end || Ext <- E],
  280. ResT =:= ?INLINE_LOWERCASE_BC(T)
  281. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  282. andalso ResP =:= ExpectedP
  283. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  284. andalso ((W =:= undefined andalso ResE =:= []) orelse (W =/= undefined andalso ResE =:= ExpectedE))
  285. end || {{T, S, P, W, E, _}, {{ResT, ResS, ResP}, ResW, ResE}} <- lists:zip(L, ResL)],
  286. [true] =:= lists:usort(CheckedL)
  287. end
  288. ).
  289. parse_accept_test_() ->
  290. Tests = [
  291. {<<>>, []},
  292. {<<" ">>, []},
  293. {<<"audio/*; q=0.2, audio/basic">>, [
  294. {{<<"audio">>, <<"*">>, []}, 200, []},
  295. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  296. ]},
  297. {<<"text/plain; q=0.5, text/html, "
  298. "text/x-dvi; q=0.8, text/x-c">>, [
  299. {{<<"text">>, <<"plain">>, []}, 500, []},
  300. {{<<"text">>, <<"html">>, []}, 1000, []},
  301. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  302. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  303. ]},
  304. {<<"text/*, text/html, text/html;level=1, */*">>, [
  305. {{<<"text">>, <<"*">>, []}, 1000, []},
  306. {{<<"text">>, <<"html">>, []}, 1000, []},
  307. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  308. {{<<"*">>, <<"*">>, []}, 1000, []}
  309. ]},
  310. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  311. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  312. {{<<"text">>, <<"*">>, []}, 300, []},
  313. {{<<"text">>, <<"html">>, []}, 700, []},
  314. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  315. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  316. {{<<"*">>, <<"*">>, []}, 500, []}
  317. ]},
  318. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  319. "q=0.123;standalone;complex=gits, text/plain">>, [
  320. {{<<"text">>, <<"html">>,
  321. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  322. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  323. {{<<"text">>, <<"plain">>, []}, 1000, []}
  324. ]},
  325. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  326. {{<<"text">>, <<"html">>, []}, 1000, []},
  327. {{<<"image">>, <<"gif">>, []}, 1000, []},
  328. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  329. {{<<"*">>, <<"*">>, []}, 200, []},
  330. {{<<"*">>, <<"*">>, []}, 200, []}
  331. ]}
  332. ],
  333. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  334. parse_accept_error_test_() ->
  335. Tests = [
  336. <<"audio/basic, */;q=0.5">>,
  337. <<"audio/, audio/basic">>,
  338. <<"aud\tio/basic">>,
  339. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  340. ],
  341. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  342. -endif.
  343. -ifdef(PERF).
  344. horse_parse_accept() ->
  345. horse:repeat(20000,
  346. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  347. "text/html;level=2;q=0.4, */*;q=0.5">>)
  348. ).
  349. -endif.
  350. %% @doc Parse the Accept-Charset header.
  351. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  352. parse_accept_charset(Charset) ->
  353. nonempty(conneg_list(Charset, [])).
  354. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  355. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  356. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  357. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  358. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  359. case C of
  360. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  361. end.
  362. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  363. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  364. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  365. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  366. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  367. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  368. case C of
  369. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  370. end.
  371. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  372. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  373. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  374. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  375. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  376. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  377. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  378. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  379. %% Special clause for broken user agents that confuse ; and , separators.
  380. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  381. case C of
  382. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  383. end.
  384. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  385. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  386. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  387. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  388. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  389. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  390. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  391. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  392. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  393. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  394. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  395. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  396. when A >= $0, A =< $9 ->
  397. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  398. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  399. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  400. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  401. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  402. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  403. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  404. -ifdef(TEST).
  405. accept_charset() ->
  406. ?LET({C, W},
  407. {token(), weight()},
  408. {C, W, iolist_to_binary([C, case W of
  409. undefined -> [];
  410. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  411. end])}
  412. ).
  413. prop_parse_accept_charset() ->
  414. ?FORALL(L,
  415. non_empty(list(accept_charset())),
  416. begin
  417. << _, AcceptCharset/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  418. ResL = parse_accept_charset(AcceptCharset),
  419. CheckedL = [begin
  420. ResC =:= ?INLINE_LOWERCASE_BC(Ch)
  421. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  422. end || {{Ch, W, _}, {ResC, ResW}} <- lists:zip(L, ResL)],
  423. [true] =:= lists:usort(CheckedL)
  424. end).
  425. parse_accept_charset_test_() ->
  426. Tests = [
  427. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  428. {<<"iso-8859-5">>, 1000},
  429. {<<"unicode-1-1">>, 800}
  430. ]},
  431. %% Some user agents send this invalid value for the Accept-Charset header
  432. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  433. {<<"iso-8859-1">>, 1000},
  434. {<<"utf-8">>, 700},
  435. {<<"*">>, 700}
  436. ]}
  437. ],
  438. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  439. parse_accept_charset_error_test_() ->
  440. Tests = [
  441. <<>>
  442. ],
  443. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  444. -endif.
  445. -ifdef(PERF).
  446. horse_parse_accept_charset() ->
  447. horse:repeat(20000,
  448. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  449. ).
  450. -endif.
  451. %% @doc Parse the Accept-Encoding header.
  452. -spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}].
  453. parse_accept_encoding(Encoding) ->
  454. conneg_list(Encoding, []).
  455. -ifdef(TEST).
  456. accept_encoding() ->
  457. ?LET({E, W},
  458. {token(), weight()},
  459. {E, W, iolist_to_binary([E, case W of
  460. undefined -> [];
  461. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  462. end])}
  463. ).
  464. prop_parse_accept_encoding() ->
  465. ?FORALL(L,
  466. non_empty(list(accept_encoding())),
  467. begin
  468. << _, AcceptEncoding/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  469. ResL = parse_accept_encoding(AcceptEncoding),
  470. CheckedL = [begin
  471. ResE =:= ?INLINE_LOWERCASE_BC(E)
  472. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  473. end || {{E, W, _}, {ResE, ResW}} <- lists:zip(L, ResL)],
  474. [true] =:= lists:usort(CheckedL)
  475. end).
  476. parse_accept_encoding_test_() ->
  477. Tests = [
  478. {<<>>, []},
  479. {<<"*">>, [{<<"*">>, 1000}]},
  480. {<<"compress, gzip">>, [
  481. {<<"compress">>, 1000},
  482. {<<"gzip">>, 1000}
  483. ]},
  484. {<<"compress;q=0.5, gzip;q=1.0">>, [
  485. {<<"compress">>, 500},
  486. {<<"gzip">>, 1000}
  487. ]},
  488. {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [
  489. {<<"gzip">>, 1000},
  490. {<<"identity">>, 500},
  491. {<<"*">>, 0}
  492. ]}
  493. ],
  494. [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests].
  495. -endif.
  496. -ifdef(PERF).
  497. horse_parse_accept_encoding() ->
  498. horse:repeat(20000,
  499. parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>)
  500. ).
  501. -endif.
  502. %% @doc Parse the Accept-Language header.
  503. -spec parse_accept_language(binary()) -> [{binary(), qvalue()}].
  504. parse_accept_language(LanguageRange) ->
  505. nonempty(language_range_list(LanguageRange, [])).
  506. language_range_list(<<>>, Acc) -> lists:reverse(Acc);
  507. language_range_list(<< $\s, R/bits >>, Acc) -> language_range_list(R, Acc);
  508. language_range_list(<< $\t, R/bits >>, Acc) -> language_range_list(R, Acc);
  509. language_range_list(<< $\,, R/bits >>, Acc) -> language_range_list(R, Acc);
  510. language_range_list(<< $*, R/bits >>, Acc) -> language_range_before_semicolon(R, Acc, <<"*">>);
  511. language_range_list(<< C, R/bits >>, Acc) when ?IS_ALPHA(C) ->
  512. case C of
  513. ?INLINE_LOWERCASE(language_range, R, Acc, 1, <<>>)
  514. end.
  515. language_range(<<>>, Acc, _, T) -> lists:reverse([{T, 1000}|Acc]);
  516. language_range(<< $,, R/bits >>, Acc, _, T) -> language_range_list(R, [{T, 1000}|Acc]);
  517. language_range(<< $;, R/bits >>, Acc, _, T) -> language_range_before_weight(R, Acc, T);
  518. language_range(<< $\s, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  519. language_range(<< $\t, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  520. language_range(<< $-, R/bits >>, Acc, _, T) -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  521. language_range(<< _, _/bits >>, _, 8, _) -> error(badarg);
  522. language_range(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C) ->
  523. case C of
  524. ?INLINE_LOWERCASE(language_range, R, Acc, N + 1, T)
  525. end.
  526. language_range_sub(<<>>, Acc, N, T) when N > 0 -> lists:reverse([{T, 1000}|Acc]);
  527. language_range_sub(<< $,, R/bits >>, Acc, N, T) when N > 0 -> language_range_list(R, [{T, 1000}|Acc]);
  528. language_range_sub(<< $;, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_weight(R, Acc, T);
  529. language_range_sub(<< $\s, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  530. language_range_sub(<< $\t, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  531. language_range_sub(<< $-, R/bits >>, Acc, N, T) when N > 0 -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  532. language_range_sub(<< _, _/bits >>, _, 8, _) -> error(badarg);
  533. language_range_sub(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C); ?IS_DIGIT(C) ->
  534. case C of
  535. ?INLINE_LOWERCASE(language_range_sub, R, Acc, N + 1, T)
  536. end.
  537. language_range_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  538. language_range_before_semicolon(<< $,, R/bits >>, Acc, T) -> language_range_list(R, [{T, 1000}|Acc]);
  539. language_range_before_semicolon(<< $;, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  540. language_range_before_semicolon(<< $\s, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T);
  541. language_range_before_semicolon(<< $\t, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T).
  542. language_range_before_weight(<< $\s, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  543. language_range_before_weight(<< $\t, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  544. language_range_before_weight(<< $q, $=, R/bits >>, Acc, T) -> language_range_weight(R, Acc, T);
  545. %% Special clause for broken user agents that confuse ; and , separators.
  546. language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_ALPHA(C) ->
  547. case C of
  548. ?INLINE_LOWERCASE(language_range, R, [{T, 1000}|Acc], 1, <<>>)
  549. end.
  550. language_range_weight(<< "1.000", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  551. language_range_weight(<< "1.00", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  552. language_range_weight(<< "1.0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  553. language_range_weight(<< "1.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  554. language_range_weight(<< "1", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  555. language_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  556. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  557. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  558. language_range_weight(<< "0.", A, B, R/bits >>, Acc, T)
  559. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  560. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  561. language_range_weight(<< "0.", A, R/bits >>, Acc, T)
  562. when A >= $0, A =< $9 ->
  563. language_range_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  564. language_range_weight(<< "0.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]);
  565. language_range_weight(<< "0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]).
  566. language_range_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  567. language_range_list_sep(<< $\s, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  568. language_range_list_sep(<< $\t, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  569. language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc).
  570. -ifdef(TEST).
  571. language_range_tag() ->
  572. vector(1, 8, alpha()).
  573. language_range_subtag() ->
  574. [$-, vector(1, 8, alphanum())].
  575. language_range() ->
  576. [language_range_tag(), small_list(language_range_subtag())].
  577. accept_language() ->
  578. ?LET({R, W},
  579. {language_range(), weight()},
  580. {iolist_to_binary(R), W, iolist_to_binary([R, case W of
  581. undefined -> [];
  582. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  583. end])}
  584. ).
  585. prop_parse_accept_language() ->
  586. ?FORALL(L,
  587. non_empty(list(accept_language())),
  588. begin
  589. << _, AcceptLanguage/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  590. ResL = parse_accept_language(AcceptLanguage),
  591. CheckedL = [begin
  592. ResR =:= ?INLINE_LOWERCASE_BC(R)
  593. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  594. end || {{R, W, _}, {ResR, ResW}} <- lists:zip(L, ResL)],
  595. [true] =:= lists:usort(CheckedL)
  596. end).
  597. parse_accept_language_test_() ->
  598. Tests = [
  599. {<<"da, en-gb;q=0.8, en;q=0.7">>, [
  600. {<<"da">>, 1000},
  601. {<<"en-gb">>, 800},
  602. {<<"en">>, 700}
  603. ]},
  604. {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin, es-419">>, [
  605. {<<"en">>, 1000},
  606. {<<"en-us">>, 1000},
  607. {<<"en-cockney">>, 1000},
  608. {<<"i-cherokee">>, 1000},
  609. {<<"x-pig-latin">>, 1000},
  610. {<<"es-419">>, 1000}
  611. ]}
  612. ],
  613. [{V, fun() -> R = parse_accept_language(V) end} || {V, R} <- Tests].
  614. parse_accept_language_error_test_() ->
  615. Tests = [
  616. <<>>,
  617. <<"loooooong">>,
  618. <<"en-us-loooooong">>,
  619. <<"419-en-us">>
  620. ],
  621. [{V, fun() -> {'EXIT', _} = (catch parse_accept_language(V)) end} || V <- Tests].
  622. -endif.
  623. -ifdef(PERF).
  624. horse_parse_accept_language() ->
  625. horse:repeat(20000,
  626. parse_accept_language(<<"da, en-gb;q=0.8, en;q=0.7">>)
  627. ).
  628. -endif.
  629. %% @doc Parse the Accept-Ranges header.
  630. -spec parse_accept_ranges(binary()) -> [binary()].
  631. parse_accept_ranges(<<"none">>) -> [];
  632. parse_accept_ranges(<<"bytes">>) -> [<<"bytes">>];
  633. parse_accept_ranges(AcceptRanges) ->
  634. nonempty(token_ci_list(AcceptRanges, [])).
  635. -ifdef(TEST).
  636. parse_accept_ranges_test_() ->
  637. Tests = [
  638. {<<"bytes">>, [<<"bytes">>]},
  639. {<<"none">>, []},
  640. {<<"bytes, pages, kilos">>, [<<"bytes">>, <<"pages">>, <<"kilos">>]}
  641. ],
  642. [{V, fun() -> R = parse_accept_ranges(V) end} || {V, R} <- Tests].
  643. parse_accept_ranges_error_test_() ->
  644. Tests = [
  645. <<>>
  646. ],
  647. [{V, fun() -> {'EXIT', _} = (catch parse_accept_ranges(V)) end} || V <- Tests].
  648. -endif.
  649. -ifdef(PERF).
  650. horse_parse_accept_ranges_none() ->
  651. horse:repeat(200000,
  652. parse_accept_ranges(<<"none">>)
  653. ).
  654. horse_parse_accept_ranges_bytes() ->
  655. horse:repeat(200000,
  656. parse_accept_ranges(<<"bytes">>)
  657. ).
  658. horse_parse_accept_ranges_other() ->
  659. horse:repeat(200000,
  660. parse_accept_ranges(<<"bytes, pages, kilos">>)
  661. ).
  662. -endif.
  663. %% @doc Parse the Age header.
  664. -spec parse_age(binary()) -> non_neg_integer().
  665. parse_age(Age) ->
  666. I = binary_to_integer(Age),
  667. true = I >= 0,
  668. I.
  669. -ifdef(TEST).
  670. parse_age_test_() ->
  671. Tests = [
  672. {<<"0">>, 0},
  673. {<<"42">>, 42},
  674. {<<"69">>, 69},
  675. {<<"1337">>, 1337},
  676. {<<"3495">>, 3495},
  677. {<<"1234567890">>, 1234567890}
  678. ],
  679. [{V, fun() -> R = parse_age(V) end} || {V, R} <- Tests].
  680. parse_age_error_test_() ->
  681. Tests = [
  682. <<>>,
  683. <<"123, 123">>,
  684. <<"4.17">>
  685. ],
  686. [{V, fun() -> {'EXIT', _} = (catch parse_age(V)) end} || V <- Tests].
  687. -endif.
  688. %% @doc Parse the Allow header.
  689. -spec parse_allow(binary()) -> [binary()].
  690. parse_allow(Allow) ->
  691. token_list(Allow, []).
  692. -ifdef(TEST).
  693. allow() ->
  694. ?LET(L,
  695. list({ows(), ows(), token()}),
  696. case L of
  697. [] -> {[], <<>>};
  698. _ ->
  699. << _, Allow/binary >> = iolist_to_binary([[OWS1, $,, OWS2, M] || {OWS1, OWS2, M} <- L]),
  700. {[M || {_, _, M} <- L], Allow}
  701. end).
  702. prop_parse_allow() ->
  703. ?FORALL({L, Allow},
  704. allow(),
  705. L =:= parse_allow(Allow)).
  706. parse_allow_test_() ->
  707. Tests = [
  708. {<<>>, []},
  709. {<<"GET, HEAD, PUT">>, [<<"GET">>, <<"HEAD">>, <<"PUT">>]}
  710. ],
  711. [{V, fun() -> R = parse_allow(V) end} || {V, R} <- Tests].
  712. -endif.
  713. -ifdef(PERF).
  714. horse_parse_allow() ->
  715. horse:repeat(200000,
  716. parse_allow(<<"GET, HEAD, PUT">>)
  717. ).
  718. -endif.
  719. %% @doc Parse the Authorization header.
  720. %%
  721. %% We support Basic, Digest and Bearer schemes only.
  722. %%
  723. %% In the Digest case we do not validate that the mandatory
  724. %% fields are present. When parsing auth-params, we do not
  725. %% accept BWS characters around the "=".
  726. -spec parse_authorization(binary())
  727. -> {basic, binary(), binary()}
  728. | {bearer, binary()}
  729. | {digest, [{binary(), binary()}]}.
  730. parse_authorization(<<"Basic ", R/bits >>) ->
  731. auth_basic(base64:decode(R), <<>>);
  732. parse_authorization(<<"Bearer ", R/bits >>) when R =/= <<>> ->
  733. validate_auth_bearer(R),
  734. {bearer, R};
  735. parse_authorization(<<"Digest ", R/bits >>) ->
  736. {digest, nonempty(auth_digest_list(R, []))}.
  737. auth_basic(<< $:, Password/bits >>, UserID) -> {basic, UserID, Password};
  738. auth_basic(<< C, R/bits >>, UserID) -> auth_basic(R, << UserID/binary, C >>).
  739. validate_auth_bearer(<< C, R/bits >>) when ?IS_TOKEN68(C) -> validate_auth_bearer(R);
  740. validate_auth_bearer(<< $=, R/bits >>) -> validate_auth_bearer_eq(R);
  741. validate_auth_bearer(<<>>) -> ok.
  742. validate_auth_bearer_eq(<< $=, R/bits >>) -> validate_auth_bearer_eq(R);
  743. validate_auth_bearer_eq(<<>>) -> ok.
  744. auth_digest_list(<<>>, Acc) -> lists:reverse(Acc);
  745. auth_digest_list(<< $\s, R/bits >>, Acc) -> auth_digest_list(R, Acc);
  746. auth_digest_list(<< $\t, R/bits >>, Acc) -> auth_digest_list(R, Acc);
  747. auth_digest_list(<< $,, R/bits >>, Acc) -> auth_digest_list(R, Acc);
  748. auth_digest_list(<< "algorithm=", C, R/bits >>, Acc) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, <<"algorithm">>, << C >>);
  749. auth_digest_list(<< "cnonce=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"cnonce">>, <<>>);
  750. auth_digest_list(<< "qop=", C, R/bits >>, Acc) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, <<"qop">>, << C >>);
  751. auth_digest_list(<< "nc=", A, B, C, D, E, F, G, H, R/bits >>, Acc)
  752. when ?IS_LHEX(A), ?IS_LHEX(B), ?IS_LHEX(C), ?IS_LHEX(D),
  753. ?IS_LHEX(E), ?IS_LHEX(F), ?IS_LHEX(G), ?IS_LHEX(H) ->
  754. auth_digest_list_sep(R, [{<<"nc">>, << A, B, C, D, E, F, G, H >>}|Acc]);
  755. auth_digest_list(<< "nonce=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"nonce">>, <<>>);
  756. auth_digest_list(<< "opaque=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"opaque">>, <<>>);
  757. auth_digest_list(<< "realm=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"realm">>, <<>>);
  758. auth_digest_list(<< "response=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"response">>, <<>>);
  759. auth_digest_list(<< "uri=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"uri">>, <<>>);
  760. auth_digest_list(<< "username=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"username">>, <<>>);
  761. auth_digest_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  762. case C of
  763. ?INLINE_LOWERCASE(auth_digest_param, R, Acc, <<>>)
  764. end.
  765. auth_digest_param(<< $=, $", R/bits >>, Acc, K) -> auth_digest_quoted(R, Acc, K, <<>>);
  766. auth_digest_param(<< $=, C, R/bits >>, Acc, K) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, K, << C >>);
  767. auth_digest_param(<< C, R/bits >>, Acc, K) when ?IS_TOKEN(C) ->
  768. case C of
  769. ?INLINE_LOWERCASE(auth_digest_param, R, Acc, K)
  770. end.
  771. auth_digest_token(<<>>, Acc, K, V) -> lists:reverse([{K, V}|Acc]);
  772. auth_digest_token(<< $,, R/bits >>, Acc, K, V) -> auth_digest_list(R, [{K, V}|Acc]);
  773. auth_digest_token(<< $\s, R/bits >>, Acc, K, V) -> auth_digest_list_sep(R, [{K, V}|Acc]);
  774. auth_digest_token(<< $\t, R/bits >>, Acc, K, V) -> auth_digest_list_sep(R, [{K, V}|Acc]);
  775. auth_digest_token(<< C, R/bits >>, Acc, K, V) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, K, << V/binary, C >>).
  776. auth_digest_quoted(<< $", R/bits >>, Acc, K, V) -> auth_digest_list_sep(R, [{K, V}|Acc]);
  777. auth_digest_quoted(<< $\\, C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) -> auth_digest_quoted(R, Acc, K, << V/binary, C >>);
  778. auth_digest_quoted(<< C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) -> auth_digest_quoted(R, Acc, K, << V/binary, C >>).
  779. auth_digest_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  780. auth_digest_list_sep(<< $,, R/bits >>, Acc) -> auth_digest_list(R, Acc);
  781. auth_digest_list_sep(<< $\s, R/bits >>, Acc) -> auth_digest_list_sep(R, Acc);
  782. auth_digest_list_sep(<< $\t, R/bits >>, Acc) -> auth_digest_list_sep(R, Acc).
  783. -ifdef(TEST).
  784. parse_authorization_test_() ->
  785. Tests = [
  786. {<<"Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==">>, {basic, <<"Aladdin">>, <<"open sesame">>}},
  787. {<<"Bearer mF_9.B5f-4.1JqM">>, {bearer, <<"mF_9.B5f-4.1JqM">>}},
  788. {<<"Digest username=\"Mufasa\","
  789. "realm=\"testrealm@host.com\","
  790. "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\","
  791. "uri=\"/dir/index.html\","
  792. "qop=auth,"
  793. "nc=00000001,"
  794. "cnonce=\"0a4f113b\","
  795. "response=\"6629fae49393a05397450978507c4ef1\","
  796. "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"">>,
  797. {digest, [
  798. {<<"username">>, <<"Mufasa">>},
  799. {<<"realm">>, <<"testrealm@host.com">>},
  800. {<<"nonce">>, <<"dcd98b7102dd2f0e8b11d0f600bfb0c093">>},
  801. {<<"uri">>, <<"/dir/index.html">>},
  802. {<<"qop">>, <<"auth">>},
  803. {<<"nc">>, <<"00000001">>},
  804. {<<"cnonce">>, <<"0a4f113b">>},
  805. {<<"response">>, <<"6629fae49393a05397450978507c4ef1">>},
  806. {<<"opaque">>, <<"5ccc069c403ebaf9f0171e9517f40e41">>}]}}
  807. ],
  808. [{V, fun() -> R = parse_authorization(V) end} || {V, R} <- Tests].
  809. -endif.
  810. -ifdef(PERF).
  811. horse_parse_authorization_basic() ->
  812. horse:repeat(20000,
  813. parse_authorization(<<"Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==">>)
  814. ).
  815. horse_parse_authorization_bearer() ->
  816. horse:repeat(20000,
  817. parse_authorization(<<"Bearer mF_9.B5f-4.1JqM">>)
  818. ).
  819. horse_parse_authorization_digest() ->
  820. horse:repeat(20000,
  821. parse_authorization(
  822. <<"Digest username=\"Mufasa\","
  823. "realm=\"testrealm@host.com\","
  824. "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\","
  825. "uri=\"/dir/index.html\","
  826. "qop=auth,"
  827. "nc=00000001,"
  828. "cnonce=\"0a4f113b\","
  829. "response=\"6629fae49393a05397450978507c4ef1\","
  830. "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"">>)
  831. ).
  832. -endif.
  833. %% @doc Parse the Cache-Control header.
  834. %%
  835. %% In the fields list case, we do not support escaping, which shouldn't be needed anyway.
  836. -spec parse_cache_control(binary())
  837. -> [binary() | {binary(), binary()} | {binary(), non_neg_integer()} | {binary(), [binary()]}].
  838. parse_cache_control(<<"no-cache">>) ->
  839. [<<"no-cache">>];
  840. parse_cache_control(<<"max-age=0">>) ->
  841. [{<<"max-age">>, 0}];
  842. parse_cache_control(CacheControl) ->
  843. nonempty(cache_directive_list(CacheControl, [])).
  844. cache_directive_list(<<>>, Acc) -> lists:reverse(Acc);
  845. cache_directive_list(<< $\s, R/bits >>, Acc) -> cache_directive_list(R, Acc);
  846. cache_directive_list(<< $\t, R/bits >>, Acc) -> cache_directive_list(R, Acc);
  847. cache_directive_list(<< $,, R/bits >>, Acc) -> cache_directive_list(R, Acc);
  848. cache_directive_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  849. case C of
  850. ?INLINE_LOWERCASE(cache_directive, R, Acc, <<>>)
  851. end.
  852. cache_directive(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  853. cache_directive(<< $\s, R/bits >>, Acc, T) -> cache_directive_list_sep(R, [T|Acc]);
  854. cache_directive(<< $\t, R/bits >>, Acc, T) -> cache_directive_list_sep(R, [T|Acc]);
  855. cache_directive(<< $,, R/bits >>, Acc, T) -> cache_directive_list(R, [T|Acc]);
  856. cache_directive(<< $=, $", R/bits >>, Acc, T = <<"no-cache">>) -> cache_directive_fields_list(R, Acc, T, []);
  857. cache_directive(<< $=, $", R/bits >>, Acc, T = <<"private">>) -> cache_directive_fields_list(R, Acc, T, []);
  858. cache_directive(<< $=, $", R/bits >>, Acc, T) -> cache_directive_quoted_string(R, Acc, T, <<>>);
  859. cache_directive(<< $=, C, R/bits >>, Acc, T = <<"max-age">>) when ?IS_DIGIT(C) -> cache_directive_delta(R, Acc, T, (C - $0));
  860. cache_directive(<< $=, C, R/bits >>, Acc, T = <<"max-stale">>) when ?IS_DIGIT(C) -> cache_directive_delta(R, Acc, T, (C - $0));
  861. cache_directive(<< $=, C, R/bits >>, Acc, T = <<"min-fresh">>) when ?IS_DIGIT(C) -> cache_directive_delta(R, Acc, T, (C - $0));
  862. cache_directive(<< $=, C, R/bits >>, Acc, T = <<"s-maxage">>) when ?IS_DIGIT(C) -> cache_directive_delta(R, Acc, T, (C - $0));
  863. cache_directive(<< $=, C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> cache_directive_token(R, Acc, T, << C >>);
  864. cache_directive(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  865. case C of
  866. ?INLINE_LOWERCASE(cache_directive, R, Acc, T)
  867. end.
  868. cache_directive_delta(<<>>, Acc, K, V) -> lists:reverse([{K, V}|Acc]);
  869. cache_directive_delta(<< $\s, R/bits >>, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]);
  870. cache_directive_delta(<< $\t, R/bits >>, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]);
  871. cache_directive_delta(<< $,, R/bits >>, Acc, K, V) -> cache_directive_list(R, [{K, V}|Acc]);
  872. cache_directive_delta(<< C, R/bits >>, Acc, K, V) when ?IS_DIGIT(C) -> cache_directive_delta(R, Acc, K, V * 10 + (C - $0)).
  873. cache_directive_fields_list(<< $\s, R/bits >>, Acc, K, L) -> cache_directive_fields_list(R, Acc, K, L);
  874. cache_directive_fields_list(<< $\t, R/bits >>, Acc, K, L) -> cache_directive_fields_list(R, Acc, K, L);
  875. cache_directive_fields_list(<< $,, R/bits >>, Acc, K, L) -> cache_directive_fields_list(R, Acc, K, L);
  876. cache_directive_fields_list(<< $", R/bits >>, Acc, K, L) -> cache_directive_list_sep(R, [{K, lists:reverse(L)}|Acc]);
  877. cache_directive_fields_list(<< C, R/bits >>, Acc, K, L) when ?IS_TOKEN(C) ->
  878. case C of
  879. ?INLINE_LOWERCASE(cache_directive_field, R, Acc, K, L, <<>>)
  880. end.
  881. cache_directive_field(<< $\s, R/bits >>, Acc, K, L, F) -> cache_directive_fields_list_sep(R, Acc, K, [F|L]);
  882. cache_directive_field(<< $\t, R/bits >>, Acc, K, L, F) -> cache_directive_fields_list_sep(R, Acc, K, [F|L]);
  883. cache_directive_field(<< $,, R/bits >>, Acc, K, L, F) -> cache_directive_fields_list(R, Acc, K, [F|L]);
  884. cache_directive_field(<< $", R/bits >>, Acc, K, L, F) -> cache_directive_list_sep(R, [{K, lists:reverse([F|L])}|Acc]);
  885. cache_directive_field(<< C, R/bits >>, Acc, K, L, F) when ?IS_TOKEN(C) ->
  886. case C of
  887. ?INLINE_LOWERCASE(cache_directive_field, R, Acc, K, L, F)
  888. end.
  889. cache_directive_fields_list_sep(<< $\s, R/bits >>, Acc, K, L) -> cache_directive_fields_list_sep(R, Acc, K, L);
  890. cache_directive_fields_list_sep(<< $\t, R/bits >>, Acc, K, L) -> cache_directive_fields_list_sep(R, Acc, K, L);
  891. cache_directive_fields_list_sep(<< $,, R/bits >>, Acc, K, L) -> cache_directive_fields_list(R, Acc, K, L);
  892. cache_directive_fields_list_sep(<< $", R/bits >>, Acc, K, L) -> cache_directive_list_sep(R, [{K, lists:reverse(L)}|Acc]).
  893. cache_directive_token(<<>>, Acc, K, V) -> lists:reverse([{K, V}|Acc]);
  894. cache_directive_token(<< $\s, R/bits >>, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]);
  895. cache_directive_token(<< $\t, R/bits >>, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]);
  896. cache_directive_token(<< $,, R/bits >>, Acc, K, V) -> cache_directive_list(R, [{K, V}|Acc]);
  897. cache_directive_token(<< C, R/bits >>, Acc, K, V) when ?IS_TOKEN(C) -> cache_directive_token(R, Acc, K, << V/binary, C >>).
  898. cache_directive_quoted_string(<< $", R/bits >>, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]);
  899. cache_directive_quoted_string(<< $\\, C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) ->
  900. cache_directive_quoted_string(R, Acc, K, << V/binary, C >>);
  901. cache_directive_quoted_string(<< C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) ->
  902. cache_directive_quoted_string(R, Acc, K, << V/binary, C >>).
  903. cache_directive_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  904. cache_directive_list_sep(<< $\s, R/bits >>, Acc) -> cache_directive_list_sep(R, Acc);
  905. cache_directive_list_sep(<< $\t, R/bits >>, Acc) -> cache_directive_list_sep(R, Acc);
  906. cache_directive_list_sep(<< $,, R/bits >>, Acc) -> cache_directive_list(R, Acc).
  907. -ifdef(TEST).
  908. cache_directive_unreserved_token() ->
  909. ?SUCHTHAT(T,
  910. token(),
  911. T =/= <<"max-age">> andalso T =/= <<"max-stale">> andalso T =/= <<"min-fresh">>
  912. andalso T =/= <<"s-maxage">> andalso T =/= <<"no-cache">> andalso T =/= <<"private">>).
  913. cache_directive() ->
  914. oneof([
  915. token(),
  916. {cache_directive_unreserved_token(), token()},
  917. {cache_directive_unreserved_token(), quoted_string()},
  918. {elements([<<"max-age">>, <<"max-stale">>, <<"min-fresh">>, <<"s-maxage">>]), non_neg_integer()},
  919. {fields, elements([<<"no-cache">>, <<"private">>]), small_list(token())}
  920. ]).
  921. cache_control() ->
  922. ?LET(L,
  923. non_empty(list(cache_directive())),
  924. begin
  925. << _, CacheControl/binary >> = iolist_to_binary([[$,,
  926. case C of
  927. {fields, K, V} -> [K, $=, $", [[F, $,] || F <- V], $"];
  928. {K, V} when is_integer(V) -> [K, $=, integer_to_binary(V)];
  929. {K, V} -> [K, $=, V];
  930. K -> K
  931. end] || C <- L]),
  932. {L, CacheControl}
  933. end).
  934. prop_parse_cache_control() ->
  935. ?FORALL({L, CacheControl},
  936. cache_control(),
  937. begin
  938. ResL = parse_cache_control(CacheControl),
  939. CheckedL = [begin
  940. ExpectedCc = case Cc of
  941. {fields, K, V} -> {?INLINE_LOWERCASE_BC(K), [?INLINE_LOWERCASE_BC(F) || F <- V]};
  942. {K, V} -> {?INLINE_LOWERCASE_BC(K), unquote(V)};
  943. K -> ?INLINE_LOWERCASE_BC(K)
  944. end,
  945. ExpectedCc =:= ResCc
  946. end || {Cc, ResCc} <- lists:zip(L, ResL)],
  947. [true] =:= lists:usort(CheckedL)
  948. end).
  949. parse_cache_control_test_() ->
  950. Tests = [
  951. {<<"no-cache">>, [<<"no-cache">>]},
  952. {<<"no-store">>, [<<"no-store">>]},
  953. {<<"max-age=0">>, [{<<"max-age">>, 0}]},
  954. {<<"max-age=30">>, [{<<"max-age">>, 30}]},
  955. {<<"private, community=\"UCI\"">>, [<<"private">>, {<<"community">>, <<"UCI">>}]},
  956. {<<"private=\"Content-Type, Content-Encoding, Content-Language\"">>,
  957. [{<<"private">>, [<<"content-type">>, <<"content-encoding">>, <<"content-language">>]}]}
  958. ],
  959. [{V, fun() -> R = parse_cache_control(V) end} || {V, R} <- Tests].
  960. parse_cache_control_error_test_() ->
  961. Tests = [
  962. <<>>
  963. ],
  964. [{V, fun() -> {'EXIT', _} = (catch parse_cache_control(V)) end} || V <- Tests].
  965. -endif.
  966. -ifdef(PERF).
  967. horse_parse_cache_control_no_cache() ->
  968. horse:repeat(200000,
  969. parse_cache_control(<<"no-cache">>)
  970. ).
  971. horse_parse_cache_control_max_age_0() ->
  972. horse:repeat(200000,
  973. parse_cache_control(<<"max-age=0">>)
  974. ).
  975. horse_parse_cache_control_max_age_30() ->
  976. horse:repeat(200000,
  977. parse_cache_control(<<"max-age=30">>)
  978. ).
  979. horse_parse_cache_control_custom() ->
  980. horse:repeat(200000,
  981. parse_cache_control(<<"private, community=\"UCI\"">>)
  982. ).
  983. horse_parse_cache_control_fields() ->
  984. horse:repeat(200000,
  985. parse_cache_control(<<"private=\"Content-Type, Content-Encoding, Content-Language\"">>)
  986. ).
  987. -endif.
  988. %% @doc Parse the Connection header.
  989. -spec parse_connection(binary()) -> [binary()].
  990. parse_connection(<<"close">>) ->
  991. [<<"close">>];
  992. parse_connection(<<"keep-alive">>) ->
  993. [<<"keep-alive">>];
  994. parse_connection(Connection) ->
  995. nonempty(token_ci_list(Connection, [])).
  996. -ifdef(TEST).
  997. prop_parse_connection() ->
  998. ?FORALL(L,
  999. non_empty(list(token())),
  1000. begin
  1001. << _, Connection/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  1002. ResL = parse_connection(Connection),
  1003. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  1004. [true] =:= lists:usort(CheckedL)
  1005. end).
  1006. parse_connection_test_() ->
  1007. Tests = [
  1008. {<<"close">>, [<<"close">>]},
  1009. {<<"ClOsE">>, [<<"close">>]},
  1010. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  1011. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  1012. ],
  1013. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  1014. parse_connection_error_test_() ->
  1015. Tests = [
  1016. <<>>
  1017. ],
  1018. [{V, fun() -> {'EXIT', _} = (catch parse_connection(V)) end} || V <- Tests].
  1019. -endif.
  1020. -ifdef(PERF).
  1021. horse_parse_connection_close() ->
  1022. horse:repeat(200000,
  1023. parse_connection(<<"close">>)
  1024. ).
  1025. horse_parse_connection_keepalive() ->
  1026. horse:repeat(200000,
  1027. parse_connection(<<"keep-alive">>)
  1028. ).
  1029. horse_parse_connection_keepalive_upgrade() ->
  1030. horse:repeat(200000,
  1031. parse_connection(<<"keep-alive, upgrade">>)
  1032. ).
  1033. -endif.
  1034. %% @doc Parse the Content-Encoding header.
  1035. -spec parse_content_encoding(binary()) -> [binary()].
  1036. parse_content_encoding(ContentEncoding) ->
  1037. nonempty(token_ci_list(ContentEncoding, [])).
  1038. -ifdef(TEST).
  1039. parse_content_encoding_test_() ->
  1040. Tests = [
  1041. {<<"gzip">>, [<<"gzip">>]}
  1042. ],
  1043. [{V, fun() -> R = parse_content_encoding(V) end} || {V, R} <- Tests].
  1044. parse_content_encoding_error_test_() ->
  1045. Tests = [
  1046. <<>>
  1047. ],
  1048. [{V, fun() -> {'EXIT', _} = (catch parse_content_encoding(V)) end} || V <- Tests].
  1049. -endif.
  1050. -ifdef(PERF).
  1051. horse_parse_content_encoding() ->
  1052. horse:repeat(200000,
  1053. parse_content_encoding(<<"gzip">>)
  1054. ).
  1055. -endif.
  1056. %% @doc Parse the Content-Language header.
  1057. %%
  1058. %% We do not support irregular deprecated tags that do not match the ABNF.
  1059. -spec parse_content_language(binary()) -> [binary()].
  1060. parse_content_language(ContentLanguage) ->
  1061. nonempty(langtag_list(ContentLanguage, [])).
  1062. langtag_list(<<>>, Acc) -> lists:reverse(Acc);
  1063. langtag_list(<< $\s, R/bits >>, Acc) -> langtag_list(R, Acc);
  1064. langtag_list(<< $\t, R/bits >>, Acc) -> langtag_list(R, Acc);
  1065. langtag_list(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
  1066. langtag_list(<< A, B, C, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
  1067. langtag_extlang(R, Acc, << ?LC(A), ?LC(B), ?LC(C) >>, 0);
  1068. langtag_list(<< A, B, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
  1069. langtag_extlang(R, Acc, << ?LC(A), ?LC(B) >>, 0);
  1070. langtag_list(<< X, R/bits >>, Acc) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << $x >>, 0).
  1071. langtag_extlang(<<>>, Acc, T, _) -> lists:reverse([T|Acc]);
  1072. langtag_extlang(<< $,, R/bits >>, Acc, T, _) -> langtag_list(R, [T|Acc]);
  1073. langtag_extlang(<< $\s, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
  1074. langtag_extlang(<< $\t, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
  1075. langtag_extlang(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, _)
  1076. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1077. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  1078. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  1079. langtag_extlang(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, _)
  1080. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1081. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  1082. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  1083. langtag_extlang(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, _)
  1084. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1085. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  1086. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  1087. langtag_extlang(<< $-, A, B, C, D, E, R/bits >>, Acc, T, _)
  1088. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  1089. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  1090. langtag_extlang(<< $-, A, B, C, D, R/bits >>, Acc, T, _)
  1091. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
  1092. langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
  1093. langtag_extlang(<< $-, A, B, C, R/bits >>, Acc, T, N)
  1094. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
  1095. case N of
  1096. 2 -> langtag_script(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>);
  1097. _ -> langtag_extlang(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1)
  1098. end;
  1099. langtag_extlang(R, Acc, T, _) -> langtag_region(R, Acc, T).
  1100. langtag_script(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1101. langtag_script(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  1102. langtag_script(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1103. langtag_script(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1104. langtag_script(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  1105. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1106. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  1107. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  1108. langtag_script(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  1109. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1110. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  1111. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  1112. langtag_script(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  1113. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1114. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  1115. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  1116. langtag_script(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  1117. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  1118. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  1119. langtag_script(<< $-, A, B, C, D, R/bits >>, Acc, T)
  1120. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
  1121. langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
  1122. langtag_script(R, Acc, T) ->
  1123. langtag_region(R, Acc, T).
  1124. langtag_region(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1125. langtag_region(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  1126. langtag_region(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1127. langtag_region(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1128. langtag_region(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  1129. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1130. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  1131. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  1132. langtag_region(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  1133. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1134. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  1135. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  1136. langtag_region(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  1137. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1138. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  1139. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  1140. langtag_region(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  1141. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  1142. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  1143. langtag_region(<< $-, A, B, C, D, R/bits >>, Acc, T)
  1144. when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  1145. langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
  1146. langtag_region(<< $-, A, B, R/bits >>, Acc, T) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
  1147. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>);
  1148. langtag_region(<< $-, A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  1149. langtag_variant(R, Acc, << T/binary, $-, A, B, C >>);
  1150. langtag_region(R, Acc, T) ->
  1151. langtag_variant(R, Acc, T).
  1152. langtag_variant(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1153. langtag_variant(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  1154. langtag_variant(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1155. langtag_variant(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1156. langtag_variant(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  1157. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1158. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  1159. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  1160. langtag_variant(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  1161. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1162. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  1163. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  1164. langtag_variant(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  1165. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1166. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  1167. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  1168. langtag_variant(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  1169. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  1170. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  1171. langtag_variant(<< $-, A, B, C, D, R/bits >>, Acc, T)
  1172. when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  1173. langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
  1174. langtag_variant(R, Acc, T) ->
  1175. langtag_extension(R, Acc, T).
  1176. langtag_extension(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1177. langtag_extension(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  1178. langtag_extension(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1179. langtag_extension(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  1180. langtag_extension(<< $-, X, R/bits >>, Acc, T) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << T/binary, $-, $x >>, 0);
  1181. langtag_extension(<< $-, S, R/bits >>, Acc, T) when ?IS_ALPHANUM(S) -> langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(S) >>, 0).
  1182. langtag_extension_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
  1183. langtag_extension_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
  1184. langtag_extension_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  1185. langtag_extension_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  1186. langtag_extension_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
  1187. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1188. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  1189. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
  1190. langtag_extension_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
  1191. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1192. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  1193. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
  1194. langtag_extension_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
  1195. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1196. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  1197. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
  1198. langtag_extension_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
  1199. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  1200. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
  1201. langtag_extension_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
  1202. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  1203. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
  1204. langtag_extension_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
  1205. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
  1206. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
  1207. langtag_extension_sub(<< $-, A, B, R/bits >>, Acc, T, N)
  1208. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
  1209. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
  1210. langtag_extension_sub(R, Acc, T, N) when N > 0 ->
  1211. langtag_extension(R, Acc, T).
  1212. langtag_privateuse_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
  1213. langtag_privateuse_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
  1214. langtag_privateuse_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  1215. langtag_privateuse_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  1216. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
  1217. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1218. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  1219. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
  1220. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
  1221. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1222. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  1223. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
  1224. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
  1225. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  1226. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  1227. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
  1228. langtag_privateuse_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
  1229. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  1230. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
  1231. langtag_privateuse_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
  1232. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  1233. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
  1234. langtag_privateuse_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
  1235. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
  1236. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
  1237. langtag_privateuse_sub(<< $-, A, B, R/bits >>, Acc, T, N)
  1238. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
  1239. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
  1240. langtag_privateuse_sub(<< $-, A, R/bits >>, Acc, T, N)
  1241. when ?IS_ALPHANUM(A) ->
  1242. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A) >>, N + 1).
  1243. langtag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  1244. langtag_list_sep(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
  1245. langtag_list_sep(<< $\s, R/bits >>, Acc) -> langtag_list_sep(R, Acc);
  1246. langtag_list_sep(<< $\t, R/bits >>, Acc) -> langtag_list_sep(R, Acc).
  1247. -ifdef(TEST).
  1248. langtag_language() -> vector(2, 3, alpha()).
  1249. langtag_extlang() -> vector(0, 3, [$-, alpha(), alpha(), alpha()]).
  1250. langtag_script() -> oneof([[], [$-, alpha(), alpha(), alpha(), alpha()]]).
  1251. langtag_region() -> oneof([[], [$-, alpha(), alpha()], [$-, digit(), digit(), digit()]]).
  1252. langtag_variant() ->
  1253. small_list(frequency([
  1254. {4, [$-, vector(5, 8, alphanum())]},
  1255. {1, [$-, digit(), alphanum(), alphanum(), alphanum()]}
  1256. ])).
  1257. langtag_extension() ->
  1258. small_list([$-, ?SUCHTHAT(S, alphanum(), S =/= $x andalso S =/= $X),
  1259. small_non_empty_list([$-, vector(2, 8, alphanum())])
  1260. ]).
  1261. langtag_privateuse() -> oneof([[], [$-, langtag_privateuse_nodash()]]).
  1262. langtag_privateuse_nodash() -> [elements([$x, $X]), small_non_empty_list([$-, vector(1, 8, alphanum())])].
  1263. private_language_tag() -> ?LET(T, langtag_privateuse_nodash(), iolist_to_binary(T)).
  1264. language_tag() ->
  1265. ?LET(IoList,
  1266. [langtag_language(), langtag_extlang(), langtag_script(), langtag_region(),
  1267. langtag_variant(), langtag_extension(), langtag_privateuse()],
  1268. iolist_to_binary(IoList)).
  1269. content_language() ->
  1270. ?LET(L,
  1271. non_empty(list(frequency([
  1272. {90, language_tag()},
  1273. {10, private_language_tag()}
  1274. ]))),
  1275. begin
  1276. << _, ContentLanguage/binary >> = iolist_to_binary([[$,, T] || T <- L]),
  1277. {L, ContentLanguage}
  1278. end).
  1279. prop_parse_content_language() ->
  1280. ?FORALL({L, ContentLanguage},
  1281. content_language(),
  1282. begin
  1283. ResL = parse_content_language(ContentLanguage),
  1284. CheckedL = [?INLINE_LOWERCASE_BC(T) =:= ResT || {T, ResT} <- lists:zip(L, ResL)],
  1285. [true] =:= lists:usort(CheckedL)
  1286. end).
  1287. parse_content_language_test_() ->
  1288. Tests = [
  1289. {<<"de">>, [<<"de">>]},
  1290. {<<"fr">>, [<<"fr">>]},
  1291. {<<"ja">>, [<<"ja">>]},
  1292. {<<"zh-Hant">>, [<<"zh-hant">>]},
  1293. {<<"zh-Hans">>, [<<"zh-hans">>]},
  1294. {<<"sr-Cyrl">>, [<<"sr-cyrl">>]},
  1295. {<<"sr-Latn">>, [<<"sr-latn">>]},
  1296. {<<"zh-cmn-Hans-CN">>, [<<"zh-cmn-hans-cn">>]},
  1297. {<<"cmn-Hans-CN">>, [<<"cmn-hans-cn">>]},
  1298. {<<"zh-yue-HK">>, [<<"zh-yue-hk">>]},
  1299. {<<"yue-HK">>, [<<"yue-hk">>]},
  1300. {<<"zh-Hans-CN">>, [<<"zh-hans-cn">>]},
  1301. {<<"sr-Latn-RS">>, [<<"sr-latn-rs">>]},
  1302. {<<"sl-rozaj">>, [<<"sl-rozaj">>]},
  1303. {<<"sl-rozaj-biske">>, [<<"sl-rozaj-biske">>]},
  1304. {<<"sl-nedis">>, [<<"sl-nedis">>]},
  1305. {<<"de-CH-1901">>, [<<"de-ch-1901">>]},
  1306. {<<"sl-IT-nedis">>, [<<"sl-it-nedis">>]},
  1307. {<<"hy-Latn-IT-arevela">>, [<<"hy-latn-it-arevela">>]},
  1308. {<<"de-DE">>, [<<"de-de">>]},
  1309. {<<"en-US">>, [<<"en-us">>]},
  1310. {<<"es-419">>, [<<"es-419">>]},
  1311. {<<"de-CH-x-phonebk">>, [<<"de-ch-x-phonebk">>]},
  1312. {<<"az-Arab-x-AZE-derbend">>, [<<"az-arab-x-aze-derbend">>]},
  1313. {<<"x-whatever">>, [<<"x-whatever">>]},
  1314. {<<"qaa-Qaaa-QM-x-southern">>, [<<"qaa-qaaa-qm-x-southern">>]},
  1315. {<<"de-Qaaa">>, [<<"de-qaaa">>]},
  1316. {<<"sr-Latn-QM">>, [<<"sr-latn-qm">>]},
  1317. {<<"sr-Qaaa-RS">>, [<<"sr-qaaa-rs">>]},
  1318. {<<"en-US-u-islamcal">>, [<<"en-us-u-islamcal">>]},
  1319. {<<"zh-CN-a-myext-x-private">>, [<<"zh-cn-a-myext-x-private">>]},
  1320. {<<"en-a-myext-b-another">>, [<<"en-a-myext-b-another">>]},
  1321. {<<"mn-Cyrl-MN">>, [<<"mn-cyrl-mn">>]},
  1322. {<<"MN-cYRL-mn">>, [<<"mn-cyrl-mn">>]},
  1323. {<<"mN-cYrL-Mn">>, [<<"mn-cyrl-mn">>]},
  1324. {<<"az-Arab-IR">>, [<<"az-arab-ir">>]},
  1325. {<<"zh-gan">>, [<<"zh-gan">>]},
  1326. {<<"zh-yue">>, [<<"zh-yue">>]},
  1327. {<<"zh-cmn">>, [<<"zh-cmn">>]},
  1328. {<<"de-AT">>, [<<"de-at">>]},
  1329. {<<"de-CH-1996">>, [<<"de-ch-1996">>]},
  1330. {<<"en-Latn-GB-boont-r-extended-sequence-x-private">>,
  1331. [<<"en-latn-gb-boont-r-extended-sequence-x-private">>]},
  1332. {<<"el-x-koine">>, [<<"el-x-koine">>]},
  1333. {<<"el-x-attic">>, [<<"el-x-attic">>]},
  1334. {<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>,
  1335. [<<"fr">>, <<"en-us">>, <<"es-419">>, <<"az-arab">>, <<"x-pig-latin">>, <<"man-nkoo-gn">>]},
  1336. {<<"da">>, [<<"da">>]},
  1337. {<<"mi, en">>, [<<"mi">>, <<"en">>]}
  1338. ],
  1339. [{V, fun() -> R = parse_content_language(V) end} || {V, R} <- Tests].
  1340. parse_content_language_error_test_() ->
  1341. Tests = [
  1342. <<>>
  1343. ],
  1344. [{V, fun() -> {'EXIT', _} = (catch parse_content_language(V)) end} || V <- Tests].
  1345. -endif.
  1346. -ifdef(PERF).
  1347. horse_parse_content_language() ->
  1348. horse:repeat(100000,
  1349. parse_content_language(<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>)
  1350. ).
  1351. -endif.
  1352. %% @doc Parse the Content-Length header.
  1353. %%
  1354. %% The value has at least one digit, and may be followed by whitespace.
  1355. -spec parse_content_length(binary()) -> non_neg_integer().
  1356. parse_content_length(ContentLength) ->
  1357. I = binary_to_integer(ContentLength),
  1358. true = I >= 0,
  1359. I.
  1360. -ifdef(TEST).
  1361. prop_parse_content_length() ->
  1362. ?FORALL(
  1363. X,
  1364. non_neg_integer(),
  1365. X =:= parse_content_length(integer_to_binary(X))
  1366. ).
  1367. parse_content_length_test_() ->
  1368. Tests = [
  1369. {<<"0">>, 0},
  1370. {<<"42">>, 42},
  1371. {<<"69">>, 69},
  1372. {<<"1337">>, 1337},
  1373. {<<"3495">>, 3495},
  1374. {<<"1234567890">>, 1234567890}
  1375. ],
  1376. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  1377. parse_content_length_error_test_() ->
  1378. Tests = [
  1379. <<>>,
  1380. <<"-1">>,
  1381. <<"123, 123">>,
  1382. <<"4.17">>
  1383. ],
  1384. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  1385. -endif.
  1386. -ifdef(PERF).
  1387. horse_parse_content_length_zero() ->
  1388. horse:repeat(100000,
  1389. parse_content_length(<<"0">>)
  1390. ).
  1391. horse_parse_content_length_giga() ->
  1392. horse:repeat(100000,
  1393. parse_content_length(<<"1234567890">>)
  1394. ).
  1395. -endif.
  1396. %% @doc Parse the Content-Range header.
  1397. -spec parse_content_range(binary())
  1398. -> {bytes, non_neg_integer(), non_neg_integer(), non_neg_integer() | '*'}
  1399. | {bytes, '*', non_neg_integer()} | {binary(), binary()}.
  1400. parse_content_range(<<"bytes */", C, R/bits >>) when ?IS_DIGIT(C) -> unsatisfied_range(R, C - $0);
  1401. parse_content_range(<<"bytes ", C, R/bits >>) when ?IS_DIGIT(C) -> byte_range_first(R, C - $0);
  1402. parse_content_range(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  1403. case C of
  1404. ?INLINE_LOWERCASE(other_content_range_unit, R, <<>>)
  1405. end.
  1406. byte_range_first(<< $-, C, R/bits >>, First) when ?IS_DIGIT(C) -> byte_range_last(R, First, C - $0);
  1407. byte_range_first(<< C, R/bits >>, First) when ?IS_DIGIT(C) -> byte_range_first(R, First * 10 + C - $0).
  1408. byte_range_last(<<"/*">>, First, Last) -> {bytes, First, Last, '*'};
  1409. byte_range_last(<< $/, C, R/bits >>, First, Last) when ?IS_DIGIT(C) -> byte_range_complete(R, First, Last, C - $0);
  1410. byte_range_last(<< C, R/bits >>, First, Last) when ?IS_DIGIT(C) -> byte_range_last(R, First, Last * 10 + C - $0).
  1411. byte_range_complete(<<>>, First, Last, Complete) -> {bytes, First, Last, Complete};
  1412. byte_range_complete(<< C, R/bits >>, First, Last, Complete) when ?IS_DIGIT(C) ->
  1413. byte_range_complete(R, First, Last, Complete * 10 + C - $0).
  1414. unsatisfied_range(<<>>, Complete) -> {bytes, '*', Complete};
  1415. unsatisfied_range(<< C, R/bits >>, Complete) when ?IS_DIGIT(C) -> unsatisfied_range(R, Complete * 10 + C - $0).
  1416. other_content_range_unit(<< $\s, R/bits >>, Unit) -> other_content_range_resp(R, Unit, <<>>);
  1417. other_content_range_unit(<< C, R/bits >>, Unit) when ?IS_TOKEN(C) ->
  1418. case C of
  1419. ?INLINE_LOWERCASE(other_content_range_unit, R, Unit)
  1420. end.
  1421. other_content_range_resp(<<>>, Unit, Resp) -> {Unit, Resp};
  1422. other_content_range_resp(<< C, R/bits >>, Unit, Resp) when ?IS_CHAR(C) -> other_content_range_resp(R, Unit, << Resp/binary, C >>).
  1423. -ifdef(TEST).
  1424. content_range() ->
  1425. ?LET(ContentRange,
  1426. oneof([
  1427. ?SUCHTHAT({bytes, First, Last, Complete},
  1428. {bytes, non_neg_integer(), non_neg_integer(), non_neg_integer()},
  1429. First =< Last andalso Last < Complete),
  1430. ?SUCHTHAT({bytes, First, Last, '*'},
  1431. {bytes, non_neg_integer(), non_neg_integer(), '*'},
  1432. First =< Last),
  1433. {bytes, '*', non_neg_integer()},
  1434. {token(), ?LET(L, list(abnf_char()), list_to_binary(L))}
  1435. ]),
  1436. {case ContentRange of
  1437. {Unit, Resp} when is_binary(Unit) -> {?INLINE_LOWERCASE_BC(Unit), Resp};
  1438. _ -> ContentRange
  1439. end, case ContentRange of
  1440. {bytes, First, Last, '*'} ->
  1441. << "bytes ", (integer_to_binary(First))/binary, "-",
  1442. (integer_to_binary(Last))/binary, "/*">>;
  1443. {bytes, First, Last, Complete} ->
  1444. << "bytes ", (integer_to_binary(First))/binary, "-",
  1445. (integer_to_binary(Last))/binary, "/", (integer_to_binary(Complete))/binary >>;
  1446. {bytes, '*', Complete} ->
  1447. << "bytes */", (integer_to_binary(Complete))/binary >>;
  1448. {Unit, Resp} ->
  1449. << Unit/binary, $\s, Resp/binary >>
  1450. end}).
  1451. prop_parse_content_range() ->
  1452. ?FORALL({Res, ContentRange},
  1453. content_range(),
  1454. Res =:= parse_content_range(ContentRange)).
  1455. parse_content_range_test_() ->
  1456. Tests = [
  1457. {<<"bytes 21010-47021/47022">>, {bytes, 21010, 47021, 47022}},
  1458. {<<"bytes 500-999/8000">>, {bytes, 500, 999, 8000}},
  1459. {<<"bytes 7000-7999/8000">>, {bytes, 7000, 7999, 8000}},
  1460. {<<"bytes 42-1233/1234">>, {bytes, 42, 1233, 1234}},
  1461. {<<"bytes 42-1233/*">>, {bytes, 42, 1233, '*'}},
  1462. {<<"bytes */1234">>, {bytes, '*', 1234}},
  1463. {<<"bytes 0-499/1234">>, {bytes, 0, 499, 1234}},
  1464. {<<"bytes 500-999/1234">>, {bytes, 500, 999, 1234}},
  1465. {<<"bytes 500-1233/1234">>, {bytes, 500, 1233, 1234}},
  1466. {<<"bytes 734-1233/1234">>, {bytes, 734, 1233, 1234}},
  1467. {<<"bytes */47022">>, {bytes, '*', 47022}},
  1468. {<<"exampleunit 1.2-4.3/25">>, {<<"exampleunit">>, <<"1.2-4.3/25">>}},
  1469. {<<"exampleunit 11.2-14.3/25">>, {<<"exampleunit">>, <<"11.2-14.3/25">>}}
  1470. ],
  1471. [{V, fun() -> R = parse_content_range(V) end} || {V, R} <- Tests].
  1472. parse_content_range_error_test_() ->
  1473. Tests = [
  1474. <<>>
  1475. ],
  1476. [{V, fun() -> {'EXIT', _} = (catch parse_content_range(V)) end} || V <- Tests].
  1477. -endif.
  1478. -ifdef(PERF).
  1479. horse_parse_content_range_bytes() ->
  1480. horse:repeat(200000,
  1481. parse_content_range(<<"bytes 21010-47021/47022">>)
  1482. ).
  1483. horse_parse_content_range_other() ->
  1484. horse:repeat(200000,
  1485. parse_content_range(<<"exampleunit 11.2-14.3/25">>)
  1486. ).
  1487. -endif.
  1488. %% @doc Parse the Content-Type header.
  1489. -spec parse_content_type(binary()) -> media_type().
  1490. parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  1491. case C of
  1492. ?INLINE_LOWERCASE(media_type, R, <<>>)
  1493. end.
  1494. media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
  1495. case C of
  1496. ?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
  1497. end;
  1498. media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
  1499. case C of
  1500. ?INLINE_LOWERCASE(media_type, R, T)
  1501. end.
  1502. media_subtype(<<>>, T, S) -> {T, S, []};
  1503. media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
  1504. media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  1505. media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  1506. media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
  1507. case C of
  1508. ?INLINE_LOWERCASE(media_subtype, R, T, S)
  1509. end.
  1510. media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
  1511. media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1512. media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
  1513. media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
  1514. media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1515. media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1516. media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
  1517. media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
  1518. media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
  1519. case C of
  1520. ?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
  1521. end.
  1522. media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
  1523. media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1524. media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR_OBS(C) ->
  1525. case C of
  1526. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  1527. end;
  1528. media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR_OBS(C) ->
  1529. case C of
  1530. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  1531. end.
  1532. media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
  1533. media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
  1534. media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1535. media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1536. media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
  1537. case C of
  1538. ?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
  1539. end.
  1540. media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
  1541. media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
  1542. media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
  1543. case C of
  1544. ?INLINE_LOWERCASE(media_param, R, T, S, P, K)
  1545. end.
  1546. media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1547. media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
  1548. media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
  1549. media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
  1550. media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
  1551. media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1552. media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1553. media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
  1554. -ifdef(TEST).
  1555. media_type_parameter() ->
  1556. frequency([
  1557. {90, parameter()},
  1558. {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}}
  1559. ]).
  1560. media_type() ->
  1561. ?LET({T, S, P},
  1562. {token(), token(), small_list(media_type_parameter())},
  1563. {T, S, P, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P]])}
  1564. ).
  1565. prop_parse_content_type() ->
  1566. ?FORALL({T, S, P, MediaType},
  1567. media_type(),
  1568. begin
  1569. {ResT, ResS, ResP} = parse_content_type(MediaType),
  1570. ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
  1571. <<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
  1572. LowK -> {LowK, unquote(V)}
  1573. end || {K, V, _, _} <- P],
  1574. ResT =:= ?INLINE_LOWERCASE_BC(T)
  1575. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  1576. andalso ResP =:= ExpectedP
  1577. end
  1578. ).
  1579. parse_content_type_test_() ->
  1580. Tests = [
  1581. {<<"text/html;charset=utf-8">>,
  1582. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1583. {<<"text/html;charset=UTF-8">>,
  1584. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1585. {<<"Text/HTML;Charset=\"utf-8\"">>,
  1586. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1587. {<<"text/html; charset=\"utf-8\"">>,
  1588. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1589. {<<"text/html; charset=ISO-8859-4">>,
  1590. {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  1591. {<<"text/plain; charset=iso-8859-4">>,
  1592. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  1593. {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
  1594. {<<"multipart">>, <<"form-data">>, [
  1595. {<<"boundary">>, <<"MultipartIsUgly">>}
  1596. ]}},
  1597. {<<"foo/bar; one=FirstParam; two=SecondParam">>,
  1598. {<<"foo">>, <<"bar">>, [
  1599. {<<"one">>, <<"FirstParam">>},
  1600. {<<"two">>, <<"SecondParam">>}
  1601. ]}}
  1602. ],
  1603. [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
  1604. -endif.
  1605. -ifdef(PERF).
  1606. horse_parse_content_type() ->
  1607. horse:repeat(200000,
  1608. parse_content_type(<<"text/html;charset=utf-8">>)
  1609. ).
  1610. -endif.
  1611. %% @doc Parse the Date header.
  1612. -spec parse_date(binary()) -> calendar:datetime().
  1613. parse_date(Date) ->
  1614. cow_date:parse_date(Date).
  1615. -ifdef(TEST).
  1616. parse_date_test_() ->
  1617. Tests = [
  1618. {<<"Tue, 15 Nov 1994 08:12:31 GMT">>, {{1994, 11, 15}, {8, 12, 31}}}
  1619. ],
  1620. [{V, fun() -> R = parse_date(V) end} || {V, R} <- Tests].
  1621. -endif.
  1622. %% @doc Parse the ETag header.
  1623. -spec parse_etag(binary()) -> etag().
  1624. parse_etag(<< $W, $/, $", R/bits >>) ->
  1625. etag(R, weak, <<>>);
  1626. parse_etag(<< $", R/bits >>) ->
  1627. etag(R, strong, <<>>).
  1628. etag(<< $" >>, Strength, Tag) ->
  1629. {Strength, Tag};
  1630. etag(<< C, R/bits >>, Strength, Tag) when ?IS_ETAGC(C) ->
  1631. etag(R, Strength, << Tag/binary, C >>).
  1632. -ifdef(TEST).
  1633. etagc() ->
  1634. ?SUCHTHAT(C, int(16#21, 16#ff), C =/= 16#22 andalso C =/= 16#7f).
  1635. etag() ->
  1636. ?LET({Strength, Tag},
  1637. {elements([weak, strong]), list(etagc())},
  1638. begin
  1639. TagBin = list_to_binary(Tag),
  1640. {{Strength, TagBin},
  1641. case Strength of
  1642. weak -> << $W, $/, $", TagBin/binary, $" >>;
  1643. strong -> << $", TagBin/binary, $" >>
  1644. end}
  1645. end).
  1646. prop_parse_etag() ->
  1647. ?FORALL({Tag, TagBin},
  1648. etag(),
  1649. Tag =:= parse_etag(TagBin)).
  1650. parse_etag_test_() ->
  1651. Tests = [
  1652. {<<"\"xyzzy\"">>, {strong, <<"xyzzy">>}},
  1653. {<<"W/\"xyzzy\"">>, {weak, <<"xyzzy">>}},
  1654. {<<"\"\"">>, {strong, <<>>}}
  1655. ],
  1656. [{V, fun() -> R = parse_etag(V) end} || {V, R} <- Tests].
  1657. parse_etag_error_test_() ->
  1658. Tests = [
  1659. <<>>,
  1660. <<"\"">>,
  1661. <<"W">>,
  1662. <<"W/">>
  1663. ],
  1664. [{V, fun() -> {'EXIT', _} = (catch parse_etag(V)) end} || V <- Tests].
  1665. -endif.
  1666. -ifdef(PERF).
  1667. horse_parse_etag() ->
  1668. horse:repeat(200000,
  1669. parse_etag(<<"W/\"xyzzy\"">>)
  1670. ).
  1671. -endif.
  1672. %% @doc Parse the Expect header.
  1673. -spec parse_expect(binary()) -> continue.
  1674. parse_expect(<<"100-continue">>) ->
  1675. continue;
  1676. parse_expect(<<"100-", C, O, N, T, I, M, U, E >>)
  1677. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  1678. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  1679. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  1680. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  1681. continue.
  1682. -ifdef(TEST).
  1683. expect() ->
  1684. ?LET(E,
  1685. [$1, $0, $0, $-,
  1686. elements([$c, $C]), elements([$o, $O]), elements([$n, $N]),
  1687. elements([$t, $T]), elements([$i, $I]), elements([$n, $N]),
  1688. elements([$u, $U]), elements([$e, $E])],
  1689. list_to_binary(E)).
  1690. prop_parse_expect() ->
  1691. ?FORALL(E, expect(), continue =:= parse_expect(E)).
  1692. parse_expect_test_() ->
  1693. Tests = [
  1694. <<"100-continue">>,
  1695. <<"100-CONTINUE">>,
  1696. <<"100-Continue">>,
  1697. <<"100-CoNtInUe">>
  1698. ],
  1699. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  1700. parse_expect_error_test_() ->
  1701. Tests = [
  1702. <<>>,
  1703. <<" ">>,
  1704. <<"200-OK">>,
  1705. <<"Cookies">>
  1706. ],
  1707. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  1708. -endif.
  1709. -ifdef(PERF).
  1710. horse_parse_expect() ->
  1711. horse:repeat(200000,
  1712. parse_expect(<<"100-continue">>)
  1713. ).
  1714. -endif.
  1715. %% @doc Parse the Expires header.
  1716. %%
  1717. %% Recipients must interpret invalid date formats as a date
  1718. %% in the past. The value "0" is commonly used.
  1719. -spec parse_expires(binary()) -> calendar:datetime().
  1720. parse_expires(<<"0">>) ->
  1721. {{1, 1, 1}, {0, 0, 0}};
  1722. parse_expires(Expires) ->
  1723. try
  1724. cow_date:parse_date(Expires)
  1725. catch _:_ ->
  1726. {{1, 1, 1}, {0, 0, 0}}
  1727. end.
  1728. -ifdef(TEST).
  1729. parse_expires_test_() ->
  1730. Tests = [
  1731. {<<"0">>, {{1, 1, 1}, {0, 0, 0}}},
  1732. {<<"Thu, 01 Dec 1994 nope invalid">>, {{1, 1, 1}, {0, 0, 0}}},
  1733. {<<"Thu, 01 Dec 1994 16:00:00 GMT">>, {{1994, 12, 1}, {16, 0, 0}}}
  1734. ],
  1735. [{V, fun() -> R = parse_expires(V) end} || {V, R} <- Tests].
  1736. -endif.
  1737. -ifdef(PERF).
  1738. horse_parse_expires_0() ->
  1739. horse:repeat(200000,
  1740. parse_expires(<<"0">>)
  1741. ).
  1742. horse_parse_expires_invalid() ->
  1743. horse:repeat(200000,
  1744. parse_expires(<<"Thu, 01 Dec 1994 nope invalid">>)
  1745. ).
  1746. -endif.
  1747. %% @doc Parse the Host header.
  1748. %%
  1749. %% We only seek to have legal characters and separate the
  1750. %% host and port values. The number of segments in the host
  1751. %% or the size of each segment is not checked.
  1752. %%
  1753. %% There is no way to distinguish IPv4 addresses from regular
  1754. %% names until the last segment is reached therefore we do not
  1755. %% differentiate them.
  1756. %%
  1757. %% The following valid hosts are currently rejected: IPv6
  1758. %% addresses with a zone identifier; IPvFuture addresses;
  1759. %% and percent-encoded addresses.
  1760. -spec parse_host(binary()) -> {binary(), 0..65535 | undefined}.
  1761. parse_host(<< $[, R/bits >>) ->
  1762. ipv6_address(R, << $[ >>);
  1763. parse_host(Host) ->
  1764. reg_name(Host, <<>>).
  1765. ipv6_address(<< $] >>, IP) -> {<< IP/binary, $] >>, undefined};
  1766. ipv6_address(<< $], $:, Port/bits >>, IP) -> {<< IP/binary, $] >>, binary_to_integer(Port)};
  1767. ipv6_address(<< C, R/bits >>, IP) when ?IS_HEX(C) orelse C =:= $: orelse C =:= $. ->
  1768. case C of
  1769. ?INLINE_LOWERCASE(ipv6_address, R, IP)
  1770. end.
  1771. reg_name(<<>>, Name) -> {Name, undefined};
  1772. reg_name(<< $:, Port/bits >>, Name) -> {Name, binary_to_integer(Port)};
  1773. reg_name(<< C, R/bits >>, Name) when ?IS_URI_UNRESERVED(C) orelse ?IS_URI_SUB_DELIMS(C) ->
  1774. case C of
  1775. ?INLINE_LOWERCASE(reg_name, R, Name)
  1776. end.
  1777. -ifdef(TEST).
  1778. host_chars() -> "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~".
  1779. host() -> vector(1, 255, elements(host_chars())).
  1780. host_port() ->
  1781. ?LET({Host, Port},
  1782. {host(), oneof([undefined, int(1, 65535)])},
  1783. begin
  1784. HostBin = list_to_binary(Host),
  1785. {{?INLINE_LOWERCASE_BC(HostBin), Port},
  1786. case Port of
  1787. undefined -> HostBin;
  1788. _ -> << HostBin/binary, $:, (integer_to_binary(Port))/binary >>
  1789. end}
  1790. end).
  1791. prop_parse_host() ->
  1792. ?FORALL({Res, Host}, host_port(), Res =:= parse_host(Host)).
  1793. parse_host_test_() ->
  1794. Tests = [
  1795. {<<>>, {<<>>, undefined}},
  1796. {<<"www.example.org:8080">>, {<<"www.example.org">>, 8080}},
  1797. {<<"www.example.org">>, {<<"www.example.org">>, undefined}},
  1798. {<<"192.0.2.1:8080">>, {<<"192.0.2.1">>, 8080}},
  1799. {<<"192.0.2.1">>, {<<"192.0.2.1">>, undefined}},
  1800. {<<"[2001:db8::1]:8080">>, {<<"[2001:db8::1]">>, 8080}},
  1801. {<<"[2001:db8::1]">>, {<<"[2001:db8::1]">>, undefined}},
  1802. {<<"[::ffff:192.0.2.1]:8080">>, {<<"[::ffff:192.0.2.1]">>, 8080}},
  1803. {<<"[::ffff:192.0.2.1]">>, {<<"[::ffff:192.0.2.1]">>, undefined}}
  1804. ],
  1805. [{V, fun() -> R = parse_host(V) end} || {V, R} <- Tests].
  1806. -endif.
  1807. -ifdef(PERF).
  1808. horse_parse_host_blue_example_org() ->
  1809. horse:repeat(200000,
  1810. parse_host(<<"blue.example.org:8080">>)
  1811. ).
  1812. horse_parse_host_ipv4() ->
  1813. horse:repeat(200000,
  1814. parse_host(<<"192.0.2.1:8080">>)
  1815. ).
  1816. horse_parse_host_ipv6() ->
  1817. horse:repeat(200000,
  1818. parse_host(<<"[2001:db8::1]:8080">>)
  1819. ).
  1820. horse_parse_host_ipv6_v4() ->
  1821. horse:repeat(200000,
  1822. parse_host(<<"[::ffff:192.0.2.1]:8080">>)
  1823. ).
  1824. -endif.
  1825. %% @doc Parse the If-Match header.
  1826. -spec parse_if_match(binary()) -> '*' | [etag()].
  1827. parse_if_match(<<"*">>) ->
  1828. '*';
  1829. parse_if_match(IfMatch) ->
  1830. nonempty(etag_list(IfMatch, [])).
  1831. etag_list(<<>>, Acc) -> lists:reverse(Acc);
  1832. etag_list(<< $\s, R/bits >>, Acc) -> etag_list(R, Acc);
  1833. etag_list(<< $\t, R/bits >>, Acc) -> etag_list(R, Acc);
  1834. etag_list(<< $,, R/bits >>, Acc) -> etag_list(R, Acc);
  1835. etag_list(<< $W, $/, $", R/bits >>, Acc) -> etag(R, Acc, weak, <<>>);
  1836. etag_list(<< $", R/bits >>, Acc) -> etag(R, Acc, strong, <<>>).
  1837. etag(<< $", R/bits >>, Acc, Strength, Tag) -> etag_list_sep(R, [{Strength, Tag}|Acc]);
  1838. etag(<< C, R/bits >>, Acc, Strength, Tag) when ?IS_ETAGC(C) -> etag(R, Acc, Strength, << Tag/binary, C >>).
  1839. etag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  1840. etag_list_sep(<< $\s, R/bits >>, Acc) -> etag_list_sep(R, Acc);
  1841. etag_list_sep(<< $\t, R/bits >>, Acc) -> etag_list_sep(R, Acc);
  1842. etag_list_sep(<< $,, R/bits >>, Acc) -> etag_list(R, Acc).
  1843. -ifdef(TEST).
  1844. prop_parse_if_match() ->
  1845. ?FORALL(L,
  1846. non_empty(list(etag())),
  1847. begin
  1848. << _, IfMatch/binary >> = iolist_to_binary([[$,, T] || {_, T} <- L]),
  1849. ResL = parse_if_match(IfMatch),
  1850. CheckedL = [T =:= ResT || {{T, _}, ResT} <- lists:zip(L, ResL)],
  1851. [true] =:= lists:usort(CheckedL)
  1852. end).
  1853. parse_if_match_test_() ->
  1854. Tests = [
  1855. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  1856. {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>,
  1857. [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]},
  1858. {<<"*">>, '*'}
  1859. ],
  1860. [{V, fun() -> R = parse_if_match(V) end} || {V, R} <- Tests].
  1861. parse_if_match_error_test_() ->
  1862. Tests = [
  1863. <<>>
  1864. ],
  1865. [{V, fun() -> {'EXIT', _} = (catch parse_if_match(V)) end} || V <- Tests].
  1866. -endif.
  1867. -ifdef(PERF).
  1868. horse_parse_if_match() ->
  1869. horse:repeat(200000,
  1870. parse_if_match(<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>)
  1871. ).
  1872. -endif.
  1873. %% @doc Parse the If-Modified-Since header.
  1874. -spec parse_if_modified_since(binary()) -> calendar:datetime().
  1875. parse_if_modified_since(IfModifiedSince) ->
  1876. cow_date:parse_date(IfModifiedSince).
  1877. -ifdef(TEST).
  1878. parse_if_modified_since_test_() ->
  1879. Tests = [
  1880. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1881. ],
  1882. [{V, fun() -> R = parse_if_modified_since(V) end} || {V, R} <- Tests].
  1883. -endif.
  1884. %% @doc Parse the If-None-Match header.
  1885. -spec parse_if_none_match(binary()) -> '*' | [etag()].
  1886. parse_if_none_match(<<"*">>) ->
  1887. '*';
  1888. parse_if_none_match(IfNoneMatch) ->
  1889. nonempty(etag_list(IfNoneMatch, [])).
  1890. -ifdef(TEST).
  1891. parse_if_none_match_test_() ->
  1892. Tests = [
  1893. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  1894. {<<"W/\"xyzzy\"">>, [{weak, <<"xyzzy">>}]},
  1895. {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>,
  1896. [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]},
  1897. {<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>,
  1898. [{weak, <<"xyzzy">>}, {weak, <<"r2d2xxxx">>}, {weak, <<"c3piozzzz">>}]},
  1899. {<<"*">>, '*'}
  1900. ],
  1901. [{V, fun() -> R = parse_if_none_match(V) end} || {V, R} <- Tests].
  1902. parse_if_none_match_error_test_() ->
  1903. Tests = [
  1904. <<>>
  1905. ],
  1906. [{V, fun() -> {'EXIT', _} = (catch parse_if_none_match(V)) end} || V <- Tests].
  1907. -endif.
  1908. -ifdef(PERF).
  1909. horse_parse_if_none_match() ->
  1910. horse:repeat(200000,
  1911. parse_if_none_match(<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>)
  1912. ).
  1913. -endif.
  1914. %% @doc Parse the If-Range header.
  1915. -spec parse_if_range(binary()) -> etag() | calendar:datetime().
  1916. parse_if_range(<< $W, $/, $", R/bits >>) ->
  1917. etag(R, weak, <<>>);
  1918. parse_if_range(<< $", R/bits >>) ->
  1919. etag(R, strong, <<>>);
  1920. parse_if_range(IfRange) ->
  1921. cow_date:parse_date(IfRange).
  1922. -ifdef(TEST).
  1923. parse_if_range_test_() ->
  1924. Tests = [
  1925. {<<"W/\"xyzzy\"">>, {weak, <<"xyzzy">>}},
  1926. {<<"\"xyzzy\"">>, {strong, <<"xyzzy">>}},
  1927. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1928. ],
  1929. [{V, fun() -> R = parse_if_range(V) end} || {V, R} <- Tests].
  1930. parse_if_range_error_test_() ->
  1931. Tests = [
  1932. <<>>
  1933. ],
  1934. [{V, fun() -> {'EXIT', _} = (catch parse_if_range(V)) end} || V <- Tests].
  1935. -endif.
  1936. -ifdef(PERF).
  1937. horse_parse_if_range_etag() ->
  1938. horse:repeat(200000,
  1939. parse_if_range(<<"\"xyzzy\"">>)
  1940. ).
  1941. horse_parse_if_range_date() ->
  1942. horse:repeat(200000,
  1943. parse_if_range(<<"Sat, 29 Oct 1994 19:43:31 GMT">>)
  1944. ).
  1945. -endif.
  1946. %% @doc Parse the If-Unmodified-Since header.
  1947. -spec parse_if_unmodified_since(binary()) -> calendar:datetime().
  1948. parse_if_unmodified_since(IfModifiedSince) ->
  1949. cow_date:parse_date(IfModifiedSince).
  1950. -ifdef(TEST).
  1951. parse_if_unmodified_since_test_() ->
  1952. Tests = [
  1953. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1954. ],
  1955. [{V, fun() -> R = parse_if_unmodified_since(V) end} || {V, R} <- Tests].
  1956. -endif.
  1957. %% @doc Parse the Last-Modified header.
  1958. -spec parse_last_modified(binary()) -> calendar:datetime().
  1959. parse_last_modified(LastModified) ->
  1960. cow_date:parse_date(LastModified).
  1961. -ifdef(TEST).
  1962. parse_last_modified_test_() ->
  1963. Tests = [
  1964. {<<"Tue, 15 Nov 1994 12:45:26 GMT">>, {{1994, 11, 15}, {12, 45, 26}}}
  1965. ],
  1966. [{V, fun() -> R = parse_last_modified(V) end} || {V, R} <- Tests].
  1967. -endif.
  1968. %% @doc Parse the Max-Forwards header.
  1969. -spec parse_max_forwards(binary()) -> non_neg_integer().
  1970. parse_max_forwards(MaxForwards) ->
  1971. I = binary_to_integer(MaxForwards),
  1972. true = I >= 0,
  1973. I.
  1974. -ifdef(TEST).
  1975. prop_parse_max_forwards() ->
  1976. ?FORALL(
  1977. X,
  1978. non_neg_integer(),
  1979. X =:= parse_max_forwards(integer_to_binary(X))
  1980. ).
  1981. parse_max_forwards_test_() ->
  1982. Tests = [
  1983. {<<"0">>, 0},
  1984. {<<"42">>, 42},
  1985. {<<"69">>, 69},
  1986. {<<"1337">>, 1337},
  1987. {<<"1234567890">>, 1234567890}
  1988. ],
  1989. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  1990. parse_max_forwards_error_test_() ->
  1991. Tests = [
  1992. <<>>,
  1993. <<"123, 123">>,
  1994. <<"4.17">>
  1995. ],
  1996. [{V, fun() -> {'EXIT', _} = (catch parse_max_forwards(V)) end} || V <- Tests].
  1997. -endif.
  1998. %% @doc Parse the Pragma header.
  1999. %%
  2000. %% Legacy header kept for backward compatibility with HTTP/1.0 caches.
  2001. %% Only the "no-cache" directive was ever specified, and only for
  2002. %% request messages.
  2003. %%
  2004. %% We take a large shortcut in the parsing of this header, expecting
  2005. %% an exact match of "no-cache".
  2006. -spec parse_pragma(binary()) -> cache | no_cache.
  2007. parse_pragma(<<"no-cache">>) -> no_cache;
  2008. parse_pragma(_) -> cache.
  2009. %% @doc Parse the Proxy-Authorization header.
  2010. %%
  2011. %% Alias of parse_authorization/1 due to identical syntax.
  2012. -spec parse_proxy_authorization(binary())
  2013. -> {basic, binary(), binary()}
  2014. | {bearer, binary()}
  2015. | {digest, [{binary(), binary()}]}.
  2016. parse_proxy_authorization(ProxyAuthorization) ->
  2017. parse_authorization(ProxyAuthorization).
  2018. %% @doc Parse the Range header.
  2019. -spec parse_range(binary())
  2020. -> {bytes, [{non_neg_integer(), non_neg_integer() | infinity} | neg_integer()]}
  2021. | {binary(), binary()}.
  2022. parse_range(<<"bytes=", R/bits >>) ->
  2023. bytes_range_set(R, []);
  2024. parse_range(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  2025. case C of
  2026. ?INLINE_LOWERCASE(other_range_unit, R, <<>>)
  2027. end.
  2028. bytes_range_set(<<>>, Acc) -> {bytes, lists:reverse(Acc)};
  2029. bytes_range_set(<< $\s, R/bits >>, Acc) -> bytes_range_set(R, Acc);
  2030. bytes_range_set(<< $\t, R/bits >>, Acc) -> bytes_range_set(R, Acc);
  2031. bytes_range_set(<< $,, R/bits >>, Acc) -> bytes_range_set(R, Acc);
  2032. bytes_range_set(<< $-, C, R/bits >>, Acc) when ?IS_DIGIT(C) -> bytes_range_suffix_spec(R, Acc, C - $0);
  2033. bytes_range_set(<< C, R/bits >>, Acc) when ?IS_DIGIT(C) -> bytes_range_spec(R, Acc, C - $0).
  2034. bytes_range_spec(<< $-, C, R/bits >>, Acc, First) when ?IS_DIGIT(C) -> bytes_range_spec_last(R, Acc, First, C - $0);
  2035. bytes_range_spec(<< $-, R/bits >>, Acc, First) -> bytes_range_set_sep(R, [{First, infinity}|Acc]);
  2036. bytes_range_spec(<< C, R/bits >>, Acc, First) when ?IS_DIGIT(C) -> bytes_range_spec(R, Acc, First * 10 + C - $0).
  2037. bytes_range_spec_last(<< C, R/bits >>, Acc, First, Last) when ?IS_DIGIT(C) -> bytes_range_spec_last(R, Acc, First, Last * 10 + C - $0);
  2038. bytes_range_spec_last(R, Acc, First, Last) -> bytes_range_set_sep(R, [{First, Last}|Acc]).
  2039. bytes_range_suffix_spec(<< C, R/bits >>, Acc, Suffix) when ?IS_DIGIT(C) -> bytes_range_suffix_spec(R, Acc, Suffix * 10 + C - $0);
  2040. bytes_range_suffix_spec(R, Acc, Suffix) -> bytes_range_set_sep(R, [-Suffix|Acc]).
  2041. bytes_range_set_sep(<<>>, Acc) -> {bytes, lists:reverse(Acc)};
  2042. bytes_range_set_sep(<< $\s, R/bits >>, Acc) -> bytes_range_set_sep(R, Acc);
  2043. bytes_range_set_sep(<< $\t, R/bits >>, Acc) -> bytes_range_set_sep(R, Acc);
  2044. bytes_range_set_sep(<< $,, R/bits >>, Acc) -> bytes_range_set(R, Acc).
  2045. other_range_unit(<< $=, C, R/bits >>, U) when ?IS_VCHAR(C) ->
  2046. other_range_set(R, U, << C >>);
  2047. other_range_unit(<< C, R/bits >>, U) when ?IS_TOKEN(C) ->
  2048. case C of
  2049. ?INLINE_LOWERCASE(other_range_unit, R, U)
  2050. end.
  2051. other_range_set(<<>>, U, S) ->
  2052. {U, S};
  2053. other_range_set(<< C, R/bits >>, U, S) when ?IS_VCHAR(C) ->
  2054. other_range_set(R, U, << S/binary, C >>).
  2055. -ifdef(TEST).
  2056. bytes_range() ->
  2057. ?LET(BytesSet,
  2058. non_empty(list(oneof([
  2059. ?SUCHTHAT({First, Last}, {pos_integer(), pos_integer()}, First =< Last),
  2060. {pos_integer(), infinity},
  2061. ?LET(I, pos_integer(), -I)
  2062. ]))),
  2063. {{bytes, BytesSet}, begin
  2064. << _, Set/bits >> = iolist_to_binary([
  2065. case Spec of
  2066. {First, infinity} -> [$,, integer_to_binary(First), $-];
  2067. {First, Last} -> [$,, integer_to_binary(First), $-, integer_to_binary(Last)];
  2068. Suffix -> [$,, integer_to_binary(Suffix)]
  2069. end || Spec <- BytesSet]),
  2070. <<"bytes=", Set/binary >>
  2071. end}).
  2072. other_range() ->
  2073. ?LET(Range = {Unit, Set},
  2074. {token(), ?LET(L, non_empty(list(vchar())), list_to_binary(L))},
  2075. {Range, << Unit/binary, $=, Set/binary >>}).
  2076. range() ->
  2077. oneof([
  2078. bytes_range(),
  2079. other_range()
  2080. ]).
  2081. prop_parse_range() ->
  2082. ?FORALL({Range, RangeBin},
  2083. range(),
  2084. begin
  2085. Range2 = case Range of
  2086. {bytes, _} -> Range;
  2087. {Unit, Set} -> {?INLINE_LOWERCASE_BC(Unit), Set}
  2088. end,
  2089. Range2 =:= parse_range(RangeBin)
  2090. end).
  2091. parse_range_test_() ->
  2092. Tests = [
  2093. {<<"bytes=0-499">>, {bytes, [{0, 499}]}},
  2094. {<<"bytes=500-999">>, {bytes, [{500, 999}]}},
  2095. {<<"bytes=-500">>, {bytes, [-500]}},
  2096. {<<"bytes=9500-">>, {bytes, [{9500, infinity}]}},
  2097. {<<"bytes=0-0,-1">>, {bytes, [{0, 0}, -1]}},
  2098. {<<"bytes=500-600,601-999">>, {bytes, [{500, 600}, {601, 999}]}},
  2099. {<<"bytes=500-700,601-999">>, {bytes, [{500, 700}, {601, 999}]}},
  2100. {<<"books=I-III,V-IX">>, {<<"books">>, <<"I-III,V-IX">>}}
  2101. ],
  2102. [{V, fun() -> R = parse_range(V) end} || {V, R} <- Tests].
  2103. parse_range_error_test_() ->
  2104. Tests = [
  2105. <<>>
  2106. ],
  2107. [{V, fun() -> {'EXIT', _} = (catch parse_range(V)) end} || V <- Tests].
  2108. -endif.
  2109. -ifdef(PERF).
  2110. horse_parse_range_first_last() ->
  2111. horse:repeat(200000,
  2112. parse_range(<<"bytes=500-999">>)
  2113. ).
  2114. horse_parse_range_infinity() ->
  2115. horse:repeat(200000,
  2116. parse_range(<<"bytes=9500-">>)
  2117. ).
  2118. horse_parse_range_suffix() ->
  2119. horse:repeat(200000,
  2120. parse_range(<<"bytes=-500">>)
  2121. ).
  2122. horse_parse_range_two() ->
  2123. horse:repeat(200000,
  2124. parse_range(<<"bytes=500-700,601-999">>)
  2125. ).
  2126. horse_parse_range_other() ->
  2127. horse:repeat(200000,
  2128. parse_range(<<"books=I-III,V-IX">>)
  2129. ).
  2130. -endif.
  2131. %% @doc Parse the Retry-After header.
  2132. -spec parse_retry_after(binary()) -> non_neg_integer() | calendar:datetime().
  2133. parse_retry_after(RetryAfter = << D, _/bits >>) when ?IS_DIGIT(D) ->
  2134. I = binary_to_integer(RetryAfter),
  2135. true = I >= 0,
  2136. I;
  2137. parse_retry_after(RetryAfter) ->
  2138. cow_date:parse_date(RetryAfter).
  2139. -ifdef(TEST).
  2140. parse_retry_after_test_() ->
  2141. Tests = [
  2142. {<<"Fri, 31 Dec 1999 23:59:59 GMT">>, {{1999, 12, 31}, {23, 59, 59}}},
  2143. {<<"120">>, 120}
  2144. ],
  2145. [{V, fun() -> R = parse_retry_after(V) end} || {V, R} <- Tests].
  2146. parse_retry_after_error_test_() ->
  2147. Tests = [
  2148. <<>>
  2149. ],
  2150. [{V, fun() -> {'EXIT', _} = (catch parse_retry_after(V)) end} || V <- Tests].
  2151. -endif.
  2152. -ifdef(PERF).
  2153. horse_parse_retry_after_date() ->
  2154. horse:repeat(200000,
  2155. parse_retry_after(<<"Fri, 31 Dec 1999 23:59:59 GMT">>)
  2156. ).
  2157. horse_parse_retry_after_delay_seconds() ->
  2158. horse:repeat(200000,
  2159. parse_retry_after(<<"120">>)
  2160. ).
  2161. -endif.
  2162. %% @doc Dummy parsing function for the Sec-WebSocket-Accept header.
  2163. %%
  2164. %% The argument is returned without any processing. This value is
  2165. %% expected to be matched directly by the client so no parsing is
  2166. %% needed.
  2167. -spec parse_sec_websocket_accept(binary()) -> binary().
  2168. parse_sec_websocket_accept(SecWebSocketAccept) ->
  2169. SecWebSocketAccept.
  2170. %% @doc Parse the Sec-WebSocket-Extensions request header.
  2171. -spec parse_sec_websocket_extensions(binary()) -> [{binary(), [binary() | {binary(), binary()}]}].
  2172. parse_sec_websocket_extensions(SecWebSocketExtensions) ->
  2173. nonempty(ws_extension_list(SecWebSocketExtensions, [])).
  2174. ws_extension_list(<<>>, Acc) -> lists:reverse(Acc);
  2175. ws_extension_list(<< $\s, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  2176. ws_extension_list(<< $\t, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  2177. ws_extension_list(<< $,, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  2178. ws_extension_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << C >>).
  2179. ws_extension(<<>>, Acc, E) -> lists:reverse([{E, []}|Acc]);
  2180. ws_extension(<< $,, R/bits >>, Acc, E) -> ws_extension_list(R, [{E, []}|Acc]);
  2181. ws_extension(<< $;, R/bits >>, Acc, E) -> ws_extension_before_param(R, Acc, E, []);
  2182. ws_extension(<< $\s, R/bits >>, Acc, E) -> ws_extension_before_semicolon(R, Acc, E, []);
  2183. ws_extension(<< $\t, R/bits >>, Acc, E) -> ws_extension_before_semicolon(R, Acc, E, []);
  2184. ws_extension(<< C, R/bits >>, Acc, E) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << E/binary, C >>).
  2185. ws_extension_before_semicolon(<<>>, Acc, E, P) -> lists:reverse([{E, lists:reverse(P)}|Acc]);
  2186. ws_extension_before_semicolon(<< $,, R/bits >>, Acc, E, P) -> ws_extension_list(R, [{E, lists:reverse(P)}|Acc]);
  2187. ws_extension_before_semicolon(<< $;, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  2188. ws_extension_before_semicolon(<< $\s, R/bits >>, Acc, E, P) -> ws_extension_before_semicolon(R, Acc, E, P);
  2189. ws_extension_before_semicolon(<< $\t, R/bits >>, Acc, E, P) -> ws_extension_before_semicolon(R, Acc, E, P).
  2190. ws_extension_before_param(<< $\s, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  2191. ws_extension_before_param(<< $\t, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  2192. ws_extension_before_param(<< C, R/bits >>, Acc, E, P) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << C >>).
  2193. ws_extension_param(<<>>, Acc, E, P, K) -> lists:reverse([{E, lists:reverse([K|P])}|Acc]);
  2194. ws_extension_param(<< $\s, R/bits >>, Acc, E, P, K) -> ws_extension_before_semicolon(R, Acc, E, [K|P]);
  2195. ws_extension_param(<< $\t, R/bits >>, Acc, E, P, K) -> ws_extension_before_semicolon(R, Acc, E, [K|P]);
  2196. ws_extension_param(<< $,, R/bits >>, Acc, E, P, K) -> ws_extension_list(R, [{E, lists:reverse([K|P])}|Acc]);
  2197. ws_extension_param(<< $;, R/bits >>, Acc, E, P, K) -> ws_extension_before_param(R, Acc, E, [K|P]);
  2198. ws_extension_param(<< $=, $", R/bits >>, Acc, E, P, K) -> ws_extension_quoted(R, Acc, E, P, K, <<>>);
  2199. ws_extension_param(<< $=, C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << C >>);
  2200. ws_extension_param(<< C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << K/binary, C >>).
  2201. ws_extension_quoted(<< $", R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  2202. ws_extension_quoted(<< $\\, C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>);
  2203. ws_extension_quoted(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>).
  2204. ws_extension_value(<<>>, Acc, E, P, K, V) -> lists:reverse([{E, lists:reverse([{K, V}|P])}|Acc]);
  2205. ws_extension_value(<< $\s, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  2206. ws_extension_value(<< $\t, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  2207. ws_extension_value(<< $,, R/bits >>, Acc, E, P, K, V) -> ws_extension_list(R, [{E, lists:reverse([{K, V}|P])}|Acc]);
  2208. ws_extension_value(<< $;, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_param(R, Acc, E, [{K, V}|P]);
  2209. ws_extension_value(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << V/binary, C >>).
  2210. -ifdef(TEST).
  2211. quoted_token() ->
  2212. ?LET(T,
  2213. non_empty(list(frequency([
  2214. {99, tchar()},
  2215. {1, [$\\, tchar()]}
  2216. ]))),
  2217. [$", T, $"]).
  2218. ws_extension() ->
  2219. ?LET({E, PL},
  2220. {token(), small_list({ows(), ows(), oneof([token(), {token(), oneof([token(), quoted_token()])}])})},
  2221. {E, PL, iolist_to_binary([E,
  2222. [case P of
  2223. {OWS1, OWS2, {K, V}} -> [OWS1, $;, OWS2, K, $=, V];
  2224. {OWS1, OWS2, K} -> [OWS1, $;, OWS2, K]
  2225. end || P <- PL]
  2226. ])}).
  2227. prop_parse_sec_websocket_extensions() ->
  2228. ?FORALL(L,
  2229. vector(1, 50, ws_extension()),
  2230. begin
  2231. << _, SecWebsocketExtensions/binary >> = iolist_to_binary([[$,, E] || {_, _, E} <- L]),
  2232. ResL = parse_sec_websocket_extensions(SecWebsocketExtensions),
  2233. CheckedL = [begin
  2234. ExpectedPL = [case P of
  2235. {_, _, {K, V}} -> {K, unquote(V)};
  2236. {_, _, K} -> K
  2237. end || P <- PL],
  2238. E =:= ResE andalso ExpectedPL =:= ResPL
  2239. end || {{E, PL, _}, {ResE, ResPL}} <- lists:zip(L, ResL)],
  2240. [true] =:= lists:usort(CheckedL)
  2241. end).
  2242. parse_sec_websocket_extensions_test_() ->
  2243. Tests = [
  2244. {<<"foo">>, [{<<"foo">>, []}]},
  2245. {<<"bar; baz=2">>, [{<<"bar">>, [{<<"baz">>, <<"2">>}]}]},
  2246. {<<"foo, bar; baz=2">>, [{<<"foo">>, []}, {<<"bar">>, [{<<"baz">>, <<"2">>}]}]},
  2247. {<<"deflate-stream">>, [{<<"deflate-stream">>, []}]},
  2248. {<<"mux; max-channels=4; flow-control, deflate-stream">>,
  2249. [{<<"mux">>, [{<<"max-channels">>, <<"4">>}, <<"flow-control">>]}, {<<"deflate-stream">>, []}]},
  2250. {<<"private-extension">>, [{<<"private-extension">>, []}]}
  2251. ],
  2252. [{V, fun() -> R = parse_sec_websocket_extensions(V) end} || {V, R} <- Tests].
  2253. parse_sec_websocket_extensions_error_test_() ->
  2254. Tests = [
  2255. <<>>
  2256. ],
  2257. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_extensions(V)) end}
  2258. || V <- Tests].
  2259. -endif.
  2260. -ifdef(PERF).
  2261. horse_parse_sec_websocket_extensions() ->
  2262. horse:repeat(200000,
  2263. parse_sec_websocket_extensions(<<"mux; max-channels=4; flow-control, deflate-stream">>)
  2264. ).
  2265. -endif.
  2266. %% @doc Dummy parsing function for the Sec-WebSocket-Key header.
  2267. %%
  2268. %% The argument is returned without any processing. This value is
  2269. %% expected to be prepended to a static value, the result of which
  2270. %% hashed to form a new base64 value returned in Sec-WebSocket-Accept,
  2271. %% therefore no parsing is needed.
  2272. -spec parse_sec_websocket_key(binary()) -> binary().
  2273. parse_sec_websocket_key(SecWebSocketKey) ->
  2274. SecWebSocketKey.
  2275. %% @doc Parse the Sec-WebSocket-Protocol request header.
  2276. -spec parse_sec_websocket_protocol_req(binary()) -> [binary()].
  2277. parse_sec_websocket_protocol_req(SecWebSocketProtocol) ->
  2278. nonempty(token_ci_list(SecWebSocketProtocol, [])).
  2279. -ifdef(TEST).
  2280. parse_sec_websocket_protocol_req_test_() ->
  2281. Tests = [
  2282. {<<"chat, superchat">>, [<<"chat">>, <<"superchat">>]}
  2283. ],
  2284. [{V, fun() -> R = parse_sec_websocket_protocol_req(V) end} || {V, R} <- Tests].
  2285. parse_sec_websocket_protocol_req_error_test_() ->
  2286. Tests = [
  2287. <<>>
  2288. ],
  2289. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_protocol_req(V)) end}
  2290. || V <- Tests].
  2291. -endif.
  2292. -ifdef(PERF).
  2293. horse_parse_sec_websocket_protocol_req() ->
  2294. horse:repeat(200000,
  2295. parse_sec_websocket_protocol_req(<<"chat, superchat">>)
  2296. ).
  2297. -endif.
  2298. %% @doc Parse the Sec-Websocket-Protocol response header.
  2299. -spec parse_sec_websocket_protocol_resp(binary()) -> binary().
  2300. parse_sec_websocket_protocol_resp(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  2301. case C of
  2302. ?INLINE_LOWERCASE(token_ci, R, <<>>)
  2303. end.
  2304. token_ci(<<>>, T) -> T;
  2305. token_ci(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
  2306. case C of
  2307. ?INLINE_LOWERCASE(token_ci, R, T)
  2308. end.
  2309. -ifdef(TEST).
  2310. prop_parse_sec_websocket_protocol_resp() ->
  2311. ?FORALL(T,
  2312. token(),
  2313. ?INLINE_LOWERCASE_BC(T) =:= parse_sec_websocket_protocol_resp(T)).
  2314. parse_sec_websocket_protocol_resp_test_() ->
  2315. Tests = [
  2316. {<<"chat">>, <<"chat">>},
  2317. {<<"CHAT">>, <<"chat">>}
  2318. ],
  2319. [{V, fun() -> R = parse_sec_websocket_protocol_resp(V) end} || {V, R} <- Tests].
  2320. parse_sec_websocket_protocol_resp_error_test_() ->
  2321. Tests = [
  2322. <<>>
  2323. ],
  2324. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_protocol_resp(V)) end}
  2325. || V <- Tests].
  2326. -endif.
  2327. -ifdef(PERF).
  2328. horse_parse_sec_websocket_protocol_resp() ->
  2329. horse:repeat(200000,
  2330. parse_sec_websocket_protocol_resp(<<"chat">>)
  2331. ).
  2332. -endif.
  2333. %% @doc Parse the Sec-WebSocket-Version request header.
  2334. -spec parse_sec_websocket_version_req(binary()) -> websocket_version().
  2335. parse_sec_websocket_version_req(SecWebSocketVersion) when byte_size(SecWebSocketVersion) < 4 ->
  2336. Version = binary_to_integer(SecWebSocketVersion),
  2337. true = Version >= 0 andalso Version =< 255,
  2338. Version.
  2339. -ifdef(TEST).
  2340. prop_parse_sec_websocket_version_req() ->
  2341. ?FORALL(Version,
  2342. int(0, 255),
  2343. Version =:= parse_sec_websocket_version_req(integer_to_binary(Version))).
  2344. parse_sec_websocket_version_req_test_() ->
  2345. Tests = [
  2346. {<<"13">>, 13},
  2347. {<<"25">>, 25}
  2348. ],
  2349. [{V, fun() -> R = parse_sec_websocket_version_req(V) end} || {V, R} <- Tests].
  2350. parse_sec_websocket_version_req_error_test_() ->
  2351. Tests = [
  2352. <<>>,
  2353. <<" ">>,
  2354. <<"7, 8, 13">>,
  2355. <<"invalid">>
  2356. ],
  2357. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_version_req(V)) end}
  2358. || V <- Tests].
  2359. -endif.
  2360. -ifdef(PERF).
  2361. horse_parse_sec_websocket_version_req_13() ->
  2362. horse:repeat(200000,
  2363. parse_sec_websocket_version_req(<<"13">>)
  2364. ).
  2365. horse_parse_sec_websocket_version_req_255() ->
  2366. horse:repeat(200000,
  2367. parse_sec_websocket_version_req(<<"255">>)
  2368. ).
  2369. -endif.
  2370. %% @doc Parse the Sec-WebSocket-Version response header.
  2371. -spec parse_sec_websocket_version_resp(binary()) -> [websocket_version()].
  2372. parse_sec_websocket_version_resp(SecWebSocketVersion) ->
  2373. nonempty(ws_version_list(SecWebSocketVersion, [])).
  2374. ws_version_list(<<>>, Acc) -> lists:reverse(Acc);
  2375. ws_version_list(<< $\s, R/bits >>, Acc) -> ws_version_list(R, Acc);
  2376. ws_version_list(<< $\t, R/bits >>, Acc) -> ws_version_list(R, Acc);
  2377. ws_version_list(<< $,, R/bits >>, Acc) -> ws_version_list(R, Acc);
  2378. ws_version_list(<< C, R/bits >>, Acc) when ?IS_DIGIT(C) -> ws_version(R, Acc, C - $0).
  2379. ws_version(<<>>, Acc, V) -> lists:reverse([V|Acc]);
  2380. ws_version(<< $\s, R/bits >>, Acc, V) -> ws_version_list_sep(R, [V|Acc]);
  2381. ws_version(<< $\t, R/bits >>, Acc, V) -> ws_version_list_sep(R, [V|Acc]);
  2382. ws_version(<< $,, R/bits >>, Acc, V) -> ws_version_list(R, [V|Acc]);
  2383. ws_version(<< C, R/bits >>, Acc, V) when ?IS_DIGIT(C) -> ws_version(R, Acc, V * 10 + C - $0).
  2384. ws_version_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  2385. ws_version_list_sep(<< $\s, R/bits >>, Acc) -> ws_version_list_sep(R, Acc);
  2386. ws_version_list_sep(<< $\t, R/bits >>, Acc) -> ws_version_list_sep(R, Acc);
  2387. ws_version_list_sep(<< $,, R/bits >>, Acc) -> ws_version_list(R, Acc).
  2388. -ifdef(TEST).
  2389. sec_websocket_version_resp() ->
  2390. ?LET(L,
  2391. non_empty(list({ows(), ows(), int(0, 255)})),
  2392. begin
  2393. << _, SecWebSocketVersion/binary >> = iolist_to_binary(
  2394. [[OWS1, $,, OWS2, integer_to_binary(V)] || {OWS1, OWS2, V} <- L]),
  2395. {[V || {_, _, V} <- L], SecWebSocketVersion}
  2396. end).
  2397. prop_parse_sec_websocket_version_resp() ->
  2398. ?FORALL({L, SecWebSocketVersion},
  2399. sec_websocket_version_resp(),
  2400. L =:= parse_sec_websocket_version_resp(SecWebSocketVersion)).
  2401. parse_sec_websocket_version_resp_test_() ->
  2402. Tests = [
  2403. {<<"13, 8, 7">>, [13, 8, 7]}
  2404. ],
  2405. [{V, fun() -> R = parse_sec_websocket_version_resp(V) end} || {V, R} <- Tests].
  2406. parse_sec_websocket_version_resp_error_test_() ->
  2407. Tests = [
  2408. <<>>
  2409. ],
  2410. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_version_resp(V)) end}
  2411. || V <- Tests].
  2412. -endif.
  2413. -ifdef(PERF).
  2414. horse_parse_sec_websocket_version_resp() ->
  2415. horse:repeat(200000,
  2416. parse_sec_websocket_version_resp(<<"13, 8, 7">>)
  2417. ).
  2418. -endif.
  2419. %% @doc Parse the TE header.
  2420. %%
  2421. %% This function does not support parsing of transfer-parameter.
  2422. -spec parse_te(binary()) -> {trailers | no_trailers, [{binary(), qvalue()}]}.
  2423. parse_te(TE) ->
  2424. te_list(TE, no_trailers, []).
  2425. te_list(<<>>, Trail, Acc) -> {Trail, lists:reverse(Acc)};
  2426. te_list(<< $\s, R/bits >>, Trail, Acc) -> te_list(R, Trail, Acc);
  2427. te_list(<< $\t, R/bits >>, Trail, Acc) -> te_list(R, Trail, Acc);
  2428. te_list(<< $\,, R/bits >>, Trail, Acc) -> te_list(R, Trail, Acc);
  2429. te_list(<< "trailers", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"trailers">>);
  2430. te_list(<< "compress", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"compress">>);
  2431. te_list(<< "deflate", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"deflate">>);
  2432. te_list(<< "gzip", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"gzip">>);
  2433. te_list(<< C, R/bits >>, Trail, Acc) when ?IS_TOKEN(C) ->
  2434. case C of
  2435. ?INLINE_LOWERCASE(te, R, Trail, Acc, <<>>)
  2436. end.
  2437. te(<<>>, _, Acc, T) when T =:= <<"trailers">> -> {trailers, lists:reverse(Acc)};
  2438. te(<<>>, Trail, Acc, T) -> {Trail, lists:reverse([{T, 1000}|Acc])};
  2439. te(<< $,, R/bits >>, _, Acc, T) when T =:= <<"trailers">> -> te_list(R, trailers, Acc);
  2440. te(<< $,, R/bits >>, Trail, Acc, T) -> te_list(R, Trail, [{T, 1000}|Acc]);
  2441. te(<< $;, R/bits >>, Trail, Acc, T) when T =/= <<"trailers">> -> te_before_weight(R, Trail, Acc, T);
  2442. te(<< $\s, R/bits >>, _, Acc, T) when T =:= <<"trailers">> -> te_list_sep(R, trailers, Acc);
  2443. te(<< $\s, R/bits >>, Trail, Acc, T) -> te_before_semicolon(R, Trail, Acc, T);
  2444. te(<< $\t, R/bits >>, _, Acc, T) when T =:= <<"trailers">> -> te_list_sep(R, trailers, Acc);
  2445. te(<< $\t, R/bits >>, Trail, Acc, T) -> te_before_semicolon(R, Trail, Acc, T);
  2446. te(<< C, R/bits >>, Trail, Acc, T) when ?IS_TOKEN(C) ->
  2447. case C of
  2448. ?INLINE_LOWERCASE(te, R, Trail, Acc, T)
  2449. end.
  2450. te_before_semicolon(<<>>, Trail, Acc, T) -> {Trail, lists:reverse([{T, 1000}|Acc])};
  2451. te_before_semicolon(<< $,, R/bits >>, Trail, Acc, T) -> te_list(R, Trail, [{T, 1000}|Acc]);
  2452. te_before_semicolon(<< $;, R/bits >>, Trail, Acc, T) -> te_before_weight(R, Trail, Acc, T);
  2453. te_before_semicolon(<< $\s, R/bits >>, Trail, Acc, T) -> te_before_semicolon(R, Trail, Acc, T);
  2454. te_before_semicolon(<< $\t, R/bits >>, Trail, Acc, T) -> te_before_semicolon(R, Trail, Acc, T).
  2455. te_before_weight(<< $\s, R/bits >>, Trail, Acc, T) -> te_before_weight(R, Trail, Acc, T);
  2456. te_before_weight(<< $\t, R/bits >>, Trail, Acc, T) -> te_before_weight(R, Trail, Acc, T);
  2457. te_before_weight(<< $q, $=, R/bits >>, Trail, Acc, T) -> te_weight(R, Trail, Acc, T).
  2458. te_weight(<< "1.000", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]);
  2459. te_weight(<< "1.00", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]);
  2460. te_weight(<< "1.0", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]);
  2461. te_weight(<< "1.", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]);
  2462. te_weight(<< "1", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]);
  2463. te_weight(<< "0.", A, B, C, R/bits >>, Trail, Acc, T)
  2464. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  2465. te_list_sep(R, Trail, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  2466. te_weight(<< "0.", A, B, R/bits >>, Trail, Acc, T)
  2467. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  2468. te_list_sep(R, Trail, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  2469. te_weight(<< "0.", A, R/bits >>, Trail, Acc, T)
  2470. when A >= $0, A =< $9 ->
  2471. te_list_sep(R, Trail, [{T, (A - $0) * 100}|Acc]);
  2472. te_weight(<< "0.", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 0}|Acc]);
  2473. te_weight(<< "0", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 0}|Acc]).
  2474. te_list_sep(<<>>, Trail, Acc) -> {Trail, lists:reverse(Acc)};
  2475. te_list_sep(<< $\s, R/bits >>, Trail, Acc) -> te_list_sep(R, Trail, Acc);
  2476. te_list_sep(<< $\t, R/bits >>, Trail, Acc) -> te_list_sep(R, Trail, Acc);
  2477. te_list_sep(<< $,, R/bits >>, Trail, Acc) -> te_list(R, Trail, Acc).
  2478. -ifdef(TEST).
  2479. te() ->
  2480. ?LET({Trail, L},
  2481. {elements([trailers, no_trailers]),
  2482. small_non_empty_list({?SUCHTHAT(T, token(), T =/= <<"trailers">>), weight()})},
  2483. {Trail, L, begin
  2484. L2 = case Trail of
  2485. no_trailers -> L;
  2486. trailers ->
  2487. Rand = random:uniform(length(L) + 1) - 1,
  2488. {Before, After} = lists:split(Rand, L),
  2489. Before ++ [{<<"trailers">>, undefined}|After]
  2490. end,
  2491. << _, TE/binary >> = iolist_to_binary([case W of
  2492. undefined -> [$,, T];
  2493. _ -> [$,, T, <<";q=">>, qvalue_to_iodata(W)]
  2494. end || {T, W} <- L2]),
  2495. TE
  2496. end}
  2497. ).
  2498. prop_parse_te() ->
  2499. random:seed(os:timestamp()),
  2500. ?FORALL({Trail, L, TE},
  2501. te(),
  2502. begin
  2503. {ResTrail, ResL} = parse_te(TE),
  2504. CheckedL = [begin
  2505. ResT =:= ?INLINE_LOWERCASE_BC(T)
  2506. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  2507. end || {{T, W}, {ResT, ResW}} <- lists:zip(L, ResL)],
  2508. ResTrail =:= Trail andalso [true] =:= lists:usort(CheckedL)
  2509. end).
  2510. parse_te_test_() ->
  2511. Tests = [
  2512. {<<"deflate">>, {no_trailers, [{<<"deflate">>, 1000}]}},
  2513. {<<>>, {no_trailers, []}},
  2514. {<<"trailers, deflate;q=0.5">>, {trailers, [{<<"deflate">>, 500}]}}
  2515. ],
  2516. [{V, fun() -> R = parse_te(V) end} || {V, R} <- Tests].
  2517. -endif.
  2518. -ifdef(PERF).
  2519. horse_parse_te() ->
  2520. horse:repeat(200000,
  2521. parse_te(<<"trailers, deflate;q=0.5">>)
  2522. ).
  2523. -endif.
  2524. %% @doc Parse the Trailer header.
  2525. -spec parse_trailer(binary()) -> [binary()].
  2526. parse_trailer(Trailer) ->
  2527. nonempty(token_ci_list(Trailer, [])).
  2528. -ifdef(TEST).
  2529. parse_trailer_test_() ->
  2530. Tests = [
  2531. {<<"Date, Content-MD5">>, [<<"date">>, <<"content-md5">>]}
  2532. ],
  2533. [{V, fun() -> R = parse_trailer(V) end} || {V, R} <- Tests].
  2534. parse_trailer_error_test_() ->
  2535. Tests = [
  2536. <<>>
  2537. ],
  2538. [{V, fun() -> {'EXIT', _} = (catch parse_trailer(V)) end} || V <- Tests].
  2539. -endif.
  2540. -ifdef(PERF).
  2541. horse_parse_trailer() ->
  2542. horse:repeat(200000,
  2543. parse_trailer(<<"Date, Content-MD5">>)
  2544. ).
  2545. -endif.
  2546. %% @doc Parse the Transfer-Encoding header.
  2547. %%
  2548. %% This function does not support parsing of transfer-parameter.
  2549. -spec parse_transfer_encoding(binary()) -> [binary()].
  2550. parse_transfer_encoding(<<"chunked">>) ->
  2551. [<<"chunked">>];
  2552. parse_transfer_encoding(TransferEncoding) ->
  2553. nonempty(token_ci_list(TransferEncoding, [])).
  2554. -ifdef(TEST).
  2555. prop_parse_transfer_encoding() ->
  2556. ?FORALL(L,
  2557. non_empty(list(token())),
  2558. begin
  2559. << _, TransferEncoding/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  2560. ResL = parse_transfer_encoding(TransferEncoding),
  2561. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  2562. [true] =:= lists:usort(CheckedL)
  2563. end).
  2564. parse_transfer_encoding_test_() ->
  2565. Tests = [
  2566. {<<"a , , , ">>, [<<"a">>]},
  2567. {<<" , , , a">>, [<<"a">>]},
  2568. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  2569. {<<"chunked">>, [<<"chunked">>]},
  2570. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]},
  2571. {<<"gzip, chunked">>, [<<"gzip">>, <<"chunked">>]}
  2572. ],
  2573. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  2574. parse_transfer_encoding_error_test_() ->
  2575. Tests = [
  2576. <<>>,
  2577. <<" ">>,
  2578. <<" , ">>,
  2579. <<",,,">>,
  2580. <<"a b">>
  2581. ],
  2582. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  2583. || V <- Tests].
  2584. -endif.
  2585. -ifdef(PERF).
  2586. horse_parse_transfer_encoding_chunked() ->
  2587. horse:repeat(200000,
  2588. parse_transfer_encoding(<<"chunked">>)
  2589. ).
  2590. horse_parse_transfer_encoding_custom() ->
  2591. horse:repeat(200000,
  2592. parse_transfer_encoding(<<"chunked, something">>)
  2593. ).
  2594. -endif.
  2595. %% @doc Parse the Upgrade header.
  2596. %%
  2597. %% It is unclear from the RFC whether the values here are
  2598. %% case sensitive.
  2599. %%
  2600. %% We handle them in a case insensitive manner because they
  2601. %% are described as case insensitive in the Websocket RFC.
  2602. -spec parse_upgrade(binary()) -> [binary()].
  2603. parse_upgrade(Upgrade) ->
  2604. nonempty(protocol_list(Upgrade, [])).
  2605. protocol_list(<<>>, Acc) -> lists:reverse(Acc);
  2606. protocol_list(<< $\s, R/bits >>, Acc) -> protocol_list(R, Acc);
  2607. protocol_list(<< $\t, R/bits >>, Acc) -> protocol_list(R, Acc);
  2608. protocol_list(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc);
  2609. protocol_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  2610. case C of
  2611. ?INLINE_LOWERCASE(protocol_name, R, Acc, <<>>)
  2612. end.
  2613. protocol_name(<<>>, Acc, P) -> lists:reverse([P|Acc]);
  2614. protocol_name(<< $\s, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  2615. protocol_name(<< $\t, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  2616. protocol_name(<< $,, R/bits >>, Acc, P) -> protocol_list(R, [P|Acc]);
  2617. protocol_name(<< $/, C, R/bits >>, Acc, P) ->
  2618. case C of
  2619. ?INLINE_LOWERCASE(protocol_version, R, Acc, << P/binary, $/ >>)
  2620. end;
  2621. protocol_name(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) ->
  2622. case C of
  2623. ?INLINE_LOWERCASE(protocol_name, R, Acc, P)
  2624. end.
  2625. protocol_version(<<>>, Acc, P) -> lists:reverse([P|Acc]);
  2626. protocol_version(<< $\s, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  2627. protocol_version(<< $\t, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  2628. protocol_version(<< $,, R/bits >>, Acc, P) -> protocol_list(R, [P|Acc]);
  2629. protocol_version(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) ->
  2630. case C of
  2631. ?INLINE_LOWERCASE(protocol_version, R, Acc, P)
  2632. end.
  2633. protocol_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  2634. protocol_list_sep(<< $\s, R/bits >>, Acc) -> protocol_list_sep(R, Acc);
  2635. protocol_list_sep(<< $\t, R/bits >>, Acc) -> protocol_list_sep(R, Acc);
  2636. protocol_list_sep(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc).
  2637. -ifdef(TEST).
  2638. protocols() ->
  2639. ?LET(P,
  2640. oneof([token(), [token(), $/, token()]]),
  2641. iolist_to_binary(P)).
  2642. prop_parse_upgrade() ->
  2643. ?FORALL(L,
  2644. non_empty(list(protocols())),
  2645. begin
  2646. << _, Upgrade/binary >> = iolist_to_binary([[$,, P] || P <- L]),
  2647. ResL = parse_upgrade(Upgrade),
  2648. CheckedL = [?INLINE_LOWERCASE_BC(P) =:= ResP || {P, ResP} <- lists:zip(L, ResL)],
  2649. [true] =:= lists:usort(CheckedL)
  2650. end).
  2651. parse_upgrade_test_() ->
  2652. Tests = [
  2653. {<<"HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11">>,
  2654. [<<"http/2.0">>, <<"shttp/1.3">>, <<"irc/6.9">>, <<"rta/x11">>]},
  2655. {<<"HTTP/2.0">>, [<<"http/2.0">>]}
  2656. ],
  2657. [{V, fun() -> R = parse_upgrade(V) end} || {V, R} <- Tests].
  2658. parse_upgrade_error_test_() ->
  2659. Tests = [
  2660. <<>>
  2661. ],
  2662. [{V, fun() -> {'EXIT', _} = (catch parse_upgrade(V)) end}
  2663. || V <- Tests].
  2664. -endif.
  2665. %% @doc Parse the Vary header.
  2666. -spec parse_vary(binary()) -> '*' | [binary()].
  2667. parse_vary(<<"*">>) ->
  2668. '*';
  2669. parse_vary(Vary) ->
  2670. nonempty(token_ci_list(Vary, [])).
  2671. -ifdef(TEST).
  2672. parse_vary_test_() ->
  2673. Tests = [
  2674. {<<"*">>, '*'},
  2675. {<<"Accept-Encoding">>, [<<"accept-encoding">>]},
  2676. {<<"accept-encoding, accept-language">>, [<<"accept-encoding">>, <<"accept-language">>]}
  2677. ],
  2678. [{V, fun() -> R = parse_vary(V) end} || {V, R} <- Tests].
  2679. parse_vary_error_test_() ->
  2680. Tests = [
  2681. <<>>
  2682. ],
  2683. [{V, fun() -> {'EXIT', _} = (catch parse_vary(V)) end} || V <- Tests].
  2684. -endif.
  2685. %% @doc Parse the X-Forwarded-For header.
  2686. %%
  2687. %% This header has no specification but *looks like* it is
  2688. %% a list of tokens.
  2689. %%
  2690. %% This header is deprecated in favor of the Forwarded header.
  2691. -spec parse_x_forwarded_for(binary()) -> [binary()].
  2692. parse_x_forwarded_for(XForwardedFor) ->
  2693. nonempty(token_list(XForwardedFor, [])).
  2694. -ifdef(TEST).
  2695. parse_x_forwarded_for_test_() ->
  2696. Tests = [
  2697. {<<"client, proxy1, proxy2">>, [<<"client">>, <<"proxy1">>, <<"proxy2">>]},
  2698. {<<"128.138.243.150, unknown, 192.52.106.30">>, [<<"128.138.243.150">>, <<"unknown">>, <<"192.52.106.30">>]}
  2699. ],
  2700. [{V, fun() -> R = parse_x_forwarded_for(V) end} || {V, R} <- Tests].
  2701. parse_x_forwarded_for_error_test_() ->
  2702. Tests = [
  2703. <<>>
  2704. ],
  2705. [{V, fun() -> {'EXIT', _} = (catch parse_x_forwarded_for(V)) end} || V <- Tests].
  2706. -endif.
  2707. %% Internal.
  2708. %% Only return if the list is not empty.
  2709. nonempty(L) when L =/= [] -> L.
  2710. %% Parse a list of case sensitive tokens.
  2711. token_list(<<>>, Acc) -> lists:reverse(Acc);
  2712. token_list(<< $\s, R/bits >>, Acc) -> token_list(R, Acc);
  2713. token_list(<< $\t, R/bits >>, Acc) -> token_list(R, Acc);
  2714. token_list(<< $,, R/bits >>, Acc) -> token_list(R, Acc);
  2715. token_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> token(R, Acc, << C >>).
  2716. token(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  2717. token(<< $\s, R/bits >>, Acc, T) -> token_list_sep(R, [T|Acc]);
  2718. token(<< $\t, R/bits >>, Acc, T) -> token_list_sep(R, [T|Acc]);
  2719. token(<< $,, R/bits >>, Acc, T) -> token_list(R, [T|Acc]);
  2720. token(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> token(R, Acc, << T/binary, C >>).
  2721. token_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  2722. token_list_sep(<< $\s, R/bits >>, Acc) -> token_list_sep(R, Acc);
  2723. token_list_sep(<< $\t, R/bits >>, Acc) -> token_list_sep(R, Acc);
  2724. token_list_sep(<< $,, R/bits >>, Acc) -> token_list(R, Acc).
  2725. %% Parse a list of case insensitive tokens.
  2726. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  2727. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  2728. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  2729. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  2730. token_ci_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  2731. case C of
  2732. ?INLINE_LOWERCASE(token_ci, R, Acc, <<>>)
  2733. end.
  2734. token_ci(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  2735. token_ci(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, [T|Acc]);
  2736. token_ci(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, [T|Acc]);
  2737. token_ci(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  2738. token_ci(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  2739. case C of
  2740. ?INLINE_LOWERCASE(token_ci, R, Acc, T)
  2741. end.
  2742. token_ci_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  2743. token_ci_list_sep(<< $\s, R/bits >>, Acc) -> token_ci_list_sep(R, Acc);
  2744. token_ci_list_sep(<< $\t, R/bits >>, Acc) -> token_ci_list_sep(R, Acc);
  2745. token_ci_list_sep(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc).