cow_http_hd.erl 78 KB


  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_accept_encoding/1]).
  18. -export([parse_accept_language/1]).
  19. -export([parse_connection/1]).
  20. -export([parse_content_encoding/1]).
  21. -export([parse_content_language/1]).
  22. -export([parse_content_length/1]).
  23. -export([parse_content_type/1]).
  24. -export([parse_date/1]).
  25. -export([parse_etag/1]).
  26. -export([parse_expect/1]).
  27. -export([parse_expires/1]).
  28. -export([parse_if_match/1]).
  29. -export([parse_if_modified_since/1]).
  30. -export([parse_if_none_match/1]).
  31. -export([parse_if_unmodified_since/1]).
  32. -export([parse_last_modified/1]).
  33. -export([parse_max_forwards/1]).
  34. -export([parse_sec_websocket_extensions/1]).
  35. -export([parse_sec_websocket_protocol_client/1]).
  36. -export([parse_sec_websocket_version_client/1]).
  37. -export([parse_trailer/1]).
  38. -export([parse_transfer_encoding/1]).
  39. -export([parse_upgrade/1]).
  40. -type etag() :: {weak | strong, binary()}.
  41. -export_type([etag/0]).
  42. -type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
  43. -export_type([media_type/0]).
  44. -type qvalue() :: 0..1000.
  45. -export_type([qvalue/0]).
  46. -include("cow_inline.hrl").
  47. -ifdef(TEST).
  48. -include_lib("triq/include/triq.hrl").
  49. vector(Min, Max, Dom) -> ?LET(N, choose(Min, Max), vector(N, Dom)).
  50. small_list(Dom) -> vector(0, 10, Dom).
  51. small_non_empty_list(Dom) -> vector(1, 10, Dom).
  52. alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  53. alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  54. digit_chars() -> "0123456789".
  55. ows() -> list(elements([$\s, $\t])).
  56. alpha() -> elements(alpha_chars()).
  57. alphanum() -> elements(alphanum_chars()).
  58. digit() -> elements(digit_chars()).
  59. tchar() ->
  60. frequency([
  61. {1, elements([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])},
  62. {99, elements(alphanum_chars())}
  63. ]).
  64. token() ->
  65. ?LET(T,
  66. non_empty(list(tchar())),
  67. list_to_binary(T)).
  68. obs_text() ->
  69. choose(128, 255).
  70. qdtext() ->
  71. frequency([
  72. {99, elements("\t\s!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  73. {1, obs_text()}
  74. ]).
  75. quoted_pair() ->
  76. [$\\, frequency([
  77. {99, elements("\t\s!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  78. {1, obs_text()}
  79. ])].
  80. quoted_string() ->
  81. [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
  82. %% Helper function for ( token / quoted-string ) values.
  83. unquote([$", V, $"]) -> unquote(V, <<>>);
  84. unquote(V) -> V.
  85. unquote([], Acc) -> Acc;
  86. unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
  87. unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
  88. parameter() ->
  89. ?SUCHTHAT({K, _, _, _},
  90. {token(), oneof([token(), quoted_string()]), ows(), ows()},
  91. K =/= <<"q">>).
  92. weight() ->
  93. frequency([
  94. {90, int(0, 1000)},
  95. {10, undefined}
  96. ]).
  97. %% Helper function for weight's qvalue formatting.
  98. qvalue_to_iodata(0) -> <<"0">>;
  99. qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
  100. qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
  101. qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
  102. qvalue_to_iodata(1000) -> <<"1">>.
  103. -endif.
  104. %% @doc Parse the Accept header.
  105. -spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
  106. parse_accept(<<"*/*">>) ->
  107. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  108. parse_accept(Accept) ->
  109. media_range_list(Accept, []).
  110. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  111. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  112. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  113. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  114. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  115. case C of
  116. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  117. end.
  118. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  119. %% Special clause for badly behaving user agents that send * instead of */*.
  120. media_range_type(<< $;, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  121. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  122. case C of
  123. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  124. end.
  125. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  126. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  127. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  128. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  129. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  130. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  131. case C of
  132. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  133. end.
  134. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  135. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  136. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  137. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  138. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  139. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  140. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  141. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  142. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  143. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  144. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  145. case C of
  146. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  147. end.
  148. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  149. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  150. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  151. case C of
  152. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  153. end.
  154. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  155. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  156. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  157. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  158. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  159. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  160. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  161. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  162. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  163. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  164. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  165. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  166. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  167. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  168. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  169. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  170. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  171. when A >= $0, A =< $9 ->
  172. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  173. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  174. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  175. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  176. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  177. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  178. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  179. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  180. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  181. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  182. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  183. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  184. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  185. when A >= $0, A =< $9 ->
  186. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  187. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  188. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  189. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  190. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  191. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  192. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  193. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  194. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  195. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  196. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  197. case C of
  198. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  199. end.
  200. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  201. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  202. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  203. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  204. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  205. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  206. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  207. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  208. case C of
  209. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  210. end.
  211. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  212. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  213. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  214. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  215. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  216. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_ext(R, Acc, T, S, P, Q, [{K, V}|E]);
  217. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  218. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  219. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  220. -ifdef(TEST).
  221. accept_ext() ->
  222. oneof([token(), parameter()]).
  223. accept_params() ->
  224. frequency([
  225. {90, []},
  226. {10, small_list(accept_ext())}
  227. ]).
  228. accept() ->
  229. ?LET({T, S, P, W, E},
  230. {token(), token(), small_list(parameter()), weight(), accept_params()},
  231. {T, S, P, W, E, iolist_to_binary([T, $/, S,
  232. [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P],
  233. case W of
  234. undefined -> [];
  235. _ -> [
  236. [<<";q=">>, qvalue_to_iodata(W)],
  237. [case Ext of
  238. {K, V, OWS1, OWS2} -> [OWS1, $;, OWS2, K, $=, V];
  239. K -> [$;, K]
  240. end || Ext <- E]]
  241. end])}
  242. ).
  243. prop_parse_accept() ->
  244. ?FORALL(L,
  245. non_empty(list(accept())),
  246. begin
  247. << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]),
  248. ResL = parse_accept(Accept),
  249. CheckedL = [begin
  250. ExpectedP = [{?INLINE_LOWERCASE_BC(K), unquote(V)} || {K, V, _, _} <- P],
  251. ExpectedE = [case Ext of
  252. {K, V, _, _} -> {?INLINE_LOWERCASE_BC(K), unquote(V)};
  253. K -> ?INLINE_LOWERCASE_BC(K)
  254. end || Ext <- E],
  255. ResT =:= ?INLINE_LOWERCASE_BC(T)
  256. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  257. andalso ResP =:= ExpectedP
  258. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  259. andalso ((W =:= undefined andalso ResE =:= []) orelse (W =/= undefined andalso ResE =:= ExpectedE))
  260. end || {{T, S, P, W, E, _}, {{ResT, ResS, ResP}, ResW, ResE}} <- lists:zip(L, ResL)],
  261. [true] =:= lists:usort(CheckedL)
  262. end
  263. ).
  264. parse_accept_test_() ->
  265. Tests = [
  266. {<<>>, []},
  267. {<<" ">>, []},
  268. {<<"audio/*; q=0.2, audio/basic">>, [
  269. {{<<"audio">>, <<"*">>, []}, 200, []},
  270. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  271. ]},
  272. {<<"text/plain; q=0.5, text/html, "
  273. "text/x-dvi; q=0.8, text/x-c">>, [
  274. {{<<"text">>, <<"plain">>, []}, 500, []},
  275. {{<<"text">>, <<"html">>, []}, 1000, []},
  276. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  277. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  278. ]},
  279. {<<"text/*, text/html, text/html;level=1, */*">>, [
  280. {{<<"text">>, <<"*">>, []}, 1000, []},
  281. {{<<"text">>, <<"html">>, []}, 1000, []},
  282. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  283. {{<<"*">>, <<"*">>, []}, 1000, []}
  284. ]},
  285. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  286. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  287. {{<<"text">>, <<"*">>, []}, 300, []},
  288. {{<<"text">>, <<"html">>, []}, 700, []},
  289. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  290. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  291. {{<<"*">>, <<"*">>, []}, 500, []}
  292. ]},
  293. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  294. "q=0.123;standalone;complex=gits, text/plain">>, [
  295. {{<<"text">>, <<"html">>,
  296. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  297. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  298. {{<<"text">>, <<"plain">>, []}, 1000, []}
  299. ]},
  300. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  301. {{<<"text">>, <<"html">>, []}, 1000, []},
  302. {{<<"image">>, <<"gif">>, []}, 1000, []},
  303. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  304. {{<<"*">>, <<"*">>, []}, 200, []},
  305. {{<<"*">>, <<"*">>, []}, 200, []}
  306. ]}
  307. ],
  308. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  309. parse_accept_error_test_() ->
  310. Tests = [
  311. <<"audio/basic, */;q=0.5">>,
  312. <<"audio/, audio/basic">>,
  313. <<"aud\tio/basic">>,
  314. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  315. ],
  316. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  317. -endif.
  318. -ifdef(PERF).
  319. horse_parse_accept() ->
  320. horse:repeat(20000,
  321. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  322. "text/html;level=2;q=0.4, */*;q=0.5">>)
  323. ).
  324. -endif.
  325. %% @doc Parse the Accept-Charset header.
  326. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  327. parse_accept_charset(Charset) ->
  328. nonempty(conneg_list(Charset, [])).
  329. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  330. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  331. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  332. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  333. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  334. case C of
  335. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  336. end.
  337. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  338. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  339. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  340. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  341. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  342. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  343. case C of
  344. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  345. end.
  346. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  347. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  348. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  349. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  350. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  351. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  352. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  353. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  354. %% Special clause for broken user agents that confuse ; and , separators.
  355. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  356. case C of
  357. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  358. end.
  359. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  360. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  361. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  362. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  363. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  364. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  365. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  366. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  367. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  368. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  369. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  370. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  371. when A >= $0, A =< $9 ->
  372. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  373. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  374. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  375. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  376. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  377. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  378. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  379. -ifdef(TEST).
  380. accept_charset() ->
  381. ?LET({C, W},
  382. {token(), weight()},
  383. {C, W, iolist_to_binary([C, case W of
  384. undefined -> [];
  385. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  386. end])}
  387. ).
  388. prop_parse_accept_charset() ->
  389. ?FORALL(L,
  390. non_empty(list(accept_charset())),
  391. begin
  392. << _, AcceptCharset/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  393. ResL = parse_accept_charset(AcceptCharset),
  394. CheckedL = [begin
  395. ResC =:= ?INLINE_LOWERCASE_BC(Ch)
  396. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  397. end || {{Ch, W, _}, {ResC, ResW}} <- lists:zip(L, ResL)],
  398. [true] =:= lists:usort(CheckedL)
  399. end).
  400. parse_accept_charset_test_() ->
  401. Tests = [
  402. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  403. {<<"iso-8859-5">>, 1000},
  404. {<<"unicode-1-1">>, 800}
  405. ]},
  406. %% Some user agents send this invalid value for the Accept-Charset header
  407. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  408. {<<"iso-8859-1">>, 1000},
  409. {<<"utf-8">>, 700},
  410. {<<"*">>, 700}
  411. ]}
  412. ],
  413. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  414. parse_accept_charset_error_test_() ->
  415. Tests = [
  416. <<>>
  417. ],
  418. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  419. -endif.
  420. -ifdef(PERF).
  421. horse_parse_accept_charset() ->
  422. horse:repeat(20000,
  423. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  424. ).
  425. -endif.
  426. %% @doc Parse the Accept-Encoding header.
  427. -spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}].
  428. parse_accept_encoding(Encoding) ->
  429. conneg_list(Encoding, []).
  430. -ifdef(TEST).
  431. accept_encoding() ->
  432. ?LET({E, W},
  433. {token(), weight()},
  434. {E, W, iolist_to_binary([E, case W of
  435. undefined -> [];
  436. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  437. end])}
  438. ).
  439. prop_parse_accept_encoding() ->
  440. ?FORALL(L,
  441. non_empty(list(accept_encoding())),
  442. begin
  443. << _, AcceptEncoding/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  444. ResL = parse_accept_encoding(AcceptEncoding),
  445. CheckedL = [begin
  446. ResE =:= ?INLINE_LOWERCASE_BC(E)
  447. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  448. end || {{E, W, _}, {ResE, ResW}} <- lists:zip(L, ResL)],
  449. [true] =:= lists:usort(CheckedL)
  450. end).
  451. parse_accept_encoding_test_() ->
  452. Tests = [
  453. {<<>>, []},
  454. {<<"*">>, [{<<"*">>, 1000}]},
  455. {<<"compress, gzip">>, [
  456. {<<"compress">>, 1000},
  457. {<<"gzip">>, 1000}
  458. ]},
  459. {<<"compress;q=0.5, gzip;q=1.0">>, [
  460. {<<"compress">>, 500},
  461. {<<"gzip">>, 1000}
  462. ]},
  463. {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [
  464. {<<"gzip">>, 1000},
  465. {<<"identity">>, 500},
  466. {<<"*">>, 0}
  467. ]}
  468. ],
  469. [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests].
  470. -endif.
  471. -ifdef(PERF).
  472. horse_parse_accept_encoding() ->
  473. horse:repeat(20000,
  474. parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>)
  475. ).
  476. -endif.
  477. %% @doc Parse the Accept-Language header.
  478. -spec parse_accept_language(binary()) -> [{binary(), qvalue()}].
  479. parse_accept_language(LanguageRange) ->
  480. nonempty(language_range_list(LanguageRange, [])).
  481. language_range_list(<<>>, Acc) -> lists:reverse(Acc);
  482. language_range_list(<< $\s, R/bits >>, Acc) -> language_range_list(R, Acc);
  483. language_range_list(<< $\t, R/bits >>, Acc) -> language_range_list(R, Acc);
  484. language_range_list(<< $\,, R/bits >>, Acc) -> language_range_list(R, Acc);
  485. language_range_list(<< $*, R/bits >>, Acc) -> language_range_before_semicolon(R, Acc, <<"*">>);
  486. language_range_list(<< C, R/bits >>, Acc) when ?IS_ALPHA(C) ->
  487. case C of
  488. ?INLINE_LOWERCASE(language_range, R, Acc, 1, <<>>)
  489. end.
  490. language_range(<<>>, Acc, _, T) -> lists:reverse([{T, 1000}|Acc]);
  491. language_range(<< $,, R/bits >>, Acc, _, T) -> language_range_list(R, [{T, 1000}|Acc]);
  492. language_range(<< $;, R/bits >>, Acc, _, T) -> language_range_before_weight(R, Acc, T);
  493. language_range(<< $\s, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  494. language_range(<< $\t, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  495. language_range(<< $-, R/bits >>, Acc, _, T) -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  496. language_range(<< _, _/bits >>, _, 8, _) -> error(badarg);
  497. language_range(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C) ->
  498. case C of
  499. ?INLINE_LOWERCASE(language_range, R, Acc, N + 1, T)
  500. end.
  501. language_range_sub(<<>>, Acc, N, T) when N > 0 -> lists:reverse([{T, 1000}|Acc]);
  502. language_range_sub(<< $,, R/bits >>, Acc, N, T) when N > 0 -> language_range_list(R, [{T, 1000}|Acc]);
  503. language_range_sub(<< $;, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_weight(R, Acc, T);
  504. language_range_sub(<< $\s, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  505. language_range_sub(<< $\t, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  506. language_range_sub(<< $-, R/bits >>, Acc, N, T) when N > 0 -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  507. language_range_sub(<< _, _/bits >>, _, 8, _) -> error(badarg);
  508. language_range_sub(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C); ?IS_DIGIT(C) ->
  509. case C of
  510. ?INLINE_LOWERCASE(language_range_sub, R, Acc, N + 1, T)
  511. end.
  512. language_range_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  513. language_range_before_semicolon(<< $,, R/bits >>, Acc, T) -> language_range_list(R, [{T, 1000}|Acc]);
  514. language_range_before_semicolon(<< $;, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  515. language_range_before_semicolon(<< $\s, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T);
  516. language_range_before_semicolon(<< $\t, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T).
  517. language_range_before_weight(<< $\s, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  518. language_range_before_weight(<< $\t, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  519. language_range_before_weight(<< $q, $=, R/bits >>, Acc, T) -> language_range_weight(R, Acc, T);
  520. %% Special clause for broken user agents that confuse ; and , separators.
  521. language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_ALPHA(C) ->
  522. case C of
  523. ?INLINE_LOWERCASE(language_range, R, [{T, 1000}|Acc], 1, <<>>)
  524. end.
  525. language_range_weight(<< "1.000", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  526. language_range_weight(<< "1.00", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  527. language_range_weight(<< "1.0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  528. language_range_weight(<< "1.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  529. language_range_weight(<< "1", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  530. language_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  531. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  532. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  533. language_range_weight(<< "0.", A, B, R/bits >>, Acc, T)
  534. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  535. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  536. language_range_weight(<< "0.", A, R/bits >>, Acc, T)
  537. when A >= $0, A =< $9 ->
  538. language_range_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  539. language_range_weight(<< "0.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]);
  540. language_range_weight(<< "0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]).
  541. language_range_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  542. language_range_list_sep(<< $\s, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  543. language_range_list_sep(<< $\t, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  544. language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc).
  545. -ifdef(TEST).
  546. language_range_tag() ->
  547. vector(1, 8, alpha()).
  548. language_range_subtag() ->
  549. [$-, vector(1, 8, alphanum())].
  550. language_range() ->
  551. [language_range_tag(), small_list(language_range_subtag())].
  552. accept_language() ->
  553. ?LET({R, W},
  554. {language_range(), weight()},
  555. {iolist_to_binary(R), W, iolist_to_binary([R, case W of
  556. undefined -> [];
  557. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  558. end])}
  559. ).
  560. prop_parse_accept_language() ->
  561. ?FORALL(L,
  562. non_empty(list(accept_language())),
  563. begin
  564. << _, AcceptLanguage/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  565. ResL = parse_accept_language(AcceptLanguage),
  566. CheckedL = [begin
  567. ResR =:= ?INLINE_LOWERCASE_BC(R)
  568. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  569. end || {{R, W, _}, {ResR, ResW}} <- lists:zip(L, ResL)],
  570. [true] =:= lists:usort(CheckedL)
  571. end).
  572. parse_accept_language_test_() ->
  573. Tests = [
  574. {<<"da, en-gb;q=0.8, en;q=0.7">>, [
  575. {<<"da">>, 1000},
  576. {<<"en-gb">>, 800},
  577. {<<"en">>, 700}
  578. ]},
  579. {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin, es-419">>, [
  580. {<<"en">>, 1000},
  581. {<<"en-us">>, 1000},
  582. {<<"en-cockney">>, 1000},
  583. {<<"i-cherokee">>, 1000},
  584. {<<"x-pig-latin">>, 1000},
  585. {<<"es-419">>, 1000}
  586. ]}
  587. ],
  588. [{V, fun() -> R = parse_accept_language(V) end} || {V, R} <- Tests].
  589. parse_accept_language_error_test_() ->
  590. Tests = [
  591. <<>>,
  592. <<"loooooong">>,
  593. <<"en-us-loooooong">>,
  594. <<"419-en-us">>
  595. ],
  596. [{V, fun() -> {'EXIT', _} = (catch parse_accept_language(V)) end} || V <- Tests].
  597. -endif.
  598. -ifdef(PERF).
  599. horse_parse_accept_language() ->
  600. horse:repeat(20000,
  601. parse_accept_language(<<"da, en-gb;q=0.8, en;q=0.7">>)
  602. ).
  603. -endif.
  604. %% @doc Parse the Connection header.
  605. -spec parse_connection(binary()) -> [binary()].
  606. parse_connection(<<"close">>) ->
  607. [<<"close">>];
  608. parse_connection(<<"keep-alive">>) ->
  609. [<<"keep-alive">>];
  610. parse_connection(Connection) ->
  611. nonempty(token_ci_list(Connection, [])).
  612. -ifdef(TEST).
  613. prop_parse_connection() ->
  614. ?FORALL(L,
  615. non_empty(list(token())),
  616. begin
  617. << _, Connection/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  618. ResL = parse_connection(Connection),
  619. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  620. [true] =:= lists:usort(CheckedL)
  621. end).
  622. parse_connection_test_() ->
  623. Tests = [
  624. {<<"close">>, [<<"close">>]},
  625. {<<"ClOsE">>, [<<"close">>]},
  626. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  627. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  628. ],
  629. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  630. parse_connection_error_test_() ->
  631. Tests = [
  632. <<>>
  633. ],
  634. [{V, fun() -> {'EXIT', _} = (catch parse_connection(V)) end} || V <- Tests].
  635. -endif.
  636. -ifdef(PERF).
  637. horse_parse_connection_close() ->
  638. horse:repeat(200000,
  639. parse_connection(<<"close">>)
  640. ).
  641. horse_parse_connection_keepalive() ->
  642. horse:repeat(200000,
  643. parse_connection(<<"keep-alive">>)
  644. ).
  645. horse_parse_connection_keepalive_upgrade() ->
  646. horse:repeat(200000,
  647. parse_connection(<<"keep-alive, upgrade">>)
  648. ).
  649. -endif.
  650. %% @doc Parse the Content-Encoding header.
  651. -spec parse_content_encoding(binary()) -> [binary()].
  652. parse_content_encoding(ContentEncoding) ->
  653. nonempty(token_ci_list(ContentEncoding, [])).
  654. -ifdef(TEST).
  655. parse_content_encoding_test_() ->
  656. Tests = [
  657. {<<"gzip">>, [<<"gzip">>]}
  658. ],
  659. [{V, fun() -> R = parse_content_encoding(V) end} || {V, R} <- Tests].
  660. parse_content_encoding_error_test_() ->
  661. Tests = [
  662. <<>>
  663. ],
  664. [{V, fun() -> {'EXIT', _} = (catch parse_content_encoding(V)) end} || V <- Tests].
  665. -endif.
  666. -ifdef(PERF).
  667. horse_parse_content_encoding() ->
  668. horse:repeat(200000,
  669. parse_content_encoding(<<"gzip">>)
  670. ).
  671. -endif.
  672. %% @doc Parse the Content-Language header.
  673. %%
  674. %% We do not support irregular deprecated tags that do not match the ABNF.
  675. -spec parse_content_language(binary()) -> [binary()].
  676. parse_content_language(ContentLanguage) ->
  677. nonempty(langtag_list(ContentLanguage, [])).
  678. langtag_list(<<>>, Acc) -> lists:reverse(Acc);
  679. langtag_list(<< $\s, R/bits >>, Acc) -> langtag_list(R, Acc);
  680. langtag_list(<< $\t, R/bits >>, Acc) -> langtag_list(R, Acc);
  681. langtag_list(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
  682. langtag_list(<< A, B, C, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
  683. langtag_extlang(R, Acc, << ?LC(A), ?LC(B), ?LC(C) >>, 0);
  684. langtag_list(<< A, B, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
  685. langtag_extlang(R, Acc, << ?LC(A), ?LC(B) >>, 0);
  686. langtag_list(<< X, R/bits >>, Acc) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << $x >>, 0).
  687. langtag_extlang(<<>>, Acc, T, _) -> lists:reverse([T|Acc]);
  688. langtag_extlang(<< $,, R/bits >>, Acc, T, _) -> langtag_list(R, [T|Acc]);
  689. langtag_extlang(<< $\s, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
  690. langtag_extlang(<< $\t, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
  691. langtag_extlang(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, _)
  692. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  693. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  694. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  695. langtag_extlang(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, _)
  696. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  697. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  698. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  699. langtag_extlang(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, _)
  700. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  701. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  702. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  703. langtag_extlang(<< $-, A, B, C, D, E, R/bits >>, Acc, T, _)
  704. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  705. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  706. langtag_extlang(<< $-, A, B, C, D, R/bits >>, Acc, T, _)
  707. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
  708. langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
  709. langtag_extlang(<< $-, A, B, C, R/bits >>, Acc, T, N)
  710. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
  711. case N of
  712. 2 -> langtag_script(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>);
  713. _ -> langtag_extlang(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1)
  714. end;
  715. langtag_extlang(R, Acc, T, _) -> langtag_region(R, Acc, T).
  716. langtag_script(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  717. langtag_script(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  718. langtag_script(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  719. langtag_script(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  720. langtag_script(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  721. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  722. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  723. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  724. langtag_script(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  725. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  726. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  727. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  728. langtag_script(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  729. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  730. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  731. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  732. langtag_script(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  733. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  734. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  735. langtag_script(<< $-, A, B, C, D, R/bits >>, Acc, T)
  736. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
  737. langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
  738. langtag_script(R, Acc, T) ->
  739. langtag_region(R, Acc, T).
  740. langtag_region(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  741. langtag_region(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  742. langtag_region(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  743. langtag_region(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  744. langtag_region(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  745. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  746. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  747. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  748. langtag_region(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  749. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  750. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  751. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  752. langtag_region(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  753. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  754. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  755. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  756. langtag_region(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  757. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  758. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  759. langtag_region(<< $-, A, B, C, D, R/bits >>, Acc, T)
  760. when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  761. langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
  762. langtag_region(<< $-, A, B, R/bits >>, Acc, T) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
  763. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>);
  764. langtag_region(<< $-, A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  765. langtag_variant(R, Acc, << T/binary, $-, A, B, C >>);
  766. langtag_region(R, Acc, T) ->
  767. langtag_variant(R, Acc, T).
  768. langtag_variant(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  769. langtag_variant(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  770. langtag_variant(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  771. langtag_variant(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  772. langtag_variant(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  773. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  774. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  775. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  776. langtag_variant(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  777. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  778. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  779. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  780. langtag_variant(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  781. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  782. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  783. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  784. langtag_variant(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  785. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  786. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  787. langtag_variant(<< $-, A, B, C, D, R/bits >>, Acc, T)
  788. when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  789. langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
  790. langtag_variant(R, Acc, T) ->
  791. langtag_extension(R, Acc, T).
  792. langtag_extension(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  793. langtag_extension(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  794. langtag_extension(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  795. langtag_extension(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  796. langtag_extension(<< $-, X, R/bits >>, Acc, T) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << T/binary, $-, $x >>, 0);
  797. langtag_extension(<< $-, S, R/bits >>, Acc, T) when ?IS_ALPHANUM(S) -> langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(S) >>, 0).
  798. langtag_extension_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
  799. langtag_extension_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
  800. langtag_extension_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  801. langtag_extension_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  802. langtag_extension_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
  803. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  804. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  805. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
  806. langtag_extension_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
  807. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  808. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  809. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
  810. langtag_extension_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
  811. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  812. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  813. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
  814. langtag_extension_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
  815. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  816. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
  817. langtag_extension_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
  818. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  819. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
  820. langtag_extension_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
  821. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
  822. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
  823. langtag_extension_sub(<< $-, A, B, R/bits >>, Acc, T, N)
  824. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
  825. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
  826. langtag_extension_sub(R, Acc, T, N) when N > 0 ->
  827. langtag_extension(R, Acc, T).
  828. langtag_privateuse_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
  829. langtag_privateuse_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
  830. langtag_privateuse_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  831. langtag_privateuse_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  832. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
  833. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  834. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  835. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
  836. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
  837. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  838. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  839. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
  840. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
  841. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  842. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  843. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
  844. langtag_privateuse_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
  845. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  846. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
  847. langtag_privateuse_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
  848. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  849. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
  850. langtag_privateuse_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
  851. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
  852. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
  853. langtag_privateuse_sub(<< $-, A, B, R/bits >>, Acc, T, N)
  854. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
  855. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
  856. langtag_privateuse_sub(<< $-, A, R/bits >>, Acc, T, N)
  857. when ?IS_ALPHANUM(A) ->
  858. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A) >>, N + 1).
  859. langtag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  860. langtag_list_sep(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
  861. langtag_list_sep(<< $\s, R/bits >>, Acc) -> langtag_list_sep(R, Acc);
  862. langtag_list_sep(<< $\t, R/bits >>, Acc) -> langtag_list_sep(R, Acc).
  863. -ifdef(TEST).
  864. langtag_language() -> vector(2, 3, alpha()).
  865. langtag_extlang() -> vector(0, 3, [$-, alpha(), alpha(), alpha()]).
  866. langtag_script() -> oneof([[], [$-, alpha(), alpha(), alpha(), alpha()]]).
  867. langtag_region() -> oneof([[], [$-, alpha(), alpha()], [$-, digit(), digit(), digit()]]).
  868. langtag_variant() ->
  869. small_list(frequency([
  870. {4, [$-, vector(5, 8, alphanum())]},
  871. {1, [$-, digit(), alphanum(), alphanum(), alphanum()]}
  872. ])).
  873. langtag_extension() ->
  874. small_list([$-, ?SUCHTHAT(S, alphanum(), S =/= $x andalso S =/= $X),
  875. small_non_empty_list([$-, vector(2, 8, alphanum())])
  876. ]).
  877. langtag_privateuse() -> oneof([[], [$-, langtag_privateuse_nodash()]]).
  878. langtag_privateuse_nodash() -> [elements([$x, $X]), small_non_empty_list([$-, vector(1, 8, alphanum())])].
  879. private_language_tag() -> ?LET(T, langtag_privateuse_nodash(), iolist_to_binary(T)).
  880. language_tag() ->
  881. ?LET(IoList,
  882. [langtag_language(), langtag_extlang(), langtag_script(), langtag_region(),
  883. langtag_variant(), langtag_extension(), langtag_privateuse()],
  884. iolist_to_binary(IoList)).
  885. content_language() ->
  886. ?LET(L,
  887. non_empty(list(frequency([
  888. {90, language_tag()},
  889. {10, private_language_tag()}
  890. ]))),
  891. begin
  892. << _, ContentLanguage/binary >> = iolist_to_binary([[$,, T] || T <- L]),
  893. {L, ContentLanguage}
  894. end).
  895. prop_parse_content_language() ->
  896. ?FORALL({L, ContentLanguage},
  897. content_language(),
  898. begin
  899. ResL = parse_content_language(ContentLanguage),
  900. CheckedL = [?INLINE_LOWERCASE_BC(T) =:= ResT || {T, ResT} <- lists:zip(L, ResL)],
  901. [true] =:= lists:usort(CheckedL)
  902. end).
  903. parse_content_language_test_() ->
  904. Tests = [
  905. {<<"de">>, [<<"de">>]},
  906. {<<"fr">>, [<<"fr">>]},
  907. {<<"ja">>, [<<"ja">>]},
  908. {<<"zh-Hant">>, [<<"zh-hant">>]},
  909. {<<"zh-Hans">>, [<<"zh-hans">>]},
  910. {<<"sr-Cyrl">>, [<<"sr-cyrl">>]},
  911. {<<"sr-Latn">>, [<<"sr-latn">>]},
  912. {<<"zh-cmn-Hans-CN">>, [<<"zh-cmn-hans-cn">>]},
  913. {<<"cmn-Hans-CN">>, [<<"cmn-hans-cn">>]},
  914. {<<"zh-yue-HK">>, [<<"zh-yue-hk">>]},
  915. {<<"yue-HK">>, [<<"yue-hk">>]},
  916. {<<"zh-Hans-CN">>, [<<"zh-hans-cn">>]},
  917. {<<"sr-Latn-RS">>, [<<"sr-latn-rs">>]},
  918. {<<"sl-rozaj">>, [<<"sl-rozaj">>]},
  919. {<<"sl-rozaj-biske">>, [<<"sl-rozaj-biske">>]},
  920. {<<"sl-nedis">>, [<<"sl-nedis">>]},
  921. {<<"de-CH-1901">>, [<<"de-ch-1901">>]},
  922. {<<"sl-IT-nedis">>, [<<"sl-it-nedis">>]},
  923. {<<"hy-Latn-IT-arevela">>, [<<"hy-latn-it-arevela">>]},
  924. {<<"de-DE">>, [<<"de-de">>]},
  925. {<<"en-US">>, [<<"en-us">>]},
  926. {<<"es-419">>, [<<"es-419">>]},
  927. {<<"de-CH-x-phonebk">>, [<<"de-ch-x-phonebk">>]},
  928. {<<"az-Arab-x-AZE-derbend">>, [<<"az-arab-x-aze-derbend">>]},
  929. {<<"x-whatever">>, [<<"x-whatever">>]},
  930. {<<"qaa-Qaaa-QM-x-southern">>, [<<"qaa-qaaa-qm-x-southern">>]},
  931. {<<"de-Qaaa">>, [<<"de-qaaa">>]},
  932. {<<"sr-Latn-QM">>, [<<"sr-latn-qm">>]},
  933. {<<"sr-Qaaa-RS">>, [<<"sr-qaaa-rs">>]},
  934. {<<"en-US-u-islamcal">>, [<<"en-us-u-islamcal">>]},
  935. {<<"zh-CN-a-myext-x-private">>, [<<"zh-cn-a-myext-x-private">>]},
  936. {<<"en-a-myext-b-another">>, [<<"en-a-myext-b-another">>]},
  937. {<<"mn-Cyrl-MN">>, [<<"mn-cyrl-mn">>]},
  938. {<<"MN-cYRL-mn">>, [<<"mn-cyrl-mn">>]},
  939. {<<"mN-cYrL-Mn">>, [<<"mn-cyrl-mn">>]},
  940. {<<"az-Arab-IR">>, [<<"az-arab-ir">>]},
  941. {<<"zh-gan">>, [<<"zh-gan">>]},
  942. {<<"zh-yue">>, [<<"zh-yue">>]},
  943. {<<"zh-cmn">>, [<<"zh-cmn">>]},
  944. {<<"de-AT">>, [<<"de-at">>]},
  945. {<<"de-CH-1996">>, [<<"de-ch-1996">>]},
  946. {<<"en-Latn-GB-boont-r-extended-sequence-x-private">>,
  947. [<<"en-latn-gb-boont-r-extended-sequence-x-private">>]},
  948. {<<"el-x-koine">>, [<<"el-x-koine">>]},
  949. {<<"el-x-attic">>, [<<"el-x-attic">>]},
  950. {<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>,
  951. [<<"fr">>, <<"en-us">>, <<"es-419">>, <<"az-arab">>, <<"x-pig-latin">>, <<"man-nkoo-gn">>]},
  952. {<<"da">>, [<<"da">>]},
  953. {<<"mi, en">>, [<<"mi">>, <<"en">>]}
  954. ],
  955. [{V, fun() -> R = parse_content_language(V) end} || {V, R} <- Tests].
  956. parse_content_language_error_test_() ->
  957. Tests = [
  958. <<>>
  959. ],
  960. [{V, fun() -> {'EXIT', _} = (catch parse_content_language(V)) end} || V <- Tests].
  961. -endif.
  962. -ifdef(PERF).
  963. horse_parse_content_language() ->
  964. horse:repeat(100000,
  965. parse_content_language(<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>)
  966. ).
  967. -endif.
  968. %% @doc Parse the Content-Length header.
  969. %%
  970. %% The value has at least one digit, and may be followed by whitespace.
  971. -spec parse_content_length(binary()) -> non_neg_integer().
  972. parse_content_length(<< $0 >>) -> 0;
  973. parse_content_length(<< $0, R/bits >>) -> number(R, 0);
  974. parse_content_length(<< $1, R/bits >>) -> number(R, 1);
  975. parse_content_length(<< $2, R/bits >>) -> number(R, 2);
  976. parse_content_length(<< $3, R/bits >>) -> number(R, 3);
  977. parse_content_length(<< $4, R/bits >>) -> number(R, 4);
  978. parse_content_length(<< $5, R/bits >>) -> number(R, 5);
  979. parse_content_length(<< $6, R/bits >>) -> number(R, 6);
  980. parse_content_length(<< $7, R/bits >>) -> number(R, 7);
  981. parse_content_length(<< $8, R/bits >>) -> number(R, 8);
  982. parse_content_length(<< $9, R/bits >>) -> number(R, 9).
  983. -ifdef(TEST).
  984. prop_parse_content_length() ->
  985. ?FORALL(
  986. X,
  987. non_neg_integer(),
  988. X =:= parse_content_length(integer_to_binary(X))
  989. ).
  990. parse_content_length_test_() ->
  991. Tests = [
  992. {<<"0">>, 0},
  993. {<<"42 ">>, 42},
  994. {<<"69\t">>, 69},
  995. {<<"1337">>, 1337},
  996. {<<"3495">>, 3495},
  997. {<<"1234567890">>, 1234567890},
  998. {<<"1234567890 ">>, 1234567890}
  999. ],
  1000. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  1001. parse_content_length_error_test_() ->
  1002. Tests = [
  1003. <<>>,
  1004. <<"123, 123">>,
  1005. <<"4.17">>
  1006. ],
  1007. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  1008. -endif.
  1009. -ifdef(PERF).
  1010. horse_parse_content_length_zero() ->
  1011. horse:repeat(100000,
  1012. parse_content_length(<<"0">>)
  1013. ).
  1014. horse_parse_content_length_giga() ->
  1015. horse:repeat(100000,
  1016. parse_content_length(<<"1234567890">>)
  1017. ).
  1018. -endif.
  1019. %% @doc Parse the Content-Type header.
  1020. -spec parse_content_type(binary()) -> media_type().
  1021. parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  1022. case C of
  1023. ?INLINE_LOWERCASE(media_type, R, <<>>)
  1024. end.
  1025. media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
  1026. case C of
  1027. ?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
  1028. end;
  1029. media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
  1030. case C of
  1031. ?INLINE_LOWERCASE(media_type, R, T)
  1032. end.
  1033. media_subtype(<<>>, T, S) -> {T, S, []};
  1034. media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
  1035. media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  1036. media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  1037. media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
  1038. case C of
  1039. ?INLINE_LOWERCASE(media_subtype, R, T, S)
  1040. end.
  1041. media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
  1042. media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1043. media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
  1044. media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
  1045. media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1046. media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1047. media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
  1048. media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
  1049. media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
  1050. case C of
  1051. ?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
  1052. end.
  1053. media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
  1054. media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1055. media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  1056. case C of
  1057. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  1058. end;
  1059. media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  1060. case C of
  1061. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  1062. end.
  1063. media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
  1064. media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
  1065. media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1066. media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1067. media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
  1068. case C of
  1069. ?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
  1070. end.
  1071. media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
  1072. media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
  1073. media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
  1074. case C of
  1075. ?INLINE_LOWERCASE(media_param, R, T, S, P, K)
  1076. end.
  1077. media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1078. media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
  1079. media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
  1080. media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
  1081. media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
  1082. media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1083. media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1084. media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
  1085. -ifdef(TEST).
  1086. media_type_parameter() ->
  1087. frequency([
  1088. {90, parameter()},
  1089. {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}}
  1090. ]).
  1091. media_type() ->
  1092. ?LET({T, S, P},
  1093. {token(), token(), small_list(media_type_parameter())},
  1094. {T, S, P, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P]])}
  1095. ).
  1096. prop_parse_content_type() ->
  1097. ?FORALL({T, S, P, MediaType},
  1098. media_type(),
  1099. begin
  1100. {ResT, ResS, ResP} = parse_content_type(MediaType),
  1101. ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
  1102. <<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
  1103. LowK -> {LowK, unquote(V)}
  1104. end || {K, V, _, _} <- P],
  1105. ResT =:= ?INLINE_LOWERCASE_BC(T)
  1106. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  1107. andalso ResP =:= ExpectedP
  1108. end
  1109. ).
  1110. parse_content_type_test_() ->
  1111. Tests = [
  1112. {<<"text/html;charset=utf-8">>,
  1113. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1114. {<<"text/html;charset=UTF-8">>,
  1115. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1116. {<<"Text/HTML;Charset=\"utf-8\"">>,
  1117. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1118. {<<"text/html; charset=\"utf-8\"">>,
  1119. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1120. {<<"text/html; charset=ISO-8859-4">>,
  1121. {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  1122. {<<"text/plain; charset=iso-8859-4">>,
  1123. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  1124. {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
  1125. {<<"multipart">>, <<"form-data">>, [
  1126. {<<"boundary">>, <<"MultipartIsUgly">>}
  1127. ]}},
  1128. {<<"foo/bar; one=FirstParam; two=SecondParam">>,
  1129. {<<"foo">>, <<"bar">>, [
  1130. {<<"one">>, <<"FirstParam">>},
  1131. {<<"two">>, <<"SecondParam">>}
  1132. ]}}
  1133. ],
  1134. [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
  1135. -endif.
  1136. -ifdef(PERF).
  1137. horse_parse_content_type() ->
  1138. horse:repeat(200000,
  1139. parse_content_type(<<"text/html;charset=utf-8">>)
  1140. ).
  1141. -endif.
  1142. %% @doc Parse the Date header.
  1143. -spec parse_date(binary()) -> calendar:datetime().
  1144. parse_date(Date) ->
  1145. cow_date:parse_date(Date).
  1146. -ifdef(TEST).
  1147. parse_date_test_() ->
  1148. Tests = [
  1149. {<<"Tue, 15 Nov 1994 08:12:31 GMT">>, {{1994, 11, 15}, {8, 12, 31}}}
  1150. ],
  1151. [{V, fun() -> R = parse_date(V) end} || {V, R} <- Tests].
  1152. -endif.
  1153. %% @doc Parse the ETag header.
  1154. -spec parse_etag(binary()) -> etag().
  1155. parse_etag(<< $W, $/, $", R/bits >>) ->
  1156. etag(R, weak, <<>>);
  1157. parse_etag(<< $", R/bits >>) ->
  1158. etag(R, strong, <<>>).
  1159. etag(<< $", R/bits >>, Strength, Tag) ->
  1160. ws_end(R),
  1161. {Strength, Tag};
  1162. etag(<< C, R/bits >>, Strength, Tag) when ?IS_ETAGC(C) ->
  1163. etag(R, Strength, << Tag/binary, C >>).
  1164. -ifdef(TEST).
  1165. etagc() ->
  1166. ?SUCHTHAT(C, int(16#21, 16#ff), C =/= 16#22 andalso C =/= 16#7f).
  1167. etag() ->
  1168. ?LET({Strength, Tag},
  1169. {elements([weak, strong]), list(etagc())},
  1170. begin
  1171. TagBin = list_to_binary(Tag),
  1172. {{Strength, TagBin},
  1173. case Strength of
  1174. weak -> << $W, $/, $", TagBin/binary, $" >>;
  1175. strong -> << $", TagBin/binary, $" >>
  1176. end}
  1177. end).
  1178. prop_parse_etag() ->
  1179. ?FORALL({Tag, TagBin},
  1180. etag(),
  1181. Tag =:= parse_etag(TagBin)).
  1182. parse_etag_test_() ->
  1183. Tests = [
  1184. {<<"\"xyzzy\"">>, {strong, <<"xyzzy">>}},
  1185. {<<"W/\"xyzzy\"">>, {weak, <<"xyzzy">>}},
  1186. {<<"\"\"">>, {strong, <<>>}}
  1187. ],
  1188. [{V, fun() -> R = parse_etag(V) end} || {V, R} <- Tests].
  1189. parse_etag_error_test_() ->
  1190. Tests = [
  1191. <<>>,
  1192. <<"\"">>,
  1193. <<"W">>,
  1194. <<"W/">>
  1195. ],
  1196. [{V, fun() -> {'EXIT', _} = (catch parse_etag(V)) end} || V <- Tests].
  1197. -endif.
  1198. -ifdef(PERF).
  1199. horse_parse_etag() ->
  1200. horse:repeat(200000,
  1201. parse_etag(<<"W/\"xyzzy\"">>)
  1202. ).
  1203. -endif.
  1204. %% @doc Parse the Expect header.
  1205. -spec parse_expect(binary()) -> continue.
  1206. parse_expect(<<"100-continue", Rest/bits >>) ->
  1207. ws_end(Rest),
  1208. continue;
  1209. parse_expect(<<"100-", C, O, N, T, I, M, U, E, Rest/bits >>)
  1210. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  1211. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  1212. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  1213. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  1214. ws_end(Rest),
  1215. continue.
  1216. -ifdef(TEST).
  1217. expect() ->
  1218. ?LET(E,
  1219. [$1, $0, $0, $-,
  1220. elements([$c, $C]), elements([$o, $O]), elements([$n, $N]),
  1221. elements([$t, $T]), elements([$i, $I]), elements([$n, $N]),
  1222. elements([$u, $U]), elements([$e, $E])],
  1223. list_to_binary(E)).
  1224. prop_parse_expect() ->
  1225. ?FORALL(E, expect(), continue =:= parse_expect(E)).
  1226. parse_expect_test_() ->
  1227. Tests = [
  1228. <<"100-continue">>,
  1229. <<"100-CONTINUE">>,
  1230. <<"100-Continue">>,
  1231. <<"100-CoNtInUe">>,
  1232. <<"100-continue ">>
  1233. ],
  1234. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  1235. parse_expect_error_test_() ->
  1236. Tests = [
  1237. <<>>,
  1238. <<" ">>,
  1239. <<"200-OK">>,
  1240. <<"Cookies">>
  1241. ],
  1242. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  1243. -endif.
  1244. -ifdef(PERF).
  1245. horse_parse_expect() ->
  1246. horse:repeat(200000,
  1247. parse_expect(<<"100-continue">>)
  1248. ).
  1249. -endif.
  1250. %% @doc Parse the Expires header.
  1251. %%
  1252. %% Recipients must interpret invalid date formats as a date
  1253. %% in the past. The value "0" is commonly used.
  1254. -spec parse_expires(binary()) -> calendar:datetime().
  1255. parse_expires(<<"0">>) ->
  1256. {{1, 1, 1}, {0, 0, 0}};
  1257. parse_expires(Expires) ->
  1258. try
  1259. cow_date:parse_date(Expires)
  1260. catch _:_ ->
  1261. {{1, 1, 1}, {0, 0, 0}}
  1262. end.
  1263. -ifdef(TEST).
  1264. parse_expires_test_() ->
  1265. Tests = [
  1266. {<<"0">>, {{1, 1, 1}, {0, 0, 0}}},
  1267. {<<"Thu, 01 Dec 1994 nope invalid">>, {{1, 1, 1}, {0, 0, 0}}},
  1268. {<<"Thu, 01 Dec 1994 16:00:00 GMT">>, {{1994, 12, 1}, {16, 0, 0}}}
  1269. ],
  1270. [{V, fun() -> R = parse_expires(V) end} || {V, R} <- Tests].
  1271. -endif.
  1272. -ifdef(PERF).
  1273. horse_parse_expires_0() ->
  1274. horse:repeat(200000,
  1275. parse_expires(<<"0">>)
  1276. ).
  1277. horse_parse_expires_invalid() ->
  1278. horse:repeat(200000,
  1279. parse_expires(<<"Thu, 01 Dec 1994 nope invalid">>)
  1280. ).
  1281. -endif.
  1282. %% @doc Parse the If-Match header.
  1283. -spec parse_if_match(binary()) -> '*' | [etag()].
  1284. parse_if_match(<<"*">>) ->
  1285. '*';
  1286. parse_if_match(IfMatch) ->
  1287. nonempty(etag_list(IfMatch, [])).
  1288. etag_list(<<>>, Acc) -> lists:reverse(Acc);
  1289. etag_list(<< $\s, R/bits >>, Acc) -> etag_list(R, Acc);
  1290. etag_list(<< $\t, R/bits >>, Acc) -> etag_list(R, Acc);
  1291. etag_list(<< $,, R/bits >>, Acc) -> etag_list(R, Acc);
  1292. etag_list(<< $W, $/, $", R/bits >>, Acc) -> etag(R, Acc, weak, <<>>);
  1293. etag_list(<< $", R/bits >>, Acc) -> etag(R, Acc, strong, <<>>).
  1294. etag(<< $", R/bits >>, Acc, Strength, Tag) -> etag_list_sep(R, [{Strength, Tag}|Acc]);
  1295. etag(<< C, R/bits >>, Acc, Strength, Tag) when ?IS_ETAGC(C) -> etag(R, Acc, Strength, << Tag/binary, C >>).
  1296. etag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  1297. etag_list_sep(<< $\s, R/bits >>, Acc) -> etag_list_sep(R, Acc);
  1298. etag_list_sep(<< $\t, R/bits >>, Acc) -> etag_list_sep(R, Acc);
  1299. etag_list_sep(<< $,, R/bits >>, Acc) -> etag_list(R, Acc).
  1300. -ifdef(TEST).
  1301. prop_parse_if_match() ->
  1302. ?FORALL(L,
  1303. non_empty(list(etag())),
  1304. begin
  1305. << _, IfMatch/binary >> = iolist_to_binary([[$,, T] || {_, T} <- L]),
  1306. ResL = parse_if_match(IfMatch),
  1307. CheckedL = [T =:= ResT || {{T, _}, ResT} <- lists:zip(L, ResL)],
  1308. [true] =:= lists:usort(CheckedL)
  1309. end).
  1310. parse_if_match_test_() ->
  1311. Tests = [
  1312. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  1313. {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>,
  1314. [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]},
  1315. {<<"*">>, '*'}
  1316. ],
  1317. [{V, fun() -> R = parse_if_match(V) end} || {V, R} <- Tests].
  1318. parse_if_match_error_test_() ->
  1319. Tests = [
  1320. <<>>
  1321. ],
  1322. [{V, fun() -> {'EXIT', _} = (catch parse_if_match(V)) end} || V <- Tests].
  1323. -endif.
  1324. -ifdef(PERF).
  1325. horse_parse_if_match() ->
  1326. horse:repeat(200000,
  1327. parse_if_match(<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>)
  1328. ).
  1329. -endif.
  1330. %% @doc Parse the If-Modified-Since header.
  1331. -spec parse_if_modified_since(binary()) -> calendar:datetime().
  1332. parse_if_modified_since(IfModifiedSince) ->
  1333. cow_date:parse_date(IfModifiedSince).
  1334. -ifdef(TEST).
  1335. parse_if_modified_since_test_() ->
  1336. Tests = [
  1337. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1338. ],
  1339. [{V, fun() -> R = parse_if_modified_since(V) end} || {V, R} <- Tests].
  1340. -endif.
  1341. %% @doc Parse the If-None-Match header.
  1342. -spec parse_if_none_match(binary()) -> '*' | [etag()].
  1343. parse_if_none_match(<<"*">>) ->
  1344. '*';
  1345. parse_if_none_match(IfNoneMatch) ->
  1346. nonempty(etag_list(IfNoneMatch, [])).
  1347. -ifdef(TEST).
  1348. parse_if_none_match_test_() ->
  1349. Tests = [
  1350. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  1351. {<<"W/\"xyzzy\"">>, [{weak, <<"xyzzy">>}]},
  1352. {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>,
  1353. [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]},
  1354. {<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>,
  1355. [{weak, <<"xyzzy">>}, {weak, <<"r2d2xxxx">>}, {weak, <<"c3piozzzz">>}]},
  1356. {<<"*">>, '*'}
  1357. ],
  1358. [{V, fun() -> R = parse_if_none_match(V) end} || {V, R} <- Tests].
  1359. parse_if_none_match_error_test_() ->
  1360. Tests = [
  1361. <<>>
  1362. ],
  1363. [{V, fun() -> {'EXIT', _} = (catch parse_if_none_match(V)) end} || V <- Tests].
  1364. -endif.
  1365. -ifdef(PERF).
  1366. horse_parse_if_none_match() ->
  1367. horse:repeat(200000,
  1368. parse_if_none_match(<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>)
  1369. ).
  1370. -endif.
  1371. %% @doc Parse the If-Unmodified-Since header.
  1372. -spec parse_if_unmodified_since(binary()) -> calendar:datetime().
  1373. parse_if_unmodified_since(IfModifiedSince) ->
  1374. cow_date:parse_date(IfModifiedSince).
  1375. -ifdef(TEST).
  1376. parse_if_unmodified_since_test_() ->
  1377. Tests = [
  1378. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1379. ],
  1380. [{V, fun() -> R = parse_if_unmodified_since(V) end} || {V, R} <- Tests].
  1381. -endif.
  1382. %% @doc Parse the Last-Modified header.
  1383. -spec parse_last_modified(binary()) -> calendar:datetime().
  1384. parse_last_modified(LastModified) ->
  1385. cow_date:parse_date(LastModified).
  1386. -ifdef(TEST).
  1387. parse_last_modified_test_() ->
  1388. Tests = [
  1389. {<<"Tue, 15 Nov 1994 12:45:26 GMT">>, {{1994, 11, 15}, {12, 45, 26}}}
  1390. ],
  1391. [{V, fun() -> R = parse_last_modified(V) end} || {V, R} <- Tests].
  1392. -endif.
  1393. %% @doc Parse the Max-Forwards header.
  1394. -spec parse_max_forwards(binary()) -> non_neg_integer().
  1395. parse_max_forwards(<< $0, R/bits >>) -> number(R, 0);
  1396. parse_max_forwards(<< $1, R/bits >>) -> number(R, 1);
  1397. parse_max_forwards(<< $2, R/bits >>) -> number(R, 2);
  1398. parse_max_forwards(<< $3, R/bits >>) -> number(R, 3);
  1399. parse_max_forwards(<< $4, R/bits >>) -> number(R, 4);
  1400. parse_max_forwards(<< $5, R/bits >>) -> number(R, 5);
  1401. parse_max_forwards(<< $6, R/bits >>) -> number(R, 6);
  1402. parse_max_forwards(<< $7, R/bits >>) -> number(R, 7);
  1403. parse_max_forwards(<< $8, R/bits >>) -> number(R, 8);
  1404. parse_max_forwards(<< $9, R/bits >>) -> number(R, 9).
  1405. -ifdef(TEST).
  1406. prop_parse_max_forwards() ->
  1407. ?FORALL(
  1408. X,
  1409. non_neg_integer(),
  1410. X =:= parse_max_forwards(integer_to_binary(X))
  1411. ).
  1412. parse_max_forwards_test_() ->
  1413. Tests = [
  1414. {<<"0">>, 0},
  1415. {<<"42 ">>, 42},
  1416. {<<"69\t">>, 69},
  1417. {<<"1337">>, 1337},
  1418. {<<"1234567890">>, 1234567890},
  1419. {<<"1234567890 ">>, 1234567890}
  1420. ],
  1421. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  1422. parse_max_forwards_error_test_() ->
  1423. Tests = [
  1424. <<>>,
  1425. <<"123, 123">>,
  1426. <<"4.17">>
  1427. ],
  1428. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  1429. -endif.
  1430. %% @doc Parse the Sec-WebSocket-Extensions request header.
  1431. -spec parse_sec_websocket_extensions(binary()) -> [{binary(), [binary() | {binary(), binary()}]}].
  1432. parse_sec_websocket_extensions(SecWebSocketExtensions) ->
  1433. nonempty(ws_extension_list(SecWebSocketExtensions, [])).
  1434. ws_extension_list(<<>>, Acc) -> lists:reverse(Acc);
  1435. ws_extension_list(<< $\s, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  1436. ws_extension_list(<< $\t, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  1437. ws_extension_list(<< $,, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  1438. ws_extension_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << C >>).
  1439. ws_extension(<<>>, Acc, E) -> lists:reverse([{E, []}|Acc]);
  1440. ws_extension(<< $,, R/bits >>, Acc, E) -> ws_extension_list(R, [{E, []}|Acc]);
  1441. ws_extension(<< $;, R/bits >>, Acc, E) -> ws_extension_before_param(R, Acc, E, []);
  1442. ws_extension(<< $\s, R/bits >>, Acc, E) -> ws_extension_before_semicolon(R, Acc, E, []);
  1443. ws_extension(<< $\t, R/bits >>, Acc, E) -> ws_extension_before_semicolon(R, Acc, E, []);
  1444. ws_extension(<< C, R/bits >>, Acc, E) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << E/binary, C >>).
  1445. ws_extension_before_semicolon(<<>>, Acc, E, P) -> lists:reverse([{E, lists:reverse(P)}|Acc]);
  1446. ws_extension_before_semicolon(<< $,, R/bits >>, Acc, E, P) -> ws_extension_list(R, [{E, lists:reverse(P)}|Acc]);
  1447. ws_extension_before_semicolon(<< $;, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  1448. ws_extension_before_semicolon(<< $\s, R/bits >>, Acc, E, P) -> ws_extension_before_semicolon(R, Acc, E, P);
  1449. ws_extension_before_semicolon(<< $\t, R/bits >>, Acc, E, P) -> ws_extension_before_semicolon(R, Acc, E, P).
  1450. ws_extension_before_param(<< $\s, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  1451. ws_extension_before_param(<< $\t, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  1452. ws_extension_before_param(<< C, R/bits >>, Acc, E, P) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << C >>).
  1453. ws_extension_param(<<>>, Acc, E, P, K) -> lists:reverse([{E, lists:reverse([K|P])}|Acc]);
  1454. ws_extension_param(<< $\s, R/bits >>, Acc, E, P, K) -> ws_extension_before_semicolon(R, Acc, E, [K|P]);
  1455. ws_extension_param(<< $\t, R/bits >>, Acc, E, P, K) -> ws_extension_before_semicolon(R, Acc, E, [K|P]);
  1456. ws_extension_param(<< $,, R/bits >>, Acc, E, P, K) -> ws_extension_list(R, [{E, lists:reverse([K|P])}|Acc]);
  1457. ws_extension_param(<< $;, R/bits >>, Acc, E, P, K) -> ws_extension_before_param(R, Acc, E, [K|P]);
  1458. ws_extension_param(<< $=, $", R/bits >>, Acc, E, P, K) -> ws_extension_quoted(R, Acc, E, P, K, <<>>);
  1459. ws_extension_param(<< $=, C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << C >>);
  1460. ws_extension_param(<< C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << K/binary, C >>).
  1461. ws_extension_quoted(<< $", R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  1462. ws_extension_quoted(<< $\\, C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>);
  1463. ws_extension_quoted(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>).
  1464. ws_extension_value(<<>>, Acc, E, P, K, V) -> lists:reverse([{E, lists:reverse([{K, V}|P])}|Acc]);
  1465. ws_extension_value(<< $\s, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  1466. ws_extension_value(<< $\t, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  1467. ws_extension_value(<< $,, R/bits >>, Acc, E, P, K, V) -> ws_extension_list(R, [{E, lists:reverse([{K, V}|P])}|Acc]);
  1468. ws_extension_value(<< $;, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_param(R, Acc, E, [{K, V}|P]);
  1469. ws_extension_value(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << V/binary, C >>).
  1470. -ifdef(TEST).
  1471. quoted_token() ->
  1472. ?LET(T,
  1473. non_empty(list(frequency([
  1474. {99, tchar()},
  1475. {1, [$\\, tchar()]}
  1476. ]))),
  1477. [$", T, $"]).
  1478. ws_extension() ->
  1479. ?LET({E, PL},
  1480. {token(), small_list({ows(), ows(), oneof([token(), {token(), oneof([token(), quoted_token()])}])})},
  1481. {E, PL, iolist_to_binary([E,
  1482. [case P of
  1483. {OWS1, OWS2, {K, V}} -> [OWS1, $;, OWS2, K, $=, V];
  1484. {OWS1, OWS2, K} -> [OWS1, $;, OWS2, K]
  1485. end || P <- PL]
  1486. ])}).
  1487. prop_parse_sec_websocket_extensions() ->
  1488. ?FORALL(L,
  1489. non_empty(list(ws_extension())),
  1490. begin
  1491. << _, SecWebsocketExtensions/binary >> = iolist_to_binary([[$,, E] || {_, _, E} <- L]),
  1492. ResL = parse_sec_websocket_extensions(SecWebsocketExtensions),
  1493. CheckedL = [begin
  1494. ExpectedPL = [case P of
  1495. {_, _, {K, V}} -> {K, unquote(V)};
  1496. {_, _, K} -> K
  1497. end || P <- PL],
  1498. E =:= ResE andalso ExpectedPL =:= ResPL
  1499. end || {{E, PL, _}, {ResE, ResPL}} <- lists:zip(L, ResL)],
  1500. [true] =:= lists:usort(CheckedL)
  1501. end).
  1502. parse_sec_websocket_extensions_test_() ->
  1503. Tests = [
  1504. {<<"foo">>, [{<<"foo">>, []}]},
  1505. {<<"bar; baz=2">>, [{<<"bar">>, [{<<"baz">>, <<"2">>}]}]},
  1506. {<<"foo, bar; baz=2">>, [{<<"foo">>, []}, {<<"bar">>, [{<<"baz">>, <<"2">>}]}]},
  1507. {<<"deflate-stream">>, [{<<"deflate-stream">>, []}]},
  1508. {<<"mux; max-channels=4; flow-control, deflate-stream">>,
  1509. [{<<"mux">>, [{<<"max-channels">>, <<"4">>}, <<"flow-control">>]}, {<<"deflate-stream">>, []}]},
  1510. {<<"private-extension">>, [{<<"private-extension">>, []}]}
  1511. ],
  1512. [{V, fun() -> R = parse_sec_websocket_extensions(V) end} || {V, R} <- Tests].
  1513. parse_sec_websocket_extensions_error_test_() ->
  1514. Tests = [
  1515. <<>>
  1516. ],
  1517. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_extensions(V)) end}
  1518. || V <- Tests].
  1519. -endif.
  1520. -ifdef(PERF).
  1521. horse_parse_sec_websocket_extensions() ->
  1522. horse:repeat(200000,
  1523. parse_sec_websocket_extensions(<<"mux; max-channels=4; flow-control, deflate-stream">>)
  1524. ).
  1525. -endif.
  1526. %% @doc Parse the Sec-WebSocket-Protocol request header.
  1527. -spec parse_sec_websocket_protocol_client(binary()) -> [binary()].
  1528. parse_sec_websocket_protocol_client(SecWebSocketProtocol) ->
  1529. nonempty(token_ci_list(SecWebSocketProtocol, [])).
  1530. -ifdef(TEST).
  1531. parse_sec_websocket_protocol_client_test_() ->
  1532. Tests = [
  1533. {<<"chat, superchat">>, [<<"chat">>, <<"superchat">>]}
  1534. ],
  1535. [{V, fun() -> R = parse_sec_websocket_protocol_client(V) end} || {V, R} <- Tests].
  1536. parse_sec_websocket_protocol_client_error_test_() ->
  1537. Tests = [
  1538. <<>>
  1539. ],
  1540. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_protocol_client(V)) end}
  1541. || V <- Tests].
  1542. -endif.
  1543. -ifdef(PERF).
  1544. horse_parse_sec_websocket_protocol_client() ->
  1545. horse:repeat(200000,
  1546. parse_sec_websocket_protocol_client(<<"chat, superchat">>)
  1547. ).
  1548. -endif.
  1549. %% @doc Parse the Sec-WebSocket-Version request header.
  1550. -spec parse_sec_websocket_version_client(binary()) -> 0..255.
  1551. parse_sec_websocket_version_client(SecWebSocketVersion) when byte_size(SecWebSocketVersion) < 4 ->
  1552. Version = binary_to_integer(SecWebSocketVersion),
  1553. true = Version >= 0 andalso Version =< 255,
  1554. Version.
  1555. -ifdef(TEST).
  1556. prop_parse_sec_websocket_version_client() ->
  1557. ?FORALL(Version,
  1558. int(0, 255),
  1559. Version =:= parse_sec_websocket_version_client(integer_to_binary(Version))).
  1560. parse_sec_websocket_version_client_test_() ->
  1561. Tests = [
  1562. {<<"13">>, 13},
  1563. {<<"25">>, 25}
  1564. ],
  1565. [{V, fun() -> R = parse_sec_websocket_version_client(V) end} || {V, R} <- Tests].
  1566. parse_sec_websocket_version_client_error_test_() ->
  1567. Tests = [
  1568. <<>>,
  1569. <<" ">>,
  1570. <<"7, 8, 13">>,
  1571. <<"invalid">>
  1572. ],
  1573. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_version_client(V)) end}
  1574. || V <- Tests].
  1575. -endif.
  1576. -ifdef(PERF).
  1577. horse_parse_sec_websocket_version_client_13() ->
  1578. horse:repeat(200000,
  1579. parse_sec_websocket_version_client(<<"13">>)
  1580. ).
  1581. horse_parse_sec_websocket_version_client_255() ->
  1582. horse:repeat(200000,
  1583. parse_sec_websocket_version_client(<<"255">>)
  1584. ).
  1585. -endif.
  1586. %% @doc Parse the Trailer header.
  1587. -spec parse_trailer(binary()) -> [binary()].
  1588. parse_trailer(Trailer) ->
  1589. nonempty(token_ci_list(Trailer, [])).
  1590. -ifdef(TEST).
  1591. parse_trailer_test_() ->
  1592. Tests = [
  1593. {<<"Date, Content-MD5">>, [<<"date">>, <<"content-md5">>]}
  1594. ],
  1595. [{V, fun() -> R = parse_trailer(V) end} || {V, R} <- Tests].
  1596. parse_trailer_error_test_() ->
  1597. Tests = [
  1598. <<>>
  1599. ],
  1600. [{V, fun() -> {'EXIT', _} = (catch parse_trailer(V)) end} || V <- Tests].
  1601. -endif.
  1602. -ifdef(PERF).
  1603. horse_parse_trailer() ->
  1604. horse:repeat(200000,
  1605. parse_trailer(<<"Date, Content-MD5">>)
  1606. ).
  1607. -endif.
  1608. %% @doc Parse the Transfer-Encoding header.
  1609. %%
  1610. %% @todo This function does not support parsing of transfer-parameter.
  1611. -spec parse_transfer_encoding(binary()) -> [binary()].
  1612. parse_transfer_encoding(<<"chunked">>) ->
  1613. [<<"chunked">>];
  1614. parse_transfer_encoding(TransferEncoding) ->
  1615. nonempty(token_ci_list(TransferEncoding, [])).
  1616. -ifdef(TEST).
  1617. prop_parse_transfer_encoding() ->
  1618. ?FORALL(L,
  1619. non_empty(list(token())),
  1620. begin
  1621. << _, TransferEncoding/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  1622. ResL = parse_transfer_encoding(TransferEncoding),
  1623. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  1624. [true] =:= lists:usort(CheckedL)
  1625. end).
  1626. parse_transfer_encoding_test_() ->
  1627. Tests = [
  1628. {<<"a , , , ">>, [<<"a">>]},
  1629. {<<" , , , a">>, [<<"a">>]},
  1630. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  1631. {<<"chunked">>, [<<"chunked">>]},
  1632. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]},
  1633. {<<"gzip, chunked">>, [<<"gzip">>, <<"chunked">>]}
  1634. ],
  1635. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  1636. parse_transfer_encoding_error_test_() ->
  1637. Tests = [
  1638. <<>>,
  1639. <<" ">>,
  1640. <<" , ">>,
  1641. <<",,,">>,
  1642. <<"a b">>
  1643. ],
  1644. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  1645. || V <- Tests].
  1646. -endif.
  1647. -ifdef(PERF).
  1648. horse_parse_transfer_encoding_chunked() ->
  1649. horse:repeat(200000,
  1650. parse_transfer_encoding(<<"chunked">>)
  1651. ).
  1652. horse_parse_transfer_encoding_custom() ->
  1653. horse:repeat(200000,
  1654. parse_transfer_encoding(<<"chunked, something">>)
  1655. ).
  1656. -endif.
  1657. %% @doc Parse the Upgrade header.
  1658. %%
  1659. %% It is unclear from the RFC whether the values here are
  1660. %% case sensitive.
  1661. %%
  1662. %% We handle them in a case insensitive manner because they
  1663. %% are described as case insensitive in the Websocket RFC.
  1664. -spec parse_upgrade(binary()) -> [binary()].
  1665. parse_upgrade(Upgrade) ->
  1666. nonempty(protocol_list(Upgrade, [])).
  1667. protocol_list(<<>>, Acc) -> lists:reverse(Acc);
  1668. protocol_list(<< $\s, R/bits >>, Acc) -> protocol_list(R, Acc);
  1669. protocol_list(<< $\t, R/bits >>, Acc) -> protocol_list(R, Acc);
  1670. protocol_list(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc);
  1671. protocol_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  1672. case C of
  1673. ?INLINE_LOWERCASE(protocol_name, R, Acc, <<>>)
  1674. end.
  1675. protocol_name(<<>>, Acc, P) -> lists:reverse([P|Acc]);
  1676. protocol_name(<< $\s, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1677. protocol_name(<< $\t, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1678. protocol_name(<< $,, R/bits >>, Acc, P) -> protocol_list(R, [P|Acc]);
  1679. protocol_name(<< $/, C, R/bits >>, Acc, P) ->
  1680. case C of
  1681. ?INLINE_LOWERCASE(protocol_version, R, Acc, << P/binary, $/ >>)
  1682. end;
  1683. protocol_name(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) ->
  1684. case C of
  1685. ?INLINE_LOWERCASE(protocol_name, R, Acc, P)
  1686. end.
  1687. protocol_version(<<>>, Acc, P) -> lists:reverse([P|Acc]);
  1688. protocol_version(<< $\s, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1689. protocol_version(<< $\t, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1690. protocol_version(<< $,, R/bits >>, Acc, P) -> protocol_list(R, [P|Acc]);
  1691. protocol_version(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) ->
  1692. case C of
  1693. ?INLINE_LOWERCASE(protocol_version, R, Acc, P)
  1694. end.
  1695. protocol_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  1696. protocol_list_sep(<< $\s, R/bits >>, Acc) -> protocol_list_sep(R, Acc);
  1697. protocol_list_sep(<< $\t, R/bits >>, Acc) -> protocol_list_sep(R, Acc);
  1698. protocol_list_sep(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc).
  1699. -ifdef(TEST).
  1700. protocols() ->
  1701. ?LET(P,
  1702. oneof([token(), [token(), $/, token()]]),
  1703. iolist_to_binary(P)).
  1704. prop_parse_upgrade() ->
  1705. ?FORALL(L,
  1706. non_empty(list(protocols())),
  1707. begin
  1708. << _, Upgrade/binary >> = iolist_to_binary([[$,, P] || P <- L]),
  1709. ResL = parse_upgrade(Upgrade),
  1710. CheckedL = [?INLINE_LOWERCASE_BC(P) =:= ResP || {P, ResP} <- lists:zip(L, ResL)],
  1711. [true] =:= lists:usort(CheckedL)
  1712. end).
  1713. parse_upgrade_test_() ->
  1714. Tests = [
  1715. {<<"HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11">>,
  1716. [<<"http/2.0">>, <<"shttp/1.3">>, <<"irc/6.9">>, <<"rta/x11">>]},
  1717. {<<"HTTP/2.0">>, [<<"http/2.0">>]}
  1718. ],
  1719. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  1720. parse_upgrade_error_test_() ->
  1721. Tests = [
  1722. <<>>
  1723. ],
  1724. [{V, fun() -> {'EXIT', _} = (catch parse_upgrade(V)) end}
  1725. || V <- Tests].
  1726. -endif.
  1727. %% Internal.
  1728. %% Only return if the list is not empty.
  1729. nonempty(L) when L =/= [] -> L.
  1730. %% Parse a number optionally followed by whitespace.
  1731. number(<< $0, R/bits >>, Acc) -> number(R, Acc * 10);
  1732. number(<< $1, R/bits >>, Acc) -> number(R, Acc * 10 + 1);
  1733. number(<< $2, R/bits >>, Acc) -> number(R, Acc * 10 + 2);
  1734. number(<< $3, R/bits >>, Acc) -> number(R, Acc * 10 + 3);
  1735. number(<< $4, R/bits >>, Acc) -> number(R, Acc * 10 + 4);
  1736. number(<< $5, R/bits >>, Acc) -> number(R, Acc * 10 + 5);
  1737. number(<< $6, R/bits >>, Acc) -> number(R, Acc * 10 + 6);
  1738. number(<< $7, R/bits >>, Acc) -> number(R, Acc * 10 + 7);
  1739. number(<< $8, R/bits >>, Acc) -> number(R, Acc * 10 + 8);
  1740. number(<< $9, R/bits >>, Acc) -> number(R, Acc * 10 + 9);
  1741. number(<< $\s, R/bits >>, Acc) -> ws_end(R), Acc;
  1742. number(<< $\t, R/bits >>, Acc) -> ws_end(R), Acc;
  1743. number(<<>>, Acc) -> Acc.
  1744. ws_end(<< $\s, R/bits >>) -> ws_end(R);
  1745. ws_end(<< $\t, R/bits >>) -> ws_end(R);
  1746. ws_end(<<>>) -> ok.
  1747. %% Parse a list of case insensitive tokens.
  1748. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  1749. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  1750. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  1751. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  1752. token_ci_list(<< C, R/bits >>, Acc) ->
  1753. case C of
  1754. ?INLINE_LOWERCASE(token_ci_list, R, Acc, <<>>)
  1755. end.
  1756. token_ci_list(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1757. token_ci_list(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1758. token_ci_list(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1759. token_ci_list(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  1760. token_ci_list(<< C, R/bits >>, Acc, T) ->
  1761. case C of
  1762. ?INLINE_LOWERCASE(token_ci_list, R, Acc, T)
  1763. end.
  1764. token_ci_list_sep(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1765. token_ci_list_sep(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1766. token_ci_list_sep(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1767. token_ci_list_sep(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]).