cow_http_hd.erl 78 KB


  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_accept_encoding/1]).
  18. -export([parse_accept_language/1]).
  19. -export([parse_connection/1]).
  20. -export([parse_content_encoding/1]).
  21. -export([parse_content_language/1]).
  22. -export([parse_content_length/1]).
  23. -export([parse_content_type/1]).
  24. -export([parse_date/1]).
  25. -export([parse_etag/1]).
  26. -export([parse_expect/1]).
  27. -export([parse_expires/1]).
  28. -export([parse_if_match/1]).
  29. -export([parse_if_modified_since/1]).
  30. -export([parse_if_none_match/1]).
  31. -export([parse_if_unmodified_since/1]).
  32. -export([parse_last_modified/1]).
  33. -export([parse_max_forwards/1]).
  34. -export([parse_sec_websocket_extensions/1]).
  35. -export([parse_sec_websocket_protocol_client/1]).
  36. -export([parse_sec_websocket_version_client/1]).
  37. -export([parse_transfer_encoding/1]).
  38. -export([parse_upgrade/1]).
  39. -type etag() :: {weak | strong, binary()}.
  40. -export_type([etag/0]).
  41. -type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
  42. -export_type([media_type/0]).
  43. -type qvalue() :: 0..1000.
  44. -export_type([qvalue/0]).
  45. -include("cow_inline.hrl").
  46. -ifdef(TEST).
  47. -include_lib("triq/include/triq.hrl").
  48. vector(Min, Max, Dom) -> ?LET(N, choose(Min, Max), vector(N, Dom)).
  49. small_list(Dom) -> vector(0, 10, Dom).
  50. small_non_empty_list(Dom) -> vector(1, 10, Dom).
  51. alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  52. alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  53. digit_chars() -> "0123456789".
  54. ows() -> list(oneof([$\s, $\t])).
  55. alpha() -> oneof(alpha_chars()).
  56. alphanum() -> oneof(alphanum_chars()).
  57. digit() -> oneof(digit_chars()).
  58. tchar() ->
  59. frequency([
  60. {1, oneof([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])},
  61. {99, oneof(alphanum_chars())}
  62. ]).
  63. token() ->
  64. ?LET(T,
  65. non_empty(list(tchar())),
  66. list_to_binary(T)).
  67. obs_text() ->
  68. oneof([128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,
  69. 146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,
  70. 164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,
  71. 182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,
  72. 200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,
  73. 218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,
  74. 236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,
  75. 254,255]).
  76. qdtext() ->
  77. frequency([
  78. {99, oneof("\t\s!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  79. {1, obs_text()}
  80. ]).
  81. quoted_pair() ->
  82. [$\\, frequency([
  83. {99, oneof("\t\s!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  84. {1, obs_text()}
  85. ])].
  86. quoted_string() ->
  87. [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
  88. %% Helper function for ( token / quoted-string ) values.
  89. unquote([$", V, $"]) -> unquote(V, <<>>);
  90. unquote(V) -> V.
  91. unquote([], Acc) -> Acc;
  92. unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
  93. unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
  94. parameter() ->
  95. ?SUCHTHAT({K, _, _, _},
  96. {token(), oneof([token(), quoted_string()]), ows(), ows()},
  97. K =/= <<"q">>).
  98. weight() ->
  99. frequency([
  100. {90, int(0, 1000)},
  101. {10, undefined}
  102. ]).
  103. %% Helper function for weight's qvalue formatting.
  104. qvalue_to_iodata(0) -> <<"0">>;
  105. qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
  106. qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
  107. qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
  108. qvalue_to_iodata(1000) -> <<"1">>.
  109. -endif.
  110. %% @doc Parse the Accept header.
  111. -spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
  112. parse_accept(<<"*/*">>) ->
  113. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  114. parse_accept(Accept) ->
  115. media_range_list(Accept, []).
  116. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  117. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  118. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  119. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  120. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  121. case C of
  122. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  123. end.
  124. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  125. %% Special clause for badly behaving user agents that send * instead of */*.
  126. media_range_type(<< $;, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  127. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  128. case C of
  129. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  130. end.
  131. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  132. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  133. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  134. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  135. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  136. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  137. case C of
  138. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  139. end.
  140. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  141. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  142. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  143. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  144. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  145. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  146. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  147. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  148. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  149. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  150. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  151. case C of
  152. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  153. end.
  154. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  155. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  156. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  157. case C of
  158. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  159. end.
  160. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  161. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  162. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  163. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  164. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  165. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  166. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  167. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  168. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  169. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  170. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  171. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  172. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  173. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  174. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  175. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  176. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  177. when A >= $0, A =< $9 ->
  178. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  179. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  180. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  181. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  182. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  183. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  184. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  185. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  186. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  187. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  188. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  189. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  190. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  191. when A >= $0, A =< $9 ->
  192. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  193. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  194. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  195. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  196. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  197. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  198. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  199. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  200. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  201. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  202. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  203. case C of
  204. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  205. end.
  206. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  207. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  208. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  209. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  210. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  211. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  212. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  213. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  214. case C of
  215. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  216. end.
  217. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  218. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  219. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  220. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  221. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  222. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_ext(R, Acc, T, S, P, Q, [{K, V}|E]);
  223. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  224. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  225. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  226. -ifdef(TEST).
  227. accept_ext() ->
  228. oneof([token(), parameter()]).
  229. accept_params() ->
  230. frequency([
  231. {90, []},
  232. {10, small_list(accept_ext())}
  233. ]).
  234. accept() ->
  235. ?LET({T, S, P, W, E},
  236. {token(), token(), small_list(parameter()), weight(), accept_params()},
  237. {T, S, P, W, E, iolist_to_binary([T, $/, S,
  238. [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P],
  239. case W of
  240. undefined -> [];
  241. _ -> [
  242. [<<";q=">>, qvalue_to_iodata(W)],
  243. [case Ext of
  244. {K, V, OWS1, OWS2} -> [OWS1, $;, OWS2, K, $=, V];
  245. K -> [$;, K]
  246. end || Ext <- E]]
  247. end])}
  248. ).
  249. prop_parse_accept() ->
  250. ?FORALL(L,
  251. non_empty(list(accept())),
  252. begin
  253. << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]),
  254. ResL = parse_accept(Accept),
  255. CheckedL = [begin
  256. ExpectedP = [{?INLINE_LOWERCASE_BC(K), unquote(V)} || {K, V, _, _} <- P],
  257. ExpectedE = [case Ext of
  258. {K, V, _, _} -> {?INLINE_LOWERCASE_BC(K), unquote(V)};
  259. K -> ?INLINE_LOWERCASE_BC(K)
  260. end || Ext <- E],
  261. ResT =:= ?INLINE_LOWERCASE_BC(T)
  262. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  263. andalso ResP =:= ExpectedP
  264. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  265. andalso ((W =:= undefined andalso ResE =:= []) orelse (W =/= undefined andalso ResE =:= ExpectedE))
  266. end || {{T, S, P, W, E, _}, {{ResT, ResS, ResP}, ResW, ResE}} <- lists:zip(L, ResL)],
  267. [true] =:= lists:usort(CheckedL)
  268. end
  269. ).
  270. parse_accept_test_() ->
  271. Tests = [
  272. {<<>>, []},
  273. {<<" ">>, []},
  274. {<<"audio/*; q=0.2, audio/basic">>, [
  275. {{<<"audio">>, <<"*">>, []}, 200, []},
  276. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  277. ]},
  278. {<<"text/plain; q=0.5, text/html, "
  279. "text/x-dvi; q=0.8, text/x-c">>, [
  280. {{<<"text">>, <<"plain">>, []}, 500, []},
  281. {{<<"text">>, <<"html">>, []}, 1000, []},
  282. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  283. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  284. ]},
  285. {<<"text/*, text/html, text/html;level=1, */*">>, [
  286. {{<<"text">>, <<"*">>, []}, 1000, []},
  287. {{<<"text">>, <<"html">>, []}, 1000, []},
  288. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  289. {{<<"*">>, <<"*">>, []}, 1000, []}
  290. ]},
  291. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  292. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  293. {{<<"text">>, <<"*">>, []}, 300, []},
  294. {{<<"text">>, <<"html">>, []}, 700, []},
  295. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  296. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  297. {{<<"*">>, <<"*">>, []}, 500, []}
  298. ]},
  299. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  300. "q=0.123;standalone;complex=gits, text/plain">>, [
  301. {{<<"text">>, <<"html">>,
  302. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  303. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  304. {{<<"text">>, <<"plain">>, []}, 1000, []}
  305. ]},
  306. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  307. {{<<"text">>, <<"html">>, []}, 1000, []},
  308. {{<<"image">>, <<"gif">>, []}, 1000, []},
  309. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  310. {{<<"*">>, <<"*">>, []}, 200, []},
  311. {{<<"*">>, <<"*">>, []}, 200, []}
  312. ]}
  313. ],
  314. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  315. parse_accept_error_test_() ->
  316. Tests = [
  317. <<"audio/basic, */;q=0.5">>,
  318. <<"audio/, audio/basic">>,
  319. <<"aud\tio/basic">>,
  320. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  321. ],
  322. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  323. -endif.
  324. -ifdef(PERF).
  325. horse_parse_accept() ->
  326. horse:repeat(20000,
  327. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  328. "text/html;level=2;q=0.4, */*;q=0.5">>)
  329. ).
  330. -endif.
  331. %% @doc Parse the Accept-Charset header.
  332. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  333. parse_accept_charset(Charset) ->
  334. nonempty(conneg_list(Charset, [])).
  335. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  336. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  337. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  338. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  339. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  340. case C of
  341. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  342. end.
  343. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  344. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  345. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  346. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  347. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  348. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  349. case C of
  350. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  351. end.
  352. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  353. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  354. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  355. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  356. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  357. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  358. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  359. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  360. %% Special clause for broken user agents that confuse ; and , separators.
  361. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  362. case C of
  363. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  364. end.
  365. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  366. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  367. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  368. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  369. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  370. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  371. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  372. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  373. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  374. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  375. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  376. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  377. when A >= $0, A =< $9 ->
  378. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  379. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  380. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  381. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  382. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  383. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  384. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  385. -ifdef(TEST).
  386. accept_charset() ->
  387. ?LET({C, W},
  388. {token(), weight()},
  389. {C, W, iolist_to_binary([C, case W of
  390. undefined -> [];
  391. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  392. end])}
  393. ).
  394. prop_parse_accept_charset() ->
  395. ?FORALL(L,
  396. non_empty(list(accept_charset())),
  397. begin
  398. << _, AcceptCharset/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  399. ResL = parse_accept_charset(AcceptCharset),
  400. CheckedL = [begin
  401. ResC =:= ?INLINE_LOWERCASE_BC(Ch)
  402. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  403. end || {{Ch, W, _}, {ResC, ResW}} <- lists:zip(L, ResL)],
  404. [true] =:= lists:usort(CheckedL)
  405. end).
  406. parse_accept_charset_test_() ->
  407. Tests = [
  408. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  409. {<<"iso-8859-5">>, 1000},
  410. {<<"unicode-1-1">>, 800}
  411. ]},
  412. %% Some user agents send this invalid value for the Accept-Charset header
  413. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  414. {<<"iso-8859-1">>, 1000},
  415. {<<"utf-8">>, 700},
  416. {<<"*">>, 700}
  417. ]}
  418. ],
  419. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  420. parse_accept_charset_error_test_() ->
  421. Tests = [
  422. <<>>
  423. ],
  424. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  425. -endif.
  426. -ifdef(PERF).
  427. horse_parse_accept_charset() ->
  428. horse:repeat(20000,
  429. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  430. ).
  431. -endif.
  432. %% @doc Parse the Accept-Encoding header.
  433. -spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}].
  434. parse_accept_encoding(Encoding) ->
  435. conneg_list(Encoding, []).
  436. -ifdef(TEST).
  437. accept_encoding() ->
  438. ?LET({E, W},
  439. {token(), weight()},
  440. {E, W, iolist_to_binary([E, case W of
  441. undefined -> [];
  442. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  443. end])}
  444. ).
  445. prop_parse_accept_encoding() ->
  446. ?FORALL(L,
  447. non_empty(list(accept_encoding())),
  448. begin
  449. << _, AcceptEncoding/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  450. ResL = parse_accept_encoding(AcceptEncoding),
  451. CheckedL = [begin
  452. ResE =:= ?INLINE_LOWERCASE_BC(E)
  453. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  454. end || {{E, W, _}, {ResE, ResW}} <- lists:zip(L, ResL)],
  455. [true] =:= lists:usort(CheckedL)
  456. end).
  457. parse_accept_encoding_test_() ->
  458. Tests = [
  459. {<<>>, []},
  460. {<<"*">>, [{<<"*">>, 1000}]},
  461. {<<"compress, gzip">>, [
  462. {<<"compress">>, 1000},
  463. {<<"gzip">>, 1000}
  464. ]},
  465. {<<"compress;q=0.5, gzip;q=1.0">>, [
  466. {<<"compress">>, 500},
  467. {<<"gzip">>, 1000}
  468. ]},
  469. {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [
  470. {<<"gzip">>, 1000},
  471. {<<"identity">>, 500},
  472. {<<"*">>, 0}
  473. ]}
  474. ],
  475. [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests].
  476. -endif.
  477. -ifdef(PERF).
  478. horse_parse_accept_encoding() ->
  479. horse:repeat(20000,
  480. parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>)
  481. ).
  482. -endif.
  483. %% @doc Parse the Accept-Language header.
  484. -spec parse_accept_language(binary()) -> [{binary(), qvalue()}].
  485. parse_accept_language(LanguageRange) ->
  486. nonempty(language_range_list(LanguageRange, [])).
  487. language_range_list(<<>>, Acc) -> lists:reverse(Acc);
  488. language_range_list(<< $\s, R/bits >>, Acc) -> language_range_list(R, Acc);
  489. language_range_list(<< $\t, R/bits >>, Acc) -> language_range_list(R, Acc);
  490. language_range_list(<< $\,, R/bits >>, Acc) -> language_range_list(R, Acc);
  491. language_range_list(<< $*, R/bits >>, Acc) -> language_range_before_semicolon(R, Acc, <<"*">>);
  492. language_range_list(<< C, R/bits >>, Acc) when ?IS_ALPHA(C) ->
  493. case C of
  494. ?INLINE_LOWERCASE(language_range, R, Acc, 1, <<>>)
  495. end.
  496. language_range(<<>>, Acc, _, T) -> lists:reverse([{T, 1000}|Acc]);
  497. language_range(<< $,, R/bits >>, Acc, _, T) -> language_range_list(R, [{T, 1000}|Acc]);
  498. language_range(<< $;, R/bits >>, Acc, _, T) -> language_range_before_weight(R, Acc, T);
  499. language_range(<< $\s, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  500. language_range(<< $\t, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  501. language_range(<< $-, R/bits >>, Acc, _, T) -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  502. language_range(<< _, _/bits >>, _, 8, _) -> error(badarg);
  503. language_range(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C) ->
  504. case C of
  505. ?INLINE_LOWERCASE(language_range, R, Acc, N + 1, T)
  506. end.
  507. language_range_sub(<<>>, Acc, N, T) when N > 0 -> lists:reverse([{T, 1000}|Acc]);
  508. language_range_sub(<< $,, R/bits >>, Acc, N, T) when N > 0 -> language_range_list(R, [{T, 1000}|Acc]);
  509. language_range_sub(<< $;, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_weight(R, Acc, T);
  510. language_range_sub(<< $\s, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  511. language_range_sub(<< $\t, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  512. language_range_sub(<< $-, R/bits >>, Acc, N, T) when N > 0 -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  513. language_range_sub(<< _, _/bits >>, _, 8, _) -> error(badarg);
  514. language_range_sub(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C); ?IS_DIGIT(C) ->
  515. case C of
  516. ?INLINE_LOWERCASE(language_range_sub, R, Acc, N + 1, T)
  517. end.
  518. language_range_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  519. language_range_before_semicolon(<< $,, R/bits >>, Acc, T) -> language_range_list(R, [{T, 1000}|Acc]);
  520. language_range_before_semicolon(<< $;, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  521. language_range_before_semicolon(<< $\s, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T);
  522. language_range_before_semicolon(<< $\t, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T).
  523. language_range_before_weight(<< $\s, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  524. language_range_before_weight(<< $\t, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  525. language_range_before_weight(<< $q, $=, R/bits >>, Acc, T) -> language_range_weight(R, Acc, T);
  526. %% Special clause for broken user agents that confuse ; and , separators.
  527. language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_ALPHA(C) ->
  528. case C of
  529. ?INLINE_LOWERCASE(language_range, R, [{T, 1000}|Acc], 1, <<>>)
  530. end.
  531. language_range_weight(<< "1.000", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  532. language_range_weight(<< "1.00", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  533. language_range_weight(<< "1.0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  534. language_range_weight(<< "1.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  535. language_range_weight(<< "1", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  536. language_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  537. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  538. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  539. language_range_weight(<< "0.", A, B, R/bits >>, Acc, T)
  540. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  541. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  542. language_range_weight(<< "0.", A, R/bits >>, Acc, T)
  543. when A >= $0, A =< $9 ->
  544. language_range_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  545. language_range_weight(<< "0.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]);
  546. language_range_weight(<< "0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]).
  547. language_range_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  548. language_range_list_sep(<< $\s, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  549. language_range_list_sep(<< $\t, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  550. language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc).
  551. -ifdef(TEST).
  552. language_range_tag() ->
  553. vector(1, 8, alpha()).
  554. language_range_subtag() ->
  555. [$-, vector(1, 8, alphanum())].
  556. language_range() ->
  557. [language_range_tag(), small_list(language_range_subtag())].
  558. accept_language() ->
  559. ?LET({R, W},
  560. {language_range(), weight()},
  561. {iolist_to_binary(R), W, iolist_to_binary([R, case W of
  562. undefined -> [];
  563. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  564. end])}
  565. ).
  566. prop_parse_accept_language() ->
  567. ?FORALL(L,
  568. non_empty(list(accept_language())),
  569. begin
  570. << _, AcceptLanguage/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  571. ResL = parse_accept_language(AcceptLanguage),
  572. CheckedL = [begin
  573. ResR =:= ?INLINE_LOWERCASE_BC(R)
  574. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  575. end || {{R, W, _}, {ResR, ResW}} <- lists:zip(L, ResL)],
  576. [true] =:= lists:usort(CheckedL)
  577. end).
  578. parse_accept_language_test_() ->
  579. Tests = [
  580. {<<"da, en-gb;q=0.8, en;q=0.7">>, [
  581. {<<"da">>, 1000},
  582. {<<"en-gb">>, 800},
  583. {<<"en">>, 700}
  584. ]},
  585. {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin, es-419">>, [
  586. {<<"en">>, 1000},
  587. {<<"en-us">>, 1000},
  588. {<<"en-cockney">>, 1000},
  589. {<<"i-cherokee">>, 1000},
  590. {<<"x-pig-latin">>, 1000},
  591. {<<"es-419">>, 1000}
  592. ]}
  593. ],
  594. [{V, fun() -> R = parse_accept_language(V) end} || {V, R} <- Tests].
  595. parse_accept_language_error_test_() ->
  596. Tests = [
  597. <<>>,
  598. <<"loooooong">>,
  599. <<"en-us-loooooong">>,
  600. <<"419-en-us">>
  601. ],
  602. [{V, fun() -> {'EXIT', _} = (catch parse_accept_language(V)) end} || V <- Tests].
  603. -endif.
  604. -ifdef(PERF).
  605. horse_parse_accept_language() ->
  606. horse:repeat(20000,
  607. parse_accept_language(<<"da, en-gb;q=0.8, en;q=0.7">>)
  608. ).
  609. -endif.
  610. %% @doc Parse the Connection header.
  611. -spec parse_connection(binary()) -> [binary()].
  612. parse_connection(<<"close">>) ->
  613. [<<"close">>];
  614. parse_connection(<<"keep-alive">>) ->
  615. [<<"keep-alive">>];
  616. parse_connection(Connection) ->
  617. nonempty(token_ci_list(Connection, [])).
  618. -ifdef(TEST).
  619. prop_parse_connection() ->
  620. ?FORALL(L,
  621. non_empty(list(token())),
  622. begin
  623. << _, Connection/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  624. ResL = parse_connection(Connection),
  625. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  626. [true] =:= lists:usort(CheckedL)
  627. end).
  628. parse_connection_test_() ->
  629. Tests = [
  630. {<<"close">>, [<<"close">>]},
  631. {<<"ClOsE">>, [<<"close">>]},
  632. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  633. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  634. ],
  635. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  636. parse_connection_error_test_() ->
  637. Tests = [
  638. <<>>
  639. ],
  640. [{V, fun() -> {'EXIT', _} = (catch parse_connection(V)) end} || V <- Tests].
  641. -endif.
  642. -ifdef(PERF).
  643. horse_parse_connection_close() ->
  644. horse:repeat(200000,
  645. parse_connection(<<"close">>)
  646. ).
  647. horse_parse_connection_keepalive() ->
  648. horse:repeat(200000,
  649. parse_connection(<<"keep-alive">>)
  650. ).
  651. horse_parse_connection_keepalive_upgrade() ->
  652. horse:repeat(200000,
  653. parse_connection(<<"keep-alive, upgrade">>)
  654. ).
  655. -endif.
  656. %% @doc Parse the Content-Encoding header.
  657. -spec parse_content_encoding(binary()) -> [binary()].
  658. parse_content_encoding(ContentEncoding) ->
  659. nonempty(token_ci_list(ContentEncoding, [])).
  660. -ifdef(TEST).
  661. parse_content_encoding_test_() ->
  662. Tests = [
  663. {<<"gzip">>, [<<"gzip">>]}
  664. ],
  665. [{V, fun() -> R = parse_content_encoding(V) end} || {V, R} <- Tests].
  666. parse_content_encoding_error_test_() ->
  667. Tests = [
  668. <<>>
  669. ],
  670. [{V, fun() -> {'EXIT', _} = (catch parse_content_encoding(V)) end} || V <- Tests].
  671. -endif.
  672. -ifdef(PERF).
  673. horse_parse_content_encoding() ->
  674. horse:repeat(200000,
  675. parse_content_encoding(<<"gzip">>)
  676. ).
  677. -endif.
  678. %% @doc Parse the Content-Language header.
  679. %%
  680. %% We do not support irregular deprecated tags that do not match the ABNF.
  681. -spec parse_content_language(binary()) -> [binary()].
  682. parse_content_language(ContentLanguage) ->
  683. nonempty(langtag_list(ContentLanguage, [])).
  684. langtag_list(<<>>, Acc) -> lists:reverse(Acc);
  685. langtag_list(<< $\s, R/bits >>, Acc) -> langtag_list(R, Acc);
  686. langtag_list(<< $\t, R/bits >>, Acc) -> langtag_list(R, Acc);
  687. langtag_list(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
  688. langtag_list(<< A, B, C, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
  689. langtag_extlang(R, Acc, << ?LC(A), ?LC(B), ?LC(C) >>, 0);
  690. langtag_list(<< A, B, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
  691. langtag_extlang(R, Acc, << ?LC(A), ?LC(B) >>, 0);
  692. langtag_list(<< X, R/bits >>, Acc) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << $x >>, 0).
  693. langtag_extlang(<<>>, Acc, T, _) -> lists:reverse([T|Acc]);
  694. langtag_extlang(<< $,, R/bits >>, Acc, T, _) -> langtag_list(R, [T|Acc]);
  695. langtag_extlang(<< $\s, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
  696. langtag_extlang(<< $\t, R/bits >>, Acc, T, _) -> langtag_list_sep(R, [T|Acc]);
  697. langtag_extlang(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, _)
  698. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  699. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  700. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  701. langtag_extlang(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, _)
  702. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  703. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  704. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  705. langtag_extlang(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, _)
  706. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  707. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  708. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  709. langtag_extlang(<< $-, A, B, C, D, E, R/bits >>, Acc, T, _)
  710. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  711. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  712. langtag_extlang(<< $-, A, B, C, D, R/bits >>, Acc, T, _)
  713. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
  714. langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
  715. langtag_extlang(<< $-, A, B, C, R/bits >>, Acc, T, N)
  716. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) ->
  717. case N of
  718. 2 -> langtag_script(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>);
  719. _ -> langtag_extlang(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1)
  720. end;
  721. langtag_extlang(R, Acc, T, _) -> langtag_region(R, Acc, T).
  722. langtag_script(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  723. langtag_script(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  724. langtag_script(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  725. langtag_script(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  726. langtag_script(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  727. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  728. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  729. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  730. langtag_script(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  731. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  732. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  733. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  734. langtag_script(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  735. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  736. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  737. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  738. langtag_script(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  739. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  740. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  741. langtag_script(<< $-, A, B, C, D, R/bits >>, Acc, T)
  742. when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) ->
  743. langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>);
  744. langtag_script(R, Acc, T) ->
  745. langtag_region(R, Acc, T).
  746. langtag_region(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  747. langtag_region(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  748. langtag_region(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  749. langtag_region(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  750. langtag_region(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  751. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  752. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  753. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  754. langtag_region(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  755. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  756. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  757. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  758. langtag_region(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  759. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  760. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  761. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  762. langtag_region(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  763. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  764. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  765. langtag_region(<< $-, A, B, C, D, R/bits >>, Acc, T)
  766. when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  767. langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
  768. langtag_region(<< $-, A, B, R/bits >>, Acc, T) when ?IS_ALPHA(A), ?IS_ALPHA(B) ->
  769. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>);
  770. langtag_region(<< $-, A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  771. langtag_variant(R, Acc, << T/binary, $-, A, B, C >>);
  772. langtag_region(R, Acc, T) ->
  773. langtag_variant(R, Acc, T).
  774. langtag_variant(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  775. langtag_variant(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  776. langtag_variant(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  777. langtag_variant(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  778. langtag_variant(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T)
  779. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  780. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  781. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>);
  782. langtag_variant(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T)
  783. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  784. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  785. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>);
  786. langtag_variant(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T)
  787. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  788. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  789. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>);
  790. langtag_variant(<< $-, A, B, C, D, E, R/bits >>, Acc, T)
  791. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  792. langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>);
  793. langtag_variant(<< $-, A, B, C, D, R/bits >>, Acc, T)
  794. when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  795. langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>);
  796. langtag_variant(R, Acc, T) ->
  797. langtag_extension(R, Acc, T).
  798. langtag_extension(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  799. langtag_extension(<< $,, R/bits >>, Acc, T) -> langtag_list(R, [T|Acc]);
  800. langtag_extension(<< $\s, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  801. langtag_extension(<< $\t, R/bits >>, Acc, T) -> langtag_list_sep(R, [T|Acc]);
  802. langtag_extension(<< $-, X, R/bits >>, Acc, T) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << T/binary, $-, $x >>, 0);
  803. langtag_extension(<< $-, S, R/bits >>, Acc, T) when ?IS_ALPHANUM(S) -> langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(S) >>, 0).
  804. langtag_extension_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
  805. langtag_extension_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
  806. langtag_extension_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  807. langtag_extension_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  808. langtag_extension_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
  809. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  810. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  811. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
  812. langtag_extension_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
  813. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  814. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  815. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
  816. langtag_extension_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
  817. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  818. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  819. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
  820. langtag_extension_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
  821. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  822. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
  823. langtag_extension_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
  824. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  825. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
  826. langtag_extension_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
  827. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
  828. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
  829. langtag_extension_sub(<< $-, A, B, R/bits >>, Acc, T, N)
  830. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
  831. langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
  832. langtag_extension_sub(R, Acc, T, N) when N > 0 ->
  833. langtag_extension(R, Acc, T).
  834. langtag_privateuse_sub(<<>>, Acc, T, N) when N > 0 -> lists:reverse([T|Acc]);
  835. langtag_privateuse_sub(<< $,, R/bits >>, Acc, T, N) when N > 0 -> langtag_list(R, [T|Acc]);
  836. langtag_privateuse_sub(<< $\s, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  837. langtag_privateuse_sub(<< $\t, R/bits >>, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]);
  838. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N)
  839. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  840. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) ->
  841. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1);
  842. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N)
  843. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  844. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) ->
  845. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1);
  846. langtag_privateuse_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N)
  847. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D),
  848. ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) ->
  849. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1);
  850. langtag_privateuse_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N)
  851. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) ->
  852. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1);
  853. langtag_privateuse_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N)
  854. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) ->
  855. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1);
  856. langtag_privateuse_sub(<< $-, A, B, C, R/bits >>, Acc, T, N)
  857. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) ->
  858. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1);
  859. langtag_privateuse_sub(<< $-, A, B, R/bits >>, Acc, T, N)
  860. when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) ->
  861. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1);
  862. langtag_privateuse_sub(<< $-, A, R/bits >>, Acc, T, N)
  863. when ?IS_ALPHANUM(A) ->
  864. langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A) >>, N + 1).
  865. langtag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  866. langtag_list_sep(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc);
  867. langtag_list_sep(<< $\s, R/bits >>, Acc) -> langtag_list_sep(R, Acc);
  868. langtag_list_sep(<< $\t, R/bits >>, Acc) -> langtag_list_sep(R, Acc).
  869. -ifdef(TEST).
  870. langtag_language() -> vector(2, 3, alpha()).
  871. langtag_extlang() -> vector(0, 3, [$-, alpha(), alpha(), alpha()]).
  872. langtag_script() -> oneof([[], [$-, alpha(), alpha(), alpha(), alpha()]]).
  873. langtag_region() -> oneof([[], [$-, alpha(), alpha()], [$-, digit(), digit(), digit()]]).
  874. langtag_variant() ->
  875. small_list(frequency([
  876. {4, [$-, vector(5, 8, alphanum())]},
  877. {1, [$-, digit(), alphanum(), alphanum(), alphanum()]}
  878. ])).
  879. langtag_extension() ->
  880. small_list([$-, ?SUCHTHAT(S, alphanum(), S =/= $x andalso S =/= $X),
  881. small_non_empty_list([$-, vector(2, 8, alphanum())])
  882. ]).
  883. langtag_privateuse() -> oneof([[], [$-, langtag_privateuse_nodash()]]).
  884. langtag_privateuse_nodash() -> [oneof([$x, $X]), small_non_empty_list([$-, vector(1, 8, alphanum())])].
  885. private_language_tag() -> ?LET(T, langtag_privateuse_nodash(), iolist_to_binary(T)).
  886. language_tag() ->
  887. ?LET(IoList,
  888. [langtag_language(), langtag_extlang(), langtag_script(), langtag_region(),
  889. langtag_variant(), langtag_extension(), langtag_privateuse()],
  890. iolist_to_binary(IoList)).
  891. content_language() ->
  892. ?LET(L,
  893. non_empty(list(frequency([
  894. {90, language_tag()},
  895. {10, private_language_tag()}
  896. ]))),
  897. begin
  898. << _, ContentLanguage/binary >> = iolist_to_binary([[$,, T] || T <- L]),
  899. {L, ContentLanguage}
  900. end).
  901. prop_parse_content_language() ->
  902. ?FORALL({L, ContentLanguage},
  903. content_language(),
  904. begin
  905. ResL = parse_content_language(ContentLanguage),
  906. CheckedL = [?INLINE_LOWERCASE_BC(T) =:= ResT || {T, ResT} <- lists:zip(L, ResL)],
  907. [true] =:= lists:usort(CheckedL)
  908. end).
  909. parse_content_language_test_() ->
  910. Tests = [
  911. {<<"de">>, [<<"de">>]},
  912. {<<"fr">>, [<<"fr">>]},
  913. {<<"ja">>, [<<"ja">>]},
  914. {<<"zh-Hant">>, [<<"zh-hant">>]},
  915. {<<"zh-Hans">>, [<<"zh-hans">>]},
  916. {<<"sr-Cyrl">>, [<<"sr-cyrl">>]},
  917. {<<"sr-Latn">>, [<<"sr-latn">>]},
  918. {<<"zh-cmn-Hans-CN">>, [<<"zh-cmn-hans-cn">>]},
  919. {<<"cmn-Hans-CN">>, [<<"cmn-hans-cn">>]},
  920. {<<"zh-yue-HK">>, [<<"zh-yue-hk">>]},
  921. {<<"yue-HK">>, [<<"yue-hk">>]},
  922. {<<"zh-Hans-CN">>, [<<"zh-hans-cn">>]},
  923. {<<"sr-Latn-RS">>, [<<"sr-latn-rs">>]},
  924. {<<"sl-rozaj">>, [<<"sl-rozaj">>]},
  925. {<<"sl-rozaj-biske">>, [<<"sl-rozaj-biske">>]},
  926. {<<"sl-nedis">>, [<<"sl-nedis">>]},
  927. {<<"de-CH-1901">>, [<<"de-ch-1901">>]},
  928. {<<"sl-IT-nedis">>, [<<"sl-it-nedis">>]},
  929. {<<"hy-Latn-IT-arevela">>, [<<"hy-latn-it-arevela">>]},
  930. {<<"de-DE">>, [<<"de-de">>]},
  931. {<<"en-US">>, [<<"en-us">>]},
  932. {<<"es-419">>, [<<"es-419">>]},
  933. {<<"de-CH-x-phonebk">>, [<<"de-ch-x-phonebk">>]},
  934. {<<"az-Arab-x-AZE-derbend">>, [<<"az-arab-x-aze-derbend">>]},
  935. {<<"x-whatever">>, [<<"x-whatever">>]},
  936. {<<"qaa-Qaaa-QM-x-southern">>, [<<"qaa-qaaa-qm-x-southern">>]},
  937. {<<"de-Qaaa">>, [<<"de-qaaa">>]},
  938. {<<"sr-Latn-QM">>, [<<"sr-latn-qm">>]},
  939. {<<"sr-Qaaa-RS">>, [<<"sr-qaaa-rs">>]},
  940. {<<"en-US-u-islamcal">>, [<<"en-us-u-islamcal">>]},
  941. {<<"zh-CN-a-myext-x-private">>, [<<"zh-cn-a-myext-x-private">>]},
  942. {<<"en-a-myext-b-another">>, [<<"en-a-myext-b-another">>]},
  943. {<<"mn-Cyrl-MN">>, [<<"mn-cyrl-mn">>]},
  944. {<<"MN-cYRL-mn">>, [<<"mn-cyrl-mn">>]},
  945. {<<"mN-cYrL-Mn">>, [<<"mn-cyrl-mn">>]},
  946. {<<"az-Arab-IR">>, [<<"az-arab-ir">>]},
  947. {<<"zh-gan">>, [<<"zh-gan">>]},
  948. {<<"zh-yue">>, [<<"zh-yue">>]},
  949. {<<"zh-cmn">>, [<<"zh-cmn">>]},
  950. {<<"de-AT">>, [<<"de-at">>]},
  951. {<<"de-CH-1996">>, [<<"de-ch-1996">>]},
  952. {<<"en-Latn-GB-boont-r-extended-sequence-x-private">>,
  953. [<<"en-latn-gb-boont-r-extended-sequence-x-private">>]},
  954. {<<"el-x-koine">>, [<<"el-x-koine">>]},
  955. {<<"el-x-attic">>, [<<"el-x-attic">>]},
  956. {<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>,
  957. [<<"fr">>, <<"en-us">>, <<"es-419">>, <<"az-arab">>, <<"x-pig-latin">>, <<"man-nkoo-gn">>]},
  958. {<<"da">>, [<<"da">>]},
  959. {<<"mi, en">>, [<<"mi">>, <<"en">>]}
  960. ],
  961. [{V, fun() -> R = parse_content_language(V) end} || {V, R} <- Tests].
  962. parse_content_language_error_test_() ->
  963. Tests = [
  964. <<>>
  965. ],
  966. [{V, fun() -> {'EXIT', _} = (catch parse_content_language(V)) end} || V <- Tests].
  967. -endif.
  968. -ifdef(PERF).
  969. horse_parse_content_language() ->
  970. horse:repeat(100000,
  971. parse_content_language(<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>)
  972. ).
  973. -endif.
  974. %% @doc Parse the Content-Length header.
  975. %%
  976. %% The value has at least one digit, and may be followed by whitespace.
  977. -spec parse_content_length(binary()) -> non_neg_integer().
  978. parse_content_length(<< $0 >>) -> 0;
  979. parse_content_length(<< $0, R/bits >>) -> number(R, 0);
  980. parse_content_length(<< $1, R/bits >>) -> number(R, 1);
  981. parse_content_length(<< $2, R/bits >>) -> number(R, 2);
  982. parse_content_length(<< $3, R/bits >>) -> number(R, 3);
  983. parse_content_length(<< $4, R/bits >>) -> number(R, 4);
  984. parse_content_length(<< $5, R/bits >>) -> number(R, 5);
  985. parse_content_length(<< $6, R/bits >>) -> number(R, 6);
  986. parse_content_length(<< $7, R/bits >>) -> number(R, 7);
  987. parse_content_length(<< $8, R/bits >>) -> number(R, 8);
  988. parse_content_length(<< $9, R/bits >>) -> number(R, 9).
  989. -ifdef(TEST).
  990. prop_parse_content_length() ->
  991. ?FORALL(
  992. X,
  993. non_neg_integer(),
  994. X =:= parse_content_length(integer_to_binary(X))
  995. ).
  996. parse_content_length_test_() ->
  997. Tests = [
  998. {<<"0">>, 0},
  999. {<<"42 ">>, 42},
  1000. {<<"69\t">>, 69},
  1001. {<<"1337">>, 1337},
  1002. {<<"3495">>, 3495},
  1003. {<<"1234567890">>, 1234567890},
  1004. {<<"1234567890 ">>, 1234567890}
  1005. ],
  1006. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  1007. parse_content_length_error_test_() ->
  1008. Tests = [
  1009. <<>>,
  1010. <<"123, 123">>,
  1011. <<"4.17">>
  1012. ],
  1013. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  1014. -endif.
  1015. -ifdef(PERF).
  1016. horse_parse_content_length_zero() ->
  1017. horse:repeat(100000,
  1018. parse_content_length(<<"0">>)
  1019. ).
  1020. horse_parse_content_length_giga() ->
  1021. horse:repeat(100000,
  1022. parse_content_length(<<"1234567890">>)
  1023. ).
  1024. -endif.
  1025. %% @doc Parse the Content-Type header.
  1026. -spec parse_content_type(binary()) -> media_type().
  1027. parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  1028. case C of
  1029. ?INLINE_LOWERCASE(media_type, R, <<>>)
  1030. end.
  1031. media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
  1032. case C of
  1033. ?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
  1034. end;
  1035. media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
  1036. case C of
  1037. ?INLINE_LOWERCASE(media_type, R, T)
  1038. end.
  1039. media_subtype(<<>>, T, S) -> {T, S, []};
  1040. media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
  1041. media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  1042. media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  1043. media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
  1044. case C of
  1045. ?INLINE_LOWERCASE(media_subtype, R, T, S)
  1046. end.
  1047. media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
  1048. media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1049. media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
  1050. media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
  1051. media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1052. media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  1053. media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
  1054. media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
  1055. media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
  1056. case C of
  1057. ?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
  1058. end.
  1059. media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
  1060. media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1061. media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  1062. case C of
  1063. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  1064. end;
  1065. media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  1066. case C of
  1067. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  1068. end.
  1069. media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
  1070. media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
  1071. media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1072. media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  1073. media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
  1074. case C of
  1075. ?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
  1076. end.
  1077. media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
  1078. media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
  1079. media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
  1080. case C of
  1081. ?INLINE_LOWERCASE(media_param, R, T, S, P, K)
  1082. end.
  1083. media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1084. media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
  1085. media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
  1086. media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
  1087. media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
  1088. media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1089. media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  1090. media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
  1091. -ifdef(TEST).
  1092. media_type_parameter() ->
  1093. frequency([
  1094. {90, parameter()},
  1095. {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}}
  1096. ]).
  1097. media_type() ->
  1098. ?LET({T, S, P},
  1099. {token(), token(), small_list(media_type_parameter())},
  1100. {T, S, P, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P]])}
  1101. ).
  1102. prop_parse_content_type() ->
  1103. ?FORALL({T, S, P, MediaType},
  1104. media_type(),
  1105. begin
  1106. {ResT, ResS, ResP} = parse_content_type(MediaType),
  1107. ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
  1108. <<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
  1109. LowK -> {LowK, unquote(V)}
  1110. end || {K, V, _, _} <- P],
  1111. ResT =:= ?INLINE_LOWERCASE_BC(T)
  1112. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  1113. andalso ResP =:= ExpectedP
  1114. end
  1115. ).
  1116. parse_content_type_test_() ->
  1117. Tests = [
  1118. {<<"text/html;charset=utf-8">>,
  1119. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1120. {<<"text/html;charset=UTF-8">>,
  1121. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1122. {<<"Text/HTML;Charset=\"utf-8\"">>,
  1123. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1124. {<<"text/html; charset=\"utf-8\"">>,
  1125. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  1126. {<<"text/html; charset=ISO-8859-4">>,
  1127. {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  1128. {<<"text/plain; charset=iso-8859-4">>,
  1129. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  1130. {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
  1131. {<<"multipart">>, <<"form-data">>, [
  1132. {<<"boundary">>, <<"MultipartIsUgly">>}
  1133. ]}},
  1134. {<<"foo/bar; one=FirstParam; two=SecondParam">>,
  1135. {<<"foo">>, <<"bar">>, [
  1136. {<<"one">>, <<"FirstParam">>},
  1137. {<<"two">>, <<"SecondParam">>}
  1138. ]}}
  1139. ],
  1140. [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
  1141. -endif.
  1142. -ifdef(PERF).
  1143. horse_parse_content_type() ->
  1144. horse:repeat(200000,
  1145. parse_content_type(<<"text/html;charset=utf-8">>)
  1146. ).
  1147. -endif.
  1148. %% @doc Parse the Date header.
  1149. -spec parse_date(binary()) -> calendar:datetime().
  1150. parse_date(Date) ->
  1151. cow_date:parse_date(Date).
  1152. -ifdef(TEST).
  1153. parse_date_test_() ->
  1154. Tests = [
  1155. {<<"Tue, 15 Nov 1994 08:12:31 GMT">>, {{1994, 11, 15}, {8, 12, 31}}}
  1156. ],
  1157. [{V, fun() -> R = parse_date(V) end} || {V, R} <- Tests].
  1158. -endif.
  1159. %% @doc Parse the ETag header.
  1160. -spec parse_etag(binary()) -> etag().
  1161. parse_etag(<< $W, $/, $", R/bits >>) ->
  1162. etag(R, weak, <<>>);
  1163. parse_etag(<< $", R/bits >>) ->
  1164. etag(R, strong, <<>>).
  1165. etag(<< $", R/bits >>, Strength, Tag) ->
  1166. ws_end(R),
  1167. {Strength, Tag};
  1168. etag(<< C, R/bits >>, Strength, Tag) when ?IS_ETAGC(C) ->
  1169. etag(R, Strength, << Tag/binary, C >>).
  1170. -ifdef(TEST).
  1171. etagc() ->
  1172. ?SUCHTHAT(C, int(16#21, 16#ff), C =/= 16#22 andalso C =/= 16#7f).
  1173. etag() ->
  1174. ?LET({Strength, Tag},
  1175. {oneof([weak, strong]), list(etagc())},
  1176. begin
  1177. TagBin = list_to_binary(Tag),
  1178. {{Strength, TagBin},
  1179. case Strength of
  1180. weak -> << $W, $/, $", TagBin/binary, $" >>;
  1181. strong -> << $", TagBin/binary, $" >>
  1182. end}
  1183. end).
  1184. prop_parse_etag() ->
  1185. ?FORALL({Tag, TagBin},
  1186. etag(),
  1187. Tag =:= parse_etag(TagBin)).
  1188. parse_etag_test_() ->
  1189. Tests = [
  1190. {<<"\"xyzzy\"">>, {strong, <<"xyzzy">>}},
  1191. {<<"W/\"xyzzy\"">>, {weak, <<"xyzzy">>}},
  1192. {<<"\"\"">>, {strong, <<>>}}
  1193. ],
  1194. [{V, fun() -> R = parse_etag(V) end} || {V, R} <- Tests].
  1195. parse_etag_error_test_() ->
  1196. Tests = [
  1197. <<>>,
  1198. <<"\"">>,
  1199. <<"W">>,
  1200. <<"W/">>
  1201. ],
  1202. [{V, fun() -> {'EXIT', _} = (catch parse_etag(V)) end} || V <- Tests].
  1203. -endif.
  1204. -ifdef(PERF).
  1205. horse_parse_etag() ->
  1206. horse:repeat(200000,
  1207. parse_etag(<<"W/\"xyzzy\"">>)
  1208. ).
  1209. -endif.
  1210. %% @doc Parse the Expect header.
  1211. -spec parse_expect(binary()) -> continue.
  1212. parse_expect(<<"100-continue", Rest/bits >>) ->
  1213. ws_end(Rest),
  1214. continue;
  1215. parse_expect(<<"100-", C, O, N, T, I, M, U, E, Rest/bits >>)
  1216. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  1217. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  1218. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  1219. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  1220. ws_end(Rest),
  1221. continue.
  1222. -ifdef(TEST).
  1223. expect() ->
  1224. ?LET(E,
  1225. [$1, $0, $0, $-,
  1226. oneof([$c, $C]), oneof([$o, $O]), oneof([$n, $N]),
  1227. oneof([$t, $T]), oneof([$i, $I]), oneof([$n, $N]),
  1228. oneof([$u, $U]), oneof([$e, $E])],
  1229. list_to_binary(E)).
  1230. prop_parse_expect() ->
  1231. ?FORALL(E, expect(), continue =:= parse_expect(E)).
  1232. parse_expect_test_() ->
  1233. Tests = [
  1234. <<"100-continue">>,
  1235. <<"100-CONTINUE">>,
  1236. <<"100-Continue">>,
  1237. <<"100-CoNtInUe">>,
  1238. <<"100-continue ">>
  1239. ],
  1240. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  1241. parse_expect_error_test_() ->
  1242. Tests = [
  1243. <<>>,
  1244. <<" ">>,
  1245. <<"200-OK">>,
  1246. <<"Cookies">>
  1247. ],
  1248. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  1249. -endif.
  1250. -ifdef(PERF).
  1251. horse_parse_expect() ->
  1252. horse:repeat(200000,
  1253. parse_expect(<<"100-continue">>)
  1254. ).
  1255. -endif.
  1256. %% @doc Parse the Expires header.
  1257. %%
  1258. %% Recipients must interpret invalid date formats as a date
  1259. %% in the past. The value "0" is commonly used.
  1260. -spec parse_expires(binary()) -> calendar:datetime().
  1261. parse_expires(<<"0">>) ->
  1262. {{1, 1, 1}, {0, 0, 0}};
  1263. parse_expires(Expires) ->
  1264. try
  1265. cow_date:parse_date(Expires)
  1266. catch _:_ ->
  1267. {{1, 1, 1}, {0, 0, 0}}
  1268. end.
  1269. -ifdef(TEST).
  1270. parse_expires_test_() ->
  1271. Tests = [
  1272. {<<"0">>, {{1, 1, 1}, {0, 0, 0}}},
  1273. {<<"Thu, 01 Dec 1994 nope invalid">>, {{1, 1, 1}, {0, 0, 0}}},
  1274. {<<"Thu, 01 Dec 1994 16:00:00 GMT">>, {{1994, 12, 1}, {16, 0, 0}}}
  1275. ],
  1276. [{V, fun() -> R = parse_expires(V) end} || {V, R} <- Tests].
  1277. -endif.
  1278. -ifdef(PERF).
  1279. horse_parse_expires_0() ->
  1280. horse:repeat(200000,
  1281. parse_expires(<<"0">>)
  1282. ).
  1283. horse_parse_expires_invalid() ->
  1284. horse:repeat(200000,
  1285. parse_expires(<<"Thu, 01 Dec 1994 nope invalid">>)
  1286. ).
  1287. -endif.
  1288. %% @doc Parse the If-Match header.
  1289. -spec parse_if_match(binary()) -> '*' | [etag()].
  1290. parse_if_match(<<"*">>) ->
  1291. '*';
  1292. parse_if_match(IfMatch) ->
  1293. nonempty(etag_list(IfMatch, [])).
  1294. etag_list(<<>>, Acc) -> lists:reverse(Acc);
  1295. etag_list(<< $\s, R/bits >>, Acc) -> etag_list(R, Acc);
  1296. etag_list(<< $\t, R/bits >>, Acc) -> etag_list(R, Acc);
  1297. etag_list(<< $,, R/bits >>, Acc) -> etag_list(R, Acc);
  1298. etag_list(<< $W, $/, $", R/bits >>, Acc) -> etag(R, Acc, weak, <<>>);
  1299. etag_list(<< $", R/bits >>, Acc) -> etag(R, Acc, strong, <<>>).
  1300. etag(<< $", R/bits >>, Acc, Strength, Tag) -> etag_list_sep(R, [{Strength, Tag}|Acc]);
  1301. etag(<< C, R/bits >>, Acc, Strength, Tag) when ?IS_ETAGC(C) -> etag(R, Acc, Strength, << Tag/binary, C >>).
  1302. etag_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  1303. etag_list_sep(<< $\s, R/bits >>, Acc) -> etag_list_sep(R, Acc);
  1304. etag_list_sep(<< $\t, R/bits >>, Acc) -> etag_list_sep(R, Acc);
  1305. etag_list_sep(<< $,, R/bits >>, Acc) -> etag_list(R, Acc).
  1306. -ifdef(TEST).
  1307. prop_parse_if_match() ->
  1308. ?FORALL(L,
  1309. non_empty(list(etag())),
  1310. begin
  1311. << _, IfMatch/binary >> = iolist_to_binary([[$,, T] || {_, T} <- L]),
  1312. ResL = parse_if_match(IfMatch),
  1313. CheckedL = [T =:= ResT || {{T, _}, ResT} <- lists:zip(L, ResL)],
  1314. [true] =:= lists:usort(CheckedL)
  1315. end).
  1316. parse_if_match_test_() ->
  1317. Tests = [
  1318. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  1319. {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>,
  1320. [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]},
  1321. {<<"*">>, '*'}
  1322. ],
  1323. [{V, fun() -> R = parse_if_match(V) end} || {V, R} <- Tests].
  1324. parse_if_match_error_test_() ->
  1325. Tests = [
  1326. <<>>
  1327. ],
  1328. [{V, fun() -> {'EXIT', _} = (catch parse_if_match(V)) end} || V <- Tests].
  1329. -endif.
  1330. -ifdef(PERF).
  1331. horse_parse_if_match() ->
  1332. horse:repeat(200000,
  1333. parse_if_match(<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>)
  1334. ).
  1335. -endif.
  1336. %% @doc Parse the If-Modified-Since header.
  1337. -spec parse_if_modified_since(binary()) -> calendar:datetime().
  1338. parse_if_modified_since(IfModifiedSince) ->
  1339. cow_date:parse_date(IfModifiedSince).
  1340. -ifdef(TEST).
  1341. parse_if_modified_since_test_() ->
  1342. Tests = [
  1343. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1344. ],
  1345. [{V, fun() -> R = parse_if_modified_since(V) end} || {V, R} <- Tests].
  1346. -endif.
  1347. %% @doc Parse the If-None-Match header.
  1348. -spec parse_if_none_match(binary()) -> '*' | [etag()].
  1349. parse_if_none_match(<<"*">>) ->
  1350. '*';
  1351. parse_if_none_match(IfNoneMatch) ->
  1352. nonempty(etag_list(IfNoneMatch, [])).
  1353. -ifdef(TEST).
  1354. parse_if_none_match_test_() ->
  1355. Tests = [
  1356. {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]},
  1357. {<<"W/\"xyzzy\"">>, [{weak, <<"xyzzy">>}]},
  1358. {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>,
  1359. [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]},
  1360. {<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>,
  1361. [{weak, <<"xyzzy">>}, {weak, <<"r2d2xxxx">>}, {weak, <<"c3piozzzz">>}]},
  1362. {<<"*">>, '*'}
  1363. ],
  1364. [{V, fun() -> R = parse_if_none_match(V) end} || {V, R} <- Tests].
  1365. parse_if_none_match_error_test_() ->
  1366. Tests = [
  1367. <<>>
  1368. ],
  1369. [{V, fun() -> {'EXIT', _} = (catch parse_if_none_match(V)) end} || V <- Tests].
  1370. -endif.
  1371. -ifdef(PERF).
  1372. horse_parse_if_none_match() ->
  1373. horse:repeat(200000,
  1374. parse_if_none_match(<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>)
  1375. ).
  1376. -endif.
  1377. %% @doc Parse the If-Unmodified-Since header.
  1378. -spec parse_if_unmodified_since(binary()) -> calendar:datetime().
  1379. parse_if_unmodified_since(IfModifiedSince) ->
  1380. cow_date:parse_date(IfModifiedSince).
  1381. -ifdef(TEST).
  1382. parse_if_unmodified_since_test_() ->
  1383. Tests = [
  1384. {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}}
  1385. ],
  1386. [{V, fun() -> R = parse_if_unmodified_since(V) end} || {V, R} <- Tests].
  1387. -endif.
  1388. %% @doc Parse the Last-Modified header.
  1389. -spec parse_last_modified(binary()) -> calendar:datetime().
  1390. parse_last_modified(LastModified) ->
  1391. cow_date:parse_date(LastModified).
  1392. -ifdef(TEST).
  1393. parse_last_modified_test_() ->
  1394. Tests = [
  1395. {<<"Tue, 15 Nov 1994 12:45:26 GMT">>, {{1994, 11, 15}, {12, 45, 26}}}
  1396. ],
  1397. [{V, fun() -> R = parse_last_modified(V) end} || {V, R} <- Tests].
  1398. -endif.
  1399. %% @doc Parse the Max-Forwards header.
  1400. -spec parse_max_forwards(binary()) -> non_neg_integer().
  1401. parse_max_forwards(<< $0, R/bits >>) -> number(R, 0);
  1402. parse_max_forwards(<< $1, R/bits >>) -> number(R, 1);
  1403. parse_max_forwards(<< $2, R/bits >>) -> number(R, 2);
  1404. parse_max_forwards(<< $3, R/bits >>) -> number(R, 3);
  1405. parse_max_forwards(<< $4, R/bits >>) -> number(R, 4);
  1406. parse_max_forwards(<< $5, R/bits >>) -> number(R, 5);
  1407. parse_max_forwards(<< $6, R/bits >>) -> number(R, 6);
  1408. parse_max_forwards(<< $7, R/bits >>) -> number(R, 7);
  1409. parse_max_forwards(<< $8, R/bits >>) -> number(R, 8);
  1410. parse_max_forwards(<< $9, R/bits >>) -> number(R, 9).
  1411. -ifdef(TEST).
  1412. prop_parse_max_forwards() ->
  1413. ?FORALL(
  1414. X,
  1415. non_neg_integer(),
  1416. X =:= parse_max_forwards(integer_to_binary(X))
  1417. ).
  1418. parse_max_forwards_test_() ->
  1419. Tests = [
  1420. {<<"0">>, 0},
  1421. {<<"42 ">>, 42},
  1422. {<<"69\t">>, 69},
  1423. {<<"1337">>, 1337},
  1424. {<<"1234567890">>, 1234567890},
  1425. {<<"1234567890 ">>, 1234567890}
  1426. ],
  1427. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  1428. parse_max_forwards_error_test_() ->
  1429. Tests = [
  1430. <<>>,
  1431. <<"123, 123">>,
  1432. <<"4.17">>
  1433. ],
  1434. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  1435. -endif.
  1436. %% @doc Parse the Sec-WebSocket-Extensions request header.
  1437. -spec parse_sec_websocket_extensions(binary()) -> [{binary(), [binary() | {binary(), binary()}]}].
  1438. parse_sec_websocket_extensions(SecWebSocketExtensions) ->
  1439. nonempty(ws_extension_list(SecWebSocketExtensions, [])).
  1440. ws_extension_list(<<>>, Acc) -> lists:reverse(Acc);
  1441. ws_extension_list(<< $\s, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  1442. ws_extension_list(<< $\t, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  1443. ws_extension_list(<< $,, R/bits >>, Acc) -> ws_extension_list(R, Acc);
  1444. ws_extension_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << C >>).
  1445. ws_extension(<<>>, Acc, E) -> lists:reverse([{E, []}|Acc]);
  1446. ws_extension(<< $,, R/bits >>, Acc, E) -> ws_extension_list(R, [{E, []}|Acc]);
  1447. ws_extension(<< $;, R/bits >>, Acc, E) -> ws_extension_before_param(R, Acc, E, []);
  1448. ws_extension(<< $\s, R/bits >>, Acc, E) -> ws_extension_before_semicolon(R, Acc, E, []);
  1449. ws_extension(<< $\t, R/bits >>, Acc, E) -> ws_extension_before_semicolon(R, Acc, E, []);
  1450. ws_extension(<< C, R/bits >>, Acc, E) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << E/binary, C >>).
  1451. ws_extension_before_semicolon(<<>>, Acc, E, P) -> lists:reverse([{E, lists:reverse(P)}|Acc]);
  1452. ws_extension_before_semicolon(<< $,, R/bits >>, Acc, E, P) -> ws_extension_list(R, [{E, lists:reverse(P)}|Acc]);
  1453. ws_extension_before_semicolon(<< $;, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  1454. ws_extension_before_semicolon(<< $\s, R/bits >>, Acc, E, P) -> ws_extension_before_semicolon(R, Acc, E, P);
  1455. ws_extension_before_semicolon(<< $\t, R/bits >>, Acc, E, P) -> ws_extension_before_semicolon(R, Acc, E, P).
  1456. ws_extension_before_param(<< $\s, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  1457. ws_extension_before_param(<< $\t, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P);
  1458. ws_extension_before_param(<< C, R/bits >>, Acc, E, P) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << C >>).
  1459. ws_extension_param(<<>>, Acc, E, P, K) -> lists:reverse([{E, lists:reverse([K|P])}|Acc]);
  1460. ws_extension_param(<< $\s, R/bits >>, Acc, E, P, K) -> ws_extension_before_semicolon(R, Acc, E, [K|P]);
  1461. ws_extension_param(<< $\t, R/bits >>, Acc, E, P, K) -> ws_extension_before_semicolon(R, Acc, E, [K|P]);
  1462. ws_extension_param(<< $,, R/bits >>, Acc, E, P, K) -> ws_extension_list(R, [{E, lists:reverse([K|P])}|Acc]);
  1463. ws_extension_param(<< $;, R/bits >>, Acc, E, P, K) -> ws_extension_before_param(R, Acc, E, [K|P]);
  1464. ws_extension_param(<< $=, $", R/bits >>, Acc, E, P, K) -> ws_extension_quoted(R, Acc, E, P, K, <<>>);
  1465. ws_extension_param(<< $=, C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << C >>);
  1466. ws_extension_param(<< C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << K/binary, C >>).
  1467. ws_extension_quoted(<< $", R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  1468. ws_extension_quoted(<< $\\, C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>);
  1469. ws_extension_quoted(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>).
  1470. ws_extension_value(<<>>, Acc, E, P, K, V) -> lists:reverse([{E, lists:reverse([{K, V}|P])}|Acc]);
  1471. ws_extension_value(<< $\s, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  1472. ws_extension_value(<< $\t, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_semicolon(R, Acc, E, [{K, V}|P]);
  1473. ws_extension_value(<< $,, R/bits >>, Acc, E, P, K, V) -> ws_extension_list(R, [{E, lists:reverse([{K, V}|P])}|Acc]);
  1474. ws_extension_value(<< $;, R/bits >>, Acc, E, P, K, V) -> ws_extension_before_param(R, Acc, E, [{K, V}|P]);
  1475. ws_extension_value(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << V/binary, C >>).
  1476. -ifdef(TEST).
  1477. quoted_token() ->
  1478. ?LET(T,
  1479. non_empty(list(frequency([
  1480. {99, tchar()},
  1481. {1, [$\\, tchar()]}
  1482. ]))),
  1483. [$", T, $"]).
  1484. ws_extension() ->
  1485. ?LET({E, PL},
  1486. {token(), small_list({ows(), ows(), oneof([token(), {token(), oneof([token(), quoted_token()])}])})},
  1487. {E, PL, iolist_to_binary([E,
  1488. [case P of
  1489. {OWS1, OWS2, {K, V}} -> [OWS1, $;, OWS2, K, $=, V];
  1490. {OWS1, OWS2, K} -> [OWS1, $;, OWS2, K]
  1491. end || P <- PL]
  1492. ])}).
  1493. prop_parse_sec_websocket_extensions() ->
  1494. ?FORALL(L,
  1495. non_empty(list(ws_extension())),
  1496. begin
  1497. << _, SecWebsocketExtensions/binary >> = iolist_to_binary([[$,, E] || {_, _, E} <- L]),
  1498. ResL = parse_sec_websocket_extensions(SecWebsocketExtensions),
  1499. CheckedL = [begin
  1500. ExpectedPL = [case P of
  1501. {_, _, {K, V}} -> {K, unquote(V)};
  1502. {_, _, K} -> K
  1503. end || P <- PL],
  1504. E =:= ResE andalso ExpectedPL =:= ResPL
  1505. end || {{E, PL, _}, {ResE, ResPL}} <- lists:zip(L, ResL)],
  1506. [true] =:= lists:usort(CheckedL)
  1507. end).
  1508. parse_sec_websocket_extensions_test_() ->
  1509. Tests = [
  1510. {<<"foo">>, [{<<"foo">>, []}]},
  1511. {<<"bar; baz=2">>, [{<<"bar">>, [{<<"baz">>, <<"2">>}]}]},
  1512. {<<"foo, bar; baz=2">>, [{<<"foo">>, []}, {<<"bar">>, [{<<"baz">>, <<"2">>}]}]},
  1513. {<<"deflate-stream">>, [{<<"deflate-stream">>, []}]},
  1514. {<<"mux; max-channels=4; flow-control, deflate-stream">>,
  1515. [{<<"mux">>, [{<<"max-channels">>, <<"4">>}, <<"flow-control">>]}, {<<"deflate-stream">>, []}]},
  1516. {<<"private-extension">>, [{<<"private-extension">>, []}]}
  1517. ],
  1518. [{V, fun() -> R = parse_sec_websocket_extensions(V) end} || {V, R} <- Tests].
  1519. parse_sec_websocket_extensions_error_test_() ->
  1520. Tests = [
  1521. <<>>
  1522. ],
  1523. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_extensions(V)) end}
  1524. || V <- Tests].
  1525. -endif.
  1526. -ifdef(PERF).
  1527. horse_parse_sec_websocket_extensions() ->
  1528. horse:repeat(200000,
  1529. parse_sec_websocket_extensions(<<"mux; max-channels=4; flow-control, deflate-stream">>)
  1530. ).
  1531. -endif.
  1532. %% @doc Parse the Sec-WebSocket-Protocol request header.
  1533. -spec parse_sec_websocket_protocol_client(binary()) -> [binary()].
  1534. parse_sec_websocket_protocol_client(SecWebSocketProtocol) ->
  1535. nonempty(token_ci_list(SecWebSocketProtocol, [])).
  1536. -ifdef(TEST).
  1537. parse_sec_websocket_protocol_client_test_() ->
  1538. Tests = [
  1539. {<<"chat, superchat">>, [<<"chat">>, <<"superchat">>]}
  1540. ],
  1541. [{V, fun() -> R = parse_sec_websocket_protocol_client(V) end} || {V, R} <- Tests].
  1542. parse_sec_websocket_protocol_client_error_test_() ->
  1543. Tests = [
  1544. <<>>
  1545. ],
  1546. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_protocol_client(V)) end}
  1547. || V <- Tests].
  1548. -endif.
  1549. -ifdef(PERF).
  1550. horse_parse_sec_websocket_protocol_client() ->
  1551. horse:repeat(200000,
  1552. parse_sec_websocket_protocol_client(<<"chat, superchat">>)
  1553. ).
  1554. -endif.
  1555. %% @doc Parse the Sec-WebSocket-Version request header.
  1556. -spec parse_sec_websocket_version_client(binary()) -> 0..255.
  1557. parse_sec_websocket_version_client(SecWebSocketVersion) when byte_size(SecWebSocketVersion) < 4 ->
  1558. Version = binary_to_integer(SecWebSocketVersion),
  1559. true = Version >= 0 andalso Version =< 255,
  1560. Version.
  1561. -ifdef(TEST).
  1562. prop_parse_sec_websocket_version_client() ->
  1563. ?FORALL(Version,
  1564. int(0, 255),
  1565. Version =:= parse_sec_websocket_version_client(integer_to_binary(Version))).
  1566. parse_sec_websocket_version_client_test_() ->
  1567. Tests = [
  1568. {<<"13">>, 13},
  1569. {<<"25">>, 25}
  1570. ],
  1571. [{V, fun() -> R = parse_sec_websocket_version_client(V) end} || {V, R} <- Tests].
  1572. parse_sec_websocket_version_client_error_test_() ->
  1573. Tests = [
  1574. <<>>,
  1575. <<" ">>,
  1576. <<"7, 8, 13">>,
  1577. <<"invalid">>
  1578. ],
  1579. [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_version_client(V)) end}
  1580. || V <- Tests].
  1581. -endif.
  1582. -ifdef(PERF).
  1583. horse_parse_sec_websocket_version_client_13() ->
  1584. horse:repeat(200000,
  1585. parse_sec_websocket_version_client(<<"13">>)
  1586. ).
  1587. horse_parse_sec_websocket_version_client_255() ->
  1588. horse:repeat(200000,
  1589. parse_sec_websocket_version_client(<<"255">>)
  1590. ).
  1591. -endif.
  1592. %% @doc Parse the Transfer-Encoding header.
  1593. %%
  1594. %% @todo This function does not support parsing of transfer-parameter.
  1595. -spec parse_transfer_encoding(binary()) -> [binary()].
  1596. parse_transfer_encoding(<<"chunked">>) ->
  1597. [<<"chunked">>];
  1598. parse_transfer_encoding(TransferEncoding) ->
  1599. nonempty(token_ci_list(TransferEncoding, [])).
  1600. -ifdef(TEST).
  1601. prop_parse_transfer_encoding() ->
  1602. ?FORALL(L,
  1603. non_empty(list(token())),
  1604. begin
  1605. << _, TransferEncoding/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  1606. ResL = parse_transfer_encoding(TransferEncoding),
  1607. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  1608. [true] =:= lists:usort(CheckedL)
  1609. end).
  1610. parse_transfer_encoding_test_() ->
  1611. Tests = [
  1612. {<<"a , , , ">>, [<<"a">>]},
  1613. {<<" , , , a">>, [<<"a">>]},
  1614. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  1615. {<<"chunked">>, [<<"chunked">>]},
  1616. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]},
  1617. {<<"gzip, chunked">>, [<<"gzip">>, <<"chunked">>]}
  1618. ],
  1619. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  1620. parse_transfer_encoding_error_test_() ->
  1621. Tests = [
  1622. <<>>,
  1623. <<" ">>,
  1624. <<" , ">>,
  1625. <<",,,">>,
  1626. <<"a b">>
  1627. ],
  1628. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  1629. || V <- Tests].
  1630. -endif.
  1631. -ifdef(PERF).
  1632. horse_parse_transfer_encoding_chunked() ->
  1633. horse:repeat(200000,
  1634. parse_transfer_encoding(<<"chunked">>)
  1635. ).
  1636. horse_parse_transfer_encoding_custom() ->
  1637. horse:repeat(200000,
  1638. parse_transfer_encoding(<<"chunked, something">>)
  1639. ).
  1640. -endif.
  1641. %% @doc Parse the Upgrade header.
  1642. %%
  1643. %% It is unclear from the RFC whether the values here are
  1644. %% case sensitive.
  1645. %%
  1646. %% We handle them in a case insensitive manner because they
  1647. %% are described as case insensitive in the Websocket RFC.
  1648. -spec parse_upgrade(binary()) -> [binary()].
  1649. parse_upgrade(Upgrade) ->
  1650. nonempty(protocol_list(Upgrade, [])).
  1651. protocol_list(<<>>, Acc) -> lists:reverse(Acc);
  1652. protocol_list(<< $\s, R/bits >>, Acc) -> protocol_list(R, Acc);
  1653. protocol_list(<< $\t, R/bits >>, Acc) -> protocol_list(R, Acc);
  1654. protocol_list(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc);
  1655. protocol_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  1656. case C of
  1657. ?INLINE_LOWERCASE(protocol_name, R, Acc, <<>>)
  1658. end.
  1659. protocol_name(<<>>, Acc, P) -> lists:reverse([P|Acc]);
  1660. protocol_name(<< $\s, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1661. protocol_name(<< $\t, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1662. protocol_name(<< $,, R/bits >>, Acc, P) -> protocol_list(R, [P|Acc]);
  1663. protocol_name(<< $/, C, R/bits >>, Acc, P) ->
  1664. case C of
  1665. ?INLINE_LOWERCASE(protocol_version, R, Acc, << P/binary, $/ >>)
  1666. end;
  1667. protocol_name(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) ->
  1668. case C of
  1669. ?INLINE_LOWERCASE(protocol_name, R, Acc, P)
  1670. end.
  1671. protocol_version(<<>>, Acc, P) -> lists:reverse([P|Acc]);
  1672. protocol_version(<< $\s, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1673. protocol_version(<< $\t, R/bits >>, Acc, P) -> protocol_list_sep(R, [P|Acc]);
  1674. protocol_version(<< $,, R/bits >>, Acc, P) -> protocol_list(R, [P|Acc]);
  1675. protocol_version(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) ->
  1676. case C of
  1677. ?INLINE_LOWERCASE(protocol_version, R, Acc, P)
  1678. end.
  1679. protocol_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  1680. protocol_list_sep(<< $\s, R/bits >>, Acc) -> protocol_list_sep(R, Acc);
  1681. protocol_list_sep(<< $\t, R/bits >>, Acc) -> protocol_list_sep(R, Acc);
  1682. protocol_list_sep(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc).
  1683. -ifdef(TEST).
  1684. protocols() ->
  1685. ?LET(P,
  1686. oneof([token(), [token(), $/, token()]]),
  1687. iolist_to_binary(P)).
  1688. prop_parse_upgrade() ->
  1689. ?FORALL(L,
  1690. non_empty(list(protocols())),
  1691. begin
  1692. << _, Upgrade/binary >> = iolist_to_binary([[$,, P] || P <- L]),
  1693. ResL = parse_upgrade(Upgrade),
  1694. CheckedL = [?INLINE_LOWERCASE_BC(P) =:= ResP || {P, ResP} <- lists:zip(L, ResL)],
  1695. [true] =:= lists:usort(CheckedL)
  1696. end).
  1697. parse_upgrade_test_() ->
  1698. Tests = [
  1699. {<<"HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11">>,
  1700. [<<"http/2.0">>, <<"shttp/1.3">>, <<"irc/6.9">>, <<"rta/x11">>]},
  1701. {<<"HTTP/2.0">>, [<<"http/2.0">>]}
  1702. ],
  1703. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  1704. parse_upgrade_error_test_() ->
  1705. Tests = [
  1706. <<>>
  1707. ],
  1708. [{V, fun() -> {'EXIT', _} = (catch parse_upgrade(V)) end}
  1709. || V <- Tests].
  1710. -endif.
  1711. %% Internal.
  1712. %% Only return if the list is not empty.
  1713. nonempty(L) when L =/= [] -> L.
  1714. %% Parse a number optionally followed by whitespace.
  1715. number(<< $0, R/bits >>, Acc) -> number(R, Acc * 10);
  1716. number(<< $1, R/bits >>, Acc) -> number(R, Acc * 10 + 1);
  1717. number(<< $2, R/bits >>, Acc) -> number(R, Acc * 10 + 2);
  1718. number(<< $3, R/bits >>, Acc) -> number(R, Acc * 10 + 3);
  1719. number(<< $4, R/bits >>, Acc) -> number(R, Acc * 10 + 4);
  1720. number(<< $5, R/bits >>, Acc) -> number(R, Acc * 10 + 5);
  1721. number(<< $6, R/bits >>, Acc) -> number(R, Acc * 10 + 6);
  1722. number(<< $7, R/bits >>, Acc) -> number(R, Acc * 10 + 7);
  1723. number(<< $8, R/bits >>, Acc) -> number(R, Acc * 10 + 8);
  1724. number(<< $9, R/bits >>, Acc) -> number(R, Acc * 10 + 9);
  1725. number(<< $\s, R/bits >>, Acc) -> ws_end(R), Acc;
  1726. number(<< $\t, R/bits >>, Acc) -> ws_end(R), Acc;
  1727. number(<<>>, Acc) -> Acc.
  1728. ws_end(<< $\s, R/bits >>) -> ws_end(R);
  1729. ws_end(<< $\t, R/bits >>) -> ws_end(R);
  1730. ws_end(<<>>) -> ok.
  1731. %% Parse a list of case insensitive tokens.
  1732. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  1733. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  1734. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  1735. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  1736. token_ci_list(<< C, R/bits >>, Acc) ->
  1737. case C of
  1738. ?INLINE_LOWERCASE(token_ci_list, R, Acc, <<>>)
  1739. end.
  1740. token_ci_list(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1741. token_ci_list(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1742. token_ci_list(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1743. token_ci_list(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  1744. token_ci_list(<< C, R/bits >>, Acc, T) ->
  1745. case C of
  1746. ?INLINE_LOWERCASE(token_ci_list, R, Acc, T)
  1747. end.
  1748. token_ci_list_sep(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  1749. token_ci_list_sep(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1750. token_ci_list_sep(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1751. token_ci_list_sep(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]).