cow_http_hd.erl 43 KB


  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_accept_encoding/1]).
  18. -export([parse_accept_language/1]).
  19. -export([parse_connection/1]).
  20. -export([parse_content_length/1]).
  21. -export([parse_content_type/1]).
  22. -export([parse_expect/1]).
  23. -export([parse_max_forwards/1]).
  24. -export([parse_transfer_encoding/1]).
  25. -type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
  26. -export_type([media_type/0]).
  27. -type qvalue() :: 0..1000.
  28. -export_type([qvalue/0]).
  29. -include("cow_inline.hrl").
  30. -ifdef(TEST).
  31. -include_lib("triq/include/triq.hrl").
  32. ows() ->
  33. list(oneof([$\s, $\t])).
  34. alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  35. digit_chars() -> "0123456789".
  36. alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  37. alpha() ->
  38. oneof(alpha_chars()).
  39. alphanum() ->
  40. oneof(alphanum_chars()).
  41. tchar() ->
  42. frequency([
  43. {1, oneof([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])},
  44. {99, oneof(alphanum_chars())}
  45. ]).
  46. token() ->
  47. ?LET(T,
  48. non_empty(list(tchar())),
  49. list_to_binary(T)).
  50. obs_text() ->
  51. [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,
  52. 146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,
  53. 164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,
  54. 182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,
  55. 200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,
  56. 218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,
  57. 236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,
  58. 254,255].
  59. qdtext() ->
  60. frequency([
  61. {99, oneof("\t\s!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  62. {1, oneof(obs_text())}
  63. ]).
  64. quoted_pair() ->
  65. [$\\, frequency([
  66. {99, oneof("\t\s!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  67. {1, oneof(obs_text())}
  68. ])].
  69. quoted_string() ->
  70. [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
  71. %% Helper function for ( token / quoted-string ) values.
  72. unquote([$", V, $"]) -> unquote(V, <<>>);
  73. unquote(V) -> V.
  74. unquote([], Acc) -> Acc;
  75. unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
  76. unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
  77. parameter() ->
  78. ?SUCHTHAT({K, _, _, _},
  79. {token(), oneof([token(), quoted_string()]), ows(), ows()},
  80. K =/= <<"q">>).
  81. weight() ->
  82. frequency([
  83. {90, int(0, 1000)},
  84. {10, undefined}
  85. ]).
  86. %% Helper function for weight's qvalue formatting.
  87. qvalue_to_iodata(0) -> <<"0">>;
  88. qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
  89. qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
  90. qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
  91. qvalue_to_iodata(1000) -> <<"1">>.
  92. -endif.
  93. %% @doc Parse the Accept header.
  94. -spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
  95. parse_accept(<<"*/*">>) ->
  96. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  97. parse_accept(Accept) ->
  98. media_range_list(Accept, []).
  99. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  100. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  101. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  102. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  103. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  104. case C of
  105. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  106. end.
  107. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  108. %% Special clause for badly behaving user agents that send * instead of */*.
  109. media_range_type(<< $;, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  110. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  111. case C of
  112. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  113. end.
  114. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  115. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  116. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  117. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  118. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  119. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  120. case C of
  121. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  122. end.
  123. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  124. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  125. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  126. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  127. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  128. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  129. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  130. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  131. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  132. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  133. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  134. case C of
  135. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  136. end.
  137. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  138. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  139. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  140. case C of
  141. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  142. end.
  143. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  144. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  145. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  146. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  147. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  148. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  149. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  150. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  151. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  152. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  153. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  154. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  155. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  156. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  157. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  158. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  159. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  160. when A >= $0, A =< $9 ->
  161. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  162. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  163. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  164. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  165. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  166. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  167. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  168. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  169. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  170. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  171. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  172. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  173. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  174. when A >= $0, A =< $9 ->
  175. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  176. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  177. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  178. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  179. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  180. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  181. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  182. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  183. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  184. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  185. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  186. case C of
  187. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  188. end.
  189. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  190. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  191. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  192. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  193. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  194. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  195. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  196. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  197. case C of
  198. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  199. end.
  200. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  201. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  202. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  203. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  204. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  205. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_ext(R, Acc, T, S, P, Q, [{K, V}|E]);
  206. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  207. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  208. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  209. -ifdef(TEST).
  210. accept_ext() ->
  211. oneof([token(), parameter()]).
  212. accept_params() ->
  213. frequency([
  214. {90, []},
  215. {10, list(accept_ext())}
  216. ]).
  217. accept() ->
  218. ?LET({T, S, P, W, E},
  219. {token(), token(), list(parameter()), weight(), accept_params()},
  220. {T, S, P, W, E, iolist_to_binary([T, $/, S,
  221. [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P],
  222. case W of
  223. undefined -> [];
  224. _ -> [
  225. [<<";q=">>, qvalue_to_iodata(W)],
  226. [case Ext of
  227. {K, V, OWS1, OWS2} -> [OWS1, $;, OWS2, K, $=, V];
  228. K -> [$;, K]
  229. end || Ext <- E]]
  230. end])}
  231. ).
  232. prop_parse_accept() ->
  233. ?FORALL(L,
  234. non_empty(list(accept())),
  235. begin
  236. << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]),
  237. ResL = parse_accept(Accept),
  238. CheckedL = [begin
  239. ExpectedP = [{?INLINE_LOWERCASE_BC(K), unquote(V)} || {K, V, _, _} <- P],
  240. ExpectedE = [case Ext of
  241. {K, V, _, _} -> {?INLINE_LOWERCASE_BC(K), unquote(V)};
  242. K -> ?INLINE_LOWERCASE_BC(K)
  243. end || Ext <- E],
  244. ResT =:= ?INLINE_LOWERCASE_BC(T)
  245. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  246. andalso ResP =:= ExpectedP
  247. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  248. andalso ((W =:= undefined andalso ResE =:= []) orelse (W =/= undefined andalso ResE =:= ExpectedE))
  249. end || {{T, S, P, W, E, _}, {{ResT, ResS, ResP}, ResW, ResE}} <- lists:zip(L, ResL)],
  250. [true] =:= lists:usort(CheckedL)
  251. end
  252. ).
  253. parse_accept_test_() ->
  254. Tests = [
  255. {<<>>, []},
  256. {<<" ">>, []},
  257. {<<"audio/*; q=0.2, audio/basic">>, [
  258. {{<<"audio">>, <<"*">>, []}, 200, []},
  259. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  260. ]},
  261. {<<"text/plain; q=0.5, text/html, "
  262. "text/x-dvi; q=0.8, text/x-c">>, [
  263. {{<<"text">>, <<"plain">>, []}, 500, []},
  264. {{<<"text">>, <<"html">>, []}, 1000, []},
  265. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  266. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  267. ]},
  268. {<<"text/*, text/html, text/html;level=1, */*">>, [
  269. {{<<"text">>, <<"*">>, []}, 1000, []},
  270. {{<<"text">>, <<"html">>, []}, 1000, []},
  271. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  272. {{<<"*">>, <<"*">>, []}, 1000, []}
  273. ]},
  274. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  275. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  276. {{<<"text">>, <<"*">>, []}, 300, []},
  277. {{<<"text">>, <<"html">>, []}, 700, []},
  278. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  279. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  280. {{<<"*">>, <<"*">>, []}, 500, []}
  281. ]},
  282. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  283. "q=0.123;standalone;complex=gits, text/plain">>, [
  284. {{<<"text">>, <<"html">>,
  285. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  286. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  287. {{<<"text">>, <<"plain">>, []}, 1000, []}
  288. ]},
  289. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  290. {{<<"text">>, <<"html">>, []}, 1000, []},
  291. {{<<"image">>, <<"gif">>, []}, 1000, []},
  292. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  293. {{<<"*">>, <<"*">>, []}, 200, []},
  294. {{<<"*">>, <<"*">>, []}, 200, []}
  295. ]}
  296. ],
  297. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  298. parse_accept_error_test_() ->
  299. Tests = [
  300. <<"audio/basic, */;q=0.5">>,
  301. <<"audio/, audio/basic">>,
  302. <<"aud\tio/basic">>,
  303. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  304. ],
  305. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  306. -endif.
  307. -ifdef(PERF).
  308. horse_parse_accept() ->
  309. horse:repeat(20000,
  310. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  311. "text/html;level=2;q=0.4, */*;q=0.5">>)
  312. ).
  313. -endif.
  314. %% @doc Parse the Accept-Charset header.
  315. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  316. parse_accept_charset(Charset) ->
  317. nonempty(conneg_list(Charset, [])).
  318. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  319. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  320. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  321. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  322. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  323. case C of
  324. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  325. end.
  326. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  327. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  328. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  329. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  330. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  331. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  332. case C of
  333. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  334. end.
  335. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  336. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  337. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  338. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  339. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  340. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  341. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  342. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  343. %% Special clause for broken user agents that confuse ; and , separators.
  344. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  345. case C of
  346. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  347. end.
  348. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  349. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  350. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  351. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  352. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  353. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  354. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  355. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  356. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  357. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  358. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  359. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  360. when A >= $0, A =< $9 ->
  361. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  362. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  363. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  364. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  365. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  366. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  367. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  368. -ifdef(TEST).
  369. accept_charset() ->
  370. ?LET({C, W},
  371. {token(), weight()},
  372. {C, W, iolist_to_binary([C, case W of
  373. undefined -> [];
  374. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  375. end])}
  376. ).
  377. prop_parse_accept_charset() ->
  378. ?FORALL(L,
  379. non_empty(list(accept_charset())),
  380. begin
  381. << _, AcceptCharset/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  382. ResL = parse_accept_charset(AcceptCharset),
  383. CheckedL = [begin
  384. ResC =:= ?INLINE_LOWERCASE_BC(Ch)
  385. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  386. end || {{Ch, W, _}, {ResC, ResW}} <- lists:zip(L, ResL)],
  387. [true] =:= lists:usort(CheckedL)
  388. end).
  389. parse_accept_charset_test_() ->
  390. Tests = [
  391. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  392. {<<"iso-8859-5">>, 1000},
  393. {<<"unicode-1-1">>, 800}
  394. ]},
  395. %% Some user agents send this invalid value for the Accept-Charset header
  396. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  397. {<<"iso-8859-1">>, 1000},
  398. {<<"utf-8">>, 700},
  399. {<<"*">>, 700}
  400. ]}
  401. ],
  402. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  403. parse_accept_charset_error_test_() ->
  404. Tests = [
  405. <<>>
  406. ],
  407. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  408. -endif.
  409. -ifdef(PERF).
  410. horse_parse_accept_charset() ->
  411. horse:repeat(20000,
  412. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  413. ).
  414. -endif.
  415. %% @doc Parse the Accept-Encoding header.
  416. -spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}].
  417. parse_accept_encoding(Encoding) ->
  418. conneg_list(Encoding, []).
  419. -ifdef(TEST).
  420. accept_encoding() ->
  421. ?LET({E, W},
  422. {token(), weight()},
  423. {E, W, iolist_to_binary([E, case W of
  424. undefined -> [];
  425. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  426. end])}
  427. ).
  428. prop_parse_accept_encoding() ->
  429. ?FORALL(L,
  430. non_empty(list(accept_encoding())),
  431. begin
  432. << _, AcceptEncoding/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  433. ResL = parse_accept_encoding(AcceptEncoding),
  434. CheckedL = [begin
  435. ResE =:= ?INLINE_LOWERCASE_BC(E)
  436. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  437. end || {{E, W, _}, {ResE, ResW}} <- lists:zip(L, ResL)],
  438. [true] =:= lists:usort(CheckedL)
  439. end).
  440. parse_accept_encoding_test_() ->
  441. Tests = [
  442. {<<>>, []},
  443. {<<"*">>, [{<<"*">>, 1000}]},
  444. {<<"compress, gzip">>, [
  445. {<<"compress">>, 1000},
  446. {<<"gzip">>, 1000}
  447. ]},
  448. {<<"compress;q=0.5, gzip;q=1.0">>, [
  449. {<<"compress">>, 500},
  450. {<<"gzip">>, 1000}
  451. ]},
  452. {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [
  453. {<<"gzip">>, 1000},
  454. {<<"identity">>, 500},
  455. {<<"*">>, 0}
  456. ]}
  457. ],
  458. [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests].
  459. -endif.
  460. -ifdef(PERF).
  461. horse_parse_accept_encoding() ->
  462. horse:repeat(20000,
  463. parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>)
  464. ).
  465. -endif.
  466. %% @doc Parse the Accept-Language header.
  467. -spec parse_accept_language(binary()) -> [{binary(), qvalue()}].
  468. parse_accept_language(LanguageRange) ->
  469. nonempty(language_range_list(LanguageRange, [])).
  470. language_range_list(<<>>, Acc) -> lists:reverse(Acc);
  471. language_range_list(<< $\s, R/bits >>, Acc) -> language_range_list(R, Acc);
  472. language_range_list(<< $\t, R/bits >>, Acc) -> language_range_list(R, Acc);
  473. language_range_list(<< $\,, R/bits >>, Acc) -> language_range_list(R, Acc);
  474. language_range_list(<< $*, R/bits >>, Acc) -> language_range_before_semicolon(R, Acc, <<"*">>);
  475. language_range_list(<< C, R/bits >>, Acc) when ?IS_ALPHA(C) ->
  476. case C of
  477. ?INLINE_LOWERCASE(language_range, R, Acc, 1, <<>>)
  478. end.
  479. language_range(<<>>, Acc, _, T) -> lists:reverse([{T, 1000}|Acc]);
  480. language_range(<< $,, R/bits >>, Acc, _, T) -> language_range_list(R, [{T, 1000}|Acc]);
  481. language_range(<< $;, R/bits >>, Acc, _, T) -> language_range_before_weight(R, Acc, T);
  482. language_range(<< $\s, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  483. language_range(<< $\t, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  484. language_range(<< $-, R/bits >>, Acc, _, T) -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  485. language_range(<< _, _/bits >>, _, 8, _) -> error(badarg);
  486. language_range(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C) ->
  487. case C of
  488. ?INLINE_LOWERCASE(language_range, R, Acc, N + 1, T)
  489. end.
  490. language_range_sub(<<>>, Acc, N, T) when N > 0 -> lists:reverse([{T, 1000}|Acc]);
  491. language_range_sub(<< $,, R/bits >>, Acc, N, T) when N > 0 -> language_range_list(R, [{T, 1000}|Acc]);
  492. language_range_sub(<< $;, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_weight(R, Acc, T);
  493. language_range_sub(<< $\s, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  494. language_range_sub(<< $\t, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  495. language_range_sub(<< $-, R/bits >>, Acc, N, T) when N > 0 -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  496. language_range_sub(<< _, _/bits >>, _, 8, _) -> error(badarg);
  497. language_range_sub(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C); ?IS_DIGIT(C) ->
  498. case C of
  499. ?INLINE_LOWERCASE(language_range_sub, R, Acc, N + 1, T)
  500. end.
  501. language_range_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  502. language_range_before_semicolon(<< $,, R/bits >>, Acc, T) -> language_range_list(R, [{T, 1000}|Acc]);
  503. language_range_before_semicolon(<< $;, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  504. language_range_before_semicolon(<< $\s, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T);
  505. language_range_before_semicolon(<< $\t, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T).
  506. language_range_before_weight(<< $\s, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  507. language_range_before_weight(<< $\t, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  508. language_range_before_weight(<< $q, $=, R/bits >>, Acc, T) -> language_range_weight(R, Acc, T);
  509. %% Special clause for broken user agents that confuse ; and , separators.
  510. language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_ALPHA(C) ->
  511. case C of
  512. ?INLINE_LOWERCASE(language_range, R, [{T, 1000}|Acc], 1, <<>>)
  513. end.
  514. language_range_weight(<< "1.000", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  515. language_range_weight(<< "1.00", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  516. language_range_weight(<< "1.0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  517. language_range_weight(<< "1.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  518. language_range_weight(<< "1", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  519. language_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  520. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  521. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  522. language_range_weight(<< "0.", A, B, R/bits >>, Acc, T)
  523. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  524. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  525. language_range_weight(<< "0.", A, R/bits >>, Acc, T)
  526. when A >= $0, A =< $9 ->
  527. language_range_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  528. language_range_weight(<< "0.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]);
  529. language_range_weight(<< "0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]).
  530. language_range_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  531. language_range_list_sep(<< $\s, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  532. language_range_list_sep(<< $\t, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  533. language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc).
  534. -ifdef(TEST).
  535. language_tag() ->
  536. oneof([
  537. [alpha()],
  538. [alpha(), alpha()],
  539. [alpha(), alpha(), alpha()],
  540. [alpha(), alpha(), alpha(), alpha()],
  541. [alpha(), alpha(), alpha(), alpha(), alpha()],
  542. [alpha(), alpha(), alpha(), alpha(), alpha(), alpha()],
  543. [alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha()],
  544. [alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha()]
  545. ]).
  546. language_subtag() ->
  547. [$-, oneof([
  548. [alphanum()],
  549. [alphanum(), alphanum()],
  550. [alphanum(), alphanum(), alphanum()],
  551. [alphanum(), alphanum(), alphanum(), alphanum()],
  552. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum()],
  553. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum()],
  554. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum()],
  555. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum()]
  556. ])].
  557. language_range() ->
  558. [language_tag(), list(language_subtag())].
  559. accept_language() ->
  560. ?LET({R, W},
  561. {language_range(), weight()},
  562. {iolist_to_binary(R), W, iolist_to_binary([R, case W of
  563. undefined -> [];
  564. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  565. end])}
  566. ).
  567. prop_parse_accept_language() ->
  568. ?FORALL(L,
  569. non_empty(list(accept_language())),
  570. begin
  571. << _, AcceptLanguage/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  572. ResL = parse_accept_language(AcceptLanguage),
  573. CheckedL = [begin
  574. ResR =:= ?INLINE_LOWERCASE_BC(R)
  575. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  576. end || {{R, W, _}, {ResR, ResW}} <- lists:zip(L, ResL)],
  577. [true] =:= lists:usort(CheckedL)
  578. end).
  579. parse_accept_language_test_() ->
  580. Tests = [
  581. {<<"da, en-gb;q=0.8, en;q=0.7">>, [
  582. {<<"da">>, 1000},
  583. {<<"en-gb">>, 800},
  584. {<<"en">>, 700}
  585. ]},
  586. {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin, es-419">>, [
  587. {<<"en">>, 1000},
  588. {<<"en-us">>, 1000},
  589. {<<"en-cockney">>, 1000},
  590. {<<"i-cherokee">>, 1000},
  591. {<<"x-pig-latin">>, 1000},
  592. {<<"es-419">>, 1000}
  593. ]}
  594. ],
  595. [{V, fun() -> R = parse_accept_language(V) end} || {V, R} <- Tests].
  596. parse_accept_language_error_test_() ->
  597. Tests = [
  598. <<>>,
  599. <<"loooooong">>,
  600. <<"en-us-loooooong">>,
  601. <<"419-en-us">>
  602. ],
  603. [{V, fun() -> {'EXIT', _} = (catch parse_accept_language(V)) end} || V <- Tests].
  604. -endif.
  605. -ifdef(PERF).
  606. horse_parse_accept_language() ->
  607. horse:repeat(20000,
  608. parse_accept_language(<<"da, en-gb;q=0.8, en;q=0.7">>)
  609. ).
  610. -endif.
  611. %% @doc Parse the Connection header.
  612. -spec parse_connection(binary()) -> [binary()].
  613. parse_connection(<<"close">>) ->
  614. [<<"close">>];
  615. parse_connection(<<"keep-alive">>) ->
  616. [<<"keep-alive">>];
  617. parse_connection(Connection) ->
  618. nonempty(token_ci_list(Connection, [])).
  619. -ifdef(TEST).
  620. prop_parse_connection() ->
  621. ?FORALL(L,
  622. non_empty(list(token())),
  623. begin
  624. << _, Connection/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  625. ResL = parse_connection(Connection),
  626. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  627. [true] =:= lists:usort(CheckedL)
  628. end).
  629. parse_connection_test_() ->
  630. Tests = [
  631. {<<"close">>, [<<"close">>]},
  632. {<<"ClOsE">>, [<<"close">>]},
  633. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  634. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  635. ],
  636. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  637. parse_connection_error_test_() ->
  638. Tests = [
  639. <<>>
  640. ],
  641. [{V, fun() -> {'EXIT', _} = (catch parse_connection(V)) end} || V <- Tests].
  642. -endif.
  643. -ifdef(PERF).
  644. horse_parse_connection_close() ->
  645. horse:repeat(200000,
  646. parse_connection(<<"close">>)
  647. ).
  648. horse_parse_connection_keepalive() ->
  649. horse:repeat(200000,
  650. parse_connection(<<"keep-alive">>)
  651. ).
  652. horse_parse_connection_keepalive_upgrade() ->
  653. horse:repeat(200000,
  654. parse_connection(<<"keep-alive, upgrade">>)
  655. ).
  656. -endif.
  657. %% @doc Parse the Content-Length header.
  658. %%
  659. %% The value has at least one digit, and may be followed by whitespace.
  660. -spec parse_content_length(binary()) -> non_neg_integer().
  661. parse_content_length(<< $0 >>) -> 0;
  662. parse_content_length(<< $0, R/bits >>) -> number(R, 0);
  663. parse_content_length(<< $1, R/bits >>) -> number(R, 1);
  664. parse_content_length(<< $2, R/bits >>) -> number(R, 2);
  665. parse_content_length(<< $3, R/bits >>) -> number(R, 3);
  666. parse_content_length(<< $4, R/bits >>) -> number(R, 4);
  667. parse_content_length(<< $5, R/bits >>) -> number(R, 5);
  668. parse_content_length(<< $6, R/bits >>) -> number(R, 6);
  669. parse_content_length(<< $7, R/bits >>) -> number(R, 7);
  670. parse_content_length(<< $8, R/bits >>) -> number(R, 8);
  671. parse_content_length(<< $9, R/bits >>) -> number(R, 9).
  672. -ifdef(TEST).
  673. prop_parse_content_length() ->
  674. ?FORALL(
  675. X,
  676. non_neg_integer(),
  677. X =:= parse_content_length(integer_to_binary(X))
  678. ).
  679. parse_content_length_test_() ->
  680. Tests = [
  681. {<<"0">>, 0},
  682. {<<"42 ">>, 42},
  683. {<<"69\t">>, 69},
  684. {<<"1337">>, 1337},
  685. {<<"1234567890">>, 1234567890},
  686. {<<"1234567890 ">>, 1234567890}
  687. ],
  688. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  689. parse_content_length_error_test_() ->
  690. Tests = [
  691. <<>>,
  692. <<"123, 123">>,
  693. <<"4.17">>
  694. ],
  695. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  696. -endif.
  697. -ifdef(PERF).
  698. horse_parse_content_length_zero() ->
  699. horse:repeat(100000,
  700. parse_content_length(<<"0">>)
  701. ).
  702. horse_parse_content_length_giga() ->
  703. horse:repeat(100000,
  704. parse_content_length(<<"1234567890">>)
  705. ).
  706. -endif.
  707. %% @doc Parse the Content-Type header.
  708. -spec parse_content_type(binary()) -> media_type().
  709. parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  710. case C of
  711. ?INLINE_LOWERCASE(media_type, R, <<>>)
  712. end.
  713. media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
  714. case C of
  715. ?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
  716. end;
  717. media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
  718. case C of
  719. ?INLINE_LOWERCASE(media_type, R, T)
  720. end.
  721. media_subtype(<<>>, T, S) -> {T, S, []};
  722. media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
  723. media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  724. media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  725. media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
  726. case C of
  727. ?INLINE_LOWERCASE(media_subtype, R, T, S)
  728. end.
  729. media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
  730. media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  731. media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
  732. media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
  733. media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  734. media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  735. media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
  736. media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
  737. media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
  738. case C of
  739. ?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
  740. end.
  741. media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
  742. media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  743. media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  744. case C of
  745. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  746. end;
  747. media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  748. case C of
  749. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  750. end.
  751. media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
  752. media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
  753. media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  754. media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  755. media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
  756. case C of
  757. ?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
  758. end.
  759. media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
  760. media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
  761. media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
  762. case C of
  763. ?INLINE_LOWERCASE(media_param, R, T, S, P, K)
  764. end.
  765. media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  766. media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
  767. media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
  768. media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
  769. media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
  770. media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  771. media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  772. media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
  773. -ifdef(TEST).
  774. media_type_parameter() ->
  775. frequency([
  776. {90, parameter()},
  777. {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}}
  778. ]).
  779. media_type() ->
  780. ?LET({T, S, P},
  781. {token(), token(), list(media_type_parameter())},
  782. {T, S, P, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P]])}
  783. ).
  784. prop_parse_content_type() ->
  785. ?FORALL({T, S, P, MediaType},
  786. media_type(),
  787. begin
  788. {ResT, ResS, ResP} = parse_content_type(MediaType),
  789. ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
  790. <<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
  791. LowK -> {LowK, unquote(V)}
  792. end || {K, V, _, _} <- P],
  793. ResT =:= ?INLINE_LOWERCASE_BC(T)
  794. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  795. andalso ResP =:= ExpectedP
  796. end
  797. ).
  798. parse_content_type_test_() ->
  799. Tests = [
  800. {<<"text/html;charset=utf-8">>,
  801. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  802. {<<"text/html;charset=UTF-8">>,
  803. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  804. {<<"Text/HTML;Charset=\"utf-8\"">>,
  805. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  806. {<<"text/html; charset=\"utf-8\"">>,
  807. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  808. {<<"text/html; charset=ISO-8859-4">>,
  809. {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  810. {<<"text/plain; charset=iso-8859-4">>,
  811. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  812. {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
  813. {<<"multipart">>, <<"form-data">>, [
  814. {<<"boundary">>, <<"MultipartIsUgly">>}
  815. ]}},
  816. {<<"foo/bar; one=FirstParam; two=SecondParam">>,
  817. {<<"foo">>, <<"bar">>, [
  818. {<<"one">>, <<"FirstParam">>},
  819. {<<"two">>, <<"SecondParam">>}
  820. ]}}
  821. ],
  822. [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
  823. -endif.
  824. -ifdef(PERF).
  825. horse_parse_content_type() ->
  826. horse:repeat(200000,
  827. parse_content_type(<<"text/html;charset=utf-8">>)
  828. ).
  829. -endif.
  830. %% @doc Parse the Expect header.
  831. -spec parse_expect(binary()) -> continue.
  832. parse_expect(<<"100-continue", Rest/bits >>) ->
  833. ws_end(Rest),
  834. continue;
  835. parse_expect(<<"100-", C, O, N, T, I, M, U, E, Rest/bits >>)
  836. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  837. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  838. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  839. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  840. ws_end(Rest),
  841. continue.
  842. -ifdef(TEST).
  843. expect() ->
  844. ?LET(E,
  845. [$1, $0, $0, $-,
  846. oneof([$c, $C]), oneof([$o, $O]), oneof([$n, $N]),
  847. oneof([$t, $T]), oneof([$i, $I]), oneof([$n, $N]),
  848. oneof([$u, $U]), oneof([$e, $E])],
  849. list_to_binary(E)).
  850. prop_parse_expect() ->
  851. ?FORALL(E, expect(), continue =:= parse_expect(E)).
  852. parse_expect_test_() ->
  853. Tests = [
  854. <<"100-continue">>,
  855. <<"100-CONTINUE">>,
  856. <<"100-Continue">>,
  857. <<"100-CoNtInUe">>,
  858. <<"100-continue ">>
  859. ],
  860. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  861. parse_expect_error_test_() ->
  862. Tests = [
  863. <<>>,
  864. <<" ">>,
  865. <<"200-OK">>,
  866. <<"Cookies">>
  867. ],
  868. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  869. -endif.
  870. -ifdef(PERF).
  871. horse_parse_expect() ->
  872. horse:repeat(200000,
  873. parse_expect(<<"100-continue">>)
  874. ).
  875. -endif.
  876. %% @doc Parse the Max-Forwards header.
  877. -spec parse_max_forwards(binary()) -> integer().
  878. parse_max_forwards(<< $0, R/bits >>) -> number(R, 0);
  879. parse_max_forwards(<< $1, R/bits >>) -> number(R, 1);
  880. parse_max_forwards(<< $2, R/bits >>) -> number(R, 2);
  881. parse_max_forwards(<< $3, R/bits >>) -> number(R, 3);
  882. parse_max_forwards(<< $4, R/bits >>) -> number(R, 4);
  883. parse_max_forwards(<< $5, R/bits >>) -> number(R, 5);
  884. parse_max_forwards(<< $6, R/bits >>) -> number(R, 6);
  885. parse_max_forwards(<< $7, R/bits >>) -> number(R, 7);
  886. parse_max_forwards(<< $8, R/bits >>) -> number(R, 8);
  887. parse_max_forwards(<< $9, R/bits >>) -> number(R, 9).
  888. -ifdef(TEST).
  889. parse_max_forwards_test_() ->
  890. Tests = [
  891. {<<"0">>, 0},
  892. {<<"42 ">>, 42},
  893. {<<"69\t">>, 69},
  894. {<<"1337">>, 1337},
  895. {<<"1234567890">>, 1234567890},
  896. {<<"1234567890 ">>, 1234567890}
  897. ],
  898. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  899. -endif.
  900. %% @doc Parse the Transfer-Encoding header.
  901. %%
  902. %% @todo Extension parameters.
  903. -spec parse_transfer_encoding(binary()) -> [binary()].
  904. parse_transfer_encoding(<<"chunked">>) ->
  905. [<<"chunked">>];
  906. parse_transfer_encoding(TransferEncoding) ->
  907. nonempty(token_ci_list(TransferEncoding, [])).
  908. -ifdef(TEST).
  909. parse_transfer_encoding_test_() ->
  910. Tests = [
  911. {<<"a , , , ">>, [<<"a">>]},
  912. {<<" , , , a">>, [<<"a">>]},
  913. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  914. {<<"chunked">>, [<<"chunked">>]},
  915. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]}
  916. ],
  917. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  918. parse_transfer_encoding_error_test_() ->
  919. Tests = [
  920. <<>>,
  921. <<" ">>,
  922. <<" , ">>,
  923. <<",,,">>,
  924. <<"a b">>
  925. ],
  926. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  927. || V <- Tests].
  928. -endif.
  929. -ifdef(PERF).
  930. horse_parse_transfer_encoding_chunked() ->
  931. horse:repeat(200000,
  932. parse_transfer_encoding(<<"chunked">>)
  933. ).
  934. horse_parse_transfer_encoding_custom() ->
  935. horse:repeat(200000,
  936. parse_transfer_encoding(<<"chunked, something">>)
  937. ).
  938. -endif.
  939. %% Internal.
  940. %% Only return if the list is not empty.
  941. nonempty(L) when L =/= [] -> L.
  942. %% Parse a number optionally followed by whitespace.
  943. number(<< $0, R/bits >>, Acc) -> number(R, Acc * 10);
  944. number(<< $1, R/bits >>, Acc) -> number(R, Acc * 10 + 1);
  945. number(<< $2, R/bits >>, Acc) -> number(R, Acc * 10 + 2);
  946. number(<< $3, R/bits >>, Acc) -> number(R, Acc * 10 + 3);
  947. number(<< $4, R/bits >>, Acc) -> number(R, Acc * 10 + 4);
  948. number(<< $5, R/bits >>, Acc) -> number(R, Acc * 10 + 5);
  949. number(<< $6, R/bits >>, Acc) -> number(R, Acc * 10 + 6);
  950. number(<< $7, R/bits >>, Acc) -> number(R, Acc * 10 + 7);
  951. number(<< $8, R/bits >>, Acc) -> number(R, Acc * 10 + 8);
  952. number(<< $9, R/bits >>, Acc) -> number(R, Acc * 10 + 9);
  953. number(<< $\s, R/bits >>, Acc) -> ws_end(R), Acc;
  954. number(<< $\t, R/bits >>, Acc) -> ws_end(R), Acc;
  955. number(<<>>, Acc) -> Acc.
  956. ws_end(<< $\s, R/bits >>) -> ws_end(R);
  957. ws_end(<< $\t, R/bits >>) -> ws_end(R);
  958. ws_end(<<>>) -> ok.
  959. %% Parse a list of case insensitive tokens.
  960. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  961. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  962. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  963. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  964. token_ci_list(<< C, R/bits >>, Acc) ->
  965. case C of
  966. ?INLINE_LOWERCASE(token_ci_list, R, Acc, <<>>)
  967. end.
  968. token_ci_list(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  969. token_ci_list(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  970. token_ci_list(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  971. token_ci_list(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  972. token_ci_list(<< C, R/bits >>, Acc, T) ->
  973. case C of
  974. ?INLINE_LOWERCASE(token_ci_list, R, Acc, T)
  975. end.
  976. token_ci_list_sep(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  977. token_ci_list_sep(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  978. token_ci_list_sep(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  979. token_ci_list_sep(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]).