cow_http_hd.erl 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139
  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_accept_encoding/1]).
  18. -export([parse_accept_language/1]).
  19. -export([parse_connection/1]).
  20. -export([parse_content_length/1]).
  21. -export([parse_content_type/1]).
  22. -export([parse_expect/1]).
  23. -export([parse_max_forwards/1]).
  24. -export([parse_transfer_encoding/1]).
  25. -type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
  26. -export_type([media_type/0]).
  27. -type qvalue() :: 0..1000.
  28. -export_type([qvalue/0]).
  29. -include("cow_inline.hrl").
  30. -ifdef(TEST).
  31. -include_lib("triq/include/triq.hrl").
  32. ows() ->
  33. list(oneof([$\s, $\t])).
  34. alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  35. alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
  36. alpha() ->
  37. oneof(alpha_chars()).
  38. alphanum() ->
  39. oneof(alphanum_chars()).
  40. tchar() ->
  41. frequency([
  42. {1, oneof([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])},
  43. {99, oneof(alphanum_chars())}
  44. ]).
  45. token() ->
  46. ?LET(T,
  47. non_empty(list(tchar())),
  48. list_to_binary(T)).
  49. obs_text() ->
  50. [128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,
  51. 146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,
  52. 164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,
  53. 182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,
  54. 200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,
  55. 218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,
  56. 236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,
  57. 254,255].
  58. qdtext() ->
  59. frequency([
  60. {99, oneof("\t\s!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  61. {1, oneof(obs_text())}
  62. ]).
  63. quoted_pair() ->
  64. [$\\, frequency([
  65. {99, oneof("\t\s!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")},
  66. {1, oneof(obs_text())}
  67. ])].
  68. quoted_string() ->
  69. [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
  70. %% Helper function for ( token / quoted-string ) values.
  71. unquote([$", V, $"]) -> unquote(V, <<>>);
  72. unquote(V) -> V.
  73. unquote([], Acc) -> Acc;
  74. unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
  75. unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
  76. parameter() ->
  77. ?SUCHTHAT({K, _, _, _},
  78. {token(), oneof([token(), quoted_string()]), ows(), ows()},
  79. K =/= <<"q">>).
  80. weight() ->
  81. frequency([
  82. {90, int(0, 1000)},
  83. {10, undefined}
  84. ]).
  85. %% Helper function for weight's qvalue formatting.
  86. qvalue_to_iodata(0) -> <<"0">>;
  87. qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
  88. qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
  89. qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
  90. qvalue_to_iodata(1000) -> <<"1">>.
  91. -endif.
  92. %% @doc Parse the Accept header.
  93. -spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
  94. parse_accept(<<"*/*">>) ->
  95. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  96. parse_accept(Accept) ->
  97. media_range_list(Accept, []).
  98. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  99. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  100. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  101. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  102. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  103. case C of
  104. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  105. end.
  106. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  107. %% Special clause for badly behaving user agents that send * instead of */*.
  108. media_range_type(<< $;, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  109. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  110. case C of
  111. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  112. end.
  113. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  114. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  115. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  116. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  117. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  118. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  119. case C of
  120. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  121. end.
  122. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  123. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  124. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  125. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  126. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  127. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  128. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  129. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  130. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  131. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  132. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  133. case C of
  134. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  135. end.
  136. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  137. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  138. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  139. case C of
  140. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  141. end.
  142. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  143. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  144. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  145. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  146. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  147. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  148. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  149. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  150. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  151. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  152. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  153. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  154. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  155. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  156. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  157. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  158. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  159. when A >= $0, A =< $9 ->
  160. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  161. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  162. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  163. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  164. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  165. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  166. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  167. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  168. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  169. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  170. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  171. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  172. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  173. when A >= $0, A =< $9 ->
  174. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  175. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  176. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  177. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  178. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  179. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  180. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  181. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  182. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  183. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  184. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  185. case C of
  186. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  187. end.
  188. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  189. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  190. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  191. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  192. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  193. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  194. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  195. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  196. case C of
  197. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  198. end.
  199. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  200. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  201. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  202. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  203. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  204. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_ext(R, Acc, T, S, P, Q, [{K, V}|E]);
  205. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  206. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  207. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  208. -ifdef(TEST).
  209. accept_ext() ->
  210. oneof([token(), parameter()]).
  211. accept_params() ->
  212. frequency([
  213. {90, []},
  214. {10, list(accept_ext())}
  215. ]).
  216. accept() ->
  217. ?LET({T, S, P, W, E},
  218. {token(), token(), list(parameter()), weight(), accept_params()},
  219. {T, S, P, W, E, iolist_to_binary([T, $/, S,
  220. [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P],
  221. case W of
  222. undefined -> [];
  223. _ -> [
  224. [<<";q=">>, qvalue_to_iodata(W)],
  225. [case Ext of
  226. {K, V, OWS1, OWS2} -> [OWS1, $;, OWS2, K, $=, V];
  227. K -> [$;, K]
  228. end || Ext <- E]]
  229. end])}
  230. ).
  231. prop_parse_accept() ->
  232. ?FORALL(L,
  233. non_empty(list(accept())),
  234. begin
  235. << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]),
  236. ResL = parse_accept(Accept),
  237. CheckedL = [begin
  238. ExpectedP = [{?INLINE_LOWERCASE_BC(K), unquote(V)} || {K, V, _, _} <- P],
  239. ExpectedE = [case Ext of
  240. {K, V, _, _} -> {?INLINE_LOWERCASE_BC(K), unquote(V)};
  241. K -> ?INLINE_LOWERCASE_BC(K)
  242. end || Ext <- E],
  243. ResT =:= ?INLINE_LOWERCASE_BC(T)
  244. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  245. andalso ResP =:= ExpectedP
  246. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  247. andalso ((W =:= undefined andalso ResE =:= []) orelse (W =/= undefined andalso ResE =:= ExpectedE))
  248. end || {{T, S, P, W, E, _}, {{ResT, ResS, ResP}, ResW, ResE}} <- lists:zip(L, ResL)],
  249. [true] =:= lists:usort(CheckedL)
  250. end
  251. ).
  252. parse_accept_test_() ->
  253. Tests = [
  254. {<<>>, []},
  255. {<<" ">>, []},
  256. {<<"audio/*; q=0.2, audio/basic">>, [
  257. {{<<"audio">>, <<"*">>, []}, 200, []},
  258. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  259. ]},
  260. {<<"text/plain; q=0.5, text/html, "
  261. "text/x-dvi; q=0.8, text/x-c">>, [
  262. {{<<"text">>, <<"plain">>, []}, 500, []},
  263. {{<<"text">>, <<"html">>, []}, 1000, []},
  264. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  265. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  266. ]},
  267. {<<"text/*, text/html, text/html;level=1, */*">>, [
  268. {{<<"text">>, <<"*">>, []}, 1000, []},
  269. {{<<"text">>, <<"html">>, []}, 1000, []},
  270. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  271. {{<<"*">>, <<"*">>, []}, 1000, []}
  272. ]},
  273. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  274. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  275. {{<<"text">>, <<"*">>, []}, 300, []},
  276. {{<<"text">>, <<"html">>, []}, 700, []},
  277. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  278. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  279. {{<<"*">>, <<"*">>, []}, 500, []}
  280. ]},
  281. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  282. "q=0.123;standalone;complex=gits, text/plain">>, [
  283. {{<<"text">>, <<"html">>,
  284. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  285. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  286. {{<<"text">>, <<"plain">>, []}, 1000, []}
  287. ]},
  288. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  289. {{<<"text">>, <<"html">>, []}, 1000, []},
  290. {{<<"image">>, <<"gif">>, []}, 1000, []},
  291. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  292. {{<<"*">>, <<"*">>, []}, 200, []},
  293. {{<<"*">>, <<"*">>, []}, 200, []}
  294. ]}
  295. ],
  296. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  297. parse_accept_error_test_() ->
  298. Tests = [
  299. <<"audio/basic, */;q=0.5">>,
  300. <<"audio/, audio/basic">>,
  301. <<"aud\tio/basic">>,
  302. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  303. ],
  304. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  305. -endif.
  306. -ifdef(PERF).
  307. horse_parse_accept() ->
  308. horse:repeat(20000,
  309. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  310. "text/html;level=2;q=0.4, */*;q=0.5">>)
  311. ).
  312. -endif.
  313. %% @doc Parse the Accept-Charset header.
  314. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  315. parse_accept_charset(Charset) ->
  316. nonempty(conneg_list(Charset, [])).
  317. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  318. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  319. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  320. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  321. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  322. case C of
  323. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  324. end.
  325. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  326. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  327. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  328. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  329. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  330. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  331. case C of
  332. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  333. end.
  334. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  335. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  336. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  337. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  338. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  339. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  340. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  341. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  342. %% Special clause for broken user agents that confuse ; and , separators.
  343. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  344. case C of
  345. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  346. end.
  347. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  348. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  349. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  350. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  351. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  352. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  353. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  354. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  355. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  356. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  357. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  358. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  359. when A >= $0, A =< $9 ->
  360. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  361. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  362. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  363. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  364. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  365. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  366. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  367. -ifdef(TEST).
  368. accept_charset() ->
  369. ?LET({C, W},
  370. {token(), weight()},
  371. {C, W, iolist_to_binary([C, case W of
  372. undefined -> [];
  373. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  374. end])}
  375. ).
  376. prop_parse_accept_charset() ->
  377. ?FORALL(L,
  378. non_empty(list(accept_charset())),
  379. begin
  380. << _, AcceptCharset/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  381. ResL = parse_accept_charset(AcceptCharset),
  382. CheckedL = [begin
  383. ResC =:= ?INLINE_LOWERCASE_BC(Ch)
  384. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  385. end || {{Ch, W, _}, {ResC, ResW}} <- lists:zip(L, ResL)],
  386. [true] =:= lists:usort(CheckedL)
  387. end).
  388. parse_accept_charset_test_() ->
  389. Tests = [
  390. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  391. {<<"iso-8859-5">>, 1000},
  392. {<<"unicode-1-1">>, 800}
  393. ]},
  394. %% Some user agents send this invalid value for the Accept-Charset header
  395. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  396. {<<"iso-8859-1">>, 1000},
  397. {<<"utf-8">>, 700},
  398. {<<"*">>, 700}
  399. ]}
  400. ],
  401. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  402. parse_accept_charset_error_test_() ->
  403. Tests = [
  404. <<>>
  405. ],
  406. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  407. -endif.
  408. -ifdef(PERF).
  409. horse_parse_accept_charset() ->
  410. horse:repeat(20000,
  411. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  412. ).
  413. -endif.
  414. %% @doc Parse the Accept-Encoding header.
  415. -spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}].
  416. parse_accept_encoding(Encoding) ->
  417. conneg_list(Encoding, []).
  418. -ifdef(TEST).
  419. accept_encoding() ->
  420. ?LET({E, W},
  421. {token(), weight()},
  422. {E, W, iolist_to_binary([E, case W of
  423. undefined -> [];
  424. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  425. end])}
  426. ).
  427. prop_parse_accept_encoding() ->
  428. ?FORALL(L,
  429. non_empty(list(accept_encoding())),
  430. begin
  431. << _, AcceptEncoding/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  432. ResL = parse_accept_encoding(AcceptEncoding),
  433. CheckedL = [begin
  434. ResE =:= ?INLINE_LOWERCASE_BC(E)
  435. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  436. end || {{E, W, _}, {ResE, ResW}} <- lists:zip(L, ResL)],
  437. [true] =:= lists:usort(CheckedL)
  438. end).
  439. parse_accept_encoding_test_() ->
  440. Tests = [
  441. {<<>>, []},
  442. {<<"*">>, [{<<"*">>, 1000}]},
  443. {<<"compress, gzip">>, [
  444. {<<"compress">>, 1000},
  445. {<<"gzip">>, 1000}
  446. ]},
  447. {<<"compress;q=0.5, gzip;q=1.0">>, [
  448. {<<"compress">>, 500},
  449. {<<"gzip">>, 1000}
  450. ]},
  451. {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [
  452. {<<"gzip">>, 1000},
  453. {<<"identity">>, 500},
  454. {<<"*">>, 0}
  455. ]}
  456. ],
  457. [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests].
  458. -endif.
  459. -ifdef(PERF).
  460. horse_parse_accept_encoding() ->
  461. horse:repeat(20000,
  462. parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>)
  463. ).
  464. -endif.
  465. %% @doc Parse the Accept-Language header.
  466. -spec parse_accept_language(binary()) -> [{binary(), qvalue()}].
  467. parse_accept_language(LanguageRange) ->
  468. nonempty(language_range_list(LanguageRange, [])).
  469. language_range_list(<<>>, Acc) -> lists:reverse(Acc);
  470. language_range_list(<< $\s, R/bits >>, Acc) -> language_range_list(R, Acc);
  471. language_range_list(<< $\t, R/bits >>, Acc) -> language_range_list(R, Acc);
  472. language_range_list(<< $\,, R/bits >>, Acc) -> language_range_list(R, Acc);
  473. language_range_list(<< $*, R/bits >>, Acc) -> language_range_before_semicolon(R, Acc, <<"*">>);
  474. language_range_list(<< C, R/bits >>, Acc) when ?IS_ALPHA(C) ->
  475. case C of
  476. ?INLINE_LOWERCASE(language_range, R, Acc, 1, <<>>)
  477. end.
  478. language_range(<<>>, Acc, _, T) -> lists:reverse([{T, 1000}|Acc]);
  479. language_range(<< $,, R/bits >>, Acc, _, T) -> language_range_list(R, [{T, 1000}|Acc]);
  480. language_range(<< $;, R/bits >>, Acc, _, T) -> language_range_before_weight(R, Acc, T);
  481. language_range(<< $\s, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  482. language_range(<< $\t, R/bits >>, Acc, _, T) -> language_range_before_semicolon(R, Acc, T);
  483. language_range(<< $-, R/bits >>, Acc, _, T) -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  484. language_range(<< _, _/bits >>, _, 8, _) -> error(badarg);
  485. language_range(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C) ->
  486. case C of
  487. ?INLINE_LOWERCASE(language_range, R, Acc, N + 1, T)
  488. end.
  489. language_range_sub(<<>>, Acc, N, T) when N > 0 -> lists:reverse([{T, 1000}|Acc]);
  490. language_range_sub(<< $,, R/bits >>, Acc, N, T) when N > 0 -> language_range_list(R, [{T, 1000}|Acc]);
  491. language_range_sub(<< $;, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_weight(R, Acc, T);
  492. language_range_sub(<< $\s, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  493. language_range_sub(<< $\t, R/bits >>, Acc, N, T) when N > 0 -> language_range_before_semicolon(R, Acc, T);
  494. language_range_sub(<< $-, R/bits >>, Acc, N, T) when N > 0 -> language_range_sub(R, Acc, 0, << T/binary, $- >>);
  495. language_range_sub(<< _, _/bits >>, _, 8, _) -> error(badarg);
  496. language_range_sub(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C); ?IS_DIGIT(C) ->
  497. case C of
  498. ?INLINE_LOWERCASE(language_range_sub, R, Acc, N + 1, T)
  499. end.
  500. language_range_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  501. language_range_before_semicolon(<< $,, R/bits >>, Acc, T) -> language_range_list(R, [{T, 1000}|Acc]);
  502. language_range_before_semicolon(<< $;, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  503. language_range_before_semicolon(<< $\s, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T);
  504. language_range_before_semicolon(<< $\t, R/bits >>, Acc, T) -> language_range_before_semicolon(R, Acc, T).
  505. language_range_before_weight(<< $\s, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  506. language_range_before_weight(<< $\t, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T);
  507. language_range_before_weight(<< $q, $=, R/bits >>, Acc, T) -> language_range_weight(R, Acc, T);
  508. %% Special clause for broken user agents that confuse ; and , separators.
  509. language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_ALPHA(C) ->
  510. case C of
  511. ?INLINE_LOWERCASE(language_range, R, [{T, 1000}|Acc], 1, <<>>)
  512. end.
  513. language_range_weight(<< "1.000", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  514. language_range_weight(<< "1.00", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  515. language_range_weight(<< "1.0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  516. language_range_weight(<< "1.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  517. language_range_weight(<< "1", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]);
  518. language_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  519. when A >= $0, A =< $9, B >= $0, B =< $9, C >= $0, C =< $9 ->
  520. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  521. language_range_weight(<< "0.", A, B, R/bits >>, Acc, T)
  522. when A >= $0, A =< $9, B >= $0, B =< $9 ->
  523. language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  524. language_range_weight(<< "0.", A, R/bits >>, Acc, T)
  525. when A >= $0, A =< $9 ->
  526. language_range_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  527. language_range_weight(<< "0.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]);
  528. language_range_weight(<< "0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]).
  529. language_range_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  530. language_range_list_sep(<< $\s, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  531. language_range_list_sep(<< $\t, R/bits >>, Acc) -> language_range_list_sep(R, Acc);
  532. language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc).
  533. -ifdef(TEST).
  534. language_tag() ->
  535. oneof([
  536. [alpha()],
  537. [alpha(), alpha()],
  538. [alpha(), alpha(), alpha()],
  539. [alpha(), alpha(), alpha(), alpha()],
  540. [alpha(), alpha(), alpha(), alpha(), alpha()],
  541. [alpha(), alpha(), alpha(), alpha(), alpha(), alpha()],
  542. [alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha()],
  543. [alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha(), alpha()]
  544. ]).
  545. language_subtag() ->
  546. [$-, oneof([
  547. [alphanum()],
  548. [alphanum(), alphanum()],
  549. [alphanum(), alphanum(), alphanum()],
  550. [alphanum(), alphanum(), alphanum(), alphanum()],
  551. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum()],
  552. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum()],
  553. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum()],
  554. [alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum(), alphanum()]
  555. ])].
  556. language_range() ->
  557. [language_tag(), list(language_subtag())].
  558. accept_language() ->
  559. ?LET({R, W},
  560. {language_range(), weight()},
  561. {iolist_to_binary(R), W, iolist_to_binary([R, case W of
  562. undefined -> [];
  563. _ -> [<<";q=">>, qvalue_to_iodata(W)]
  564. end])}
  565. ).
  566. prop_parse_accept_language() ->
  567. ?FORALL(L,
  568. non_empty(list(accept_language())),
  569. begin
  570. << _, AcceptLanguage/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]),
  571. ResL = parse_accept_language(AcceptLanguage),
  572. CheckedL = [begin
  573. ResR =:= ?INLINE_LOWERCASE_BC(R)
  574. andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000))
  575. end || {{R, W, _}, {ResR, ResW}} <- lists:zip(L, ResL)],
  576. [true] =:= lists:usort(CheckedL)
  577. end).
  578. parse_accept_language_test_() ->
  579. Tests = [
  580. {<<"da, en-gb;q=0.8, en;q=0.7">>, [
  581. {<<"da">>, 1000},
  582. {<<"en-gb">>, 800},
  583. {<<"en">>, 700}
  584. ]},
  585. {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin, es-419">>, [
  586. {<<"en">>, 1000},
  587. {<<"en-us">>, 1000},
  588. {<<"en-cockney">>, 1000},
  589. {<<"i-cherokee">>, 1000},
  590. {<<"x-pig-latin">>, 1000},
  591. {<<"es-419">>, 1000}
  592. ]}
  593. ],
  594. [{V, fun() -> R = parse_accept_language(V) end} || {V, R} <- Tests].
  595. parse_accept_language_error_test_() ->
  596. Tests = [
  597. <<>>,
  598. <<"loooooong">>,
  599. <<"en-us-loooooong">>,
  600. <<"419-en-us">>
  601. ],
  602. [{V, fun() -> {'EXIT', _} = (catch parse_accept_language(V)) end} || V <- Tests].
  603. -endif.
  604. -ifdef(PERF).
  605. horse_parse_accept_language() ->
  606. horse:repeat(20000,
  607. parse_accept_language(<<"da, en-gb;q=0.8, en;q=0.7">>)
  608. ).
  609. -endif.
  610. %% @doc Parse the Connection header.
  611. -spec parse_connection(binary()) -> [binary()].
  612. parse_connection(<<"close">>) ->
  613. [<<"close">>];
  614. parse_connection(<<"keep-alive">>) ->
  615. [<<"keep-alive">>];
  616. parse_connection(Connection) ->
  617. nonempty(token_ci_list(Connection, [])).
  618. -ifdef(TEST).
  619. prop_parse_connection() ->
  620. ?FORALL(L,
  621. non_empty(list(token())),
  622. begin
  623. << _, Connection/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  624. ResL = parse_connection(Connection),
  625. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  626. [true] =:= lists:usort(CheckedL)
  627. end).
  628. parse_connection_test_() ->
  629. Tests = [
  630. {<<"close">>, [<<"close">>]},
  631. {<<"ClOsE">>, [<<"close">>]},
  632. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  633. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  634. ],
  635. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  636. parse_connection_error_test_() ->
  637. Tests = [
  638. <<>>
  639. ],
  640. [{V, fun() -> {'EXIT', _} = (catch parse_connection(V)) end} || V <- Tests].
  641. -endif.
  642. -ifdef(PERF).
  643. horse_parse_connection_close() ->
  644. horse:repeat(200000,
  645. parse_connection(<<"close">>)
  646. ).
  647. horse_parse_connection_keepalive() ->
  648. horse:repeat(200000,
  649. parse_connection(<<"keep-alive">>)
  650. ).
  651. horse_parse_connection_keepalive_upgrade() ->
  652. horse:repeat(200000,
  653. parse_connection(<<"keep-alive, upgrade">>)
  654. ).
  655. -endif.
  656. %% @doc Parse the Content-Length header.
  657. %%
  658. %% The value has at least one digit, and may be followed by whitespace.
  659. -spec parse_content_length(binary()) -> non_neg_integer().
  660. parse_content_length(<< $0 >>) -> 0;
  661. parse_content_length(<< $0, R/bits >>) -> number(R, 0);
  662. parse_content_length(<< $1, R/bits >>) -> number(R, 1);
  663. parse_content_length(<< $2, R/bits >>) -> number(R, 2);
  664. parse_content_length(<< $3, R/bits >>) -> number(R, 3);
  665. parse_content_length(<< $4, R/bits >>) -> number(R, 4);
  666. parse_content_length(<< $5, R/bits >>) -> number(R, 5);
  667. parse_content_length(<< $6, R/bits >>) -> number(R, 6);
  668. parse_content_length(<< $7, R/bits >>) -> number(R, 7);
  669. parse_content_length(<< $8, R/bits >>) -> number(R, 8);
  670. parse_content_length(<< $9, R/bits >>) -> number(R, 9).
  671. -ifdef(TEST).
  672. prop_parse_content_length() ->
  673. ?FORALL(
  674. X,
  675. non_neg_integer(),
  676. X =:= parse_content_length(integer_to_binary(X))
  677. ).
  678. parse_content_length_test_() ->
  679. Tests = [
  680. {<<"0">>, 0},
  681. {<<"42 ">>, 42},
  682. {<<"69\t">>, 69},
  683. {<<"1337">>, 1337},
  684. {<<"1234567890">>, 1234567890},
  685. {<<"1234567890 ">>, 1234567890}
  686. ],
  687. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  688. parse_content_length_error_test_() ->
  689. Tests = [
  690. <<>>,
  691. <<"123, 123">>,
  692. <<"4.17">>
  693. ],
  694. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  695. -endif.
  696. -ifdef(PERF).
  697. horse_parse_content_length_zero() ->
  698. horse:repeat(100000,
  699. parse_content_length(<<"0">>)
  700. ).
  701. horse_parse_content_length_giga() ->
  702. horse:repeat(100000,
  703. parse_content_length(<<"1234567890">>)
  704. ).
  705. -endif.
  706. %% @doc Parse the Content-Type header.
  707. -spec parse_content_type(binary()) -> media_type().
  708. parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
  709. case C of
  710. ?INLINE_LOWERCASE(media_type, R, <<>>)
  711. end.
  712. media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
  713. case C of
  714. ?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
  715. end;
  716. media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
  717. case C of
  718. ?INLINE_LOWERCASE(media_type, R, T)
  719. end.
  720. media_subtype(<<>>, T, S) -> {T, S, []};
  721. media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
  722. media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  723. media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
  724. media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
  725. case C of
  726. ?INLINE_LOWERCASE(media_subtype, R, T, S)
  727. end.
  728. media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
  729. media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  730. media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
  731. media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
  732. media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  733. media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
  734. media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
  735. media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
  736. media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
  737. case C of
  738. ?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
  739. end.
  740. media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
  741. media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  742. media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  743. case C of
  744. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  745. end;
  746. media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
  747. case C of
  748. ?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
  749. end.
  750. media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
  751. media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
  752. media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  753. media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
  754. media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
  755. case C of
  756. ?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
  757. end.
  758. media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
  759. media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
  760. media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
  761. case C of
  762. ?INLINE_LOWERCASE(media_param, R, T, S, P, K)
  763. end.
  764. media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  765. media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
  766. media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
  767. media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
  768. media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
  769. media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  770. media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
  771. media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
  772. -ifdef(TEST).
  773. media_type_parameter() ->
  774. frequency([
  775. {90, parameter()},
  776. {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}}
  777. ]).
  778. media_type() ->
  779. ?LET({T, S, P},
  780. {token(), token(), list(media_type_parameter())},
  781. {T, S, P, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P]])}
  782. ).
  783. prop_parse_content_type() ->
  784. ?FORALL({T, S, P, MediaType},
  785. media_type(),
  786. begin
  787. {ResT, ResS, ResP} = parse_content_type(MediaType),
  788. ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
  789. <<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
  790. LowK -> {LowK, unquote(V)}
  791. end || {K, V, _, _} <- P],
  792. ResT =:= ?INLINE_LOWERCASE_BC(T)
  793. andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
  794. andalso ResP =:= ExpectedP
  795. end
  796. ).
  797. parse_content_type_test_() ->
  798. Tests = [
  799. {<<"text/html;charset=utf-8">>,
  800. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  801. {<<"text/html;charset=UTF-8">>,
  802. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  803. {<<"Text/HTML;Charset=\"utf-8\"">>,
  804. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  805. {<<"text/html; charset=\"utf-8\"">>,
  806. {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
  807. {<<"text/html; charset=ISO-8859-4">>,
  808. {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  809. {<<"text/plain; charset=iso-8859-4">>,
  810. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
  811. {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>,
  812. {<<"multipart">>, <<"form-data">>, [
  813. {<<"boundary">>, <<"MultipartIsUgly">>}
  814. ]}},
  815. {<<"foo/bar; one=FirstParam; two=SecondParam">>,
  816. {<<"foo">>, <<"bar">>, [
  817. {<<"one">>, <<"FirstParam">>},
  818. {<<"two">>, <<"SecondParam">>}
  819. ]}}
  820. ],
  821. [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
  822. -endif.
  823. -ifdef(PERF).
  824. horse_parse_content_type() ->
  825. horse:repeat(200000,
  826. parse_content_type(<<"text/html;charset=utf-8">>)
  827. ).
  828. -endif.
  829. %% @doc Parse the Expect header.
  830. -spec parse_expect(binary()) -> continue.
  831. parse_expect(<<"100-continue", Rest/bits >>) ->
  832. ws_end(Rest),
  833. continue;
  834. parse_expect(<<"100-", C, O, N, T, I, M, U, E, Rest/bits >>)
  835. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  836. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  837. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  838. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  839. ws_end(Rest),
  840. continue.
  841. -ifdef(TEST).
  842. expect() ->
  843. ?LET(E,
  844. [$1, $0, $0, $-,
  845. oneof([$c, $C]), oneof([$o, $O]), oneof([$n, $N]),
  846. oneof([$t, $T]), oneof([$i, $I]), oneof([$n, $N]),
  847. oneof([$u, $U]), oneof([$e, $E])],
  848. list_to_binary(E)).
  849. prop_parse_expect() ->
  850. ?FORALL(E, expect(), continue =:= parse_expect(E)).
  851. parse_expect_test_() ->
  852. Tests = [
  853. <<"100-continue">>,
  854. <<"100-CONTINUE">>,
  855. <<"100-Continue">>,
  856. <<"100-CoNtInUe">>,
  857. <<"100-continue ">>
  858. ],
  859. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  860. parse_expect_error_test_() ->
  861. Tests = [
  862. <<>>,
  863. <<" ">>,
  864. <<"200-OK">>,
  865. <<"Cookies">>
  866. ],
  867. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  868. -endif.
  869. -ifdef(PERF).
  870. horse_parse_expect() ->
  871. horse:repeat(200000,
  872. parse_expect(<<"100-continue">>)
  873. ).
  874. -endif.
  875. %% @doc Parse the Max-Forwards header.
  876. -spec parse_max_forwards(binary()) -> integer().
  877. parse_max_forwards(<< $0, R/bits >>) -> number(R, 0);
  878. parse_max_forwards(<< $1, R/bits >>) -> number(R, 1);
  879. parse_max_forwards(<< $2, R/bits >>) -> number(R, 2);
  880. parse_max_forwards(<< $3, R/bits >>) -> number(R, 3);
  881. parse_max_forwards(<< $4, R/bits >>) -> number(R, 4);
  882. parse_max_forwards(<< $5, R/bits >>) -> number(R, 5);
  883. parse_max_forwards(<< $6, R/bits >>) -> number(R, 6);
  884. parse_max_forwards(<< $7, R/bits >>) -> number(R, 7);
  885. parse_max_forwards(<< $8, R/bits >>) -> number(R, 8);
  886. parse_max_forwards(<< $9, R/bits >>) -> number(R, 9).
  887. -ifdef(TEST).
  888. prop_parse_max_forwards() ->
  889. ?FORALL(
  890. X,
  891. non_neg_integer(),
  892. X =:= parse_max_forwards(integer_to_binary(X))
  893. ).
  894. parse_max_forwards_test_() ->
  895. Tests = [
  896. {<<"0">>, 0},
  897. {<<"42 ">>, 42},
  898. {<<"69\t">>, 69},
  899. {<<"1337">>, 1337},
  900. {<<"1234567890">>, 1234567890},
  901. {<<"1234567890 ">>, 1234567890}
  902. ],
  903. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  904. parse_max_forwards_error_test_() ->
  905. Tests = [
  906. <<>>,
  907. <<"123, 123">>,
  908. <<"4.17">>
  909. ],
  910. [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests].
  911. -endif.
  912. %% @doc Parse the Transfer-Encoding header.
  913. %%
  914. %% @todo This function does not support parsing of transfer-parameter.
  915. -spec parse_transfer_encoding(binary()) -> [binary()].
  916. parse_transfer_encoding(<<"chunked">>) ->
  917. [<<"chunked">>];
  918. parse_transfer_encoding(TransferEncoding) ->
  919. nonempty(token_ci_list(TransferEncoding, [])).
  920. -ifdef(TEST).
  921. prop_parse_transfer_encoding() ->
  922. ?FORALL(L,
  923. non_empty(list(token())),
  924. begin
  925. << _, TransferEncoding/binary >> = iolist_to_binary([[$,, C] || C <- L]),
  926. ResL = parse_transfer_encoding(TransferEncoding),
  927. CheckedL = [?INLINE_LOWERCASE_BC(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)],
  928. [true] =:= lists:usort(CheckedL)
  929. end).
  930. parse_transfer_encoding_test_() ->
  931. Tests = [
  932. {<<"a , , , ">>, [<<"a">>]},
  933. {<<" , , , a">>, [<<"a">>]},
  934. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  935. {<<"chunked">>, [<<"chunked">>]},
  936. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]}
  937. ],
  938. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  939. parse_transfer_encoding_error_test_() ->
  940. Tests = [
  941. <<>>,
  942. <<" ">>,
  943. <<" , ">>,
  944. <<",,,">>,
  945. <<"a b">>
  946. ],
  947. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  948. || V <- Tests].
  949. -endif.
  950. -ifdef(PERF).
  951. horse_parse_transfer_encoding_chunked() ->
  952. horse:repeat(200000,
  953. parse_transfer_encoding(<<"chunked">>)
  954. ).
  955. horse_parse_transfer_encoding_custom() ->
  956. horse:repeat(200000,
  957. parse_transfer_encoding(<<"chunked, something">>)
  958. ).
  959. -endif.
  960. %% Internal.
  961. %% Only return if the list is not empty.
  962. nonempty(L) when L =/= [] -> L.
  963. %% Parse a number optionally followed by whitespace.
  964. number(<< $0, R/bits >>, Acc) -> number(R, Acc * 10);
  965. number(<< $1, R/bits >>, Acc) -> number(R, Acc * 10 + 1);
  966. number(<< $2, R/bits >>, Acc) -> number(R, Acc * 10 + 2);
  967. number(<< $3, R/bits >>, Acc) -> number(R, Acc * 10 + 3);
  968. number(<< $4, R/bits >>, Acc) -> number(R, Acc * 10 + 4);
  969. number(<< $5, R/bits >>, Acc) -> number(R, Acc * 10 + 5);
  970. number(<< $6, R/bits >>, Acc) -> number(R, Acc * 10 + 6);
  971. number(<< $7, R/bits >>, Acc) -> number(R, Acc * 10 + 7);
  972. number(<< $8, R/bits >>, Acc) -> number(R, Acc * 10 + 8);
  973. number(<< $9, R/bits >>, Acc) -> number(R, Acc * 10 + 9);
  974. number(<< $\s, R/bits >>, Acc) -> ws_end(R), Acc;
  975. number(<< $\t, R/bits >>, Acc) -> ws_end(R), Acc;
  976. number(<<>>, Acc) -> Acc.
  977. ws_end(<< $\s, R/bits >>) -> ws_end(R);
  978. ws_end(<< $\t, R/bits >>) -> ws_end(R);
  979. ws_end(<<>>) -> ok.
  980. %% Parse a list of case insensitive tokens.
  981. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  982. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  983. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  984. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  985. token_ci_list(<< C, R/bits >>, Acc) ->
  986. case C of
  987. ?INLINE_LOWERCASE(token_ci_list, R, Acc, <<>>)
  988. end.
  989. token_ci_list(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  990. token_ci_list(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  991. token_ci_list(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  992. token_ci_list(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  993. token_ci_list(<< C, R/bits >>, Acc, T) ->
  994. case C of
  995. ?INLINE_LOWERCASE(token_ci_list, R, Acc, T)
  996. end.
  997. token_ci_list_sep(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  998. token_ci_list_sep(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  999. token_ci_list_sep(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  1000. token_ci_list_sep(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]).