cow_http_hd.erl 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_accept_encoding/1]).
  18. -export([parse_connection/1]).
  19. -export([parse_content_length/1]).
  20. -export([parse_expect/1]).
  21. -export([parse_max_forwards/1]).
  22. -export([parse_transfer_encoding/1]).
  23. -type qvalue() :: 0..1000.
  24. -export_type([qvalue/0]).
  25. -include("cow_inline.hrl").
  26. %% @doc Parse the Accept header.
  27. -spec parse_accept(binary()) -> [{{binary(), binary(), [{binary(), binary()}]}, qvalue(), [binary() | {binary(), binary()}]}].
  28. parse_accept(<<"*/*">>) ->
  29. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  30. parse_accept(Accept) ->
  31. nonempty(media_range_list(Accept, [])).
  32. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  33. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  34. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  35. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  36. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  37. case C of
  38. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  39. end.
  40. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  41. %% Special clause for badly behaving user agents that send * instead of */*.
  42. media_range_type(<< _, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  43. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  44. case C of
  45. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  46. end.
  47. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  48. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  49. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  50. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  51. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  52. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  53. case C of
  54. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  55. end.
  56. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  57. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  58. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  59. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  60. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  61. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  62. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  63. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  64. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  65. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  66. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  67. case C of
  68. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  69. end.
  70. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  71. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  72. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  73. case C of
  74. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  75. end.
  76. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  77. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  78. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  79. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  80. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  81. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  82. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  83. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  84. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  85. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  86. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  87. when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  88. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  89. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  90. when ?IS_DIGIT(A), ?IS_DIGIT(B) ->
  91. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  92. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  93. when ?IS_DIGIT(A) ->
  94. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  95. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  96. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  97. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  98. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  99. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  100. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  101. when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  102. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  103. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  104. when ?IS_DIGIT(A), ?IS_DIGIT(B) ->
  105. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  106. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  107. when ?IS_DIGIT(A) ->
  108. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  109. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  110. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  111. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  112. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  113. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  114. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  115. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  116. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  117. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  118. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  119. case C of
  120. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  121. end.
  122. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  123. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  124. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  125. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  126. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  127. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  128. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  129. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  130. case C of
  131. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  132. end.
  133. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  134. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  135. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  136. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  137. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  138. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  139. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  140. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  141. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  142. -ifdef(TEST).
  143. parse_accept_test_() ->
  144. Tests = [
  145. {<<"audio/*; q=0.2, audio/basic">>, [
  146. {{<<"audio">>, <<"*">>, []}, 200, []},
  147. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  148. ]},
  149. {<<"text/plain; q=0.5, text/html, "
  150. "text/x-dvi; q=0.8, text/x-c">>, [
  151. {{<<"text">>, <<"plain">>, []}, 500, []},
  152. {{<<"text">>, <<"html">>, []}, 1000, []},
  153. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  154. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  155. ]},
  156. {<<"text/*, text/html, text/html;level=1, */*">>, [
  157. {{<<"text">>, <<"*">>, []}, 1000, []},
  158. {{<<"text">>, <<"html">>, []}, 1000, []},
  159. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  160. {{<<"*">>, <<"*">>, []}, 1000, []}
  161. ]},
  162. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  163. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  164. {{<<"text">>, <<"*">>, []}, 300, []},
  165. {{<<"text">>, <<"html">>, []}, 700, []},
  166. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  167. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  168. {{<<"*">>, <<"*">>, []}, 500, []}
  169. ]},
  170. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  171. "q=0.123;standalone;complex=gits, text/plain">>, [
  172. {{<<"text">>, <<"html">>,
  173. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  174. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  175. {{<<"text">>, <<"plain">>, []}, 1000, []}
  176. ]},
  177. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  178. {{<<"text">>, <<"html">>, []}, 1000, []},
  179. {{<<"image">>, <<"gif">>, []}, 1000, []},
  180. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  181. {{<<"*">>, <<"*">>, []}, 200, []},
  182. {{<<"*">>, <<"*">>, []}, 200, []}
  183. ]}
  184. ],
  185. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  186. parse_accept_error_test_() ->
  187. Tests = [
  188. <<>>,
  189. <<" ">>,
  190. <<"audio/basic, */;q=0.5">>,
  191. <<"audio/, audio/basic">>,
  192. <<"aud\tio/basic">>,
  193. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  194. ],
  195. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  196. -endif.
  197. -ifdef(PERF).
  198. horse_parse_accept() ->
  199. horse:repeat(20000,
  200. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  201. "text/html;level=2;q=0.4, */*;q=0.5">>)
  202. ).
  203. -endif.
  204. %% @doc Parse the Accept-Charset header.
  205. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  206. parse_accept_charset(Charset) ->
  207. nonempty(conneg_list(Charset, [])).
  208. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  209. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  210. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  211. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  212. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  213. case C of
  214. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  215. end.
  216. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  217. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  218. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  219. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  220. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  221. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  222. case C of
  223. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  224. end.
  225. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  226. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  227. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  228. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  229. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  230. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  231. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  232. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  233. %% Special clause for broken user agents that confuse ; and , separators.
  234. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  235. case C of
  236. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  237. end.
  238. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  239. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  240. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  241. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  242. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  243. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  244. when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  245. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  246. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  247. when ?IS_DIGIT(A), ?IS_DIGIT(B) ->
  248. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  249. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  250. when ?IS_DIGIT(A) ->
  251. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  252. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  253. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  254. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  255. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  256. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  257. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  258. -ifdef(TEST).
  259. parse_accept_charset_test_() ->
  260. Tests = [
  261. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  262. {<<"iso-8859-5">>, 1000},
  263. {<<"unicode-1-1">>, 800}
  264. ]},
  265. %% Some user agents send this invalid value for the Accept-Charset header
  266. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  267. {<<"iso-8859-1">>, 1000},
  268. {<<"utf-8">>, 700},
  269. {<<"*">>, 700}
  270. ]}
  271. ],
  272. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  273. parse_accept_charset_error_test_() ->
  274. Tests = [
  275. <<>>
  276. ],
  277. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  278. -endif.
  279. -ifdef(PERF).
  280. horse_parse_accept_charset() ->
  281. horse:repeat(20000,
  282. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  283. ).
  284. -endif.
  285. %% @doc Parse the Accept-Encoding header.
  286. -spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}].
  287. parse_accept_encoding(Encoding) ->
  288. conneg_list(Encoding, []).
  289. -ifdef(TEST).
  290. parse_accept_encoding_test_() ->
  291. Tests = [
  292. {<<>>, []},
  293. {<<"*">>, [{<<"*">>, 1000}]},
  294. {<<"compress, gzip">>, [
  295. {<<"compress">>, 1000},
  296. {<<"gzip">>, 1000}
  297. ]},
  298. {<<"compress;q=0.5, gzip;q=1.0">>, [
  299. {<<"compress">>, 500},
  300. {<<"gzip">>, 1000}
  301. ]},
  302. {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [
  303. {<<"gzip">>, 1000},
  304. {<<"identity">>, 500},
  305. {<<"*">>, 0}
  306. ]}
  307. ],
  308. [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests].
  309. -endif.
  310. -ifdef(PERF).
  311. horse_parse_accept_encoding() ->
  312. horse:repeat(20000,
  313. parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>)
  314. ).
  315. -endif.
  316. %% @doc Parse the Connection header.
  317. -spec parse_connection(binary()) -> [binary()].
  318. parse_connection(<<"close">>) ->
  319. [<<"close">>];
  320. parse_connection(<<"keep-alive">>) ->
  321. [<<"keep-alive">>];
  322. parse_connection(Connection) ->
  323. nonempty(token_ci_list(Connection, [])).
  324. -ifdef(TEST).
  325. parse_connection_test_() ->
  326. Tests = [
  327. {<<"close">>, [<<"close">>]},
  328. {<<"ClOsE">>, [<<"close">>]},
  329. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  330. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  331. ],
  332. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  333. -endif.
  334. -ifdef(PERF).
  335. horse_parse_connection_close() ->
  336. horse:repeat(200000,
  337. parse_connection(<<"close">>)
  338. ).
  339. horse_parse_connection_keepalive() ->
  340. horse:repeat(200000,
  341. parse_connection(<<"keep-alive">>)
  342. ).
  343. horse_parse_connection_keepalive_upgrade() ->
  344. horse:repeat(200000,
  345. parse_connection(<<"keep-alive, upgrade">>)
  346. ).
  347. -endif.
  348. %% @doc Parse the Content-Length header.
  349. %%
  350. %% The value has at least one digit, and may be followed by whitespace.
  351. -spec parse_content_length(binary()) -> non_neg_integer().
  352. parse_content_length(<< $0 >>) -> 0;
  353. parse_content_length(<< $0, R/bits >>) -> number(R, 0);
  354. parse_content_length(<< $1, R/bits >>) -> number(R, 1);
  355. parse_content_length(<< $2, R/bits >>) -> number(R, 2);
  356. parse_content_length(<< $3, R/bits >>) -> number(R, 3);
  357. parse_content_length(<< $4, R/bits >>) -> number(R, 4);
  358. parse_content_length(<< $5, R/bits >>) -> number(R, 5);
  359. parse_content_length(<< $6, R/bits >>) -> number(R, 6);
  360. parse_content_length(<< $7, R/bits >>) -> number(R, 7);
  361. parse_content_length(<< $8, R/bits >>) -> number(R, 8);
  362. parse_content_length(<< $9, R/bits >>) -> number(R, 9).
  363. -ifdef(TEST).
  364. parse_content_length_test_() ->
  365. Tests = [
  366. {<<"0">>, 0},
  367. {<<"42 ">>, 42},
  368. {<<"69\t">>, 69},
  369. {<<"1337">>, 1337},
  370. {<<"1234567890">>, 1234567890},
  371. {<<"1234567890 ">>, 1234567890}
  372. ],
  373. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  374. -endif.
  375. -ifdef(PERF).
  376. horse_parse_content_length_zero() ->
  377. horse:repeat(100000,
  378. parse_content_length(<<"0">>)
  379. ).
  380. horse_parse_content_length_giga() ->
  381. horse:repeat(100000,
  382. parse_content_length(<<"1234567890">>)
  383. ).
  384. -endif.
  385. %% @doc Parse the Expect header.
  386. -spec parse_expect(binary()) -> continue.
  387. parse_expect(<<"100-continue", Rest/bits >>) ->
  388. ws_end(Rest),
  389. continue;
  390. parse_expect(<<"100-", C, O, N, T, I, M, U, E, Rest/bits >>)
  391. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  392. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  393. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  394. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  395. ws_end(Rest),
  396. continue.
  397. -ifdef(TEST).
  398. parse_expect_test_() ->
  399. Tests = [
  400. <<"100-continue">>,
  401. <<"100-CONTINUE">>,
  402. <<"100-Continue">>,
  403. <<"100-CoNtInUe">>,
  404. <<"100-continue ">>
  405. ],
  406. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  407. parse_expect_error_test_() ->
  408. Tests = [
  409. <<>>,
  410. <<" ">>,
  411. <<"200-OK">>,
  412. <<"Cookies">>
  413. ],
  414. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  415. -endif.
  416. -ifdef(PERF).
  417. horse_parse_expect() ->
  418. horse:repeat(200000,
  419. parse_expect(<<"100-continue">>)
  420. ).
  421. -endif.
  422. %% @doc Parse the Max-Forwards header.
  423. -spec parse_max_forwards(binary()) -> integer().
  424. parse_max_forwards(<< $0, R/bits >>) -> number(R, 0);
  425. parse_max_forwards(<< $1, R/bits >>) -> number(R, 1);
  426. parse_max_forwards(<< $2, R/bits >>) -> number(R, 2);
  427. parse_max_forwards(<< $3, R/bits >>) -> number(R, 3);
  428. parse_max_forwards(<< $4, R/bits >>) -> number(R, 4);
  429. parse_max_forwards(<< $5, R/bits >>) -> number(R, 5);
  430. parse_max_forwards(<< $6, R/bits >>) -> number(R, 6);
  431. parse_max_forwards(<< $7, R/bits >>) -> number(R, 7);
  432. parse_max_forwards(<< $8, R/bits >>) -> number(R, 8);
  433. parse_max_forwards(<< $9, R/bits >>) -> number(R, 9).
  434. -ifdef(TEST).
  435. parse_max_forwards_test_() ->
  436. Tests = [
  437. {<<"0">>, 0},
  438. {<<"42 ">>, 42},
  439. {<<"69\t">>, 69},
  440. {<<"1337">>, 1337},
  441. {<<"1234567890">>, 1234567890},
  442. {<<"1234567890 ">>, 1234567890}
  443. ],
  444. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  445. -endif.
  446. %% @doc Parse the Transfer-Encoding header.
  447. %%
  448. %% @todo Extension parameters.
  449. -spec parse_transfer_encoding(binary()) -> [binary()].
  450. parse_transfer_encoding(<<"chunked">>) ->
  451. [<<"chunked">>];
  452. parse_transfer_encoding(TransferEncoding) ->
  453. nonempty(token_ci_list(TransferEncoding, [])).
  454. -ifdef(TEST).
  455. parse_transfer_encoding_test_() ->
  456. Tests = [
  457. {<<"a , , , ">>, [<<"a">>]},
  458. {<<" , , , a">>, [<<"a">>]},
  459. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  460. {<<"chunked">>, [<<"chunked">>]},
  461. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]}
  462. ],
  463. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  464. parse_transfer_encoding_error_test_() ->
  465. Tests = [
  466. <<>>,
  467. <<" ">>,
  468. <<" , ">>,
  469. <<",,,">>,
  470. <<"a b">>
  471. ],
  472. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  473. || V <- Tests].
  474. -endif.
  475. -ifdef(PERF).
  476. horse_parse_transfer_encoding_chunked() ->
  477. horse:repeat(200000,
  478. parse_transfer_encoding(<<"chunked">>)
  479. ).
  480. horse_parse_transfer_encoding_custom() ->
  481. horse:repeat(200000,
  482. parse_transfer_encoding(<<"chunked, something">>)
  483. ).
  484. -endif.
  485. %% Internal.
  486. %% Only return if the list is not empty.
  487. nonempty(L) when L =/= [] -> L.
  488. %% Parse a number optionally followed by whitespace.
  489. number(<< $0, R/bits >>, Acc) -> number(R, Acc * 10);
  490. number(<< $1, R/bits >>, Acc) -> number(R, Acc * 10 + 1);
  491. number(<< $2, R/bits >>, Acc) -> number(R, Acc * 10 + 2);
  492. number(<< $3, R/bits >>, Acc) -> number(R, Acc * 10 + 3);
  493. number(<< $4, R/bits >>, Acc) -> number(R, Acc * 10 + 4);
  494. number(<< $5, R/bits >>, Acc) -> number(R, Acc * 10 + 5);
  495. number(<< $6, R/bits >>, Acc) -> number(R, Acc * 10 + 6);
  496. number(<< $7, R/bits >>, Acc) -> number(R, Acc * 10 + 7);
  497. number(<< $8, R/bits >>, Acc) -> number(R, Acc * 10 + 8);
  498. number(<< $9, R/bits >>, Acc) -> number(R, Acc * 10 + 9);
  499. number(<< $\s, R/bits >>, Acc) -> ws_end(R), Acc;
  500. number(<< $\t, R/bits >>, Acc) -> ws_end(R), Acc;
  501. number(<<>>, Acc) -> Acc.
  502. ws_end(<< $\s, R/bits >>) -> ws_end(R);
  503. ws_end(<< $\t, R/bits >>) -> ws_end(R);
  504. ws_end(<<>>) -> ok.
  505. %% Parse a list of case insensitive tokens.
  506. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  507. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  508. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  509. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  510. token_ci_list(<< C, R/bits >>, Acc) ->
  511. case C of
  512. ?INLINE_LOWERCASE(token_ci_list, R, Acc, <<>>)
  513. end.
  514. token_ci_list(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  515. token_ci_list(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  516. token_ci_list(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  517. token_ci_list(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  518. token_ci_list(<< C, R/bits >>, Acc, T) ->
  519. case C of
  520. ?INLINE_LOWERCASE(token_ci_list, R, Acc, T)
  521. end.
  522. token_ci_list_sep(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  523. token_ci_list_sep(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  524. token_ci_list_sep(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  525. token_ci_list_sep(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]).