cow_http_hd.erl 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_hd).
  15. -export([parse_accept/1]).
  16. -export([parse_accept_charset/1]).
  17. -export([parse_connection/1]).
  18. -export([parse_content_length/1]).
  19. -export([parse_expect/1]).
  20. -export([parse_max_forwards/1]).
  21. -export([parse_transfer_encoding/1]).
  22. -type qvalue() :: 0..1000.
  23. -export_type([qvalue/0]).
  24. -include("cow_inline.hrl").
  25. %% @doc Parse the Accept header.
  26. -spec parse_accept(binary()) -> [{{binary(), binary(), [{binary(), binary()}]}, qvalue(), [binary() | {binary(), binary()}]}].
  27. parse_accept(<<"*/*">>) ->
  28. [{{<<"*">>, <<"*">>, []}, 1000, []}];
  29. parse_accept(Accept) ->
  30. nonempty(media_range_list(Accept, [])).
  31. media_range_list(<<>>, Acc) -> lists:reverse(Acc);
  32. media_range_list(<< $\s, R/bits >>, Acc) -> media_range_list(R, Acc);
  33. media_range_list(<< $\t, R/bits >>, Acc) -> media_range_list(R, Acc);
  34. media_range_list(<< $,, R/bits >>, Acc) -> media_range_list(R, Acc);
  35. media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  36. case C of
  37. ?INLINE_LOWERCASE(media_range_type, R, Acc, <<>>)
  38. end.
  39. media_range_type(<< $/, R/bits >>, Acc, T) -> media_range_subtype(R, Acc, T, <<>>);
  40. %% Special clause for badly behaving user agents that send * instead of */*.
  41. media_range_type(<< _, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []);
  42. media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  43. case C of
  44. ?INLINE_LOWERCASE(media_range_type, R, Acc, T)
  45. end.
  46. media_range_subtype(<<>>, Acc, T, S) when S =/= <<>> -> lists:reverse([{{T, S, []}, 1000, []}|Acc]);
  47. media_range_subtype(<< $,, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_list(R, [{{T, S, []}, 1000, []}|Acc]);
  48. media_range_subtype(<< $;, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_param(R, Acc, T, S, []);
  49. media_range_subtype(<< $\s, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  50. media_range_subtype(<< $\t, R/bits >>, Acc, T, S) when S =/= <<>> -> media_range_before_semicolon(R, Acc, T, S, []);
  51. media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) ->
  52. case C of
  53. ?INLINE_LOWERCASE(media_range_subtype, R, Acc, T, S)
  54. end.
  55. media_range_before_semicolon(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  56. media_range_before_semicolon(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]);
  57. media_range_before_semicolon(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  58. media_range_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P);
  59. media_range_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_semicolon(R, Acc, T, S, P).
  60. media_range_before_param(<< $\s, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  61. media_range_before_param(<< $\t, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P);
  62. %% Special clause for badly behaving user agents that send .123 instead of 0.123.
  63. media_range_before_param(<< $q, $=, $., R/bits >>, Acc, T, S, P) -> media_range_broken_weight(R, Acc, T, S, P);
  64. media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P);
  65. media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) ->
  66. case C of
  67. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, <<>>)
  68. end.
  69. media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>);
  70. media_range_param(<< $=, R/bits >>, Acc, T, S, P, K) -> media_range_value(R, Acc, T, S, P, K, <<>>);
  71. media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) ->
  72. case C of
  73. ?INLINE_LOWERCASE(media_range_param, R, Acc, T, S, P, K)
  74. end.
  75. media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  76. media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>);
  77. media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>).
  78. media_range_value(<<>>, Acc, T, S, P, K, V) -> lists:reverse([{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  79. media_range_value(<< $,, R/bits >>, Acc, T, S, P, K, V) -> media_range_list(R, [{{T, S, lists:reverse([{K, V}|P])}, 1000, []}|Acc]);
  80. media_range_value(<< $;, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_param(R, Acc, T, S, [{K, V}|P]);
  81. media_range_value(<< $\s, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  82. media_range_value(<< $\t, R/bits >>, Acc, T, S, P, K, V) -> media_range_before_semicolon(R, Acc, T, S, [{K, V}|P]);
  83. media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>).
  84. %% Special function for badly behaving user agents that send .123 instead of 0.123.
  85. media_range_broken_weight(<< A, B, C, R/bits >>, Acc, T, S, P)
  86. when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  87. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  88. media_range_broken_weight(<< A, B, R/bits >>, Acc, T, S, P)
  89. when ?IS_DIGIT(A), ?IS_DIGIT(B) ->
  90. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  91. media_range_broken_weight(<< A, R/bits >>, Acc, T, S, P)
  92. when ?IS_DIGIT(A) ->
  93. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []).
  94. media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  95. media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  96. media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  97. media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  98. media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 1000, []);
  99. media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P)
  100. when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  101. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []);
  102. media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P)
  103. when ?IS_DIGIT(A), ?IS_DIGIT(B) ->
  104. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []);
  105. media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P)
  106. when ?IS_DIGIT(A) ->
  107. accept_before_semicolon(R, Acc, T, S, P, (A - $0) * 100, []);
  108. media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []);
  109. media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_before_semicolon(R, Acc, T, S, P, 0, []).
  110. accept_before_semicolon(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  111. accept_before_semicolon(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]);
  112. accept_before_semicolon(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  113. accept_before_semicolon(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E);
  114. accept_before_semicolon(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_semicolon(R, Acc, T, S, P, Q, E).
  115. accept_before_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  116. accept_before_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E);
  117. accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) ->
  118. case C of
  119. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, <<>>)
  120. end.
  121. accept_ext(<<>>, Acc, T, S, P, Q, E, K) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  122. accept_ext(<< $,, R/bits >>, Acc, T, S, P, Q, E, K) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([K|E])}|Acc]);
  123. accept_ext(<< $;, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_ext(R, Acc, T, S, P, Q, [K|E]);
  124. accept_ext(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  125. accept_ext(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_before_semicolon(R, Acc, T, S, P, Q, [K|E]);
  126. accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>);
  127. accept_ext(<< $=, R/bits >>, Acc, T, S, P, Q, E, K) -> accept_value(R, Acc, T, S, P, Q, E, K, <<>>);
  128. accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) ->
  129. case C of
  130. ?INLINE_LOWERCASE(accept_ext, R, Acc, T, S, P, Q, E, K)
  131. end.
  132. accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  133. accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>);
  134. accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  135. accept_value(<<>>, Acc, T, S, P, Q, E, K, V) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  136. accept_value(<< $,, R/bits >>, Acc, T, S, P, Q, E, K, V) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse([{K, V}|E])}|Acc]);
  137. accept_value(<< $;, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  138. accept_value(<< $\s, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  139. accept_value(<< $\t, R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_before_semicolon(R, Acc, T, S, P, Q, [{K, V}|E]);
  140. accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>).
  141. -ifdef(TEST).
  142. parse_accept_test_() ->
  143. Tests = [
  144. {<<"audio/*; q=0.2, audio/basic">>, [
  145. {{<<"audio">>, <<"*">>, []}, 200, []},
  146. {{<<"audio">>, <<"basic">>, []}, 1000, []}
  147. ]},
  148. {<<"text/plain; q=0.5, text/html, "
  149. "text/x-dvi; q=0.8, text/x-c">>, [
  150. {{<<"text">>, <<"plain">>, []}, 500, []},
  151. {{<<"text">>, <<"html">>, []}, 1000, []},
  152. {{<<"text">>, <<"x-dvi">>, []}, 800, []},
  153. {{<<"text">>, <<"x-c">>, []}, 1000, []}
  154. ]},
  155. {<<"text/*, text/html, text/html;level=1, */*">>, [
  156. {{<<"text">>, <<"*">>, []}, 1000, []},
  157. {{<<"text">>, <<"html">>, []}, 1000, []},
  158. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  159. {{<<"*">>, <<"*">>, []}, 1000, []}
  160. ]},
  161. {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  162. "text/html;level=2;q=0.4, */*;q=0.5">>, [
  163. {{<<"text">>, <<"*">>, []}, 300, []},
  164. {{<<"text">>, <<"html">>, []}, 700, []},
  165. {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
  166. {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
  167. {{<<"*">>, <<"*">>, []}, 500, []}
  168. ]},
  169. {<<"text/html;level=1;quoted=\"hi hi hi\";"
  170. "q=0.123;standalone;complex=gits, text/plain">>, [
  171. {{<<"text">>, <<"html">>,
  172. [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
  173. [<<"standalone">>, {<<"complex">>, <<"gits">>}]},
  174. {{<<"text">>, <<"plain">>, []}, 1000, []}
  175. ]},
  176. {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [
  177. {{<<"text">>, <<"html">>, []}, 1000, []},
  178. {{<<"image">>, <<"gif">>, []}, 1000, []},
  179. {{<<"image">>, <<"jpeg">>, []}, 1000, []},
  180. {{<<"*">>, <<"*">>, []}, 200, []},
  181. {{<<"*">>, <<"*">>, []}, 200, []}
  182. ]}
  183. ],
  184. [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests].
  185. parse_accept_error_test_() ->
  186. Tests = [
  187. <<>>,
  188. <<" ">>,
  189. <<"audio/basic, */;q=0.5">>,
  190. <<"audio/, audio/basic">>,
  191. <<"aud\tio/basic">>,
  192. <<"audio/basic;t=\"zero \\", 0, " woo\"">>
  193. ],
  194. [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests].
  195. -endif.
  196. -ifdef(PERF).
  197. horse_parse_accept() ->
  198. horse:repeat(20000,
  199. parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
  200. "text/html;level=2;q=0.4, */*;q=0.5">>)
  201. ).
  202. -endif.
  203. %% @doc Parse the Accept-Charset header.
  204. -spec parse_accept_charset(binary()) -> [{binary(), qvalue()}].
  205. parse_accept_charset(Charset) ->
  206. nonempty(conneg_list(Charset, [])).
  207. conneg_list(<<>>, Acc) -> lists:reverse(Acc);
  208. conneg_list(<< $\s, R/bits >>, Acc) -> conneg_list(R, Acc);
  209. conneg_list(<< $\t, R/bits >>, Acc) -> conneg_list(R, Acc);
  210. conneg_list(<< $\,, R/bits >>, Acc) -> conneg_list(R, Acc);
  211. conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) ->
  212. case C of
  213. ?INLINE_LOWERCASE(conneg, R, Acc, <<>>)
  214. end.
  215. conneg(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  216. conneg(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  217. conneg(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  218. conneg(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  219. conneg(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  220. conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  221. case C of
  222. ?INLINE_LOWERCASE(conneg, R, Acc, T)
  223. end.
  224. conneg_before_semicolon(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]);
  225. conneg_before_semicolon(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]);
  226. conneg_before_semicolon(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  227. conneg_before_semicolon(<< $\s, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T);
  228. conneg_before_semicolon(<< $\t, R/bits >>, Acc, T) -> conneg_before_semicolon(R, Acc, T).
  229. conneg_before_weight(<< $\s, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  230. conneg_before_weight(<< $\t, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T);
  231. conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T);
  232. %% Special clause for broken user agents that confuse ; and , separators.
  233. conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) ->
  234. case C of
  235. ?INLINE_LOWERCASE(conneg, R, [{T, 1000}|Acc], <<>>)
  236. end.
  237. conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  238. conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  239. conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  240. conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  241. conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]);
  242. conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T)
  243. when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) ->
  244. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]);
  245. conneg_weight(<< "0.", A, B, R/bits >>, Acc, T)
  246. when ?IS_DIGIT(A), ?IS_DIGIT(B) ->
  247. conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]);
  248. conneg_weight(<< "0.", A, R/bits >>, Acc, T)
  249. when ?IS_DIGIT(A) ->
  250. conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]);
  251. conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]);
  252. conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]).
  253. conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc);
  254. conneg_list_sep(<< $\s, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  255. conneg_list_sep(<< $\t, R/bits >>, Acc) -> conneg_list_sep(R, Acc);
  256. conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc).
  257. -ifdef(TEST).
  258. parse_accept_charset_test_() ->
  259. Tests = [
  260. {<<"iso-8859-5, unicode-1-1;q=0.8">>, [
  261. {<<"iso-8859-5">>, 1000},
  262. {<<"unicode-1-1">>, 800}
  263. ]},
  264. %% Some user agents send this invalid value for the Accept-Charset header
  265. {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
  266. {<<"iso-8859-1">>, 1000},
  267. {<<"utf-8">>, 700},
  268. {<<"*">>, 700}
  269. ]}
  270. ],
  271. [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests].
  272. parse_accept_charset_error_test_() ->
  273. Tests = [
  274. <<>>
  275. ],
  276. [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests].
  277. -endif.
  278. -ifdef(PERF).
  279. horse_parse_accept_charset() ->
  280. horse:repeat(20000,
  281. parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>)
  282. ).
  283. -endif.
  284. %% @doc Parse the Connection header.
  285. -spec parse_connection(binary()) -> [binary()].
  286. parse_connection(<<"close">>) ->
  287. [<<"close">>];
  288. parse_connection(<<"keep-alive">>) ->
  289. [<<"keep-alive">>];
  290. parse_connection(Connection) ->
  291. nonempty(token_ci_list(Connection, [])).
  292. -ifdef(TEST).
  293. parse_connection_test_() ->
  294. Tests = [
  295. {<<"close">>, [<<"close">>]},
  296. {<<"ClOsE">>, [<<"close">>]},
  297. {<<"Keep-Alive">>, [<<"keep-alive">>]},
  298. {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
  299. ],
  300. [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests].
  301. -endif.
  302. -ifdef(PERF).
  303. horse_parse_connection_close() ->
  304. horse:repeat(200000,
  305. parse_connection(<<"close">>)
  306. ).
  307. horse_parse_connection_keepalive() ->
  308. horse:repeat(200000,
  309. parse_connection(<<"keep-alive">>)
  310. ).
  311. horse_parse_connection_keepalive_upgrade() ->
  312. horse:repeat(200000,
  313. parse_connection(<<"keep-alive, upgrade">>)
  314. ).
  315. -endif.
  316. %% @doc Parse the Content-Length header.
  317. %%
  318. %% The value has at least one digit, and may be followed by whitespace.
  319. -spec parse_content_length(binary()) -> non_neg_integer().
  320. parse_content_length(<< $0 >>) -> 0;
  321. parse_content_length(<< $0, R/bits >>) -> number(R, 0);
  322. parse_content_length(<< $1, R/bits >>) -> number(R, 1);
  323. parse_content_length(<< $2, R/bits >>) -> number(R, 2);
  324. parse_content_length(<< $3, R/bits >>) -> number(R, 3);
  325. parse_content_length(<< $4, R/bits >>) -> number(R, 4);
  326. parse_content_length(<< $5, R/bits >>) -> number(R, 5);
  327. parse_content_length(<< $6, R/bits >>) -> number(R, 6);
  328. parse_content_length(<< $7, R/bits >>) -> number(R, 7);
  329. parse_content_length(<< $8, R/bits >>) -> number(R, 8);
  330. parse_content_length(<< $9, R/bits >>) -> number(R, 9).
  331. -ifdef(TEST).
  332. parse_content_length_test_() ->
  333. Tests = [
  334. {<<"0">>, 0},
  335. {<<"42 ">>, 42},
  336. {<<"69\t">>, 69},
  337. {<<"1337">>, 1337},
  338. {<<"1234567890">>, 1234567890},
  339. {<<"1234567890 ">>, 1234567890}
  340. ],
  341. [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests].
  342. -endif.
  343. -ifdef(PERF).
  344. horse_parse_content_length_zero() ->
  345. horse:repeat(100000,
  346. parse_content_length(<<"0">>)
  347. ).
  348. horse_parse_content_length_giga() ->
  349. horse:repeat(100000,
  350. parse_content_length(<<"1234567890">>)
  351. ).
  352. -endif.
  353. %% @doc Parse the Expect header.
  354. -spec parse_expect(binary()) -> continue.
  355. parse_expect(<<"100-continue", Rest/bits >>) ->
  356. ws_end(Rest),
  357. continue;
  358. parse_expect(<<"100-", C, O, N, T, I, M, U, E, Rest/bits >>)
  359. when C =:= $C orelse C =:= $c, O =:= $O orelse O =:= $o,
  360. N =:= $N orelse N =:= $n, T =:= $T orelse T =:= $t,
  361. I =:= $I orelse I =:= $i, M =:= $N orelse M =:= $n,
  362. U =:= $U orelse U =:= $u, E =:= $E orelse E =:= $e ->
  363. ws_end(Rest),
  364. continue.
  365. -ifdef(TEST).
  366. parse_expect_test_() ->
  367. Tests = [
  368. <<"100-continue">>,
  369. <<"100-CONTINUE">>,
  370. <<"100-Continue">>,
  371. <<"100-CoNtInUe">>,
  372. <<"100-continue ">>
  373. ],
  374. [{V, fun() -> continue = parse_expect(V) end} || V <- Tests].
  375. parse_expect_error_test_() ->
  376. Tests = [
  377. <<>>,
  378. <<" ">>,
  379. <<"200-OK">>,
  380. <<"Cookies">>
  381. ],
  382. [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests].
  383. -endif.
  384. -ifdef(PERF).
  385. horse_parse_expect() ->
  386. horse:repeat(200000,
  387. parse_expect(<<"100-continue">>)
  388. ).
  389. -endif.
  390. %% @doc Parse the Max-Forwards header.
  391. -spec parse_max_forwards(binary()) -> integer().
  392. parse_max_forwards(<< $0, R/bits >>) -> number(R, 0);
  393. parse_max_forwards(<< $1, R/bits >>) -> number(R, 1);
  394. parse_max_forwards(<< $2, R/bits >>) -> number(R, 2);
  395. parse_max_forwards(<< $3, R/bits >>) -> number(R, 3);
  396. parse_max_forwards(<< $4, R/bits >>) -> number(R, 4);
  397. parse_max_forwards(<< $5, R/bits >>) -> number(R, 5);
  398. parse_max_forwards(<< $6, R/bits >>) -> number(R, 6);
  399. parse_max_forwards(<< $7, R/bits >>) -> number(R, 7);
  400. parse_max_forwards(<< $8, R/bits >>) -> number(R, 8);
  401. parse_max_forwards(<< $9, R/bits >>) -> number(R, 9).
  402. -ifdef(TEST).
  403. parse_max_forwards_test_() ->
  404. Tests = [
  405. {<<"0">>, 0},
  406. {<<"42 ">>, 42},
  407. {<<"69\t">>, 69},
  408. {<<"1337">>, 1337},
  409. {<<"1234567890">>, 1234567890},
  410. {<<"1234567890 ">>, 1234567890}
  411. ],
  412. [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests].
  413. -endif.
  414. %% @doc Parse the Transfer-Encoding header.
  415. %%
  416. %% @todo Extension parameters.
  417. -spec parse_transfer_encoding(binary()) -> [binary()].
  418. parse_transfer_encoding(<<"chunked">>) ->
  419. [<<"chunked">>];
  420. parse_transfer_encoding(TransferEncoding) ->
  421. nonempty(token_ci_list(TransferEncoding, [])).
  422. -ifdef(TEST).
  423. parse_transfer_encoding_test_() ->
  424. Tests = [
  425. {<<"a , , , ">>, [<<"a">>]},
  426. {<<" , , , a">>, [<<"a">>]},
  427. {<<"a , , b">>, [<<"a">>, <<"b">>]},
  428. {<<"chunked">>, [<<"chunked">>]},
  429. {<<"chunked, something">>, [<<"chunked">>, <<"something">>]}
  430. ],
  431. [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests].
  432. parse_transfer_encoding_error_test_() ->
  433. Tests = [
  434. <<>>,
  435. <<" ">>,
  436. <<" , ">>,
  437. <<",,,">>,
  438. <<"a b">>
  439. ],
  440. [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end}
  441. || V <- Tests].
  442. -endif.
  443. -ifdef(PERF).
  444. horse_parse_transfer_encoding_chunked() ->
  445. horse:repeat(200000,
  446. parse_transfer_encoding(<<"chunked">>)
  447. ).
  448. horse_parse_transfer_encoding_custom() ->
  449. horse:repeat(200000,
  450. parse_transfer_encoding(<<"chunked, something">>)
  451. ).
  452. -endif.
  453. %% Internal.
  454. %% Only return if the list is not empty.
  455. nonempty(L) when L =/= [] -> L.
  456. %% Parse a number optionally followed by whitespace.
  457. number(<< $0, R/bits >>, Acc) -> number(R, Acc * 10);
  458. number(<< $1, R/bits >>, Acc) -> number(R, Acc * 10 + 1);
  459. number(<< $2, R/bits >>, Acc) -> number(R, Acc * 10 + 2);
  460. number(<< $3, R/bits >>, Acc) -> number(R, Acc * 10 + 3);
  461. number(<< $4, R/bits >>, Acc) -> number(R, Acc * 10 + 4);
  462. number(<< $5, R/bits >>, Acc) -> number(R, Acc * 10 + 5);
  463. number(<< $6, R/bits >>, Acc) -> number(R, Acc * 10 + 6);
  464. number(<< $7, R/bits >>, Acc) -> number(R, Acc * 10 + 7);
  465. number(<< $8, R/bits >>, Acc) -> number(R, Acc * 10 + 8);
  466. number(<< $9, R/bits >>, Acc) -> number(R, Acc * 10 + 9);
  467. number(<< $\s, R/bits >>, Acc) -> ws_end(R), Acc;
  468. number(<< $\t, R/bits >>, Acc) -> ws_end(R), Acc;
  469. number(<<>>, Acc) -> Acc.
  470. ws_end(<< $\s, R/bits >>) -> ws_end(R);
  471. ws_end(<< $\t, R/bits >>) -> ws_end(R);
  472. ws_end(<<>>) -> ok.
  473. %% Parse a list of case insensitive tokens.
  474. token_ci_list(<<>>, Acc) -> lists:reverse(Acc);
  475. token_ci_list(<< $\s, R/bits >>, Acc) -> token_ci_list(R, Acc);
  476. token_ci_list(<< $\t, R/bits >>, Acc) -> token_ci_list(R, Acc);
  477. token_ci_list(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc);
  478. token_ci_list(<< C, R/bits >>, Acc) ->
  479. case C of
  480. ?INLINE_LOWERCASE(token_ci_list, R, Acc, <<>>)
  481. end.
  482. token_ci_list(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  483. token_ci_list(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  484. token_ci_list(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  485. token_ci_list(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]);
  486. token_ci_list(<< C, R/bits >>, Acc, T) ->
  487. case C of
  488. ?INLINE_LOWERCASE(token_ci_list, R, Acc, T)
  489. end.
  490. token_ci_list_sep(<<>>, Acc, T) -> lists:reverse([T|Acc]);
  491. token_ci_list_sep(<< $\s, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  492. token_ci_list_sep(<< $\t, R/bits >>, Acc, T) -> token_ci_list_sep(R, Acc, T);
  493. token_ci_list_sep(<< $,, R/bits >>, Acc, T) -> token_ci_list(R, [T|Acc]).