cow_http_te.erl 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. %% Copyright (c) 2014-2018, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_te).
  15. %% Identity.
  16. -export([stream_identity/2]).
  17. -export([identity/1]).
  18. %% Chunked.
  19. -export([stream_chunked/2]).
  20. -export([chunk/1]).
  21. -export([last_chunk/0]).
  22. %% The state type is the same for both identity and chunked.
  23. -type state() :: {non_neg_integer(), non_neg_integer()}.
  24. -export_type([state/0]).
  25. -type decode_ret() :: more
  26. | {more, Data::binary(), state()}
  27. | {more, Data::binary(), RemLen::non_neg_integer(), state()}
  28. | {more, Data::binary(), Rest::binary(), state()}
  29. | {done, HasTrailers::trailers | no_trailers, Rest::binary()}
  30. | {done, Data::binary(), HasTrailers::trailers | no_trailers, Rest::binary()}.
  31. -export_type([decode_ret/0]).
  32. -include("cow_parse.hrl").
  33. -ifdef(TEST).
  34. dripfeed(<< C, Rest/bits >>, Acc, State, F) ->
  35. case F(<< Acc/binary, C >>, State) of
  36. more ->
  37. dripfeed(Rest, << Acc/binary, C >>, State, F);
  38. {more, _, State2} ->
  39. dripfeed(Rest, <<>>, State2, F);
  40. {more, _, Length, State2} when is_integer(Length) ->
  41. dripfeed(Rest, <<>>, State2, F);
  42. {more, _, Acc2, State2} ->
  43. dripfeed(Rest, Acc2, State2, F);
  44. {done, _, <<>>} ->
  45. ok;
  46. {done, _, _, <<>>} ->
  47. ok
  48. end.
  49. -endif.
  50. %% Identity.
  51. %% @doc Decode an identity stream.
  52. -spec stream_identity(Data, State)
  53. -> {more, Data, Len, State} | {done, Data, Len, Data}
  54. when Data::binary(), State::state(), Len::non_neg_integer().
  55. stream_identity(Data, {Streamed, Total}) ->
  56. Streamed2 = Streamed + byte_size(Data),
  57. if
  58. Streamed2 < Total ->
  59. {more, Data, Total - Streamed2, {Streamed2, Total}};
  60. true ->
  61. Size = Total - Streamed,
  62. << Data2:Size/binary, Rest/bits >> = Data,
  63. {done, Data2, Total, Rest}
  64. end.
  65. -spec identity(Data) -> Data when Data::iodata().
  66. identity(Data) ->
  67. Data.
  68. -ifdef(TEST).
  69. stream_identity_test() ->
  70. {done, <<>>, 0, <<>>}
  71. = stream_identity(identity(<<>>), {0, 0}),
  72. {done, <<"\r\n">>, 2, <<>>}
  73. = stream_identity(identity(<<"\r\n">>), {0, 2}),
  74. {done, << 0:80000 >>, 10000, <<>>}
  75. = stream_identity(identity(<< 0:80000 >>), {0, 10000}),
  76. ok.
  77. stream_identity_parts_test() ->
  78. {more, << 0:8000 >>, 1999, S1}
  79. = stream_identity(<< 0:8000 >>, {0, 2999}),
  80. {more, << 0:8000 >>, 999, S2}
  81. = stream_identity(<< 0:8000 >>, S1),
  82. {done, << 0:7992 >>, 2999, <<>>}
  83. = stream_identity(<< 0:7992 >>, S2),
  84. ok.
  85. %% Using the same data as the chunked one for comparison.
  86. horse_stream_identity() ->
  87. horse:repeat(10000,
  88. stream_identity(<<
  89. "4\r\n"
  90. "Wiki\r\n"
  91. "5\r\n"
  92. "pedia\r\n"
  93. "e\r\n"
  94. " in\r\n\r\nchunks.\r\n"
  95. "0\r\n"
  96. "\r\n">>, {0, 43})
  97. ).
  98. horse_stream_identity_dripfeed() ->
  99. horse:repeat(10000,
  100. dripfeed(<<
  101. "4\r\n"
  102. "Wiki\r\n"
  103. "5\r\n"
  104. "pedia\r\n"
  105. "e\r\n"
  106. " in\r\n\r\nchunks.\r\n"
  107. "0\r\n"
  108. "\r\n">>, <<>>, {0, 43}, fun stream_identity/2)
  109. ).
  110. -endif.
  111. %% Chunked.
  112. %% @doc Decode a chunked stream.
  113. -spec stream_chunked(Data, State)
  114. -> more | {more, Data, State} | {more, Data, non_neg_integer(), State}
  115. | {more, Data, Data, State}
  116. | {done, HasTrailers, Data} | {done, Data, HasTrailers, Data}
  117. when Data::binary(), State::state(), HasTrailers::trailers | no_trailers.
  118. stream_chunked(Data, State) ->
  119. stream_chunked(Data, State, <<>>).
  120. %% New chunk.
  121. stream_chunked(Data = << C, _/bits >>, {0, Streamed}, Acc) when C =/= $\r ->
  122. case chunked_len(Data, Streamed, Acc, 0) of
  123. {next, Rest, State, Acc2} ->
  124. stream_chunked(Rest, State, Acc2);
  125. {more, State, Acc2} ->
  126. {more, Acc2, Data, State};
  127. Ret ->
  128. Ret
  129. end;
  130. %% Trailing \r\n before next chunk.
  131. stream_chunked(<< "\r\n", Rest/bits >>, {2, Streamed}, Acc) ->
  132. stream_chunked(Rest, {0, Streamed}, Acc);
  133. %% Trailing \r before next chunk.
  134. stream_chunked(<< "\r" >>, {2, Streamed}, Acc) ->
  135. {more, Acc, {1, Streamed}};
  136. %% Trailing \n before next chunk.
  137. stream_chunked(<< "\n", Rest/bits >>, {1, Streamed}, Acc) ->
  138. stream_chunked(Rest, {0, Streamed}, Acc);
  139. %% More data needed.
  140. stream_chunked(<<>>, State = {Rem, _}, Acc) ->
  141. {more, Acc, Rem, State};
  142. %% Chunk data.
  143. stream_chunked(Data, {Rem, Streamed}, Acc) when Rem > 2 ->
  144. DataSize = byte_size(Data),
  145. RemSize = Rem - 2,
  146. case Data of
  147. << Chunk:RemSize/binary, "\r\n", Rest/bits >> ->
  148. stream_chunked(Rest, {0, Streamed + RemSize}, << Acc/binary, Chunk/binary >>);
  149. << Chunk:RemSize/binary, "\r" >> ->
  150. {more, << Acc/binary, Chunk/binary >>, {1, Streamed + RemSize}};
  151. %% Everything in Data is part of the chunk. If we have more
  152. %% data than the chunk accepts, then this is an error and we crash.
  153. _ when DataSize =< RemSize ->
  154. Rem2 = Rem - DataSize,
  155. {more, << Acc/binary, Data/binary >>, Rem2, {Rem2, Streamed + DataSize}}
  156. end.
  157. chunked_len(<< $0, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16);
  158. chunked_len(<< $1, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 1);
  159. chunked_len(<< $2, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 2);
  160. chunked_len(<< $3, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 3);
  161. chunked_len(<< $4, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 4);
  162. chunked_len(<< $5, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 5);
  163. chunked_len(<< $6, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 6);
  164. chunked_len(<< $7, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 7);
  165. chunked_len(<< $8, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 8);
  166. chunked_len(<< $9, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 9);
  167. chunked_len(<< $A, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 10);
  168. chunked_len(<< $B, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 11);
  169. chunked_len(<< $C, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 12);
  170. chunked_len(<< $D, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 13);
  171. chunked_len(<< $E, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 14);
  172. chunked_len(<< $F, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 15);
  173. chunked_len(<< $a, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 10);
  174. chunked_len(<< $b, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 11);
  175. chunked_len(<< $c, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 12);
  176. chunked_len(<< $d, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 13);
  177. chunked_len(<< $e, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 14);
  178. chunked_len(<< $f, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 15);
  179. %% Chunk extensions.
  180. %%
  181. %% Note that we currently skip the first character we encounter here,
  182. %% and not in the skip_chunk_ext function. If we latter implement
  183. %% chunk extensions (unlikely) we will need to change this clause too.
  184. chunked_len(<< C, R/bits >>, S, A, Len) when ?IS_WS(C); C =:= $; -> skip_chunk_ext(R, S, A, Len, 0);
  185. %% Final chunk.
  186. %%
  187. %% When trailers are following we simply return them as the Rest.
  188. %% Then the user code can decide to call the stream_trailers function
  189. %% to parse them. The user can therefore ignore trailers as necessary
  190. %% if they do not wish to handle them.
  191. chunked_len(<< "\r\n\r\n", R/bits >>, _, <<>>, 0) -> {done, no_trailers, R};
  192. chunked_len(<< "\r\n\r\n", R/bits >>, _, A, 0) -> {done, A, no_trailers, R};
  193. chunked_len(<< "\r\n", R/bits >>, _, <<>>, 0) when byte_size(R) > 2 -> {done, trailers, R};
  194. chunked_len(<< "\r\n", R/bits >>, _, A, 0) when byte_size(R) > 2 -> {done, A, trailers, R};
  195. chunked_len(_, _, _, 0) -> more;
  196. %% Normal chunk. Add 2 to Len for the trailing \r\n.
  197. chunked_len(<< "\r\n", R/bits >>, S, A, Len) -> {next, R, {Len + 2, S}, A};
  198. chunked_len(<<"\r">>, _, <<>>, _) -> more;
  199. chunked_len(<<"\r">>, S, A, _) -> {more, {0, S}, A};
  200. chunked_len(<<>>, _, <<>>, _) -> more;
  201. chunked_len(<<>>, S, A, _) -> {more, {0, S}, A}.
  202. skip_chunk_ext(R = << "\r", _/bits >>, S, A, Len, _) -> chunked_len(R, S, A, Len);
  203. skip_chunk_ext(R = <<>>, S, A, Len, _) -> chunked_len(R, S, A, Len);
  204. %% We skip up to 128 characters of chunk extensions. The value
  205. %% is hardcoded: chunk extensions are very rarely seen in the
  206. %% wild and Cowboy doesn't do anything with them anyway.
  207. %%
  208. %% Line breaks are not allowed in the middle of chunk extensions.
  209. skip_chunk_ext(<< C, R/bits >>, S, A, Len, Skipped) when C =/= $\n, Skipped < 128 ->
  210. skip_chunk_ext(R, S, A, Len, Skipped + 1).
  211. %% @doc Encode a chunk.
  212. -spec chunk(D) -> D when D::iodata().
  213. chunk(Data) ->
  214. [integer_to_list(iolist_size(Data), 16), <<"\r\n">>,
  215. Data, <<"\r\n">>].
  216. %% @doc Encode the last chunk of a chunked stream.
  217. -spec last_chunk() -> << _:40 >>.
  218. last_chunk() ->
  219. <<"0\r\n\r\n">>.
  220. -ifdef(TEST).
  221. stream_chunked_identity_test() ->
  222. {done, <<"Wikipedia in\r\n\r\nchunks.">>, no_trailers, <<>>}
  223. = stream_chunked(iolist_to_binary([
  224. chunk("Wiki"),
  225. chunk("pedia"),
  226. chunk(" in\r\n\r\nchunks."),
  227. last_chunk()
  228. ]), {0, 0}),
  229. ok.
  230. stream_chunked_one_pass_test() ->
  231. {done, no_trailers, <<>>} = stream_chunked(<<"0\r\n\r\n">>, {0, 0}),
  232. {done, <<"Wikipedia in\r\n\r\nchunks.">>, no_trailers, <<>>}
  233. = stream_chunked(<<
  234. "4\r\n"
  235. "Wiki\r\n"
  236. "5\r\n"
  237. "pedia\r\n"
  238. "e\r\n"
  239. " in\r\n\r\nchunks.\r\n"
  240. "0\r\n"
  241. "\r\n">>, {0, 0}),
  242. %% Same but with extra spaces or chunk extensions.
  243. {done, <<"Wikipedia in\r\n\r\nchunks.">>, no_trailers, <<>>}
  244. = stream_chunked(<<
  245. "4 \r\n"
  246. "Wiki\r\n"
  247. "5 ; ext = abc\r\n"
  248. "pedia\r\n"
  249. "e;ext=abc\r\n"
  250. " in\r\n\r\nchunks.\r\n"
  251. "0;ext\r\n"
  252. "\r\n">>, {0, 0}),
  253. %% Same but with trailers.
  254. {done, <<"Wikipedia in\r\n\r\nchunks.">>, trailers, Rest}
  255. = stream_chunked(<<
  256. "4\r\n"
  257. "Wiki\r\n"
  258. "5\r\n"
  259. "pedia\r\n"
  260. "e\r\n"
  261. " in\r\n\r\nchunks.\r\n"
  262. "0\r\n"
  263. "x-foo-bar: bar foo\r\n"
  264. "\r\n">>, {0, 0}),
  265. {[{<<"x-foo-bar">>, <<"bar foo">>}], <<>>} = cow_http:parse_headers(Rest),
  266. ok.
  267. stream_chunked_n_passes_test() ->
  268. S0 = {0, 0},
  269. more = stream_chunked(<<"4\r">>, S0),
  270. {more, <<>>, 6, S1} = stream_chunked(<<"4\r\n">>, S0),
  271. {more, <<"Wiki">>, 0, S2} = stream_chunked(<<"Wiki\r\n">>, S1),
  272. {more, <<"pedia">>, <<"e\r">>, S3} = stream_chunked(<<"5\r\npedia\r\ne\r">>, S2),
  273. {more, <<" in\r\n\r\nchunks.">>, 2, S4} = stream_chunked(<<"e\r\n in\r\n\r\nchunks.">>, S3),
  274. {done, no_trailers, <<>>} = stream_chunked(<<"\r\n0\r\n\r\n">>, S4),
  275. %% A few extra for coverage purposes.
  276. more = stream_chunked(<<"\n3">>, {1, 0}),
  277. {more, <<"abc">>, 2, {2, 3}} = stream_chunked(<<"\n3\r\nabc">>, {1, 0}),
  278. {more, <<"abc">>, {1, 3}} = stream_chunked(<<"3\r\nabc\r">>, {0, 0}),
  279. {more, <<"abc">>, <<"123">>, {0, 3}} = stream_chunked(<<"3\r\nabc\r\n123">>, {0, 0}),
  280. ok.
  281. stream_chunked_dripfeed_test() ->
  282. dripfeed(<<
  283. "4\r\n"
  284. "Wiki\r\n"
  285. "5\r\n"
  286. "pedia\r\n"
  287. "e\r\n"
  288. " in\r\n\r\nchunks.\r\n"
  289. "0\r\n"
  290. "\r\n">>, <<>>, {0, 0}, fun stream_chunked/2).
  291. do_body_to_chunks(_, <<>>, Acc) ->
  292. lists:reverse([<<"0\r\n\r\n">>|Acc]);
  293. do_body_to_chunks(ChunkSize, Body, Acc) ->
  294. BodySize = byte_size(Body),
  295. ChunkSize2 = case BodySize < ChunkSize of
  296. true -> BodySize;
  297. false -> ChunkSize
  298. end,
  299. << Chunk:ChunkSize2/binary, Rest/binary >> = Body,
  300. ChunkSizeBin = list_to_binary(integer_to_list(ChunkSize2, 16)),
  301. do_body_to_chunks(ChunkSize, Rest,
  302. [<< ChunkSizeBin/binary, "\r\n", Chunk/binary, "\r\n" >>|Acc]).
  303. stream_chunked_dripfeed2_test() ->
  304. Body = list_to_binary(io_lib:format("~p", [lists:seq(1, 100)])),
  305. Body2 = iolist_to_binary(do_body_to_chunks(50, Body, [])),
  306. dripfeed(Body2, <<>>, {0, 0}, fun stream_chunked/2).
  307. stream_chunked_error_test_() ->
  308. Tests = [
  309. {<<>>, undefined},
  310. {<<"\n\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, {2, 0}}
  311. ],
  312. [{lists:flatten(io_lib:format("value ~p state ~p", [V, S])),
  313. fun() -> {'EXIT', _} = (catch stream_chunked(V, S)) end}
  314. || {V, S} <- Tests].
  315. horse_stream_chunked() ->
  316. horse:repeat(10000,
  317. stream_chunked(<<
  318. "4\r\n"
  319. "Wiki\r\n"
  320. "5\r\n"
  321. "pedia\r\n"
  322. "e\r\n"
  323. " in\r\n\r\nchunks.\r\n"
  324. "0\r\n"
  325. "\r\n">>, {0, 0})
  326. ).
  327. horse_stream_chunked_dripfeed() ->
  328. horse:repeat(10000,
  329. dripfeed(<<
  330. "4\r\n"
  331. "Wiki\r\n"
  332. "5\r\n"
  333. "pedia\r\n"
  334. "e\r\n"
  335. " in\r\n\r\nchunks.\r\n"
  336. "0\r\n"
  337. "\r\n">>, <<>>, {0, 43}, fun stream_chunked/2)
  338. ).
  339. -endif.