cow_http_te.erl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_http_te).
  15. %% Identity.
  16. -export([stream_identity/2]).
  17. -export([identity/1]).
  18. %% Chunked.
  19. -export([stream_chunked/2]).
  20. -export([chunk/1]).
  21. -export([last_chunk/0]).
  22. %% The state type is the same for both identity and chunked.
  23. -type state() :: {non_neg_integer(), non_neg_integer()}.
  24. -type decode_ret() :: more
  25. | {more, Data::binary(), state()}
  26. | {more, Data::binary(), RemLen::non_neg_integer(), state()}
  27. | {more, Data::binary(), Rest::binary(), state()}
  28. | {done, TotalLen::non_neg_integer(), Rest::binary()}
  29. | {done, Data::binary(), TotalLen::non_neg_integer(), Rest::binary()}.
  30. -export_type([decode_ret/0]).
  31. -ifdef(TEST).
  32. dripfeed(<< C, Rest/bits >>, Acc, State, F) ->
  33. case F(<< Acc/binary, C >>, State) of
  34. more ->
  35. dripfeed(Rest, << Acc/binary, C >>, State, F);
  36. {more, _, State2} ->
  37. dripfeed(Rest, <<>>, State2, F);
  38. {more, _, Length, State2} when is_integer(Length) ->
  39. dripfeed(Rest, <<>>, State2, F);
  40. {more, _, Acc2, State2} ->
  41. dripfeed(Rest, Acc2, State2, F);
  42. {done, _, <<>>} ->
  43. ok;
  44. {done, _, _, <<>>} ->
  45. ok
  46. end.
  47. -endif.
  48. %% Identity.
  49. %% @doc Decode an identity stream.
  50. -spec stream_identity(Data, State)
  51. -> {more, Data, Len, State} | {done, Data, Len, Data}
  52. when Data::binary(), State::state(), Len::non_neg_integer().
  53. stream_identity(Data, {Streamed, Total}) ->
  54. Streamed2 = Streamed + byte_size(Data),
  55. if
  56. Streamed2 < Total ->
  57. {more, Data, Total - Streamed2, {Streamed2, Total}};
  58. true ->
  59. Size = Total - Streamed,
  60. << Data2:Size/binary, Rest/bits >> = Data,
  61. {done, Data2, Total, Rest}
  62. end.
  63. -spec identity(Data) -> Data when Data::iodata().
  64. identity(Data) ->
  65. Data.
  66. -ifdef(TEST).
  67. stream_identity_test() ->
  68. {done, <<>>, 0, <<>>}
  69. = stream_identity(identity(<<>>), {0, 0}),
  70. {done, <<"\r\n">>, 2, <<>>}
  71. = stream_identity(identity(<<"\r\n">>), {0, 2}),
  72. {done, << 0:80000 >>, 10000, <<>>}
  73. = stream_identity(identity(<< 0:80000 >>), {0, 10000}),
  74. ok.
  75. stream_identity_parts_test() ->
  76. {more, << 0:8000 >>, 1999, S1}
  77. = stream_identity(<< 0:8000 >>, {0, 2999}),
  78. {more, << 0:8000 >>, 999, S2}
  79. = stream_identity(<< 0:8000 >>, S1),
  80. {done, << 0:7992 >>, 2999, <<>>}
  81. = stream_identity(<< 0:7992 >>, S2),
  82. ok.
  83. %% Using the same data as the chunked one for comparison.
  84. horse_stream_identity() ->
  85. horse:repeat(10000,
  86. stream_identity(<<
  87. "4\r\n"
  88. "Wiki\r\n"
  89. "5\r\n"
  90. "pedia\r\n"
  91. "e\r\n"
  92. " in\r\n\r\nchunks.\r\n"
  93. "0\r\n"
  94. "\r\n">>, {0, 43})
  95. ).
  96. horse_stream_identity_dripfeed() ->
  97. horse:repeat(10000,
  98. dripfeed(<<
  99. "4\r\n"
  100. "Wiki\r\n"
  101. "5\r\n"
  102. "pedia\r\n"
  103. "e\r\n"
  104. " in\r\n\r\nchunks.\r\n"
  105. "0\r\n"
  106. "\r\n">>, <<>>, {0, 43}, fun stream_identity/2)
  107. ).
  108. -endif.
  109. %% Chunked.
  110. %% @doc Decode a chunked stream.
  111. -spec stream_chunked(Data, State)
  112. -> more | {more, Data, State} | {more, Data, Len, State}
  113. | {more, Data, Data, State}
  114. | {done, Len, Data} | {done, Data, Len, Data}
  115. when Data::binary(), State::state(), Len::non_neg_integer().
  116. stream_chunked(Data, State) ->
  117. stream_chunked(Data, State, <<>>).
  118. %% New chunk.
  119. stream_chunked(Data = << C, _/bits >>, {0, Streamed}, Acc) when C =/= $\r ->
  120. case chunked_len(Data, Streamed, Acc, 0) of
  121. {next, Rest, State, Acc2} ->
  122. stream_chunked(Rest, State, Acc2);
  123. {more, State, Acc2} ->
  124. {more, Acc2, Data, State};
  125. Ret ->
  126. Ret
  127. end;
  128. %% Trailing \r\n before next chunk.
  129. stream_chunked(<< "\r\n", Rest/bits >>, {2, Streamed}, Acc) ->
  130. stream_chunked(Rest, {0, Streamed}, Acc);
  131. %% Trailing \r before next chunk.
  132. stream_chunked(<< "\r" >>, {2, Streamed}, Acc) ->
  133. {more, Acc, {1, Streamed}};
  134. %% Trailing \n before next chunk.
  135. stream_chunked(<< "\n", Rest/bits >>, {1, Streamed}, Acc) ->
  136. stream_chunked(Rest, {0, Streamed}, Acc);
  137. %% More data needed.
  138. stream_chunked(<<>>, State = {Rem, _}, Acc) ->
  139. {more, Acc, Rem, State};
  140. %% Chunk data.
  141. stream_chunked(Data, {Rem, Streamed}, Acc) when Rem > 2 ->
  142. DataSize = byte_size(Data),
  143. RemSize = Rem - 2,
  144. case Data of
  145. << Chunk:RemSize/binary, "\r\n", Rest/bits >> ->
  146. stream_chunked(Rest, {0, Streamed + RemSize}, << Acc/binary, Chunk/binary >>);
  147. << Chunk:RemSize/binary, "\r" >> ->
  148. {more, << Acc/binary, Chunk/binary >>, {1, Streamed + RemSize}};
  149. %% Everything in Data is part of the chunk.
  150. _ ->
  151. Rem2 = Rem - DataSize,
  152. {more, << Acc/binary, Data/binary >>, Rem2, {Rem2, Streamed + DataSize}}
  153. end.
  154. chunked_len(<< $0, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16);
  155. chunked_len(<< $1, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 1);
  156. chunked_len(<< $2, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 2);
  157. chunked_len(<< $3, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 3);
  158. chunked_len(<< $4, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 4);
  159. chunked_len(<< $5, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 5);
  160. chunked_len(<< $6, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 6);
  161. chunked_len(<< $7, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 7);
  162. chunked_len(<< $8, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 8);
  163. chunked_len(<< $9, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 9);
  164. chunked_len(<< $A, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 10);
  165. chunked_len(<< $B, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 11);
  166. chunked_len(<< $C, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 12);
  167. chunked_len(<< $D, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 13);
  168. chunked_len(<< $E, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 14);
  169. chunked_len(<< $F, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 15);
  170. chunked_len(<< $a, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 10);
  171. chunked_len(<< $b, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 11);
  172. chunked_len(<< $c, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 12);
  173. chunked_len(<< $d, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 13);
  174. chunked_len(<< $e, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 14);
  175. chunked_len(<< $f, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 15);
  176. %% Chunk extensions.
  177. %%
  178. %% Note that we currently skip the first character we encounter here,
  179. %% and not in the skip_chunk_ext function. If we latter implement
  180. %% chunk extensions (unlikely) we will need to change this clause too.
  181. chunked_len(<< C, R/bits >>, S, A, Len) when C =/= $\r -> skip_chunk_ext(R, S, A, Len);
  182. %% Final chunk.
  183. chunked_len(<< "\r\n\r\n", R/bits >>, S, <<>>, 0) -> {done, S, R};
  184. chunked_len(<< "\r\n\r\n", R/bits >>, S, A, 0) -> {done, A, S, R};
  185. chunked_len(_, _, _, 0) -> more;
  186. %% Normal chunk. Add 2 to Len for the trailing \r\n.
  187. chunked_len(<< "\r\n", R/bits >>, S, A, Len) -> {next, R, {Len + 2, S}, A};
  188. chunked_len(<<"\r">>, _, <<>>, _) -> more;
  189. chunked_len(<<"\r">>, S, A, _) -> {more, {0, S}, A};
  190. chunked_len(<<>>, _, <<>>, _) -> more;
  191. chunked_len(<<>>, S, A, _) -> {more, {0, S}, A}.
  192. %% @todo We should probably limit how much we skip.
  193. skip_chunk_ext(R = << "\r", _/bits >>, S, A, Len) -> chunked_len(R, S, A, Len);
  194. skip_chunk_ext(R = <<>>, S, A, Len) -> chunked_len(R, S, A, Len);
  195. skip_chunk_ext(<< _, R/bits >>, S, A, Len) -> skip_chunk_ext(R, S, A, Len).
  196. %% @doc Encode a chunk.
  197. -spec chunk(D) -> D when D::iodata().
  198. chunk(Data) ->
  199. [integer_to_list(iolist_size(Data), 16), <<"\r\n">>,
  200. Data, <<"\r\n">>].
  201. %% @doc Encode the last chunk of a chunked stream.
  202. -spec last_chunk() -> << _:40 >>.
  203. last_chunk() ->
  204. <<"0\r\n\r\n">>.
  205. -ifdef(TEST).
  206. stream_chunked_identity_test() ->
  207. {done, <<"Wikipedia in\r\n\r\nchunks.">>, 23, <<>>}
  208. = stream_chunked(iolist_to_binary([
  209. chunk("Wiki"),
  210. chunk("pedia"),
  211. chunk(" in\r\n\r\nchunks."),
  212. last_chunk()
  213. ]), {0, 0}),
  214. ok.
  215. stream_chunked_one_pass_test() ->
  216. {done, 0, <<>>} = stream_chunked(<<"0\r\n\r\n">>, {0, 0}),
  217. {done, <<"Wikipedia in\r\n\r\nchunks.">>, 23, <<>>}
  218. = stream_chunked(<<
  219. "4\r\n"
  220. "Wiki\r\n"
  221. "5\r\n"
  222. "pedia\r\n"
  223. "e\r\n"
  224. " in\r\n\r\nchunks.\r\n"
  225. "0\r\n"
  226. "\r\n">>, {0, 0}),
  227. %% Same but with extra spaces or chunk extensions.
  228. {done, <<"Wikipedia in\r\n\r\nchunks.">>, 23, <<>>}
  229. = stream_chunked(<<
  230. "4 \r\n"
  231. "Wiki\r\n"
  232. "5 ; ext = abc\r\n"
  233. "pedia\r\n"
  234. "e;ext=abc\r\n"
  235. " in\r\n\r\nchunks.\r\n"
  236. "0;ext\r\n"
  237. "\r\n">>, {0, 0}),
  238. ok.
  239. stream_chunked_n_passes_test() ->
  240. S0 = {0, 0},
  241. more = stream_chunked(<<"4\r">>, S0),
  242. {more, <<>>, 6, S1} = stream_chunked(<<"4\r\n">>, S0),
  243. {more, <<"Wiki">>, 0, S2} = stream_chunked(<<"Wiki\r\n">>, S1),
  244. {more, <<"pedia">>, <<"e\r">>, S3} = stream_chunked(<<"5\r\npedia\r\ne\r">>, S2),
  245. {more, <<" in\r\n\r\nchunks.">>, 2, S4} = stream_chunked(<<"e\r\n in\r\n\r\nchunks.">>, S3),
  246. {done, 23, <<>>} = stream_chunked(<<"\r\n0\r\n\r\n">>, S4),
  247. %% A few extra for coverage purposes.
  248. more = stream_chunked(<<"\n3">>, {1, 0}),
  249. {more, <<"abc">>, 2, {2, 3}} = stream_chunked(<<"\n3\r\nabc">>, {1, 0}),
  250. {more, <<"abc">>, {1, 3}} = stream_chunked(<<"3\r\nabc\r">>, {0, 0}),
  251. {more, <<"abc">>, <<"123">>, {0, 3}} = stream_chunked(<<"3\r\nabc\r\n123">>, {0, 0}),
  252. ok.
  253. stream_chunked_dripfeed_test() ->
  254. dripfeed(<<
  255. "4\r\n"
  256. "Wiki\r\n"
  257. "5\r\n"
  258. "pedia\r\n"
  259. "e\r\n"
  260. " in\r\n\r\nchunks.\r\n"
  261. "0\r\n"
  262. "\r\n">>, <<>>, {0, 0}, fun stream_chunked/2).
  263. do_body_to_chunks(_, <<>>, Acc) ->
  264. lists:reverse([<<"0\r\n\r\n">>|Acc]);
  265. do_body_to_chunks(ChunkSize, Body, Acc) ->
  266. BodySize = byte_size(Body),
  267. ChunkSize2 = case BodySize < ChunkSize of
  268. true -> BodySize;
  269. false -> ChunkSize
  270. end,
  271. << Chunk:ChunkSize2/binary, Rest/binary >> = Body,
  272. ChunkSizeBin = list_to_binary(integer_to_list(ChunkSize2, 16)),
  273. do_body_to_chunks(ChunkSize, Rest,
  274. [<< ChunkSizeBin/binary, "\r\n", Chunk/binary, "\r\n" >>|Acc]).
  275. stream_chunked_dripfeed2_test() ->
  276. Body = list_to_binary(io_lib:format("~p", [lists:seq(1, 100)])),
  277. Body2 = iolist_to_binary(do_body_to_chunks(50, Body, [])),
  278. dripfeed(Body2, <<>>, {0, 0}, fun stream_chunked/2).
  279. stream_chunked_error_test_() ->
  280. Tests = [
  281. {<<>>, undefined},
  282. {<<"\n\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, {2, 0}}
  283. ],
  284. [{lists:flatten(io_lib:format("value ~p state ~p", [V, S])),
  285. fun() -> {'EXIT', _} = (catch stream_chunked(V, S)) end}
  286. || {V, S} <- Tests].
  287. horse_stream_chunked() ->
  288. horse:repeat(10000,
  289. stream_chunked(<<
  290. "4\r\n"
  291. "Wiki\r\n"
  292. "5\r\n"
  293. "pedia\r\n"
  294. "e\r\n"
  295. " in\r\n\r\nchunks.\r\n"
  296. "0\r\n"
  297. "\r\n">>, {0, 0})
  298. ).
  299. horse_stream_chunked_dripfeed() ->
  300. horse:repeat(10000,
  301. dripfeed(<<
  302. "4\r\n"
  303. "Wiki\r\n"
  304. "5\r\n"
  305. "pedia\r\n"
  306. "e\r\n"
  307. " in\r\n\r\nchunks.\r\n"
  308. "0\r\n"
  309. "\r\n">>, <<>>, {0, 43}, fun stream_chunked/2)
  310. ).
  311. -endif.