cow_multipart.erl 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. %% Copyright (c) 2014, Loïc Hoguin <essen@ninenines.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. -module(cow_multipart).
  15. %% Parsing.
  16. -export([parse_headers/2]).
  17. -export([parse_body/2]).
  18. %% Building.
  19. -export([boundary/0]).
  20. -export([first_part/2]).
  21. -export([part/2]).
  22. -export([close/1]).
  23. %% Headers.
  24. -export([form_data/1]).
  25. -export([parse_content_disposition/1]).
  26. -export([parse_content_transfer_encoding/1]).
  27. -export([parse_content_type/1]).
  28. -type headers() :: [{iodata(), iodata()}].
  29. -export_type([headers/0]).
  30. -include("cow_inline.hrl").
  31. -define(TEST1_MIME, <<
  32. "This is a message with multiple parts in MIME format.\r\n"
  33. "--frontier\r\n"
  34. "Content-Type: text/plain\r\n"
  35. "\r\n"
  36. "This is the body of the message.\r\n"
  37. "--frontier\r\n"
  38. "Content-Type: application/octet-stream\r\n"
  39. "Content-Transfer-Encoding: base64\r\n"
  40. "\r\n"
  41. "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
  42. "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n"
  43. "--frontier--"
  44. >>).
  45. -define(TEST1_BOUNDARY, <<"frontier">>).
  46. -define(TEST2_MIME, <<
  47. "--AaB03x\r\n"
  48. "Content-Disposition: form-data; name=\"submit-name\"\r\n"
  49. "\r\n"
  50. "Larry\r\n"
  51. "--AaB03x\r\n"
  52. "Content-Disposition: form-data; name=\"files\"\r\n"
  53. "Content-Type: multipart/mixed; boundary=BbC04y\r\n"
  54. "\r\n"
  55. "--BbC04y\r\n"
  56. "Content-Disposition: file; filename=\"file1.txt\"\r\n"
  57. "Content-Type: text/plain\r\n"
  58. "\r\n"
  59. "... contents of file1.txt ...\r\n"
  60. "--BbC04y\r\n"
  61. "Content-Disposition: file; filename=\"file2.gif\"\r\n"
  62. "Content-Type: image/gif\r\n"
  63. "Content-Transfer-Encoding: binary\r\n"
  64. "\r\n"
  65. "...contents of file2.gif...\r\n"
  66. "--BbC04y--\r\n"
  67. "--AaB03x--"
  68. >>).
  69. -define(TEST2_BOUNDARY, <<"AaB03x">>).
  70. -define(TEST3_MIME, <<
  71. "This is the preamble.\r\n"
  72. "--boundary\r\n"
  73. "Content-Type: text/plain\r\n"
  74. "\r\n"
  75. "This is the body of the message.\r\n"
  76. "--boundary--"
  77. "\r\nThis is the epilogue. Here it includes leading CRLF"
  78. >>).
  79. -define(TEST3_BOUNDARY, <<"boundary">>).
  80. -define(TEST4_MIME, <<
  81. "This is the preamble.\r\n"
  82. "--boundary\r\n"
  83. "Content-Type: text/plain\r\n"
  84. "\r\n"
  85. "This is the body of the message.\r\n"
  86. "--boundary--"
  87. "\r\n"
  88. >>).
  89. -define(TEST4_BOUNDARY, <<"boundary">>).
  90. %% RFC 2046, Section 5.1.1:
  91. -define(TEST5_MIME, <<
  92. "This is the preamble. It is to be ignored, though it\r\n"
  93. "is a handy place for composition agents to include an\r\n"
  94. "explanatory note to non-MIME conformant readers.\r\n"
  95. "\r\n"
  96. "--simple boundary\r\n",
  97. "\r\n"
  98. "This is implicitly typed plain US-ASCII text.\r\n"
  99. "It does NOT end with a linebreak."
  100. "\r\n"
  101. "--simple boundary\r\n",
  102. "Content-type: text/plain; charset=us-ascii\r\n"
  103. "\r\n"
  104. "This is explicitly typed plain US-ASCII text.\r\n"
  105. "It DOES end with a linebreak.\r\n"
  106. "\r\n"
  107. "--simple boundary--\r\n"
  108. "\r\n"
  109. "This is the epilogue. It is also to be ignored."
  110. >>).
  111. -define(TEST5_BOUNDARY, <<"simple boundary">>).
  112. %% Parsing.
  113. %%
  114. %% The multipart format is defined in RFC 2045.
  115. %% @doc Parse the headers for the next multipart part.
  116. %%
  117. %% This function skips any preamble before the boundary.
  118. %% The preamble may be retrieved using parse_body/2.
  119. %%
  120. %% This function will accept input of any size, it is
  121. %% up to the caller to limit it if needed.
  122. -spec parse_headers(binary(), binary())
  123. -> more | {more, binary()}
  124. | {ok, headers(), binary()}
  125. | {done, binary()}.
  126. %% If the stream starts with the boundary we can make a few assumptions
  127. %% and quickly figure out if we got the complete list of headers.
  128. parse_headers(<< "--", Stream/bits >>, Boundary) ->
  129. BoundarySize = byte_size(Boundary),
  130. case Stream of
  131. %% Last boundary. Return the epilogue.
  132. << Boundary:BoundarySize/binary, "--", Stream2/bits >> ->
  133. {done, Stream2};
  134. << Boundary:BoundarySize/binary, Stream2/bits >> ->
  135. %% We have all the headers only if there is a \r\n\r\n
  136. %% somewhere in the data after the boundary.
  137. case binary:match(Stream2, <<"\r\n\r\n">>) of
  138. nomatch ->
  139. more;
  140. _ ->
  141. before_parse_headers(Stream2)
  142. end;
  143. %% If there isn't enough to represent Boundary \r\n\r\n
  144. %% then we definitely don't have all the headers.
  145. _ when byte_size(Stream) < byte_size(Boundary) + 4 ->
  146. more;
  147. %% Otherwise we have preamble data to skip.
  148. %% We still got rid of the first two misleading bytes.
  149. _ ->
  150. skip_preamble(Stream, Boundary)
  151. end;
  152. %% Otherwise we have preamble data to skip.
  153. parse_headers(Stream, Boundary) ->
  154. skip_preamble(Stream, Boundary).
  155. %% We need to find the boundary and a \r\n\r\n after that.
  156. %% Since the boundary isn't at the start, it must be right
  157. %% after a \r\n too.
  158. skip_preamble(Stream, Boundary) ->
  159. case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of
  160. %% No boundary, need more data.
  161. nomatch ->
  162. %% We can safely skip the size of the stream
  163. %% minus the last 3 bytes which may be a partial boundary.
  164. SkipSize = byte_size(Stream) - 3,
  165. case SkipSize > 0 of
  166. false ->
  167. more;
  168. true ->
  169. << _:SkipSize/binary, Stream2/bits >> = Stream,
  170. {more, Stream2}
  171. end;
  172. {Start, Length} ->
  173. Start2 = Start + Length,
  174. << _:Start2/binary, Stream2/bits >> = Stream,
  175. case Stream2 of
  176. %% Last boundary. Return the epilogue.
  177. << "--", Stream3/bits >> ->
  178. {done, Stream3};
  179. _ ->
  180. case binary:match(Stream, <<"\r\n\r\n">>) of
  181. %% We don't have the full headers.
  182. nomatch ->
  183. {more, Stream2};
  184. _ ->
  185. before_parse_headers(Stream2)
  186. end
  187. end
  188. end.
  189. before_parse_headers(<< "\r\n\r\n", Stream/bits >>) ->
  190. %% This indicates that there are no headers, so we can abort
  191. %% immediately.
  192. {ok, [], Stream};
  193. before_parse_headers(<< "\r\n", Stream/bits >>) ->
  194. %% There is a line break right after the boundary, skip it.
  195. parse_hd_name(Stream, [], <<>>).
  196. parse_hd_name(<< C, Rest/bits >>, H, SoFar) ->
  197. case C of
  198. $: -> parse_hd_before_value(Rest, H, SoFar);
  199. $\s -> parse_hd_name_ws(Rest, H, SoFar);
  200. $\t -> parse_hd_name_ws(Rest, H, SoFar);
  201. _ -> ?LOWER(parse_hd_name, Rest, H, SoFar)
  202. end.
  203. parse_hd_name_ws(<< C, Rest/bits >>, H, Name) ->
  204. case C of
  205. $\s -> parse_hd_name_ws(Rest, H, Name);
  206. $\t -> parse_hd_name_ws(Rest, H, Name);
  207. $: -> parse_hd_before_value(Rest, H, Name)
  208. end.
  209. parse_hd_before_value(<< $\s, Rest/bits >>, H, N) ->
  210. parse_hd_before_value(Rest, H, N);
  211. parse_hd_before_value(<< $\t, Rest/bits >>, H, N) ->
  212. parse_hd_before_value(Rest, H, N);
  213. parse_hd_before_value(Buffer, H, N) ->
  214. parse_hd_value(Buffer, H, N, <<>>).
  215. parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) ->
  216. case Rest of
  217. << "\n\r\n", Rest2/bits >> ->
  218. {ok, [{Name, SoFar}|Headers], Rest2};
  219. << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t ->
  220. parse_hd_value(Rest2, Headers, Name, SoFar);
  221. << $\n, Rest2/bits >> ->
  222. parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>)
  223. end;
  224. parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) ->
  225. parse_hd_value(Rest, H, N, << SoFar/binary, C >>).
  226. %% @doc Parse the body of the current multipart part.
  227. %%
  228. %% The body is everything until the next boundary.
  229. -spec parse_body(binary(), binary())
  230. -> {ok, binary()} | {ok, binary(), binary()}
  231. | done | {done, binary()} | {done, binary(), binary()}.
  232. parse_body(Stream, Boundary) ->
  233. BoundarySize = byte_size(Boundary),
  234. case Stream of
  235. << "--", Boundary:BoundarySize/binary, _/bits >> ->
  236. done;
  237. _ ->
  238. case binary:match(Stream, << "\r\n--", Boundary/bits >>) of
  239. %% No boundary, check for a possible partial at the end.
  240. %% Return more or less of the body depending on the result.
  241. nomatch ->
  242. StreamSize = byte_size(Stream),
  243. From = StreamSize - BoundarySize - 3,
  244. MatchOpts = if
  245. %% Binary too small to contain boundary, check it fully.
  246. From < 0 -> [];
  247. %% Optimize, only check the end of the binary.
  248. true -> [{scope, {From, StreamSize - From}}]
  249. end,
  250. case binary:match(Stream, <<"\r">>, MatchOpts) of
  251. nomatch ->
  252. {ok, Stream};
  253. {Pos, _} ->
  254. case Stream of
  255. << Body:Pos/binary >> ->
  256. {ok, Body};
  257. << Body:Pos/binary, Rest/bits >> ->
  258. {ok, Body, Rest}
  259. end
  260. end;
  261. %% Boundary found, this is the last chunk of the body.
  262. {Pos, _} ->
  263. case Stream of
  264. << Body:Pos/binary, "\r\n" >> ->
  265. {done, Body};
  266. << Body:Pos/binary, "\r\n", Rest/bits >> ->
  267. {done, Body, Rest};
  268. << Body:Pos/binary, Rest/bits >> ->
  269. {done, Body, Rest}
  270. end
  271. end
  272. end.
  273. -ifdef(TEST).
  274. parse_test() ->
  275. H1 = [{<<"content-type">>, <<"text/plain">>}],
  276. Body1 = <<"This is the body of the message.">>,
  277. H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>},
  278. {<<"content-transfer-encoding">>, <<"base64">>}]),
  279. Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
  280. "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
  281. {ok, H1, Rest} = parse_headers(?TEST1_MIME, ?TEST1_BOUNDARY),
  282. {done, Body1, Rest2} = parse_body(Rest, ?TEST1_BOUNDARY),
  283. done = parse_body(Rest2, ?TEST1_BOUNDARY),
  284. {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST1_BOUNDARY),
  285. H2 = lists:sort(H2Unsorted),
  286. {done, Body2, Rest4} = parse_body(Rest3, ?TEST1_BOUNDARY),
  287. done = parse_body(Rest4, ?TEST1_BOUNDARY),
  288. {done, <<>>} = parse_headers(Rest4, ?TEST1_BOUNDARY),
  289. ok.
  290. parse_interleaved_test() ->
  291. H1 = [{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}],
  292. Body1 = <<"Larry">>,
  293. H2 = lists:sort([{<<"content-disposition">>, <<"form-data; name=\"files\"">>},
  294. {<<"content-type">>, <<"multipart/mixed; boundary=BbC04y">>}]),
  295. InH1 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file1.txt\"">>},
  296. {<<"content-type">>, <<"text/plain">>}]),
  297. InBody1 = <<"... contents of file1.txt ...">>,
  298. InH2 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file2.gif\"">>},
  299. {<<"content-type">>, <<"image/gif">>},
  300. {<<"content-transfer-encoding">>, <<"binary">>}]),
  301. InBody2 = <<"...contents of file2.gif...">>,
  302. {ok, H1, Rest} = parse_headers(?TEST2_MIME, ?TEST2_BOUNDARY),
  303. {done, Body1, Rest2} = parse_body(Rest, ?TEST2_BOUNDARY),
  304. done = parse_body(Rest2, ?TEST2_BOUNDARY),
  305. {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST2_BOUNDARY),
  306. H2 = lists:sort(H2Unsorted),
  307. {_, ContentType} = lists:keyfind(<<"content-type">>, 1, H2),
  308. {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, InBoundary}]}
  309. = parse_content_type(ContentType),
  310. {ok, InH1Unsorted, InRest} = parse_headers(Rest3, InBoundary),
  311. InH1 = lists:sort(InH1Unsorted),
  312. {done, InBody1, InRest2} = parse_body(InRest, InBoundary),
  313. done = parse_body(InRest2, InBoundary),
  314. {ok, InH2Unsorted, InRest3} = parse_headers(InRest2, InBoundary),
  315. InH2 = lists:sort(InH2Unsorted),
  316. {done, InBody2, InRest4} = parse_body(InRest3, InBoundary),
  317. done = parse_body(InRest4, InBoundary),
  318. {done, Rest4} = parse_headers(InRest4, InBoundary),
  319. {done, <<>>} = parse_headers(Rest4, ?TEST2_BOUNDARY),
  320. ok.
  321. parse_epilogue_test() ->
  322. H1 = [{<<"content-type">>, <<"text/plain">>}],
  323. Body1 = <<"This is the body of the message.">>,
  324. Epilogue = <<"\r\nThis is the epilogue. Here it includes leading CRLF">>,
  325. {ok, H1, Rest} = parse_headers(?TEST3_MIME, ?TEST3_BOUNDARY),
  326. {done, Body1, Rest2} = parse_body(Rest, ?TEST3_BOUNDARY),
  327. done = parse_body(Rest2, ?TEST3_BOUNDARY),
  328. {done, Epilogue} = parse_headers(Rest2, ?TEST3_BOUNDARY),
  329. ok.
  330. parse_epilogue_crlf_test() ->
  331. H1 = [{<<"content-type">>, <<"text/plain">>}],
  332. Body1 = <<"This is the body of the message.">>,
  333. Epilogue = <<"\r\n">>,
  334. {ok, H1, Rest} = parse_headers(?TEST4_MIME, ?TEST4_BOUNDARY),
  335. {done, Body1, Rest2} = parse_body(Rest, ?TEST4_BOUNDARY),
  336. done = parse_body(Rest2, ?TEST4_BOUNDARY),
  337. {done, Epilogue} = parse_headers(Rest2, ?TEST4_BOUNDARY),
  338. ok.
  339. parse_rfc2046_test() ->
  340. %% The following is an included in RFC 2046, Section 5.1.1.
  341. Body1 = <<"This is implicitly typed plain US-ASCII text.\r\n"
  342. "It does NOT end with a linebreak.">>,
  343. Body2 = <<"This is explicitly typed plain US-ASCII text.\r\n"
  344. "It DOES end with a linebreak.\r\n">>,
  345. H2 = [{<<"content-type">>, <<"text/plain; charset=us-ascii">>}],
  346. Epilogue = <<"\r\n\r\nThis is the epilogue. It is also to be ignored.">>,
  347. {ok, [], Rest} = parse_headers(?TEST5_MIME, ?TEST5_BOUNDARY),
  348. {done, Body1, Rest2} = parse_body(Rest, ?TEST5_BOUNDARY),
  349. {ok, H2, Rest3} = parse_headers(Rest2, ?TEST5_BOUNDARY),
  350. {done, Body2, Rest4} = parse_body(Rest3, ?TEST5_BOUNDARY),
  351. {done, Epilogue} = parse_headers(Rest4, ?TEST5_BOUNDARY),
  352. ok.
  353. parse_partial_test() ->
  354. {ok, <<0:8000, "abcdef">>, <<"\rghij">>}
  355. = parse_body(<<0:8000, "abcdef\rghij">>, <<"boundary">>),
  356. {ok, <<"abcdef">>, <<"\rghij">>}
  357. = parse_body(<<"abcdef\rghij">>, <<"boundary">>),
  358. {ok, <<"abc">>, <<"\rdef">>}
  359. = parse_body(<<"abc\rdef">>, <<"boundaryboundary">>),
  360. {ok, <<0:8000, "abcdef">>, <<"\r\nghij">>}
  361. = parse_body(<<0:8000, "abcdef\r\nghij">>, <<"boundary">>),
  362. {ok, <<"abcdef">>, <<"\r\nghij">>}
  363. = parse_body(<<"abcdef\r\nghij">>, <<"boundary">>),
  364. {ok, <<"abc">>, <<"\r\ndef">>}
  365. = parse_body(<<"abc\r\ndef">>, <<"boundaryboundary">>),
  366. {ok, <<"boundary">>, <<"\r">>}
  367. = parse_body(<<"boundary\r">>, <<"boundary">>),
  368. {ok, <<"boundary">>, <<"\r\n">>}
  369. = parse_body(<<"boundary\r\n">>, <<"boundary">>),
  370. {ok, <<"boundary">>, <<"\r\n-">>}
  371. = parse_body(<<"boundary\r\n-">>, <<"boundary">>),
  372. {ok, <<"boundary">>, <<"\r\n--">>}
  373. = parse_body(<<"boundary\r\n--">>, <<"boundary">>),
  374. ok.
  375. -endif.
  376. -ifdef(PERF).
  377. perf_parse_multipart(Stream, Boundary) ->
  378. case parse_headers(Stream, Boundary) of
  379. {ok, _, Rest} ->
  380. {_, _, Rest2} = parse_body(Rest, Boundary),
  381. perf_parse_multipart(Rest2, Boundary);
  382. {done, _} ->
  383. ok
  384. end.
  385. horse_parse() ->
  386. horse:repeat(50000,
  387. perf_parse_multipart(?TEST1_MIME, ?TEST1_BOUNDARY)
  388. ).
  389. -endif.
  390. %% Building.
  391. %% @doc Generate a new random boundary.
  392. %%
  393. %% The boundary generated has a low probability of ever appearing
  394. %% in the data.
  395. -spec boundary() -> binary().
  396. boundary() ->
  397. base64:encode(crypto:rand_bytes(48)).
  398. %% @doc Return the first part's head.
  399. %%
  400. %% This works exactly like the part/2 function except there is
  401. %% no leading \r\n. It's not required to use this function,
  402. %% just makes the output a little smaller and prettier.
  403. -spec first_part(binary(), headers()) -> iodata().
  404. first_part(Boundary, Headers) ->
  405. [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])].
  406. %% @doc Return a part's head.
  407. -spec part(binary(), headers()) -> iodata().
  408. part(Boundary, Headers) ->
  409. [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])].
  410. headers_to_iolist([], Acc) ->
  411. lists:reverse([<<"\r\n">>|Acc]);
  412. headers_to_iolist([{N, V}|Tail], Acc) ->
  413. %% We don't want to create a sublist so we list the
  414. %% values in reverse order so that it gets reversed properly.
  415. headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]).
  416. %% @doc Return the closing delimiter of the multipart message.
  417. -spec close(binary()) -> iodata().
  418. close(Boundary) ->
  419. [<<"\r\n--">>, Boundary, <<"--">>].
  420. -ifdef(TEST).
  421. build_test() ->
  422. Result = string:to_lower(binary_to_list(?TEST1_MIME)),
  423. Result = string:to_lower(binary_to_list(iolist_to_binary([
  424. <<"This is a message with multiple parts in MIME format.\r\n">>,
  425. first_part(?TEST1_BOUNDARY, [{<<"content-type">>, <<"text/plain">>}]),
  426. <<"This is the body of the message.">>,
  427. part(?TEST1_BOUNDARY, [
  428. {<<"content-type">>, <<"application/octet-stream">>},
  429. {<<"content-transfer-encoding">>, <<"base64">>}]),
  430. <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
  431. "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
  432. close(?TEST1_BOUNDARY)
  433. ]))),
  434. ok.
  435. identity_test() ->
  436. B = boundary(),
  437. Preamble = <<"This is a message with multiple parts in MIME format.">>,
  438. H1 = [{<<"content-type">>, <<"text/plain">>}],
  439. Body1 = <<"This is the body of the message.">>,
  440. H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>},
  441. {<<"content-transfer-encoding">>, <<"base64">>}]),
  442. Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
  443. "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
  444. Epilogue = <<"Gotta go fast!">>,
  445. M = iolist_to_binary([
  446. Preamble,
  447. part(B, H1), Body1,
  448. part(B, H2), Body2,
  449. close(B),
  450. Epilogue
  451. ]),
  452. {done, Preamble, M2} = parse_body(M, B),
  453. {ok, H1, M3} = parse_headers(M2, B),
  454. {done, Body1, M4} = parse_body(M3, B),
  455. {ok, H2Unsorted, M5} = parse_headers(M4, B),
  456. H2 = lists:sort(H2Unsorted),
  457. {done, Body2, M6} = parse_body(M5, B),
  458. {done, Epilogue} = parse_headers(M6, B),
  459. ok.
  460. -endif.
  461. -ifdef(PERF).
  462. perf_build_multipart() ->
  463. B = boundary(),
  464. [
  465. <<"preamble\r\n">>,
  466. first_part(B, [{<<"content-type">>, <<"text/plain">>}]),
  467. <<"This is the body of the message.">>,
  468. part(B, [
  469. {<<"content-type">>, <<"application/octet-stream">>},
  470. {<<"content-transfer-encoding">>, <<"base64">>}]),
  471. <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n"
  472. "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>,
  473. close(B),
  474. <<"epilogue">>
  475. ].
  476. horse_build() ->
  477. horse:repeat(50000,
  478. perf_build_multipart()
  479. ).
  480. -endif.
  481. %% Headers.
  482. %% @doc Convenience function for extracting information from headers
  483. %% when parsing a multipart/form-data stream.
  484. -spec form_data(headers())
  485. -> {data, binary()}
  486. | {file, binary(), binary(), binary(), binary()}.
  487. form_data(Headers) ->
  488. {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers),
  489. {<<"form-data">>, Params} = parse_content_disposition(DispositionBin),
  490. {_, FieldName} = lists:keyfind(<<"name">>, 1, Params),
  491. case lists:keyfind(<<"filename">>, 1, Params) of
  492. false ->
  493. {data, FieldName};
  494. {_, Filename} ->
  495. Type = case lists:keyfind(<<"content-type">>, 1, Headers) of
  496. false -> <<"text/plain">>;
  497. {_, T} -> T
  498. end,
  499. TransferEncoding = case lists:keyfind(
  500. <<"content-transfer-encoding">>, 1, Headers) of
  501. false -> <<"7bit">>;
  502. {_, TE} -> TE
  503. end,
  504. {file, FieldName, Filename, Type, TransferEncoding}
  505. end.
  506. -ifdef(TEST).
  507. form_data_test_() ->
  508. Tests = [
  509. {[{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}],
  510. {data, <<"submit-name">>}},
  511. {[{<<"content-disposition">>,
  512. <<"form-data; name=\"files\"; filename=\"file1.txt\"">>},
  513. {<<"content-type">>, <<"text/x-plain">>}],
  514. {file, <<"files">>, <<"file1.txt">>,
  515. <<"text/x-plain">>, <<"7bit">>}}
  516. ],
  517. [{lists:flatten(io_lib:format("~p", [V])),
  518. fun() -> R = form_data(V) end} || {V, R} <- Tests].
  519. -endif.
  520. %% @todo parse_content_description
  521. %% @todo parse_content_id
  522. %% @doc Parse an RFC 2183 content-disposition value.
  523. %% @todo Support RFC 2231.
  524. -spec parse_content_disposition(binary())
  525. -> {binary(), [{binary(), binary()}]}.
  526. parse_content_disposition(Bin) ->
  527. parse_cd_type(Bin, <<>>).
  528. parse_cd_type(<<>>, Acc) ->
  529. {Acc, []};
  530. parse_cd_type(<< C, Rest/bits >>, Acc) ->
  531. case C of
  532. $; -> {Acc, parse_before_param(Rest, [])};
  533. $\s -> {Acc, parse_before_param(Rest, [])};
  534. $\t -> {Acc, parse_before_param(Rest, [])};
  535. _ -> ?LOWER(parse_cd_type, Rest, Acc)
  536. end.
  537. -ifdef(TEST).
  538. parse_content_disposition_test_() ->
  539. Tests = [
  540. {<<"inline">>, {<<"inline">>, []}},
  541. {<<"attachment">>, {<<"attachment">>, []}},
  542. {<<"attachment; filename=genome.jpeg;"
  543. " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>,
  544. {<<"attachment">>, [
  545. {<<"filename">>, <<"genome.jpeg">>},
  546. {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}
  547. ]}},
  548. {<<"form-data; name=\"user\"">>,
  549. {<<"form-data">>, [{<<"name">>, <<"user">>}]}},
  550. {<<"form-data; NAME=\"submit-name\"">>,
  551. {<<"form-data">>, [{<<"name">>, <<"submit-name">>}]}},
  552. {<<"form-data; name=\"files\"; filename=\"file1.txt\"">>,
  553. {<<"form-data">>, [
  554. {<<"name">>, <<"files">>},
  555. {<<"filename">>, <<"file1.txt">>}
  556. ]}},
  557. {<<"file; filename=\"file1.txt\"">>,
  558. {<<"file">>, [{<<"filename">>, <<"file1.txt">>}]}},
  559. {<<"file; filename=\"file2.gif\"">>,
  560. {<<"file">>, [{<<"filename">>, <<"file2.gif">>}]}}
  561. ],
  562. [{V, fun() -> R = parse_content_disposition(V) end} || {V, R} <- Tests].
  563. -endif.
  564. -ifdef(PERF).
  565. horse_parse_content_disposition_attachment() ->
  566. horse:repeat(100000,
  567. parse_content_disposition(<<"attachment; filename=genome.jpeg;"
  568. " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>)
  569. ).
  570. horse_parse_content_disposition_form_data() ->
  571. horse:repeat(100000,
  572. parse_content_disposition(
  573. <<"form-data; name=\"files\"; filename=\"file1.txt\"">>)
  574. ).
  575. horse_parse_content_disposition_inline() ->
  576. horse:repeat(100000,
  577. parse_content_disposition(<<"inline">>)
  578. ).
  579. -endif.
  580. %% @doc Parse an RFC 2045 content-transfer-encoding header.
  581. -spec parse_content_transfer_encoding(binary()) -> binary().
  582. parse_content_transfer_encoding(Bin) ->
  583. ?LOWER(Bin).
  584. -ifdef(TEST).
  585. parse_content_transfer_encoding_test_() ->
  586. Tests = [
  587. {<<"7bit">>, <<"7bit">>},
  588. {<<"7BIT">>, <<"7bit">>},
  589. {<<"8bit">>, <<"8bit">>},
  590. {<<"binary">>, <<"binary">>},
  591. {<<"quoted-printable">>, <<"quoted-printable">>},
  592. {<<"base64">>, <<"base64">>},
  593. {<<"Base64">>, <<"base64">>},
  594. {<<"BASE64">>, <<"base64">>},
  595. {<<"bAsE64">>, <<"base64">>}
  596. ],
  597. [{V, fun() -> R = parse_content_transfer_encoding(V) end}
  598. || {V, R} <- Tests].
  599. -endif.
  600. -ifdef(PERF).
  601. horse_parse_content_transfer_encoding() ->
  602. horse:repeat(100000,
  603. parse_content_transfer_encoding(<<"QUOTED-PRINTABLE">>)
  604. ).
  605. -endif.
  606. %% @doc Parse an RFC 2045 content-type header.
  607. -spec parse_content_type(binary())
  608. -> {binary(), binary(), [{binary(), binary()}]}.
  609. parse_content_type(Bin) ->
  610. parse_ct_type(Bin, <<>>).
  611. parse_ct_type(<< C, Rest/bits >>, Acc) ->
  612. case C of
  613. $/ -> parse_ct_subtype(Rest, Acc, <<>>);
  614. _ -> ?LOWER(parse_ct_type, Rest, Acc)
  615. end.
  616. parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> ->
  617. {Type, Subtype, []};
  618. parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) ->
  619. case C of
  620. $; -> {Type, Acc, parse_before_param(Rest, [])};
  621. $\s -> {Type, Acc, parse_before_param(Rest, [])};
  622. $\t -> {Type, Acc, parse_before_param(Rest, [])};
  623. _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc)
  624. end.
  625. -ifdef(TEST).
  626. parse_content_type_test_() ->
  627. Tests = [
  628. {<<"image/gif">>,
  629. {<<"image">>, <<"gif">>, []}},
  630. {<<"text/plain">>,
  631. {<<"text">>, <<"plain">>, []}},
  632. {<<"text/plain; charset=us-ascii">>,
  633. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}},
  634. {<<"text/plain; charset=\"us-ascii\"">>,
  635. {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}},
  636. {<<"multipart/form-data; boundary=AaB03x">>,
  637. {<<"multipart">>, <<"form-data">>,
  638. [{<<"boundary">>, <<"AaB03x">>}]}},
  639. {<<"multipart/mixed; boundary=BbC04y">>,
  640. {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"BbC04y">>}]}},
  641. {<<"multipart/mixed; boundary=--------">>,
  642. {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"--------">>}]}},
  643. {<<"application/x-horse; filename=genome.jpeg;"
  644. " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";"
  645. " charset=us-ascii; empty=; number=12345">>,
  646. {<<"application">>, <<"x-horse">>, [
  647. {<<"filename">>, <<"genome.jpeg">>},
  648. {<<"some-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>},
  649. {<<"charset">>, <<"us-ascii">>},
  650. {<<"empty">>, <<>>},
  651. {<<"number">>, <<"12345">>}
  652. ]}}
  653. ],
  654. [{V, fun() -> R = parse_content_type(V) end}
  655. || {V, R} <- Tests].
  656. -endif.
  657. -ifdef(PERF).
  658. horse_parse_content_type_zero() ->
  659. horse:repeat(100000,
  660. parse_content_type(<<"text/plain">>)
  661. ).
  662. horse_parse_content_type_one() ->
  663. horse:repeat(100000,
  664. parse_content_type(<<"text/plain; charset=\"us-ascii\"">>)
  665. ).
  666. horse_parse_content_type_five() ->
  667. horse:repeat(100000,
  668. parse_content_type(<<"application/x-horse; filename=genome.jpeg;"
  669. " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";"
  670. " charset=us-ascii; empty=; number=12345">>)
  671. ).
  672. -endif.
  673. %% @doc Parse RFC 2045 parameters.
  674. parse_before_param(<<>>, Params) ->
  675. lists:reverse(Params);
  676. parse_before_param(<< C, Rest/bits >>, Params) ->
  677. case C of
  678. $; -> parse_before_param(Rest, Params);
  679. $\s -> parse_before_param(Rest, Params);
  680. $\t -> parse_before_param(Rest, Params);
  681. _ -> ?LOWER(parse_param_name, Rest, Params, <<>>)
  682. end.
  683. parse_param_name(<<>>, Params, Acc) ->
  684. lists:reverse([{Acc, <<>>}|Params]);
  685. parse_param_name(<< C, Rest/bits >>, Params, Acc) ->
  686. case C of
  687. $= -> parse_param_value(Rest, Params, Acc);
  688. _ -> ?LOWER(parse_param_name, Rest, Params, Acc)
  689. end.
  690. parse_param_value(<<>>, Params, Name) ->
  691. lists:reverse([{Name, <<>>}|Params]);
  692. parse_param_value(<< C, Rest/bits >>, Params, Name) ->
  693. case C of
  694. $" -> parse_param_quoted_value(Rest, Params, Name, <<>>);
  695. $; -> parse_before_param(Rest, [{Name, <<>>}|Params]);
  696. $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]);
  697. $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]);
  698. C -> parse_param_value(Rest, Params, Name, << C >>)
  699. end.
  700. parse_param_value(<<>>, Params, Name, Acc) ->
  701. lists:reverse([{Name, Acc}|Params]);
  702. parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) ->
  703. case C of
  704. $; -> parse_before_param(Rest, [{Name, Acc}|Params]);
  705. $\s -> parse_before_param(Rest, [{Name, Acc}|Params]);
  706. $\t -> parse_before_param(Rest, [{Name, Acc}|Params]);
  707. C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>)
  708. end.
  709. %% We expect a final $" so no need to test for <<>>.
  710. parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) ->
  711. parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>);
  712. parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) ->
  713. parse_before_param(Rest, [{Name, Acc}|Params]);
  714. parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc)
  715. when C =/= $\r ->
  716. parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>).