cowboy_multipart.erl 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. %% Copyright (c) 2011, Anthony Ramine <nox@dev-extend.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. %% @doc Multipart parser.
  15. -module(cowboy_multipart).
  16. -export([parser/1]).
  17. -export([content_disposition/1]).
  18. -type part_parser() :: parser(more(part_result())).
  19. -type parser(T) :: fun((binary()) -> T).
  20. -type more(T) :: T | {more, parser(T)}.
  21. -type part_result() :: headers() | eof.
  22. -type headers() :: {headers, http_headers(), body_cont()}.
  23. -type http_headers() :: [{binary(), binary()}].
  24. -type body_cont() :: cont(more(body_result())).
  25. -type cont(T) :: fun(() -> T).
  26. -type body_result() :: {body, binary(), body_cont()} | end_of_part().
  27. -type end_of_part() :: {end_of_part, cont(more(part_result()))}.
  28. -type disposition() :: {binary(), [{binary(), binary()}]}.
  29. -ifdef(TEST).
  30. -include_lib("eunit/include/eunit.hrl").
  31. -endif.
  32. %% API.
  33. %% @doc Return a multipart parser for the given boundary.
  34. -spec parser(binary()) -> part_parser().
  35. parser(Boundary) when is_binary(Boundary) ->
  36. fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end.
  37. %% @doc Parse a content disposition.
  38. %% @todo Parse the MIME header instead of the HTTP one.
  39. -spec content_disposition(binary()) -> disposition().
  40. content_disposition(Data) ->
  41. cowboy_http:token_ci(Data,
  42. fun (_Rest, <<>>) -> {error, badarg};
  43. (Rest, Disposition) ->
  44. cowboy_http:params(Rest,
  45. fun (<<>>, Params) -> {Disposition, Params};
  46. (_Rest2, _) -> {error, badarg}
  47. end)
  48. end).
  49. %% Internal.
  50. %% @doc Entry point of the multipart parser, skips over the preamble if any.
  51. -spec parse(binary(), binary()) -> more(part_result()).
  52. parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 ->
  53. BoundarySize = byte_size(Boundary),
  54. Pattern = pattern(Boundary),
  55. case Bin of
  56. <<"--", Boundary:BoundarySize/binary, Rest/binary>> ->
  57. % Data starts with initial boundary, skip preamble parsing.
  58. parse_boundary_tail(Rest, Pattern);
  59. _ ->
  60. % Parse preamble.
  61. skip(Bin, Pattern)
  62. end;
  63. parse(Bin, Boundary) ->
  64. % Not enough data to know if the data begins with a boundary.
  65. more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end).
  66. -type pattern() :: {binary:cp(), non_neg_integer()}.
  67. %% @doc Return a compiled binary pattern with its size in bytes.
  68. %% The pattern is the boundary prepended with "\r\n--".
  69. -spec pattern(binary()) -> pattern().
  70. pattern(Boundary) ->
  71. MatchPattern = <<"\r\n--", Boundary/binary>>,
  72. {binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}.
  73. %% @doc Parse remaining characters of a line beginning with the boundary.
  74. %% If followed by "--", <em>eof</em> is returned and parsing is finished.
  75. -spec parse_boundary_tail(binary(), pattern()) -> more(part_result()).
  76. parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 ->
  77. case Bin of
  78. <<"--", _Rest/binary>> ->
  79. % Boundary is followed by "--", end parsing.
  80. eof;
  81. _ ->
  82. % No dash after boundary, proceed with unknown chars and lwsp
  83. % removal.
  84. parse_boundary_eol(Bin, Pattern)
  85. end;
  86. parse_boundary_tail(Bin, Pattern) ->
  87. % Boundary may be followed by "--", need more data.
  88. more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end).
  89. %% @doc Skip whitespace and unknown chars until CRLF.
  90. -spec parse_boundary_eol(binary(), pattern()) -> more(part_result()).
  91. parse_boundary_eol(Bin, Pattern) ->
  92. case binary:match(Bin, <<"\r\n">>) of
  93. {CrlfStart, _Length} ->
  94. % End of line found, remove optional whitespace.
  95. <<_:CrlfStart/binary, Rest/binary>> = Bin,
  96. Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end,
  97. cowboy_http:whitespace(Rest, Fun);
  98. nomatch ->
  99. % CRLF not found in the given binary.
  100. RestStart = max(byte_size(Bin) - 1, 0),
  101. <<_:RestStart/binary, Rest/binary>> = Bin,
  102. more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end)
  103. end.
  104. -spec parse_boundary_crlf(binary(), pattern()) -> more(part_result()).
  105. parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) ->
  106. % The binary is at least 2 bytes long as this function is only called by
  107. % parse_boundary_eol/3 when CRLF has been found so a more tuple will never
  108. % be returned from here.
  109. parse_headers(Rest, Pattern);
  110. parse_boundary_crlf(Bin, Pattern) ->
  111. % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is
  112. % not followed directly by a new line. In this implementation it is
  113. % considered part of the boundary so EOL needs to be searched again.
  114. parse_boundary_eol(Bin, Pattern).
  115. -spec parse_headers(binary(), pattern()) -> more(part_result()).
  116. parse_headers(Bin, Pattern) ->
  117. parse_headers(Bin, Pattern, []).
  118. -spec parse_headers(binary(), pattern(), http_headers()) -> more(part_result()).
  119. parse_headers(Bin, Pattern, Acc) ->
  120. case erlang:decode_packet(httph_bin, Bin, []) of
  121. {ok, {http_header, _, Name, _, Value}, Rest} ->
  122. Name2 = case is_atom(Name) of
  123. true -> cowboy_bstr:to_lower(atom_to_binary(Name, latin1));
  124. false -> cowboy_bstr:to_lower(Name)
  125. end,
  126. parse_headers(Rest, Pattern, [{Name2, Value} | Acc]);
  127. {ok, http_eoh, Rest} ->
  128. Headers = lists:reverse(Acc),
  129. {headers, Headers, fun () -> parse_body(Rest, Pattern) end};
  130. {ok, {http_error, _}, _} ->
  131. % Skip malformed parts.
  132. skip(Bin, Pattern);
  133. {more, _} ->
  134. more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end)
  135. end.
  136. -spec parse_body(binary(), pattern()) -> more(body_result()).
  137. parse_body(Bin, Pattern = {P, PSize}) when byte_size(Bin) >= PSize ->
  138. case binary:match(Bin, P) of
  139. {0, _Length} ->
  140. <<_:PSize/binary, Rest/binary>> = Bin,
  141. end_of_part(Rest, Pattern);
  142. {BoundaryStart, _Length} ->
  143. % Boundary found, this is the latest partial body that will be
  144. % returned for this part.
  145. <<PBody:BoundaryStart/binary, _:PSize/binary, Rest/binary>> = Bin,
  146. FResult = end_of_part(Rest, Pattern),
  147. {body, PBody, fun () -> FResult end};
  148. nomatch ->
  149. PartialLength = byte_size(Bin) - PSize + 1,
  150. <<PBody:PartialLength/binary, Rest/binary>> = Bin,
  151. {body, PBody, fun () -> parse_body(Rest, Pattern) end}
  152. end;
  153. parse_body(Bin, Pattern) ->
  154. more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end).
  155. -spec end_of_part(binary(), pattern()) -> end_of_part().
  156. end_of_part(Bin, Pattern) ->
  157. {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}.
  158. -spec skip(binary(), pattern()) -> more(part_result()).
  159. skip(Bin, Pattern = {P, PSize}) ->
  160. case binary:match(Bin, P) of
  161. {BoundaryStart, _Length} ->
  162. % Boundary found, proceed with parsing of the next part.
  163. RestStart = BoundaryStart + PSize,
  164. <<_:RestStart/binary, Rest/binary>> = Bin,
  165. parse_boundary_tail(Rest, Pattern);
  166. nomatch ->
  167. % Boundary not found, need more data.
  168. RestStart = max(byte_size(Bin) - PSize + 1, 0),
  169. <<_:RestStart/binary, Rest/binary>> = Bin,
  170. more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end)
  171. end.
  172. -spec more(binary(), parser(T)) -> {more, parser(T)}.
  173. more(<<>>, F) ->
  174. {more, F};
  175. more(Bin, InnerF) ->
  176. F = fun (NewData) when is_binary(NewData) ->
  177. InnerF(<<Bin/binary, NewData/binary>>)
  178. end,
  179. {more, F}.
  180. %% Tests.
  181. -ifdef(TEST).
  182. multipart_test_() ->
  183. %% {Body, Result}
  184. Tests = [
  185. {<<"--boundary--">>, []},
  186. {<<"preamble\r\n--boundary--">>, []},
  187. {<<"--boundary--\r\nepilogue">>, []},
  188. {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>,
  189. [{[{<<"a">>, <<"b">>}, {<<"c">>, <<"d">>}], <<>>}]},
  190. {
  191. <<
  192. "--boundary\r\nX-Name:answer\r\n\r\n42"
  193. "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n"
  194. "\r\n--boundary--"
  195. >>,
  196. [
  197. {[{<<"x-name">>, <<"answer">>}], <<"42">>},
  198. {[{<<"server">>, <<"Cowboy">>}], <<"It rocks!\r\n">>}
  199. ]
  200. }
  201. ],
  202. [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests].
  203. acc_multipart(V) ->
  204. acc_multipart((parser(<<"boundary">>))(V), []).
  205. acc_multipart({headers, Headers, Cont}, Acc) ->
  206. acc_multipart(Cont(), [{Headers, []}|Acc]);
  207. acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) ->
  208. acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]);
  209. acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) ->
  210. Body = list_to_binary(lists:reverse(BodyAcc)),
  211. acc_multipart(Cont(), [{Headers, Body}|Acc]);
  212. acc_multipart(eof, Acc) ->
  213. lists:reverse(Acc).
  214. content_disposition_test_() ->
  215. %% {Disposition, Result}
  216. Tests = [
  217. {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}},
  218. {<<"inline">>, {<<"inline">>, []}},
  219. {<<"attachment; \tfilename=brackets-slides.pdf">>,
  220. {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}}
  221. ],
  222. [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests].
  223. title(Bin) ->
  224. Title = lists:foldl(
  225. fun ({T, R}, V) -> re:replace(V, T, R, [global]) end,
  226. Bin,
  227. [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}]
  228. ),
  229. iolist_to_binary(Title).
  230. -endif.