cowboy_multipart.erl 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. %% Copyright (c) 2011, Anthony Ramine <nox@dev-extend.eu>
  2. %%
  3. %% Permission to use, copy, modify, and/or distribute this software for any
  4. %% purpose with or without fee is hereby granted, provided that the above
  5. %% copyright notice and this permission notice appear in all copies.
  6. %%
  7. %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  10. %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  12. %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  13. %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. %% @doc Multipart parser.
  15. -module(cowboy_multipart).
  16. -type part_parser() :: parser(more(part_result())).
  17. -type parser(T) :: fun((binary()) -> T).
  18. -type more(T) :: T | {more, parser(T)}.
  19. -type part_result() :: headers() | eof.
  20. -type headers() :: {headers, http_headers(), body_cont()}.
  21. -type http_headers() :: [{atom() | binary(), binary()}].
  22. -type body_cont() :: cont(more(body_result())).
  23. -type cont(T) :: fun(() -> T).
  24. -type body_result() :: {body, binary(), body_cont()} | end_of_part().
  25. -type end_of_part() :: {end_of_part, cont(more(part_result()))}.
  26. -type disposition() :: {binary(), [{binary(), binary()}]}.
  27. -export([parser/1, content_disposition/1]).
  28. -include_lib("eunit/include/eunit.hrl").
  29. %% API.
  30. %% @doc Return a multipart parser for the given boundary.
  31. -spec parser(binary()) -> part_parser().
  32. parser(Boundary) when is_binary(Boundary) ->
  33. fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end.
  34. %% @doc Parse a content disposition.
  35. %% @todo Parse the MIME header instead of the HTTP one.
  36. -spec content_disposition(binary()) -> disposition().
  37. content_disposition(Data) ->
  38. cowboy_http:token_ci(Data,
  39. fun (_Rest, <<>>) -> {error, badarg};
  40. (Rest, Disposition) ->
  41. cowboy_http:content_type_params(Rest,
  42. fun (Params) -> {Disposition, Params} end, [])
  43. end).
  44. %% Internal.
  45. %% @doc Entry point of the multipart parser, skips over the preamble if any.
  46. -spec parse(binary(), binary()) -> more(part_result()).
  47. parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 ->
  48. BoundarySize = byte_size(Boundary),
  49. Pattern = pattern(Boundary),
  50. case Bin of
  51. <<"--", Boundary:BoundarySize/binary, Rest/binary>> ->
  52. % Data starts with initial boundary, skip preamble parsing.
  53. parse_boundary_tail(Rest, Pattern);
  54. _ ->
  55. % Parse preamble.
  56. skip(Bin, Pattern)
  57. end;
  58. parse(Bin, Boundary) ->
  59. % Not enough data to know if the data begins with a boundary.
  60. more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end).
  61. -type pattern() :: {binary:cp(), non_neg_integer()}.
  62. %% @doc Return a compiled binary pattern with its size in bytes.
  63. %% The pattern is the boundary prepended with "\r\n--".
  64. -spec pattern(binary()) -> pattern().
  65. pattern(Boundary) ->
  66. MatchPattern = <<"\r\n--", Boundary/binary>>,
  67. {binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}.
  68. %% @doc Parse remaining characters of a line beginning with the boundary.
  69. %% If followed by "--", <em>eof</em> is returned and parsing is finished.
  70. -spec parse_boundary_tail(binary(), pattern()) -> more(part_result()).
  71. parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 ->
  72. case Bin of
  73. <<"--", _Rest/binary>> ->
  74. % Boundary is followed by "--", end parsing.
  75. eof;
  76. _ ->
  77. % No dash after boundary, proceed with unknown chars and lwsp
  78. % removal.
  79. parse_boundary_eol(Bin, Pattern)
  80. end;
  81. parse_boundary_tail(Bin, Pattern) ->
  82. % Boundary may be followed by "--", need more data.
  83. more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end).
  84. %% @doc Skip whitespace and unknown chars until CRLF.
  85. -spec parse_boundary_eol(binary(), pattern()) -> more(part_result()).
  86. parse_boundary_eol(Bin, Pattern) ->
  87. case binary:match(Bin, <<"\r\n">>) of
  88. {CrlfStart, _Length} ->
  89. % End of line found, remove optional whitespace.
  90. <<_:CrlfStart/binary, Rest/binary>> = Bin,
  91. Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end,
  92. cowboy_http:whitespace(Rest, Fun);
  93. nomatch ->
  94. % CRLF not found in the given binary.
  95. RestStart = max(byte_size(Bin) - 1, 0),
  96. <<_:RestStart/binary, Rest/binary>> = Bin,
  97. more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end)
  98. end.
  99. -spec parse_boundary_crlf(binary(), pattern()) -> more(part_result()).
  100. parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) ->
  101. % The binary is at least 2 bytes long as this function is only called by
  102. % parse_boundary_eol/3 when CRLF has been found so a more tuple will never
  103. % be returned from here.
  104. parse_headers(Rest, Pattern);
  105. parse_boundary_crlf(Bin, Pattern) ->
  106. % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is
  107. % not followed directly by a new line. In this implementation it is
  108. % considered part of the boundary so EOL needs to be searched again.
  109. parse_boundary_eol(Bin, Pattern).
  110. -spec parse_headers(binary(), pattern()) -> more(part_result()).
  111. parse_headers(Bin, Pattern) ->
  112. parse_headers(Bin, Pattern, []).
  113. -spec parse_headers(binary(), pattern(), http_headers()) -> more(part_result()).
  114. parse_headers(Bin, Pattern, Acc) ->
  115. case erlang:decode_packet(httph_bin, Bin, []) of
  116. {ok, {http_header, _, Name, _, Value}, Rest} ->
  117. parse_headers(Rest, Pattern, [{Name, Value} | Acc]);
  118. {ok, http_eoh, Rest} ->
  119. Headers = lists:reverse(Acc),
  120. {headers, Headers, fun () -> parse_body(Rest, Pattern) end};
  121. {ok, {http_error, _}, _} ->
  122. % Skip malformed parts.
  123. skip(Bin, Pattern);
  124. {more, _} ->
  125. more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end)
  126. end.
  127. -spec parse_body(binary(), pattern()) -> more(body_result()).
  128. parse_body(Bin, Pattern = {P, PSize}) when byte_size(Bin) >= PSize ->
  129. case binary:match(Bin, P) of
  130. {0, _Length} ->
  131. <<_:PSize/binary, Rest/binary>> = Bin,
  132. end_of_part(Rest, Pattern);
  133. {BoundaryStart, _Length} ->
  134. % Boundary found, this is the latest partial body that will be
  135. % returned for this part.
  136. <<PBody:BoundaryStart/binary, _:PSize/binary, Rest/binary>> = Bin,
  137. FResult = end_of_part(Rest, Pattern),
  138. {body, PBody, fun () -> FResult end};
  139. nomatch ->
  140. PartialLength = byte_size(Bin) - PSize + 1,
  141. <<PBody:PartialLength/binary, Rest/binary>> = Bin,
  142. {body, PBody, fun () -> parse_body(Rest, Pattern) end}
  143. end;
  144. parse_body(Bin, Pattern) ->
  145. more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end).
  146. -spec end_of_part(binary(), pattern()) -> end_of_part().
  147. end_of_part(Bin, Pattern) ->
  148. {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}.
  149. -spec skip(binary(), pattern()) -> more(part_result()).
  150. skip(Bin, Pattern = {P, PSize}) ->
  151. case binary:match(Bin, P) of
  152. {BoundaryStart, _Length} ->
  153. % Boundary found, proceed with parsing of the next part.
  154. RestStart = BoundaryStart + PSize,
  155. <<_:RestStart/binary, Rest/binary>> = Bin,
  156. parse_boundary_tail(Rest, Pattern);
  157. nomatch ->
  158. % Boundary not found, need more data.
  159. RestStart = max(byte_size(Bin) - PSize + 1, 0),
  160. <<_:RestStart/binary, Rest/binary>> = Bin,
  161. more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end)
  162. end.
  163. -spec more(binary(), parser(T)) -> {more, parser(T)}.
  164. more(<<>>, F) ->
  165. {more, F};
  166. more(Bin, InnerF) ->
  167. F = fun (NewData) when is_binary(NewData) ->
  168. InnerF(<<Bin/binary, NewData/binary>>)
  169. end,
  170. {more, F}.
  171. %% Tests.
  172. -ifdef(TEST).
  173. multipart_test_() ->
  174. %% {Body, Result}
  175. Tests = [
  176. {<<"--boundary--">>, []},
  177. {<<"preamble\r\n--boundary--">>, []},
  178. {<<"--boundary--\r\nepilogue">>, []},
  179. {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>,
  180. [{[{<<"A">>, <<"b">>}, {<<"C">>, <<"d">>}], <<>>}]},
  181. {
  182. <<
  183. "--boundary\r\nX-Name:answer\r\n\r\n42"
  184. "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n"
  185. "\r\n--boundary--"
  186. >>,
  187. [
  188. {[{<<"X-Name">>, <<"answer">>}], <<"42">>},
  189. {[{'Server', <<"Cowboy">>}], <<"It rocks!\r\n">>}
  190. ]
  191. }
  192. ],
  193. [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests].
  194. acc_multipart(V) ->
  195. acc_multipart((parser(<<"boundary">>))(V), []).
  196. acc_multipart({headers, Headers, Cont}, Acc) ->
  197. acc_multipart(Cont(), [{Headers, []}|Acc]);
  198. acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) ->
  199. acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]);
  200. acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) ->
  201. Body = list_to_binary(lists:reverse(BodyAcc)),
  202. acc_multipart(Cont(), [{Headers, Body}|Acc]);
  203. acc_multipart(eof, Acc) ->
  204. lists:reverse(Acc).
  205. content_disposition_test_() ->
  206. %% {Disposition, Result}
  207. Tests = [
  208. {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}},
  209. {<<"inline">>, {<<"inline">>, []}},
  210. {<<"attachment; \tfilename=brackets-slides.pdf">>,
  211. {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}}
  212. ],
  213. [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests].
  214. title(Bin) ->
  215. Title = lists:foldl(
  216. fun ({T, R}, V) -> re:replace(V, T, R, [global]) end,
  217. Bin,
  218. [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}]
  219. ),
  220. iolist_to_binary(Title).
  221. -endif.