10 years ago · 2239020020
--- a/src/cow_http_hd.erl
+++ b/src/cow_http_hd.erl
@@ -20,10 +20,14 @@
 
				 -export([parse_accept_language/1]).
			
 
				 -export([parse_connection/1]).
			
 
				 -export([parse_content_length/1]).
			
 
				+-export([parse_content_type/1]).
			
 
				 -export([parse_expect/1]).
			
 
				 -export([parse_max_forwards/1]).
			
 
				 -export([parse_transfer_encoding/1]).
			
 
				 
			
 
				+-type media_type() :: {binary(), binary(), [{binary(), binary()}]}.
			
 
				+-export_type([media_type/0]).
			
 
				+
			
 
				 -type qvalue() :: 0..1000.
			
 
				 -export_type([qvalue/0]).
			
 
				 
			
@@ -31,11 +35,34 @@
 
				 
			
 
				 -ifdef(TEST).
			
 
				 -include_lib("triq/include/triq.hrl").
			
 
				+
			
 
				+alpha_chars() -> lists:seq($a, $z) ++ lists:seq($A, $Z).
			
 
				+digit_chars() -> lists:seq($0, $9).
			
 
				+
			
 
				+tchar() -> oneof([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~] ++ digit_chars() ++ alpha_chars()).
			
 
				+token() -> ?LET(T, non_empty(list(tchar())), list_to_binary(T)).
			
 
				+
			
 
				+qdtext() ->
			
 
				+	oneof([$\t, $\s, $!] ++ lists:seq(16#23, 16#5b) ++ lists:seq(16#5d, 16#7e) ++ lists:seq(16#80, 16#ff)).
			
 
				+
			
 
				+quoted_pair() ->
			
 
				+	[$\\, oneof([$\t, $\s] ++ lists:seq(16#21, 16#7e) ++ lists:seq(16#80, 16#ff))].
			
 
				+
			
 
				+quoted_string() ->
			
 
				+	[$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"].
			
 
				+
			
 
				+%% Helper function for ( token / quoted-string ) values.
			
 
				+unquote([$", V, $"]) -> unquote(V, <<>>);
			
 
				+unquote(V) -> V.
			
 
				+
			
 
				+unquote([], Acc) -> Acc;
			
 
				+unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>);
			
 
				+unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>).
			
 
				 -endif.
			
 
				 
			
 
				 %% @doc Parse the Accept header.
			
 
				 
			
 
				--spec parse_accept(binary()) -> [{{binary(), binary(), [{binary(), binary()}]}, qvalue(), [binary() | {binary(), binary()}]}].
			
 
				+-spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}].
			
 
				 parse_accept(<<"*/*">>) ->
			
 
				 	[{{<<"*">>, <<"*">>, []}, 1000, []}];
			
 
				 parse_accept(Accept) ->
			
@@ -566,6 +593,146 @@ horse_parse_content_length_giga() ->
 
				 	).
			
 
				 -endif.
			
 
				 
			
 
				+%% @doc Parse the Content-Type header.
			
 
				+
			
 
				+-spec parse_content_type(binary()) -> media_type().
			
 
				+parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_type, R, <<>>)
			
 
				+	end.
			
 
				+
			
 
				+media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_subtype, R, T, <<>>)
			
 
				+	end;
			
 
				+media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_type, R, T)
			
 
				+	end.
			
 
				+
			
 
				+media_subtype(<<>>, T, S) -> {T, S, []};
			
 
				+media_subtype(<< $;, R/bits >>, T, S) -> media_before_param(R, T, S, []);
			
 
				+media_subtype(<< $\s, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
			
 
				+media_subtype(<< $\t, R/bits >>, T, S) -> media_before_semicolon(R, T, S, []);
			
 
				+media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_subtype, R, T, S)
			
 
				+	end.
			
 
				+
			
 
				+media_before_semicolon(<<>>, T, S, P) -> {T, S, lists:reverse(P)};
			
 
				+media_before_semicolon(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
			
 
				+media_before_semicolon(<< $\s, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P);
			
 
				+media_before_semicolon(<< $\t, R/bits >>, T, S, P) -> media_before_semicolon(R, T, S, P).
			
 
				+
			
 
				+media_before_param(<< $\s, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
			
 
				+media_before_param(<< $\t, R/bits >>, T, S, P) -> media_before_param(R, T, S, P);
			
 
				+media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>);
			
 
				+media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>);
			
 
				+media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_param, R, T, S, P, <<>>)
			
 
				+	end.
			
 
				+
			
 
				+media_charset_quoted(<< $", R/bits >>, T, S, P, V) ->
			
 
				+	media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
			
 
				+media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
			
 
				+	end;
			
 
				+media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_charset_quoted, R, T, S, P, V)
			
 
				+	end.
			
 
				+
			
 
				+media_charset(<<>>, T, S, P, V) -> {T, S, lists:reverse([{<<"charset">>, V}|P])};
			
 
				+
			
 
				+media_charset(<< $;, R/bits >>, T, S, P, V) -> media_before_param(R, T, S, [{<<"charset">>, V}|P]);
			
 
				+media_charset(<< $\s, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
			
 
				+media_charset(<< $\t, R/bits >>, T, S, P, V) -> media_before_semicolon(R, T, S, [{<<"charset">>, V}|P]);
			
 
				+media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_charset, R, T, S, P, V)
			
 
				+	end.
			
 
				+
			
 
				+media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>);
			
 
				+media_param(<< $=, R/bits >>, T, S, P, K) -> media_value(R, T, S, P, K, <<>>);
			
 
				+media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) ->
			
 
				+	case C of
			
 
				+		?INLINE_LOWERCASE(media_param, R, T, S, P, K)
			
 
				+	end.
			
 
				+
			
 
				+media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
			
 
				+media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>);
			
 
				+media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>).
			
 
				+
			
 
				+media_value(<<>>, T, S, P, K, V) -> {T, S, lists:reverse([{K, V}|P])};
			
 
				+media_value(<< $;, R/bits >>, T, S, P, K, V) -> media_before_param(R, T, S, [{K, V}|P]);
			
 
				+media_value(<< $\s, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
			
 
				+media_value(<< $\t, R/bits >>, T, S, P, K, V) -> media_before_semicolon(R, T, S, [{K, V}|P]);
			
 
				+media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>).
			
 
				+
			
 
				+-ifdef(TEST).
			
 
				+media_type_parameter() ->
			
 
				+	frequency([
			
 
				+		{90, {token(), oneof([token(), quoted_string()])}},
			
 
				+		{10, {<<"charset">>, oneof([token(), quoted_string()])}}
			
 
				+	]).
			
 
				+
			
 
				+media_type() ->
			
 
				+	?LET({T, S, P},
			
 
				+		{token(), token(), list(media_type_parameter())},
			
 
				+		{T, S, P, iolist_to_binary([T, $/, S, [[$;, K, $=, V] || {K, V} <- P]])}
			
 
				+	).
			
 
				+
			
 
				+prop_parse_content_type() ->
			
 
				+	?FORALL({T, S, P, MediaType},
			
 
				+		media_type(),
			
 
				+		begin
			
 
				+			{ResT, ResS, ResP} = parse_content_type(MediaType),
			
 
				+			ExpectedP = [case ?INLINE_LOWERCASE_BC(K) of
			
 
				+				<<"charset">> -> {<<"charset">>, ?INLINE_LOWERCASE_BC(unquote(V))};
			
 
				+				LowK -> {LowK, unquote(V)}
			
 
				+			end || {K, V} <- P],
			
 
				+			ResT =:= ?INLINE_LOWERCASE_BC(T)
			
 
				+				andalso ResS =:= ?INLINE_LOWERCASE_BC(S)
			
 
				+				andalso ResP =:= ExpectedP
			
 
				+		end
			
 
				+	).
			
 
				+
			
 
				+parse_content_type_test_() ->
			
 
				+	Tests = [
			
 
				+		{<<"text/html;charset=utf-8">>,
			
 
				+			{<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
			
 
				+		{<<"text/html;charset=UTF-8">>,
			
 
				+			{<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
			
 
				+		{<<"Text/HTML;Charset=\"utf-8\"">>,
			
 
				+			{<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
			
 
				+		{<<"text/html; charset=\"utf-8\"">>,
			
 
				+			{<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}},
			
 
				+		{<<"text/html; charset=ISO-8859-4">>,
			
 
				+			{<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
			
 
				+		{<<"text/plain; charset=iso-8859-4">>,
			
 
				+			{<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}},
			
 
				+		{<<"multipart/form-data  \t;Boundary=\"MultipartIsUgly\"">>,
			
 
				+			{<<"multipart">>, <<"form-data">>, [
			
 
				+				{<<"boundary">>, <<"MultipartIsUgly">>}
			
 
				+			]}},
			
 
				+		{<<"foo/bar; one=FirstParam; two=SecondParam">>,
			
 
				+			{<<"foo">>, <<"bar">>, [
			
 
				+				{<<"one">>, <<"FirstParam">>},
			
 
				+				{<<"two">>, <<"SecondParam">>}
			
 
				+			]}}
			
 
				+	],
			
 
				+	[{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests].
			
 
				+-endif.
			
 
				+
			
 
				+-ifdef(PERF).
			
 
				+horse_parse_content_type() ->
			
 
				+	horse:repeat(200000,
			
 
				+		parse_content_type(<<"text/html;charset=utf-8">>)
			
 
				+	).
			
 
				+-endif.
			
 
				+
			
 
				 %% @doc Parse the Expect header.
			
 
				 
			
 
				 -spec parse_expect(binary()) -> continue.