Browse Source

Add 'Accept' header parsing

Rework the cowboy_http_req:parse_header/2 function while I was at it.
Loïc Hoguin 13 years ago
parent
commit
c605c4fa40
4 changed files with 256 additions and 34 deletions
  1. 220 1
      src/cowboy_http.erl
  2. 1 1
      src/cowboy_http_protocol.erl
  3. 34 31
      src/cowboy_http_req.erl
  4. 1 1
      src/cowboy_http_websocket.erl

+ 220 - 1
src/cowboy_http.erl

@@ -16,7 +16,8 @@
 -module(cowboy_http).
 
 %% Parsing.
--export([list/2, nonempty_list/2, token/2, token_ci/2]).
+-export([list/2, nonempty_list/2,
+	media_range/2, token/2, token_ci/2, quoted_string/2]).
 
 %% Interpretation.
 -export([connection_to_atom/1]).
@@ -63,6 +64,144 @@ list(Data, Fun, Acc) ->
 				end)
 		end).
 
+%% @doc Parse a media range.
+-spec media_range(binary(), fun()) -> any().
+media_range(Data, Fun) ->
+	whitespace(Data,
+		fun (<<>>) -> {error, badarg};
+			(Rest) -> media_range_type(Rest, Fun)
+		end).
+
+-spec media_range_type(binary(), fun()) -> any().
+media_range_type(Data, Fun) ->
+	token_ci(Data,
+		fun (_Rest, <<>>) -> {error, badarg};
+			(Rest, Type) -> whitespace(Rest,
+				fun (<< $/, Rest2/bits >>) -> whitespace(Rest2,
+						fun (<<>>) -> {error, badarg};
+							(Rest3) -> media_range_subtype(Rest3, Fun, Type)
+						end);
+					(_Rest2) -> {error, badarg}
+				end)
+		end).
+
+-spec media_range_subtype(binary(), fun(), binary()) -> any().
+media_range_subtype(Data, Fun, Type) ->
+	token_ci(Data,
+		fun (_Rest, <<>>) -> {error, badarg};
+			(Rest, SubType) -> media_range_params(Rest, Fun, Type, SubType, [])
+		end).
+
+-spec media_range_params(binary(), fun(), binary(), binary(),
+	[{binary(), binary()}]) -> any().
+media_range_params(Data, Fun, Type, SubType, Acc) ->
+	whitespace(Data,
+		fun (<< $;, Rest/bits >>) ->
+				whitespace(Rest,
+					fun (Rest2) ->
+						media_range_param_attr(Rest2, Fun, Type, SubType, Acc)
+					end);
+			(Rest) -> Fun(Rest, {{Type, SubType, lists:reverse(Acc)}, 1000, []})
+		end).
+
+-spec media_range_param_attr(binary(), fun(), binary(), binary(),
+	[{binary(), binary()}]) -> any().
+media_range_param_attr(Data, Fun, Type, SubType, Acc) ->
+	token_ci(Data,
+		fun (_Rest, <<>>) -> {error, badarg};
+			(Rest, Attr) ->
+				whitespace(Rest,
+					fun (<< $=, Rest2/bits >>) ->
+							whitespace(Rest2,
+								fun (<<>>) -> {error, badarg};
+									(Rest3) ->
+										media_range_param_value(Rest3, Fun,
+											Type, SubType, Acc, Attr)
+								end);
+						(_Rest2) ->
+							{error, badarg}
+					end)
+		end).
+
+-spec media_range_param_value(binary(), fun(), binary(), binary(),
+	[{binary(), binary()}], binary()) -> any().
+media_range_param_value(Data, Fun, Type, SubType, Acc, <<"q">>) ->
+	quality(Data,
+		fun (Rest, Quality) ->
+			accept_ext(Rest, Fun, Type, SubType, Acc, Quality, [])
+		end);
+media_range_param_value(Data = << $", _/bits >>, Fun,
+		Type, SubType, Acc, Attr) ->
+	quoted_string(Data,
+		fun (Rest, Value) ->
+			media_range_params(Rest, Fun,
+				Type, SubType, [{Attr, Value}|Acc])
+		end);
+media_range_param_value(Data, Fun, Type, SubType, Acc, Attr) ->
+	token(Data,
+		fun (_Rest, <<>>) -> {error, badarg};
+			(Rest, Value) ->
+				media_range_params(Rest, Fun,
+					Type, SubType, [{Attr, Value}|Acc])
+		end).
+
+-spec accept_ext(binary(), fun(), binary(), binary(),
+	[{binary(), binary()}], 0..1000,
+	[{binary(), binary()} | binary()]) -> any().
+accept_ext(Data, Fun, Type, SubType, Params, Quality, Acc) ->
+	whitespace(Data,
+		fun (<< $;, Rest/bits >>) ->
+				whitespace(Rest,
+					fun (Rest2) ->
+						accept_ext_attr(Rest2, Fun,
+							Type, SubType, Params, Quality, Acc)
+					end);
+			(Rest) ->
+				Fun(Rest, {{Type, SubType, lists:reverse(Params)},
+					Quality, lists:reverse(Acc)})
+		end).
+
+-spec accept_ext_attr(binary(), fun(), binary(), binary(),
+	[{binary(), binary()}], 0..1000,
+	[{binary(), binary()} | binary()]) -> any().
+accept_ext_attr(Data, Fun, Type, SubType, Params, Quality, Acc) ->
+	token_ci(Data,
+		fun (_Rest, <<>>) -> {error, badarg};
+			(Rest, Attr) ->
+				whitespace(Rest,
+					fun (<< $=, Rest2/bits >>) ->
+							whitespace(Rest2,
+								fun (<<>>) -> {error, badarg};
+									(Rest3) ->
+										accept_ext_value(Rest3, Fun,
+											Type, SubType, Params,
+											Quality, Acc, Attr)
+								end);
+						(Rest2) ->
+							accept_ext(Rest2, Fun,
+								Type, SubType, Params,
+								Quality, [Attr|Acc])
+					end)
+		end).
+
+-spec accept_ext_value(binary(), fun(), binary(), binary(),
+	[{binary(), binary()}], 0..1000,
+	[{binary(), binary()} | binary()], binary()) -> any().
+accept_ext_value(Data = << $", _/bits >>, Fun,
+		Type, SubType, Params, Quality, Acc, Attr) ->
+	quoted_string(Data,
+		fun (Rest, Value) ->
+				accept_ext(Rest, Fun,
+					Type, SubType, Params, Quality, [{Attr, Value}|Acc])
+		end);
+accept_ext_value(Data, Fun, Type, SubType, Params, Quality, Acc, Attr) ->
+	token(Data,
+		fun (_Rest, <<>>) -> {error, badarg};
+			(Rest, Value) ->
+				accept_ext(Rest, Fun,
+					Type, SubType, Params, Quality, [{Attr, Value}|Acc])
+		end).
+
 %% @doc Skip whitespace.
 -spec whitespace(binary(), fun()) -> any().
 whitespace(<< C, Rest/bits >>, Fun)
@@ -99,6 +238,48 @@ token(<< C, Rest/bits >>, Fun, Case = ci, Acc) ->
 token(<< C, Rest/bits >>, Fun, Case, Acc) ->
 	token(Rest, Fun, Case, << Acc/binary, C >>).
 
+%% @doc Parse a quoted string.
+-spec quoted_string(binary(), fun()) -> any().
+quoted_string(<< $", Rest/bits >>, Fun) ->
+	quoted_string(Rest, Fun, <<>>).
+
+-spec quoted_string(binary(), fun(), binary()) -> any().
+quoted_string(<<>>, _Fun, _Acc) ->
+	{error, badarg};
+quoted_string(<< $", Rest/bits >>, Fun, Acc) ->
+	Fun(Rest, Acc);
+quoted_string(<< $\\, C, Rest/bits >>, Fun, Acc) ->
+	quoted_string(Rest, Fun, << Acc/binary, C >>);
+quoted_string(<< C, Rest/bits >>, Fun, Acc) ->
+	quoted_string(Rest, Fun, << Acc/binary, C >>).
+
+%% @doc Parse a quality value.
+-spec quality(binary(), fun()) -> any().
+quality(<< $0, $., Rest/bits >>, Fun) ->
+	quality(Rest, Fun, 0, 100);
+quality(<< $0, Rest/bits >>, Fun) ->
+	Fun(Rest, 0);
+quality(<< $1, $., $0, $0, $0, Rest/bits >>, Fun) ->
+	Fun(Rest, 1000);
+quality(<< $1, $., $0, $0, Rest/bits >>, Fun) ->
+	Fun(Rest, 1000);
+quality(<< $1, $., $0, Rest/bits >>, Fun) ->
+	Fun(Rest, 1000);
+quality(<< $1, Rest/bits >>, Fun) ->
+	Fun(Rest, 1000);
+quality(_Data, _Fun) ->
+	{error, badarg}.
+
+-spec quality(binary(), fun(), integer(), 1 | 10 | 100) -> any().
+quality(Data, Fun, Q, 0) ->
+	Fun(Data, Q);
+quality(<< C, Rest/bits >>, Fun, Q, M)
+		when C =:= $0; C =:= $1; C =:= $2; C =:= $3; C =:= $4;
+			 C =:= $5; C =:= $6; C =:= $7; C =:= $8; C =:= $9 ->
+	quality(Rest, Fun, Q + (C - $0) * M, M div 10);
+quality(Data, Fun, Q, _M) ->
+	Fun(Data, Q).
+
 %% Interpretation.
 
 %% @doc Walk through a tokens list and return whether
@@ -135,6 +316,44 @@ nonempty_token_list_test_() ->
 	],
 	[{V, fun() -> R = nonempty_list(V, fun token/2) end} || {V, R} <- Tests].
 
+media_range_list_test_() ->
+	%% {Tokens, Result}
+	Tests = [
+		{<<"audio/*; q=0.2, audio/basic">>, [
+			{{<<"audio">>, <<"*">>, []}, 200, []},
+			{{<<"audio">>, <<"basic">>, []}, 1000, []}
+		]},
+		{<<"text/plain; q=0.5, text/html, "
+		   "text/x-dvi; q=0.8, text/x-c">>, [
+		   {{<<"text">>, <<"plain">>, []}, 500, []},
+		   {{<<"text">>, <<"html">>, []}, 1000, []},
+		   {{<<"text">>, <<"x-dvi">>, []}, 800, []},
+		   {{<<"text">>, <<"x-c">>, []}, 1000, []}
+		]},
+		{<<"text/*, text/html, text/html;level=1, */*">>, [
+			{{<<"text">>, <<"*">>, []}, 1000, []},
+			{{<<"text">>, <<"html">>, []}, 1000, []},
+			{{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
+			{{<<"*">>, <<"*">>, []}, 1000, []}
+		]},
+		{<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, "
+		   "text/html;level=2;q=0.4, */*;q=0.5">>, [
+		   {{<<"text">>, <<"*">>, []}, 300, []},
+		   {{<<"text">>, <<"html">>, []}, 700, []},
+		   {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []},
+		   {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []},
+		   {{<<"*">>, <<"*">>, []}, 500, []}
+		]},
+		{<<"text/html;level=1;quoted=\"hi hi hi\";"
+		   "q=0.123;standalone;complex=gits, text/plain">>, [
+			{{<<"text">>, <<"html">>,
+				[{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123,
+				[<<"standalone">>, {<<"complex">>, <<"gits">>}]},
+			{{<<"text">>, <<"plain">>, []}, 1000, []}
+		]}
+	],
+	[{V, fun() -> R = list(V, fun media_range/2) end} || {V, R} <- Tests].
+
 connection_to_atom_test_() ->
 	%% {Tokens, Result}
 	Tests = [

+ 1 - 1
src/cowboy_http_protocol.erl

@@ -172,7 +172,7 @@ header({http_header, _I, 'Host', _R, _V}, Req, State) ->
 header({http_header, _I, 'Connection', _R, Connection},
 		Req=#http_req{headers=Headers}, State) ->
 	Req2 = Req#http_req{headers=[{'Connection', Connection}|Headers]},
-	{tokens, ConnTokens, Req3}
+	{ConnTokens, Req3}
 		= cowboy_http_req:parse_header('Connection', Req2),
 	ConnAtom = cowboy_http:connection_to_atom(ConnTokens),
 	parse_header(Req3#http_req{connection=ConnAtom}, State);

+ 34 - 31
src/cowboy_http_req.erl

@@ -191,47 +191,50 @@ headers(Req) ->
 %% returned is used as a return value.
 %% @see parse_header/3
 -spec parse_header(http_header(), #http_req{})
-	-> {tokens, [binary()], #http_req{}}
-	 | {undefined, binary(), #http_req{}}
-	 | {error, badarg}.
-parse_header('Connection', Req) ->
-	parse_header('Connection', Req, []);
-parse_header(Name, Req) ->
-	parse_header(Name, Req, undefined).
+	-> {any(), #http_req{}} | {error, badarg}.
+parse_header(Name, Req=#http_req{p_headers=PHeaders}) ->
+	case lists:keyfind(Name, 1, PHeaders) of
+		false -> parse_header(Name, Req, parse_header_default(Name));
+		{Name, Value} -> {Value, Req}
+	end.
+
+%% @doc Default values for semantic header parsing.
+-spec parse_header_default(http_header()) -> any().
+parse_header_default('Accept') -> [];
+parse_header_default('Connection') -> [];
+parse_header_default(_Name) -> undefined.
 
 %% @doc Semantically parse headers.
 %%
-%% When the header is known, a named tuple is returned containing
-%% {Type, P, Req} with Type being the type of value found in P.
-%% For example, the header 'Connection' is a list of tokens, therefore
-%% the value returned will be a list of binary values and Type will be
-%% 'tokens'.
-%%
-%% When the header is known but not found, the tuple {Type, Default, Req}
-%% is returned instead.
-%%
-%% When the header is unknown, the value is returned directly as an
-%% 'undefined' tagged tuple.
+%% When the header is unknown, the value is returned directly without parsing.
 -spec parse_header(http_header(), #http_req{}, any())
-	-> {tokens, [binary()], #http_req{}}
-	 | {undefined, binary(), #http_req{}}
-	 | {error, badarg}.
-parse_header(Name, Req=#http_req{p_headers=PHeaders}, Default)
-		when Name =:= 'Connection' ->
+	-> {any(), #http_req{}} | {error, badarg}.
+parse_header(Name, Req, Default) when Name =:= 'Accept' ->
+	parse_header(Name, Req, Default,
+		fun (Value) ->
+			cowboy_http:list(Value, fun cowboy_http:media_range/2)
+		end);
+parse_header(Name, Req, Default) when Name =:= 'Connection' ->
+	parse_header(Name, Req, Default,
+		fun (Value) ->
+			cowboy_http:nonempty_list(Value, fun cowboy_http:token_ci/2)
+		end);
+parse_header(Name, Req, Default) ->
+	{Value, Req2} = header(Name, Req, Default),
+	{undefined, Value, Req2}.
+
+parse_header(Name, Req=#http_req{p_headers=PHeaders}, Default, Fun) ->
 	case header(Name, Req) of
-		{undefined, Req2} -> {tokens, Default, Req2};
+		{undefined, Req2} ->
+			{Default, Req2#http_req{p_headers=[{Name, Default}|PHeaders]}};
 		{Value, Req2} ->
-			case cowboy_http:nonempty_list(Value, fun cowboy_http:token_ci/2) of
+			case Fun(Value) of
 				{error, badarg} ->
 					{error, badarg};
 				P ->
-					{tokens, P, Req2#http_req{
-						p_headers=[{Name, P}|PHeaders]}}
+					{P, Req2#http_req{p_headers=[{Name, P}|PHeaders]}}
 			end
-	end;
-parse_header(Name, Req, Default) ->
-	{Value, Req2} = header(Name, Req, Default),
-	{undefined, Value, Req2}.
+	end.
 
 %% @equiv cookie(Name, Req, undefined)
 -spec cookie(binary(), #http_req{})

+ 1 - 1
src/cowboy_http_websocket.erl

@@ -77,7 +77,7 @@ upgrade(ListenerPid, Handler, Opts, Req) ->
 %% @todo Upgrade is a list of products and should be parsed as such.
 -spec websocket_upgrade(#state{}, #http_req{}) -> {ok, #state{}, #http_req{}}.
 websocket_upgrade(State, Req) ->
-	{tokens, ConnTokens, Req2}
+	{ConnTokens, Req2}
 		= cowboy_http_req:parse_header('Connection', Req),
 	true = lists:member(<<"upgrade">>, ConnTokens),
 	{WS, Req3} = cowboy_http_req:header('Upgrade', Req2),