Browse Source

Better handle content negotiation when accept contains charsets

Thanks to Philip Witty for help figuring this out.
Loïc Hoguin 6 years ago
parent
commit
399b6a16b4

+ 20 - 0
doc/src/manual/cowboy_rest.asciidoc

@@ -272,6 +272,16 @@ Cowboy treats all parameters as case sensitive, except for the
 should therefore always provide the charset as a lowercase
 binary string.
 
+When a charset is given in the media type parameters in the
+accept header, Cowboy will do some additional checks to confirm
+that it can use this charset. When the wildcard is used then Cowboy
+will immediately call `charsets_provided` to confirm the charset
+is acceptable. If the callback is undefined Cowboy assumes any
+charset is acceptable. When the wildcard is not used and the charset
+given in the accept header matches one of the configured media
+types Cowboy will use that charset and skip the `charsets_provided`
+step entirely.
+
 Cowboy will add the negotiated `media_type` to the Req object
 after this step completes:
 
@@ -284,6 +294,16 @@ req() :: #{
 
 // @todo Case sensitivity of parsed mime content?
 
+Cowboy may also add the negotiated `charset` to the Req object
+after this step completes:
+
+[source,erlang]
+----
+req() :: #{
+    charset => binary()  %% lowercase; case insensitive
+}
+----
+
 === delete_completed
 
 [source,erlang]

+ 70 - 19
src/cowboy_rest.erl

@@ -245,7 +245,7 @@
 	language_a :: undefined | binary(),
 
 	%% Charset.
-	charsets_p = [] :: [binary()],
+	charsets_p = undefined :: undefined | [binary()],
 	charset_a :: undefined | binary(),
 
 	%% Whether the resource exists.
@@ -503,16 +503,55 @@ match_media_type(Req, State, Accept,
 match_media_type(Req, State, Accept, [_Any|Tail], MediaType) ->
 	match_media_type(Req, State, Accept, Tail, MediaType).
 
-match_media_type_params(Req, State, _Accept,
-		[Provided = {{TP, STP, '*'}, _Fun}|_Tail],
-		{{_TA, _STA, Params_A}, _QA, _APA}) ->
-	PMT = {TP, STP, Params_A},
-	languages_provided(Req#{media_type => PMT},
-		State#state{content_type_a=Provided});
 match_media_type_params(Req, State, Accept,
-		[Provided = {PMT = {_TP, _STP, Params_P}, _Fun}|Tail],
+		[Provided = {{TP, STP, '*'}, _Fun}|Tail],
+		MediaType = {{TA, _STA, Params_A0}, _QA, _APA}) ->
+	case lists:keytake(<<"charset">>, 1, Params_A0) of
+		{value, {_, Charset}, Params_A} when TA =:= <<"text">> ->
+			%% When we match against a wildcard, the media type is text
+			%% and has a charset parameter, we call charsets_provided
+			%% and check that the charset is provided. If the callback
+			%% is not exported, we accept inconditionally but ignore
+			%% the given charset so as to not send a wrong value back.
+			case call(Req, State, charsets_provided) of
+				no_call ->
+					languages_provided(Req#{media_type => {TP, STP, Params_A0}},
+						State#state{content_type_a=Provided});
+				{stop, Req2, HandlerState} ->
+					terminate(Req2, State#state{handler_state=HandlerState});
+				{Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler ->
+					switch_handler(Switch, Req2, HandlerState);
+				{CP, Req2, HandlerState} ->
+					State2 = State#state{handler_state=HandlerState, charsets_p=CP},
+					case lists:member(Charset, CP) of
+						false ->
+							match_media_type(Req2, State2, Accept, Tail, MediaType);
+						true ->
+							languages_provided(Req2#{media_type => {TP, STP, Params_A}},
+								State2#state{content_type_a=Provided,
+									charset_a=Charset})
+					end
+			end;
+		_ ->
+			languages_provided(Req#{media_type => {TP, STP, Params_A0}},
+				State#state{content_type_a=Provided})
+	end;
+match_media_type_params(Req, State, Accept,
+		[Provided = {PMT = {TP, STP, Params_P0}, Fun}|Tail],
 		MediaType = {{_TA, _STA, Params_A}, _QA, _APA}) ->
-	case lists:sort(Params_P) =:= lists:sort(Params_A) of
+	case lists:sort(Params_P0) =:= lists:sort(Params_A) of
+		true when TP =:= <<"text">> ->
+			%% When a charset was provided explicitly in both the charset header
+			%% and the media types provided and the negotiation is successful,
+			%% we keep the charset and don't call charsets_provided. This only
+			%% applies to text media types, however.
+			{Charset, Params_P} = case lists:keytake(<<"charset">>, 1, Params_P0) of
+				false -> {undefined, Params_P0};
+				{value, {_, Charset0}, Params_P1} -> {Charset0, Params_P1}
+			end,
+			languages_provided(Req#{media_type => {TP, STP, Params_P}},
+				State#state{content_type_a={{TP, STP, Params_P}, Fun},
+					charset_a=Charset});
 		true ->
 			languages_provided(Req#{media_type => PMT},
 				State#state{content_type_a=Provided});
@@ -587,6 +626,26 @@ set_language(Req, State=#state{language_a=Language}) ->
 
 %% charsets_provided should return a list of binary values indicating
 %% which charsets are accepted by the resource.
+%%
+%% A charset may have been selected while negotiating the accept header.
+%% There's no need to select one again.
+charsets_provided(Req, State=#state{charset_a=Charset})
+		when Charset =/= undefined ->
+	set_content_type(Req, State);
+%% If charsets_p is defined, use it instead of calling charsets_provided
+%% again. We also call this clause during normal execution to avoid
+%% duplicating code.
+charsets_provided(Req, State=#state{charsets_p=[]}) ->
+	not_acceptable(Req, State);
+charsets_provided(Req, State=#state{charsets_p=CP})
+		when CP =/= undefined ->
+	case cowboy_req:parse_header(<<"accept-charset">>, Req) of
+		undefined ->
+			set_content_type(Req, State#state{charset_a=hd(CP)});
+		AcceptCharset0 ->
+			AcceptCharset = prioritize_charsets(AcceptCharset0),
+			choose_charset(Req, State, AcceptCharset)
+	end;
 charsets_provided(Req, State) ->
 	case call(Req, State, charsets_provided) of
 		no_call ->
@@ -595,17 +654,8 @@ charsets_provided(Req, State) ->
 			terminate(Req2, State#state{handler_state=HandlerState});
 		{Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler ->
 			switch_handler(Switch, Req2, HandlerState);
-		{[], Req2, HandlerState} ->
-			not_acceptable(Req2, State#state{handler_state=HandlerState});
 		{CP, Req2, HandlerState} ->
-			State2 = State#state{handler_state=HandlerState, charsets_p=CP},
-			case cowboy_req:parse_header(<<"accept-charset">>, Req2) of
-				undefined ->
-					set_content_type(Req2, State2#state{charset_a=hd(CP)});
-				AcceptCharset ->
-					AcceptCharset2 = prioritize_charsets(AcceptCharset),
-					choose_charset(Req2, State2, AcceptCharset2)
-			end
+			charsets_provided(Req2, State#state{handler_state=HandlerState, charsets_p=CP})
 	end.
 
 %% The special value "*", if present in the Accept-Charset field,
@@ -690,6 +740,7 @@ variances(Req, State=#state{content_types_p=CTP,
 		[_|_] -> [<<"accept-language">>|Variances]
 	end,
 	Variances3 = case CP of
+		undefined -> Variances2;
 		[] -> Variances2;
 		[_] -> Variances2;
 		[_|_] -> [<<"accept-charset">>|Variances2]

+ 22 - 0
test/handlers/charset_in_content_types_provided_h.erl

@@ -0,0 +1,22 @@
+%% This module has a media type provided with an explicit charset.
+
+-module(charset_in_content_types_provided_h).
+
+-export([init/2]).
+-export([content_types_provided/2]).
+-export([charsets_provided/2]).
+-export([get_text_plain/2]).
+
+init(Req, Opts) ->
+	{cowboy_rest, Req, Opts}.
+
+content_types_provided(Req, State) ->
+	{[
+		{{<<"text">>, <<"plain">>, [{<<"charset">>, <<"utf-8">>}]}, get_text_plain}
+	], Req, State}.
+
+charsets_provided(Req, State) ->
+	{[<<"utf-16">>, <<"iso-8861-1">>], Req, State}.
+
+get_text_plain(Req, State) ->
+	{<<"This is REST!">>, Req, State}.

+ 24 - 0
test/handlers/charset_in_content_types_provided_implicit_h.erl

@@ -0,0 +1,24 @@
+%% This module has a media type provided with a wildcard
+%% and a list of charsets that is limited.
+
+-module(charset_in_content_types_provided_implicit_h).
+
+-export([init/2]).
+-export([content_types_provided/2]).
+-export([charsets_provided/2]).
+-export([get_text_plain/2]).
+
+init(Req, Opts) ->
+	{cowboy_rest, Req, Opts}.
+
+content_types_provided(Req, State) ->
+	{[
+		{{<<"text">>, <<"plain">>, '*'}, get_text_plain}
+	], Req, State}.
+
+charsets_provided(Req, State) ->
+	{[<<"utf-8">>, <<"utf-16">>], Req, State}.
+
+get_text_plain(Req, State) ->
+	{<<"This is REST!">>, Req, State}.
+

+ 21 - 0
test/handlers/charset_in_content_types_provided_implicit_no_callback_h.erl

@@ -0,0 +1,21 @@
+%% This module has a media type provided with a wildcard
+%% and lacks a charsets_provided callback.
+
+-module(charset_in_content_types_provided_implicit_no_callback_h).
+
+-export([init/2]).
+-export([content_types_provided/2]).
+-export([get_text_plain/2]).
+
+init(Req, Opts) ->
+	{cowboy_rest, Req, Opts}.
+
+content_types_provided(Req, State) ->
+	{[
+		{{<<"text">>, <<"plain">>, '*'}, get_text_plain}
+	], Req, State}.
+
+get_text_plain(Req, State) ->
+	{<<"This is REST!">>, Req, State}.
+
+

+ 74 - 0
test/rest_handler_SUITE.erl

@@ -39,6 +39,12 @@ end_per_group(Name, _) ->
 init_dispatch(_) ->
 	cowboy_router:compile([{'_', [
 		{"/", rest_hello_h, []},
+		{"/charset_in_content_types_provided",
+			charset_in_content_types_provided_h, []},
+		{"/charset_in_content_types_provided_implicit",
+			charset_in_content_types_provided_implicit_h, []},
+		{"/charset_in_content_types_provided_implicit_no_callback",
+			charset_in_content_types_provided_implicit_no_callback_h, []},
 		{"/provide_callback_missing", provide_callback_missing_h, []},
 		{"/switch_handler", switch_handler_h, run},
 		{"/switch_handler_opts", switch_handler_h, hibernate}
@@ -74,6 +80,74 @@ error_on_malformed_if_none_match(Config) ->
 	{response, _, 400, _} = gun:await(ConnPid, Ref),
 	ok.
 
+charset_in_content_types_provided(Config) ->
+	doc("When a charset is matched explictly in content_types_provided, "
+		"that charset is used and the charsets_provided callback is ignored."),
+	ConnPid = gun_open(Config),
+	Ref = gun:get(ConnPid, "/charset_in_content_types_provided", [
+		{<<"accept">>, <<"text/plain;charset=utf-8">>},
+		{<<"accept-charset">>, <<"utf-16, utf-8;q=0.5">>},
+		{<<"accept-encoding">>, <<"gzip">>}
+	]),
+	{response, _, 200, Headers} = gun:await(ConnPid, Ref),
+	{_, <<"text/plain; charset=utf-8">>} = lists:keyfind(<<"content-type">>, 1, Headers),
+	ok.
+
+charset_in_content_types_provided_implicit_match(Config) ->
+	doc("When a charset is matched implicitly in content_types_provided, "
+		"the charsets_provided callback is used to determine if the media "
+		"type will match."),
+	ConnPid = gun_open(Config),
+	Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [
+		{<<"accept">>, <<"text/plain;charset=utf-16">>},
+		{<<"accept-encoding">>, <<"gzip">>}
+	]),
+	{response, _, 200, Headers} = gun:await(ConnPid, Ref),
+	{_, <<"text/plain; charset=utf-16">>} = lists:keyfind(<<"content-type">>, 1, Headers),
+	ok.
+
+charset_in_content_types_provided_implicit_nomatch(Config) ->
+	doc("When a charset is matched implicitly in content_types_provided, "
+		"the charsets_provided callback is used to determine if the media "
+		"type will match. If it doesn't, try the next media type."),
+	ConnPid = gun_open(Config),
+	Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [
+		{<<"accept">>, <<"text/plain;charset=utf-32, text/plain">>},
+		{<<"accept-encoding">>, <<"gzip">>}
+	]),
+	{response, _, 200, Headers} = gun:await(ConnPid, Ref),
+	%% We end up with the first charset listed in charsets_provided.
+	{_, <<"text/plain; charset=utf-8">>} = lists:keyfind(<<"content-type">>, 1, Headers),
+	ok.
+
+charset_in_content_types_provided_implicit_nomatch_error(Config) ->
+	doc("When a charset is matched implicitly in content_types_provided, "
+		"the charsets_provided callback is used to determine if the media "
+		"type will match. If it doesn't, and there's no other media type, "
+		"a 406 is returned."),
+	ConnPid = gun_open(Config),
+	Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [
+		{<<"accept">>, <<"text/plain;charset=utf-32">>},
+		{<<"accept-encoding">>, <<"gzip">>}
+	]),
+	{response, _, 406, _} = gun:await(ConnPid, Ref),
+	ok.
+
+charset_in_content_types_provided_implicit_no_callback(Config) ->
+	doc("When a charset is matched implicitly in content_types_provided, "
+		"and the charsets_provided callback is not exported, the media "
+		"type will match but the charset will be ignored like all other "
+		"parameters."),
+	ConnPid = gun_open(Config),
+	Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit_no_callback", [
+		{<<"accept">>, <<"text/plain;charset=utf-32">>},
+		{<<"accept-encoding">>, <<"gzip">>}
+	]),
+	{response, _, 200, Headers} = gun:await(ConnPid, Ref),
+	%% The charset is ignored as if it was any other parameter.
+	{_, <<"text/plain">>} = lists:keyfind(<<"content-type">>, 1, Headers),
+	ok.
+
 provide_callback_missing(Config) ->
 	doc("A 500 response must be sent when the ProvideCallback can't be called."),
 	ConnPid = gun_open(Config),