Просмотр исходного кода

Make charsets parsing more relaxed

Certain user agents send invalid Accept-Charset headers, like the
following: "ISO-8859-1;utf-8;q=0.7,*;q=0.7"

The user agent with which this behavior was observed presented itself
with the User-Agent string: "Mozilla/5.0 (Windows; U; Windows NT 5.1;
en-US; rv:1.9) Gecko/2008052906 Firefox/3.0" Although this doesn't
appear to be correct. The request might have been mangled by a
transparent proxy.
Ali Sabil 13 лет назад
Родитель
Сommit
4b43d06384
1 измененных файлов с 15 добавлено и 1 удалено
  1. 15 1
      src/cowboy_http.erl

+ 15 - 1
src/cowboy_http.erl

@@ -269,7 +269,15 @@ maybe_qparam(Data, Fun) ->
 		fun (<< $;, Rest/binary >>) ->
 		fun (<< $;, Rest/binary >>) ->
 			whitespace(Rest,
 			whitespace(Rest,
 				fun (Rest2) ->
 				fun (Rest2) ->
-					qparam(Rest2, Fun)
+					%% This is a non-strict parsing clause required by some user agents
+					%% that use the wrong delimiter putting a charset where a qparam is
+					%% expected.
+					try qparam(Rest2, Fun) of
+						Result -> Result
+					catch
+						error:function_clause ->
+							Fun(<<",", Rest2/binary>>, 1000)
+					end
 				end);
 				end);
 			(Rest) ->
 			(Rest) ->
 				Fun(Rest, 1000)
 				Fun(Rest, 1000)
@@ -879,6 +887,12 @@ nonempty_charset_list_test_() ->
 		{<<"iso-8859-5, unicode-1-1;q=0.8">>, [
 		{<<"iso-8859-5, unicode-1-1;q=0.8">>, [
 			{<<"iso-8859-5">>, 1000},
 			{<<"iso-8859-5">>, 1000},
 			{<<"unicode-1-1">>, 800}
 			{<<"unicode-1-1">>, 800}
+		]},
+		%% Some user agents send this invalid value for the Accept-Charset header
+		{<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [
+			{<<"iso-8859-1">>, 1000},
+			{<<"utf-8">>, 700},
+			{<<"*">>, 700}
 		]}
 		]}
 	],
 	],
 	[{V, fun() -> R = nonempty_list(V, fun conneg/2) end} || {V, R} <- Tests].
 	[{V, fun() -> R = nonempty_list(V, fun conneg/2) end} || {V, R} <- Tests].