Browse Source

Merge branch 'noquoted' of https://github.com/klaar/cowboy

Loïc Hoguin 13 years ago
parent
commit
7f46e53436
7 changed files with 103 additions and 32 deletions
  1. 8 0
      README.md
  2. 4 1
      include/http.hrl
  3. 0 2
      rebar.config
  4. 11 9
      src/cowboy_dispatcher.erl
  5. 50 1
      src/cowboy_http.erl
  6. 14 6
      src/cowboy_http_protocol.erl
  7. 16 13
      src/cowboy_http_req.erl

+ 8 - 0
README.md

@@ -107,6 +107,14 @@ handle(Req, State) ->
 terminate(Req, State) ->
     ok.
 ```
+**Note**: versions prior to `0.4.0` used the
+[quoted](https://github.com/klaar/quoted.erl) library instead of the built in
+`cowboy_http:urldecode/2` function. If you want to retain this you must add it
+as a dependency to your application and add the following cowboy_http_protocol
+option:
+
+    {urldecode, {fun quoted:from_url/2, quoted:make([])}}
+
 
 Continue reading to learn how to dispatch rules and handle requests.
 

+ 4 - 1
include/http.hrl

@@ -68,5 +68,8 @@
 	%% Response.
 	resp_state = waiting   :: locked | waiting | chunks | done,
 	resp_headers = []      :: http_headers(),
-	resp_body  = <<>>      :: binary()
+	resp_body  = <<>>      :: binary(),
+
+	%% Functions.
+	urldecode :: {fun((binary(), T) -> binary()), T}
 }).

+ 0 - 2
rebar.config

@@ -1,7 +1,5 @@
 {cover_enabled, true}.
 {deps, [
-	{quoted, "1.2.*",
-		{git, "git://github.com/klaar/quoted.erl.git", {tag, "1.2.0"}}},
 	{proper, "1.0",
 		{git, "git://github.com/manopapad/proper.git", {tag, "v1.0"}}}
 ]}.

+ 11 - 9
src/cowboy_dispatcher.erl

@@ -16,7 +16,7 @@
 %% @doc Dispatch requests according to a hostname and path.
 -module(cowboy_dispatcher).
 
--export([split_host/1, split_path/1, match/3]). %% API.
+-export([split_host/1, split_path/2, match/3]). %% API.
 
 -type bindings() :: list({atom(), binary()}).
 -type tokens() :: list(binary()).
@@ -50,21 +50,22 @@ split_host(Host) ->
 %% Following RFC2396, this function may return path segments containing any
 %% character, including <em>/</em> if, and only if, a <em>/</em> was escaped
 %% and part of a path segment.
--spec split_path(binary()) -> {tokens(), binary(), binary()}.
-split_path(Path) ->
+-spec split_path(binary(), fun((binary()) -> binary())) ->
+		{tokens(), binary(), binary()}.
+split_path(Path, URLDec) ->
 	case binary:split(Path, <<"?">>) of
-		[Path] -> {do_split_path(Path, <<"/">>), Path, <<>>};
+		[Path] -> {do_split_path(Path, <<"/">>, URLDec), Path, <<>>};
 		[<<>>, Qs] -> {[], <<>>, Qs};
-		[Path2, Qs] -> {do_split_path(Path2, <<"/">>), Path2, Qs}
+		[Path2, Qs] -> {do_split_path(Path2, <<"/">>, URLDec), Path2, Qs}
 	end.
 
--spec do_split_path(binary(), <<_:8>>) -> tokens().
-do_split_path(RawPath, Separator) ->
+-spec do_split_path(binary(), <<_:8>>, fun((binary()) -> binary())) -> tokens().
+do_split_path(RawPath, Separator, URLDec) ->
 	EncodedPath = case binary:split(RawPath, Separator, [global, trim]) of
 		[<<>>|Path] -> Path;
 		Path -> Path
 	end,
-	[quoted:from_url(Token) || Token <- EncodedPath].
+	[URLDec(Token) || Token <- EncodedPath].
 
 %% @doc Match hostname tokens and path tokens against dispatch rules.
 %%
@@ -224,7 +225,8 @@ split_path_test_() ->
 			[<<"users">>, <<"a b">>, <<"c!d">>],
 			<<"/users/a+b/c%21d">>, <<"e+f=g+h">>}
 	],
-	[{P, fun() -> {R, RawP, Qs} = split_path(P) end}
+	URLDecode = fun(Bin) -> cowboy_http:urldecode(Bin, crash) end,
+	[{P, fun() -> {R, RawP, Qs} = split_path(P, URLDecode) end}
 		|| {P, R, RawP, Qs} <- Tests].
 
 match_test_() ->

+ 50 - 1
src/cowboy_http.erl

@@ -23,7 +23,7 @@
 	digits/1, token/2, token_ci/2, quoted_string/2]).
 
 %% Interpretation.
--export([connection_to_atom/1]).
+-export([connection_to_atom/1, urldecode/1, urldecode/2]).
 
 -include("include/http.hrl").
 -include_lib("eunit/include/eunit.hrl").
@@ -670,6 +670,46 @@ connection_to_atom([<<"close">>|_Tail]) ->
 connection_to_atom([_Any|Tail]) ->
 	connection_to_atom(Tail).
 
+%% @doc Decode a URL encoded binary.
+%% @equiv urldecode(Bin, crash)
+-spec urldecode(binary()) -> binary().
+urldecode(Bin) when is_binary(Bin) ->
+	urldecode(Bin, <<>>, crash).
+
+%% @doc Decode a URL encoded binary.
+%% The second argument specifies how to handle percent characters that are not
+%% followed by two valid hex characters. Use `skip' to ignore such errors,
+%% if `crash' is used the function will fail with the reason `badarg'.
+-spec urldecode(binary(), crash | skip) -> binary().
+urldecode(Bin, OnError) when is_binary(Bin) ->
+	urldecode(Bin, <<>>, OnError).
+
+-spec urldecode(binary(), binary(), crash | skip) -> binary().
+urldecode(<<$%, H, L, Rest/binary>>, Acc, OnError) ->
+	G = unhex(H),
+	M = unhex(L),
+	if	G =:= error; M =:= error ->
+		case OnError of skip -> ok; crash -> erlang:error(badarg) end,
+		urldecode(<<H, L, Rest/binary>>, <<Acc/binary, $%>>, OnError);
+		true ->
+		urldecode(Rest, <<Acc/binary, (G bsl 4 bor M)>>, OnError)
+	end;
+urldecode(<<$%, Rest/binary>>, Acc, OnError) ->
+	case OnError of skip -> ok; crash -> erlang:error(badarg) end,
+	urldecode(Rest, <<Acc/binary, $%>>, OnError);
+urldecode(<<$+, Rest/binary>>, Acc, OnError) ->
+	urldecode(Rest, <<Acc/binary, $ >>, OnError);
+urldecode(<<C, Rest/binary>>, Acc, OnError) ->
+	urldecode(Rest, <<Acc/binary, C>>, OnError);
+urldecode(<<>>, Acc, _OnError) ->
+	Acc.
+
+-spec unhex(byte()) -> byte() | error.
+unhex(C) when C >= $0, C =< $9 -> C - $0;
+unhex(C) when C >= $A, C =< $F -> C - $A + 10;
+unhex(C) when C >= $a, C =< $f -> C - $a + 10;
+unhex(_) -> error.
+
 %% Tests.
 
 -ifdef(TEST).
@@ -836,4 +876,13 @@ digits_test_() ->
 	],
 	[{V, fun() -> R = digits(V) end} || {V, R} <- Tests].
 
+urldecode_test_() ->
+	Tests = [
+		{<<" ">>, <<"%20">>},
+		{<<" ">>, <<"+">>},
+		{<<0>>, <<"%00">>},
+		{<<255>>, <<"%fF">>}
+	],
+	[{I, ?_assertEqual(E, urldecode(I))} || {E, I} <- Tests].
+
 -endif.

+ 14 - 6
src/cowboy_http_protocol.erl

@@ -22,6 +22,9 @@
 %%   Defaults to 5.</dd>
 %%  <dt>timeout</dt><dd>Time in milliseconds before an idle
 %%   connection is closed. Defaults to 5000 milliseconds.</dd>
+%%  <dt>urldecode</dt><dd>Function and options argument to use when decoding
+%%   URL encoded strings. Defaults to `{fun cowboy_http:urldecode/2, crash}'.
+%%   </dd>
 %% </dl>
 %%
 %% Note that there is no need to monitor these processes when using Cowboy as
@@ -44,6 +47,7 @@
 	transport :: module(),
 	dispatch :: cowboy_dispatcher:dispatch_rules(),
 	handler :: {module(), any()},
+	urldecode :: {fun((binary(), T) -> binary()), T},
 	req_empty_lines = 0 :: integer(),
 	max_empty_lines :: integer(),
 	max_line_length :: integer(),
@@ -71,10 +75,12 @@ init(ListenerPid, Socket, Transport, Opts) ->
 	MaxEmptyLines = proplists:get_value(max_empty_lines, Opts, 5),
 	MaxLineLength = proplists:get_value(max_line_length, Opts, 4096),
 	Timeout = proplists:get_value(timeout, Opts, 5000),
+	URLDecDefault = {fun cowboy_http:urldecode/2, crash},
+	URLDec = proplists:get_value(urldecode, Opts, URLDecDefault),
 	receive shoot -> ok end,
 	wait_request(#state{listener=ListenerPid, socket=Socket, transport=Transport,
 		dispatch=Dispatch, max_empty_lines=MaxEmptyLines,
-		max_line_length=MaxLineLength, timeout=Timeout}).
+		max_line_length=MaxLineLength, timeout=Timeout, urldecode=URLDec}).
 
 %% @private
 -spec parse_request(#state{}) -> ok | none().
@@ -106,18 +112,20 @@ request({http_request, _Method, _URI, Version}, State)
 	error_terminate(505, State);
 %% @todo We need to cleanup the URI properly.
 request({http_request, Method, {abs_path, AbsPath}, Version},
-		State=#state{socket=Socket, transport=Transport}) ->
-	{Path, RawPath, Qs} = cowboy_dispatcher:split_path(AbsPath),
+		State=#state{socket=Socket, transport=Transport,
+		urldecode={URLDecFun, URLDecArg}=URLDec}) ->
+	URLDecode = fun(Bin) -> URLDecFun(Bin, URLDecArg) end,
+	{Path, RawPath, Qs} = cowboy_dispatcher:split_path(AbsPath, URLDecode),
 	ConnAtom = version_to_connection(Version),
 	parse_header(#http_req{socket=Socket, transport=Transport,
 		connection=ConnAtom, method=Method, version=Version,
-		path=Path, raw_path=RawPath, raw_qs=Qs}, State);
+		path=Path, raw_path=RawPath, raw_qs=Qs, urldecode=URLDec}, State);
 request({http_request, Method, '*', Version},
-		State=#state{socket=Socket, transport=Transport}) ->
+		State=#state{socket=Socket, transport=Transport, urldecode=URLDec}) ->
 	ConnAtom = version_to_connection(Version),
 	parse_header(#http_req{socket=Socket, transport=Transport,
 		connection=ConnAtom, method=Method, version=Version,
-		path='*', raw_path= <<"*">>, raw_qs= <<>>}, State);
+		path='*', raw_path= <<"*">>, raw_qs= <<>>, urldecode=URLDec}, State);
 request({http_request, _Method, _URI, _Version}, State) ->
 	error_terminate(501, State);
 request({http_error, <<"\r\n">>},

+ 16 - 13
src/cowboy_http_req.erl

@@ -124,9 +124,9 @@ qs_val(Name, Req) when is_binary(Name) ->
 %% missing.
 -spec qs_val(binary(), #http_req{}, Default)
 	-> {binary() | true | Default, #http_req{}} when Default::any().
-qs_val(Name, Req=#http_req{raw_qs=RawQs, qs_vals=undefined}, Default)
-		when is_binary(Name) ->
-	QsVals = parse_qs(RawQs),
+qs_val(Name, Req=#http_req{raw_qs=RawQs, qs_vals=undefined,
+		urldecode={URLDecFun, URLDecArg}}, Default) when is_binary(Name) ->
+	QsVals = parse_qs(RawQs, fun(Bin) -> URLDecFun(Bin, URLDecArg) end),
 	qs_val(Name, Req#http_req{qs_vals=QsVals}, Default);
 qs_val(Name, Req, Default) ->
 	case lists:keyfind(Name, 1, Req#http_req.qs_vals) of
@@ -136,8 +136,9 @@ qs_val(Name, Req, Default) ->
 
 %% @doc Return the full list of query string values.
 -spec qs_vals(#http_req{}) -> {list({binary(), binary() | true}), #http_req{}}.
-qs_vals(Req=#http_req{raw_qs=RawQs, qs_vals=undefined}) ->
-	QsVals = parse_qs(RawQs),
+qs_vals(Req=#http_req{raw_qs=RawQs, qs_vals=undefined,
+		urldecode={URLDecFun, URLDecArg}}) ->
+	QsVals = parse_qs(RawQs, fun(Bin) -> URLDecFun(Bin, URLDecArg) end),
 	qs_vals(Req#http_req{qs_vals=QsVals});
 qs_vals(Req=#http_req{qs_vals=QsVals}) ->
 	{QsVals, Req}.
@@ -355,9 +356,9 @@ body(Length, Req=#http_req{socket=Socket, transport=Transport,
 %% @doc Return the full body sent with the reqest, parsed as an
 %% application/x-www-form-urlencoded string. Essentially a POST query string.
 -spec body_qs(#http_req{}) -> {list({binary(), binary() | true}), #http_req{}}.
-body_qs(Req) ->
+body_qs(Req=#http_req{urldecode={URLDecFun, URLDecArg}}) ->
 	{ok, Body, Req2} = body(Req),
-	{parse_qs(Body), Req2}.
+	{parse_qs(Body, fun(Bin) -> URLDecFun(Bin, URLDecArg) end), Req2}.
 
 %% Response API.
 
@@ -483,14 +484,15 @@ compact(Req) ->
 
 %% Internal.
 
--spec parse_qs(binary()) -> list({binary(), binary() | true}).
-parse_qs(<<>>) ->
+-spec parse_qs(binary(), fun((binary()) -> binary())) ->
+		list({binary(), binary() | true}).
+parse_qs(<<>>, _URLDecode) ->
 	[];
-parse_qs(Qs) ->
+parse_qs(Qs, URLDecode) ->
 	Tokens = binary:split(Qs, <<"&">>, [global, trim]),
 	[case binary:split(Token, <<"=">>) of
-		[Token] -> {quoted:from_url(Token), true};
-		[Name, Value] -> {quoted:from_url(Name), quoted:from_url(Value)}
+		[Token] -> {URLDecode(Token), true};
+		[Name, Value] -> {URLDecode(Name), URLDecode(Value)}
 	end || Token <- Tokens].
 
 -spec response_connection(http_headers(), keepalive | close)
@@ -670,6 +672,7 @@ parse_qs_test_() ->
 		{<<"a=b=c=d=e&f=g">>, [{<<"a">>, <<"b=c=d=e">>}, {<<"f">>, <<"g">>}]},
 		{<<"a+b=c+d">>, [{<<"a b">>, <<"c d">>}]}
 	],
-	[{Qs, fun() -> R = parse_qs(Qs) end} || {Qs, R} <- Tests].
+	URLDecode = fun cowboy_http:urldecode/1,
+	[{Qs, fun() -> R = parse_qs(Qs, URLDecode) end} || {Qs, R} <- Tests].
 
 -endif.