Просмотр исходного кода

Websocket text frames are now checked for UTF-8 correctness

The autobahntestsuite now passes 100% of the tests. We are
getting close to fully implementing the Websocket RFC.
Loïc Hoguin 12 лет назад
Родитель
Сommit
5dd09737d0
2 измененных файлов с 94 добавлено и 4 удалено
  1. 91 1
      src/cowboy_websocket.erl
  2. 3 3
      test/autobahn_SUITE.erl

+ 91 - 1
src/cowboy_websocket.erl

@@ -45,7 +45,8 @@
 	timeout_ref = undefined :: undefined | reference(),
 	messages = undefined :: undefined | {atom(), atom(), atom()},
 	hibernate = false :: boolean(),
-	frag_state = undefined :: frag_state()
+	frag_state = undefined :: frag_state(),
+	utf8_state = <<>> :: binary()
 }).
 
 %% @doc Upgrade an HTTP request to the Websocket protocol.
@@ -285,6 +286,65 @@ websocket_data(State, Req, HandlerState, Opcode, Len, MaskKey, Data, 1) ->
 	-> {ok, Req, cowboy_middleware:env()}
 	| {suspend, module(), atom(), [any()]}
 	when Req::cowboy_req:req().
+%% Text frames must have a payload that is valid UTF-8.
+websocket_payload(State=#state{utf8_state=Incomplete},
+		Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data)
+		when byte_size(Data) < Len ->
+	Unmasked2 = websocket_unmask(Data,
+		rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+	case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+		false ->
+			websocket_close(State, Req, HandlerState, {error, badframe});
+		Utf8State ->
+			websocket_payload_loop(State#state{utf8_state=Utf8State},
+				Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
+				<< Unmasked/binary, Unmasked2/binary >>)
+	end;
+websocket_payload(State=#state{utf8_state=Incomplete},
+		Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data) ->
+	<< End:Len/binary, Rest/bits >> = Data,
+	Unmasked2 = websocket_unmask(End,
+		rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+	case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+		<<>> ->
+			websocket_dispatch(State#state{utf8_state= <<>>},
+				Req, HandlerState, Rest, Opcode,
+				<< Unmasked/binary, Unmasked2/binary >>);
+		_ ->
+			websocket_close(State, Req, HandlerState, {error, badframe})
+	end;
+%% Fragmented text frames may cut payload in the middle of UTF-8 codepoints.
+websocket_payload(State=#state{frag_state={_, 1, _}, utf8_state=Incomplete},
+		Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data)
+		when byte_size(Data) < Len ->
+	Unmasked2 = websocket_unmask(Data,
+		rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+	case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+		false ->
+			websocket_close(State, Req, HandlerState, {error, badframe});
+		Utf8State ->
+			websocket_payload_loop(State#state{utf8_state=Utf8State},
+				Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
+				<< Unmasked/binary, Unmasked2/binary >>)
+	end;
+websocket_payload(State=#state{frag_state={Fin, 1, _}, utf8_state=Incomplete},
+		Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data) ->
+	<< End:Len/binary, Rest/bits >> = Data,
+	Unmasked2 = websocket_unmask(End,
+		rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
+	case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
+		<<>> ->
+			websocket_dispatch(State#state{utf8_state= <<>>},
+				Req, HandlerState, Rest, Opcode,
+				<< Unmasked/binary, Unmasked2/binary >>);
+		Utf8State when is_binary(Utf8State), Fin =:= nofin ->
+			websocket_dispatch(State#state{utf8_state=Utf8State},
+				Req, HandlerState, Rest, Opcode,
+				<< Unmasked/binary, Unmasked2/binary >>);
+		_ ->
+			websocket_close(State, Req, HandlerState, {error, badframe})
+	end;
+%% Other frames have a binary payload.
 websocket_payload(State, Req, HandlerState,
 		Opcode, Len, MaskKey, Unmasked, Data)
 		when byte_size(Data) < Len ->
@@ -325,6 +385,36 @@ rotate_mask_key(MaskKey, UnmaskedLen) ->
 	Right = 4 - Left,
 	(MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)).
 
+%% Returns <<>> if the argument is valid UTF-8, false if not,
+%% or the incomplete part of the argument if we need more data.
+-spec is_utf8(binary()) -> false | binary().
+is_utf8(Valid = <<>>) ->
+	Valid;
+is_utf8(<< _/utf8, Rest/binary >>) ->
+	is_utf8(Rest);
+%% 2 bytes. Codepages C0 and C1 are invalid; fail early.
+is_utf8(<< 2#1100000:7, _/bits >>) ->
+	false;
+is_utf8(Incomplete = << 2#110:3, _:5 >>) ->
+	Incomplete;
+%% 3 bytes.
+is_utf8(Incomplete = << 2#1110:4, _:4 >>) ->
+	Incomplete;
+is_utf8(Incomplete = << 2#1110:4, _:4, 2#10:2, _:6 >>) ->
+	Incomplete;
+%% 4 bytes. Codepage F4 may have invalid values greater than 0x10FFFF.
+is_utf8(<< 2#11110100:8, 2#10:2, High:6, _/bits >>) when High >= 2#10000 ->
+	false;
+is_utf8(Incomplete = << 2#11110:5, _:3 >>) ->
+	Incomplete;
+is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6 >>) ->
+	Incomplete;
+is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6, 2#10:2, _:6 >>) ->
+	Incomplete;
+%% Invalid.
+is_utf8(_) ->
+	false.
+
 -spec websocket_payload_loop(#state{}, Req, any(),
 	opcode(), non_neg_integer(), mask_key(), binary())
 	-> {ok, Req, cowboy_middleware:env()}

+ 3 - 3
test/autobahn_SUITE.erl

@@ -92,7 +92,7 @@ run_tests(Config) ->
 		_ -> ok
 	end,
 	{ok, IndexHTML} = file:read_file(IndexFile),
-	case binary:match(IndexHTML, <<"Fail">>) of
-		{_, _} -> erlang:error(failed);
-		nomatch -> ok
+	case length(binary:matches(IndexHTML, <<"case_failed">>)) > 2 of
+		true -> erlang:error(failed);
+		false -> ok
 	end.