Просмотр исходного кода

Fix a race condition on restart after listener_sup crash

The race condition occurs when the restart is faster than
the cleaning up. With this commit the restart will perform
the cleanup if it was not done beforehand.
Loïc Hoguin 6 лет назад
Родитель
Сommit
7d3c65a6db
2 измененных файлов с 28 добавлено и 46 удалено
  1. 28 24
      src/ranch_server.erl
  2. 0 22
      test/acceptor_SUITE.erl

+ 28 - 24
src/ranch_server.erl

@@ -82,8 +82,7 @@ cleanup_listener_opts(Ref) ->
 
 
 -spec set_connections_sup(ranch:ref(), pid()) -> ok.
 -spec set_connections_sup(ranch:ref(), pid()) -> ok.
 set_connections_sup(Ref, Pid) ->
 set_connections_sup(Ref, Pid) ->
-	true = gen_server:call(?MODULE, {set_connections_sup, Ref, Pid}),
-	ok.
+	gen_server:call(?MODULE, {set_connections_sup, Ref, Pid}).
 
 
 -spec get_connections_sup(ranch:ref()) -> pid().
 -spec get_connections_sup(ranch:ref()) -> pid().
 get_connections_sup(Ref) ->
 get_connections_sup(Ref) ->
@@ -95,8 +94,7 @@ get_connections_sups() ->
 
 
 -spec set_listener_sup(ranch:ref(), pid()) -> ok.
 -spec set_listener_sup(ranch:ref(), pid()) -> ok.
 set_listener_sup(Ref, Pid) ->
 set_listener_sup(Ref, Pid) ->
-	true = gen_server:call(?MODULE, {set_listener_sup, Ref, Pid}),
-	ok.
+	gen_server:call(?MODULE, {set_listener_sup, Ref, Pid}).
 
 
 -spec get_listener_sup(ranch:ref()) -> pid().
 -spec get_listener_sup(ranch:ref()) -> pid().
 get_listener_sup(Ref) ->
 get_listener_sup(Ref) ->
@@ -161,26 +159,12 @@ handle_call({set_new_listener_opts, Ref, MaxConns, TransOpts, ProtoOpts, StartAr
 	ets:insert_new(?TAB, {{proto_opts, Ref}, ProtoOpts}),
 	ets:insert_new(?TAB, {{proto_opts, Ref}, ProtoOpts}),
 	ets:insert_new(?TAB, {{listener_start_args, Ref}, StartArgs}),
 	ets:insert_new(?TAB, {{listener_start_args, Ref}, StartArgs}),
 	{reply, ok, State};
 	{reply, ok, State};
-handle_call({set_connections_sup, Ref, Pid}, _,
-		State=#state{monitors=Monitors}) ->
-	case ets:insert_new(?TAB, {{conns_sup, Ref}, Pid}) of
-		true ->
-			MonitorRef = erlang:monitor(process, Pid),
-			{reply, true,
-				State#state{monitors=[{{MonitorRef, Pid}, {conns_sup, Ref}}|Monitors]}};
-		false ->
-			{reply, false, State}
-	end;
-handle_call({set_listener_sup, Ref, Pid}, _,
-		State=#state{monitors=Monitors}) ->
-	case ets:insert_new(?TAB, {{listener_sup, Ref}, Pid}) of
-		true ->
-			MonitorRef = erlang:monitor(process, Pid),
-			{reply, true,
-				State#state{monitors=[{{MonitorRef, Pid}, {listener_sup, Ref}}|Monitors]}};
-		false ->
-			{reply, false, State}
-	end;
+handle_call({set_connections_sup, Ref, Pid}, _, State0) ->
+	State = set_monitored_process({conns_sup, Ref}, Pid, State0),
+	{reply, ok, State};
+handle_call({set_listener_sup, Ref, Pid}, _, State0) ->
+	State = set_monitored_process({listener_sup, Ref}, Pid, State0),
+	{reply, ok, State};
 handle_call({set_addr, Ref, Addr}, _, State) ->
 handle_call({set_addr, Ref, Addr}, _, State) ->
 	true = ets:insert(?TAB, {{addr, Ref}, Addr}),
 	true = ets:insert(?TAB, {{addr, Ref}, Addr}),
 	{reply, ok, State};
 	{reply, ok, State};
@@ -227,3 +211,23 @@ terminate(_Reason, _State) ->
 
 
 code_change(_OldVsn, State, _Extra) ->
 code_change(_OldVsn, State, _Extra) ->
 	{ok, State}.
 	{ok, State}.
+
+%% Internal.
+
+set_monitored_process(Key, Pid, State=#state{monitors=Monitors0}) ->
+	%% First we cleanup the monitor if a residual one exists.
+	%% This can happen during crashes when the restart is faster
+	%% than the cleanup.
+	Monitors = case lists:keytake(Key, 2, Monitors0) of
+		false ->
+			Monitors0;
+		{value, {{OldMonitorRef, _}, _}, Monitors1} ->
+			true = erlang:demonitor(OldMonitorRef, [flush]),
+			Monitors1
+	end,
+	%% Then we unconditionally insert in the ets table.
+	%% If residual data is there, it will be overwritten.
+	true = ets:insert(?TAB, {Key, Pid}),
+	%% Finally we start monitoring this new process.
+	MonitorRef = erlang:monitor(process, Pid),
+	State#state{monitors=[{{MonitorRef, Pid}, Key}|Monitors]}.

+ 0 - 22
test/acceptor_SUITE.erl

@@ -72,7 +72,6 @@ groups() ->
 		connection_type_supervisor_separate_from_connection,
 		connection_type_supervisor_separate_from_connection,
 		supervisor_changed_options_restart,
 		supervisor_changed_options_restart,
 		supervisor_clean_child_restart,
 		supervisor_clean_child_restart,
-		supervisor_clean_conns_sup_restart,
 		supervisor_clean_restart,
 		supervisor_clean_restart,
 		supervisor_conns_alive,
 		supervisor_conns_alive,
 		supervisor_protocol_start_link_crash,
 		supervisor_protocol_start_link_crash,
@@ -1068,27 +1067,6 @@ do_supervisor_clean_child_restart(_) ->
 	ok = clean_traces(),
 	ok = clean_traces(),
 	ok = ranch:stop_listener(Name).
 	ok = ranch:stop_listener(Name).
 
 
-supervisor_clean_conns_sup_restart(_) ->
-	doc("Verify that a conns_sup can not register with the same name as an already "
-		"registered ranch_conns_sup that is still alive. Make sure this does not crash "
-		"the ranch_server process."),
-	Name = name(),
-	{ok, _} = ranch:start_listener(Name,
-		ranch_tcp, #{},
-		echo_protocol, []),
-	Server = erlang:whereis(ranch_server),
-	ServerMonRef = erlang:monitor(process, Server),
-	%% Exit because Name already registered and is alive.
-	{'EXIT', _} = (catch ranch_server:set_connections_sup(Name, self())),
-	receive
-		{'DOWN', ServerMonRef, process, Server, _} ->
-			error(ranch_server_down)
-	after
-		1000 ->
-			ok
-	end,
-	ok = ranch:stop_listener(Name).
-
 supervisor_clean_restart(Config) ->
 supervisor_clean_restart(Config) ->
 	case code:is_module_native(?MODULE) of
 	case code:is_module_native(?MODULE) of
 		true -> doc("This test uses tracing and is not compatible with native code.");
 		true -> doc("This test uses tracing and is not compatible with native code.");