Browse Source

Fix a race condition on restart after listener_sup crash

The race condition occurs when the restart is faster than
the cleaning up. With this commit the restart will perform
the cleanup if it was not done beforehand.
Loïc Hoguin 6 years ago
parent
commit
7d3c65a6db
2 changed files with 28 additions and 46 deletions
  1. 28 24
      src/ranch_server.erl
  2. 0 22
      test/acceptor_SUITE.erl

+ 28 - 24
src/ranch_server.erl

@@ -82,8 +82,7 @@ cleanup_listener_opts(Ref) ->
 
 -spec set_connections_sup(ranch:ref(), pid()) -> ok.
 set_connections_sup(Ref, Pid) ->
-	true = gen_server:call(?MODULE, {set_connections_sup, Ref, Pid}),
-	ok.
+	gen_server:call(?MODULE, {set_connections_sup, Ref, Pid}).
 
 -spec get_connections_sup(ranch:ref()) -> pid().
 get_connections_sup(Ref) ->
@@ -95,8 +94,7 @@ get_connections_sups() ->
 
 -spec set_listener_sup(ranch:ref(), pid()) -> ok.
 set_listener_sup(Ref, Pid) ->
-	true = gen_server:call(?MODULE, {set_listener_sup, Ref, Pid}),
-	ok.
+	gen_server:call(?MODULE, {set_listener_sup, Ref, Pid}).
 
 -spec get_listener_sup(ranch:ref()) -> pid().
 get_listener_sup(Ref) ->
@@ -161,26 +159,12 @@ handle_call({set_new_listener_opts, Ref, MaxConns, TransOpts, ProtoOpts, StartAr
 	ets:insert_new(?TAB, {{proto_opts, Ref}, ProtoOpts}),
 	ets:insert_new(?TAB, {{listener_start_args, Ref}, StartArgs}),
 	{reply, ok, State};
-handle_call({set_connections_sup, Ref, Pid}, _,
-		State=#state{monitors=Monitors}) ->
-	case ets:insert_new(?TAB, {{conns_sup, Ref}, Pid}) of
-		true ->
-			MonitorRef = erlang:monitor(process, Pid),
-			{reply, true,
-				State#state{monitors=[{{MonitorRef, Pid}, {conns_sup, Ref}}|Monitors]}};
-		false ->
-			{reply, false, State}
-	end;
-handle_call({set_listener_sup, Ref, Pid}, _,
-		State=#state{monitors=Monitors}) ->
-	case ets:insert_new(?TAB, {{listener_sup, Ref}, Pid}) of
-		true ->
-			MonitorRef = erlang:monitor(process, Pid),
-			{reply, true,
-				State#state{monitors=[{{MonitorRef, Pid}, {listener_sup, Ref}}|Monitors]}};
-		false ->
-			{reply, false, State}
-	end;
+handle_call({set_connections_sup, Ref, Pid}, _, State0) ->
+	State = set_monitored_process({conns_sup, Ref}, Pid, State0),
+	{reply, ok, State};
+handle_call({set_listener_sup, Ref, Pid}, _, State0) ->
+	State = set_monitored_process({listener_sup, Ref}, Pid, State0),
+	{reply, ok, State};
 handle_call({set_addr, Ref, Addr}, _, State) ->
 	true = ets:insert(?TAB, {{addr, Ref}, Addr}),
 	{reply, ok, State};
@@ -227,3 +211,23 @@ terminate(_Reason, _State) ->
 
 code_change(_OldVsn, State, _Extra) ->
 	{ok, State}.
+
+%% Internal.
+
+set_monitored_process(Key, Pid, State=#state{monitors=Monitors0}) ->
+	%% First we cleanup the monitor if a residual one exists.
+	%% This can happen during crashes when the restart is faster
+	%% than the cleanup.
+	Monitors = case lists:keytake(Key, 2, Monitors0) of
+		false ->
+			Monitors0;
+		{value, {{OldMonitorRef, _}, _}, Monitors1} ->
+			true = erlang:demonitor(OldMonitorRef, [flush]),
+			Monitors1
+	end,
+	%% Then we unconditionally insert in the ets table.
+	%% If residual data is there, it will be overwritten.
+	true = ets:insert(?TAB, {Key, Pid}),
+	%% Finally we start monitoring this new process.
+	MonitorRef = erlang:monitor(process, Pid),
+	State#state{monitors=[{{MonitorRef, Pid}, Key}|Monitors]}.

+ 0 - 22
test/acceptor_SUITE.erl

@@ -72,7 +72,6 @@ groups() ->
 		connection_type_supervisor_separate_from_connection,
 		supervisor_changed_options_restart,
 		supervisor_clean_child_restart,
-		supervisor_clean_conns_sup_restart,
 		supervisor_clean_restart,
 		supervisor_conns_alive,
 		supervisor_protocol_start_link_crash,
@@ -1068,27 +1067,6 @@ do_supervisor_clean_child_restart(_) ->
 	ok = clean_traces(),
 	ok = ranch:stop_listener(Name).
 
-supervisor_clean_conns_sup_restart(_) ->
-	doc("Verify that a conns_sup can not register with the same name as an already "
-		"registered ranch_conns_sup that is still alive. Make sure this does not crash "
-		"the ranch_server process."),
-	Name = name(),
-	{ok, _} = ranch:start_listener(Name,
-		ranch_tcp, #{},
-		echo_protocol, []),
-	Server = erlang:whereis(ranch_server),
-	ServerMonRef = erlang:monitor(process, Server),
-	%% Exit because Name already registered and is alive.
-	{'EXIT', _} = (catch ranch_server:set_connections_sup(Name, self())),
-	receive
-		{'DOWN', ServerMonRef, process, Server, _} ->
-			error(ranch_server_down)
-	after
-		1000 ->
-			ok
-	end,
-	ok = ranch:stop_listener(Name).
-
 supervisor_clean_restart(Config) ->
 	case code:is_module_native(?MODULE) of
 		true -> doc("This test uses tracing and is not compatible with native code.");