Browse Source

Do not kill a discarded process in a custom resolve.

Roberto Ostinelli 5 years ago
parent
commit
31d559027e
4 changed files with 46 additions and 13 deletions
  1. 1 1
      README.md
  2. 5 5
      src/syn_event_handler.erl
  3. 9 3
      src/syn_registry.erl
  4. 31 4
      test/syn_registry_SUITE.erl

+ 1 - 1
README.md

@@ -451,7 +451,7 @@ Called when a process in a group exits. It will be called only on the node where
 #### `resolve_registry_conflict/3`
 #### `resolve_registry_conflict/3`
 In case of net splits, a specific Name might get registered simultaneously on two different nodes. In this case, the cluster experiences a registry naming conflict.
 In case of net splits, a specific Name might get registered simultaneously on two different nodes. In this case, the cluster experiences a registry naming conflict.
 
 
-When this happens, Syn will resolve this Process Registry conflict by choosing a single process. Syn will keep the processes running on the node the conflict is being resolved on, and by default will kill the other process by sending a `kill` signal with `exit(Pid, kill)`.
+When this happens, Syn will resolve this Process Registry conflict by choosing a single process. By default, Syn will keep the process running on the node the conflict is being resolved on, and will kill the other process by sending a `kill` signal with `exit(Pid, kill)`.
 
 
 If this is not desired, you can set this callback to perform custom operations (such as a graceful shutdown).
 If this is not desired, you can set this callback to perform custom operations (such as a graceful shutdown).
 
 

+ 5 - 5
src/syn_event_handler.erl

@@ -96,23 +96,23 @@ do_on_group_process_exit(GroupName, Pid, Meta, Reason, CustomEventHandler) ->
     {Pid1 :: pid(), Meta1 :: any()},
     {Pid1 :: pid(), Meta1 :: any()},
     {Pid2 :: pid(), Meta2 :: any()},
     {Pid2 :: pid(), Meta2 :: any()},
     CustomEventHandler :: module()
     CustomEventHandler :: module()
-) -> PidToKeep :: pid() | undefined.
+) -> {PidToKeep :: pid() | undefined, KillOther :: boolean()}.
 do_resolve_registry_conflict(Name, {LocalPid, LocalMeta}, {RemotePid, RemoteMeta}, CustomEventHandler) ->
 do_resolve_registry_conflict(Name, {LocalPid, LocalMeta}, {RemotePid, RemoteMeta}, CustomEventHandler) ->
     case erlang:function_exported(CustomEventHandler, resolve_registry_conflict, 3) of
     case erlang:function_exported(CustomEventHandler, resolve_registry_conflict, 3) of
         true ->
         true ->
             try CustomEventHandler:resolve_registry_conflict(Name, {LocalPid, LocalMeta}, {RemotePid, RemoteMeta}) of
             try CustomEventHandler:resolve_registry_conflict(Name, {LocalPid, LocalMeta}, {RemotePid, RemoteMeta}) of
                 PidToKeep when is_pid(PidToKeep) ->
                 PidToKeep when is_pid(PidToKeep) ->
-                    PidToKeep;
+                    {PidToKeep, false};
                 _ ->
                 _ ->
-                    undefined
+                    {undefined, false}
             catch Class:Reason:Stacktrace ->
             catch Class:Reason:Stacktrace ->
                 error_logger:error_msg(
                 error_logger:error_msg(
                     "Syn(~p): Error in custom handler resolve_registry_conflict: ~p:~p:~p",
                     "Syn(~p): Error in custom handler resolve_registry_conflict: ~p:~p:~p",
                     [node(), Class, Reason, Stacktrace]
                     [node(), Class, Reason, Stacktrace]
                 ),
                 ),
-                undefined
+                {undefined, false}
             end;
             end;
         _ ->
         _ ->
             %% by default, keep local pid
             %% by default, keep local pid
-            LocalPid
+            {LocalPid, true}
     end.
     end.

+ 9 - 3
src/syn_registry.erl

@@ -415,7 +415,7 @@ sync_registry_tuples(RemoteNode, RegistryTuples, #state{
                 ),
                 ),
 
 
                 %% call conflict resolution
                 %% call conflict resolution
-                PidToKeep = syn_event_handler:do_resolve_registry_conflict(
+                {PidToKeep, KillOther} = syn_event_handler:do_resolve_registry_conflict(
                     Name,
                     Name,
                     {LocalPid, LocalMeta},
                     {LocalPid, LocalMeta},
                     {RemotePid, RemoteMeta},
                     {RemotePid, RemoteMeta},
@@ -427,13 +427,19 @@ sync_registry_tuples(RemoteNode, RegistryTuples, #state{
                     LocalPid ->
                     LocalPid ->
                         %% keep local
                         %% keep local
                         ok = rpc:call(RemoteNode, syn_registry, remove_from_local_table, [Name]),
                         ok = rpc:call(RemoteNode, syn_registry, remove_from_local_table, [Name]),
-                        exit(RemotePid, kill);
+                        case KillOther of
+                            true -> exit(RemotePid, kill);
+                            _ -> ok
+                        end;
 
 
                     RemotePid ->
                     RemotePid ->
                         %% keep remote
                         %% keep remote
                         remove_from_local_table(Name),
                         remove_from_local_table(Name),
                         add_to_local_table(Name, RemotePid, RemoteMeta, undefined),
                         add_to_local_table(Name, RemotePid, RemoteMeta, undefined),
-                        exit(LocalPid, kill);
+                        case KillOther of
+                            true -> exit(LocalPid, kill);
+                            _ -> ok
+                        end;
 
 
                     Other ->
                     Other ->
                         error_logger:error_msg(
                         error_logger:error_msg(

+ 31 - 4
test/syn_registry_SUITE.erl

@@ -667,11 +667,32 @@ three_nodes_start_syn_before_connecting_cluster_with_conflict(Config) ->
     case syn:whereis(ConflictingName, with_meta) of
     case syn:whereis(ConflictingName, with_meta) of
         {Pid0, Meta} ->
         {Pid0, Meta} ->
             CurrentNode = node(),
             CurrentNode = node(),
-            CurrentNode = Meta;
+            CurrentNode = Meta,
+            %% check that other nodes' data corresponds
+            {Pid0, Meta} = rpc:call(SlaveNode1, syn, whereis, [ConflictingName, with_meta]),
+            {Pid0, Meta} = rpc:call(SlaveNode2, syn, whereis, [ConflictingName, with_meta]),
+            %% check that other processes are not alive because syn killed them
+            true = is_process_alive(Pid0),
+            false = rpc:call(SlaveNode1, erlang, is_process_alive, [Pid1]),
+            false = rpc:call(SlaveNode2, erlang, is_process_alive, [Pid2]);
         {Pid1, Meta} ->
         {Pid1, Meta} ->
-            SlaveNode1 = Meta;
+            SlaveNode1 = Meta,
+            %% check that other nodes' data corresponds
+            {Pid1, Meta} = rpc:call(SlaveNode1, syn, whereis, [ConflictingName, with_meta]),
+            {Pid1, Meta} = rpc:call(SlaveNode2, syn, whereis, [ConflictingName, with_meta]),
+            %% check that other processes are not alive because syn killed them
+            false = is_process_alive(Pid0),
+            true = rpc:call(SlaveNode1, erlang, is_process_alive, [Pid1]),
+            false = rpc:call(SlaveNode2, erlang, is_process_alive, [Pid2]);
         {Pid2, Meta} ->
         {Pid2, Meta} ->
-            SlaveNode2 = Meta
+            SlaveNode2 = Meta,
+            %% check that other nodes' data corresponds
+            {Pid2, Meta} = rpc:call(SlaveNode1, syn, whereis, [ConflictingName, with_meta]),
+            {Pid2, Meta} = rpc:call(SlaveNode2, syn, whereis, [ConflictingName, with_meta]),
+            %% check that other processes are not alive because syn killed them
+            false = is_process_alive(Pid0),
+            false = rpc:call(SlaveNode1, erlang, is_process_alive, [Pid1]),
+            true = rpc:call(SlaveNode2, erlang, is_process_alive, [Pid2])
     end,
     end,
     %% kill processes
     %% kill processes
     syn_test_suite_helper:kill_process(Pid0),
     syn_test_suite_helper:kill_process(Pid0),
@@ -721,8 +742,14 @@ three_nodes_start_syn_before_connecting_cluster_with_custom_conflict_resolution(
     true = lists:member(syn:whereis(ConflictingName), [Pid0, Pid1, Pid2]),
     true = lists:member(syn:whereis(ConflictingName), [Pid0, Pid1, Pid2]),
     true = lists:member(rpc:call(SlaveNode1, syn, whereis, [ConflictingName]), [Pid0, Pid1, Pid2]),
     true = lists:member(rpc:call(SlaveNode1, syn, whereis, [ConflictingName]), [Pid0, Pid1, Pid2]),
     true = lists:member(rpc:call(SlaveNode2, syn, whereis, [ConflictingName]), [Pid0, Pid1, Pid2]),
     true = lists:member(rpc:call(SlaveNode2, syn, whereis, [ConflictingName]), [Pid0, Pid1, Pid2]),
-    %% check metadata that we kept the correct process
+    %% check metadata that we kept the correct process on all nodes
     {Pid1, keep_this_one} = syn:whereis(ConflictingName, with_meta),
     {Pid1, keep_this_one} = syn:whereis(ConflictingName, with_meta),
+    {Pid1, keep_this_one} = rpc:call(SlaveNode1, syn, whereis, [ConflictingName, with_meta]),
+    {Pid1, keep_this_one} = rpc:call(SlaveNode1, syn, whereis, [ConflictingName, with_meta]),
+    %% check that other processes are still alive because we didn't kill them
+    true = is_process_alive(Pid0),
+    true = rpc:call(SlaveNode1, erlang, is_process_alive, [Pid1]),
+    true = rpc:call(SlaveNode2, erlang, is_process_alive, [Pid2]),
     %% kill processes
     %% kill processes
     syn_test_suite_helper:kill_process(Pid0),
     syn_test_suite_helper:kill_process(Pid0),
     syn_test_suite_helper:kill_process(Pid1),
     syn_test_suite_helper:kill_process(Pid1),