Просмотр исходного кода

Pick remote pid over local pid in conflict resolution.

Roberto Ostinelli 5 лет назад
Родитель
Сommit
e9f923680d
4 измененных файлов с 27 добавлено и 27 удалено
  1. 1 1
      README.md
  2. 2 2
      src/syn_event_handler.erl
  3. 12 11
      src/syn_registry.erl
  4. 12 13
      test/syn_registry_SUITE.erl

+ 1 - 1
README.md

@@ -482,7 +482,7 @@ Called when a process in a group exits. It will be called only on the node where
 #### `resolve_registry_conflict/3`
 In case of net splits, a specific Name might get registered simultaneously on two different nodes. In this case, the cluster experiences a registry naming conflict.
 
-When this happens, Syn will resolve this Process Registry conflict by choosing a single process. By default, Syn will keep the process running on the node the conflict is being resolved on, and will kill the other process by sending a `kill` signal with `exit(Pid, kill)`.
+When this happens, Syn will resolve this Process Registry conflict by choosing a single process. By default, Syn will keep the process running on the remote node from where the conflict is being resolved on, and will kill the other process by sending a `kill` signal with `exit(Pid, kill)`.
 
 If this is not desired, you can set this callback to perform custom operations (such as a graceful shutdown).
 

+ 2 - 2
src/syn_event_handler.erl

@@ -113,6 +113,6 @@ do_resolve_registry_conflict(Name, {Pid1, Meta1}, {Pid2, Meta2}, CustomEventHand
                 {undefined, false}
             end;
         _ ->
-            %% by default, keep pid in table
-            {Pid1, true}
+            %% by default, keep pid that generated the conflict
+            {Pid2, true}
     end.

+ 12 - 11
src/syn_registry.erl

@@ -89,7 +89,8 @@ reregister(Name, Pid, Meta, RetryCount) when is_pid(Pid) ->
             case ?MODULE:register(Name, Pid, Meta) of
                 {error, taken} ->
                     %% race conditions, retry
-                    reregister(Name, Pid, Meta, RetryCount);
+                    timer:sleep(100),
+                    reregister(Name, Pid, Meta, RetryCount + 1);
 
                 Result ->
                     Result
@@ -675,12 +676,8 @@ resolve_conflict(
                 [node(), TablePid, RemotePid]
             ),
             %% callback: keeping local
-            CallbackIfLocal(),
-            %% kill?
-            case KillOther of
-                true -> syn_kill(RemotePid, Name, RemoteMeta);
-                _ -> undefined
-            end;
+            %% no process killing necessary because we kill local only if in a custom handler
+            CallbackIfLocal();
 
         RemotePid ->
             %% keep remote
@@ -689,8 +686,12 @@ resolve_conflict(
                 [node(), RemotePid, TablePid]
             ),
             %% keeping remote: overwrite remote data to local
-            %% no process killing necessary because we kill remote only if in a custom handler
-            add_to_local_table(Name, RemotePid, RemoteMeta, undefined);
+            add_to_local_table(Name, RemotePid, RemoteMeta, undefined),
+            %% kill?
+            case KillOther of
+                true -> syn_kill(Name, TablePid, TableMeta);
+                _ -> undefined
+            end;
 
         none ->
             error_logger:info_msg(
@@ -708,8 +709,8 @@ resolve_conflict(
             )
     end.
 
--spec syn_kill(PidToKill :: pid(), Name :: any(), Meta :: any()) -> true.
-syn_kill(PidToKill, Name, Meta) -> exit(PidToKill, {syn_resolve_kill, Name, Meta}).
+-spec syn_kill(Name :: any(), PidToKill :: pid(), Meta :: any()) -> true.
+syn_kill(Name, PidToKill, Meta) -> exit(PidToKill, {syn_resolve_kill, Name, Meta}).
 
 -spec raw_purge_registry_entries_for_remote_node(Node :: atom()) -> ok.
 raw_purge_registry_entries_for_remote_node(Node) when Node =/= node() ->

+ 12 - 13
test/syn_registry_SUITE.erl

@@ -45,8 +45,8 @@
 -export([
     two_nodes_register_monitor_and_unregister/1,
     two_nodes_registry_count/1,
-    two_nodes_registration_race_condition_conflict_resolution_keep_local/1,
     two_nodes_registration_race_condition_conflict_resolution_keep_remote/1,
+    two_nodes_registration_race_condition_conflict_resolution_keep_local_with_custom_handler/1,
     two_nodes_registration_race_condition_conflict_resolution_when_process_died/1,
     two_nodes_registry_full_cluster_sync_on_boot_node_added_later/1,
     two_nodes_registry_full_cluster_sync_on_boot_syn_started_later/1,
@@ -119,8 +119,8 @@ groups() ->
         {two_nodes_process_registration, [shuffle], [
             two_nodes_register_monitor_and_unregister,
             two_nodes_registry_count,
-            two_nodes_registration_race_condition_conflict_resolution_keep_local,
             two_nodes_registration_race_condition_conflict_resolution_keep_remote,
+            two_nodes_registration_race_condition_conflict_resolution_keep_local_with_custom_handler,
             two_nodes_registration_race_condition_conflict_resolution_when_process_died,
             two_nodes_registry_full_cluster_sync_on_boot_node_added_later,
             two_nodes_registry_full_cluster_sync_on_boot_syn_started_later,
@@ -496,7 +496,7 @@ two_nodes_registry_count(Config) ->
     0 = syn:registry_count(node()),
     0 = syn:registry_count(SlaveNode).
 
-two_nodes_registration_race_condition_conflict_resolution_keep_local(Config) ->
+two_nodes_registration_race_condition_conflict_resolution_keep_remote(Config) ->
     ConflictingName = "COMMON",
     %% get slaves
     SlaveNode = proplists:get_value(slave_node, Config),
@@ -513,14 +513,13 @@ two_nodes_registration_race_condition_conflict_resolution_keep_local(Config) ->
     ok = rpc:call(SlaveNode, syn, register, [ConflictingName, Pid1, SlaveNode]),
     timer:sleep(1000),
     %% check metadata, resolution happens on master node
-    Node = node(),
-    {Pid0, Node} = syn:whereis(ConflictingName, with_meta),
-    {Pid0, Node} = rpc:call(SlaveNode, syn, whereis, [ConflictingName, with_meta]),
+    {Pid1, SlaveNode} = syn:whereis(ConflictingName, with_meta),
+    {Pid1, SlaveNode} = rpc:call(SlaveNode, syn, whereis, [ConflictingName, with_meta]),
     %% check that other processes are not alive because syn killed them
-    true = is_process_alive(Pid0),
-    false = rpc:call(SlaveNode, erlang, is_process_alive, [Pid1]).
+    false = is_process_alive(Pid0),
+    true = rpc:call(SlaveNode, erlang, is_process_alive, [Pid1]).
 
-two_nodes_registration_race_condition_conflict_resolution_keep_remote(Config) ->
+two_nodes_registration_race_condition_conflict_resolution_keep_local_with_custom_handler(Config) ->
     ConflictingName = "COMMON",
     %% get slaves
     SlaveNode = proplists:get_value(slave_node, Config),
@@ -535,13 +534,13 @@ two_nodes_registration_race_condition_conflict_resolution_keep_remote(Config) ->
     Pid0 = syn_test_suite_helper:start_process(),
     Pid1 = syn_test_suite_helper:start_process(SlaveNode),
     %% inject into syn to simulate concurrent registration
-    ok = syn_registry:add_to_local_table(ConflictingName, Pid0, node(), undefined),
+    ok = syn_registry:add_to_local_table(ConflictingName, Pid0, keep_this_one, undefined),
     %% register on slave node to trigger conflict resolution on master node
-    ok = rpc:call(SlaveNode, syn, register, [ConflictingName, Pid1, keep_this_one]),
+    ok = rpc:call(SlaveNode, syn, register, [ConflictingName, Pid1, SlaveNode]),
     timer:sleep(1000),
     %% check metadata, resolution happens on master node
-    {Pid1, keep_this_one} = syn:whereis(ConflictingName, with_meta),
-    {Pid1, keep_this_one} = rpc:call(SlaveNode, syn, whereis, [ConflictingName, with_meta]),
+    {Pid0, keep_this_one} = syn:whereis(ConflictingName, with_meta),
+    {Pid0, keep_this_one} = rpc:call(SlaveNode, syn, whereis, [ConflictingName, with_meta]),
     %% check that other processes are not alive because syn killed them
     true = is_process_alive(Pid0),
     true = rpc:call(SlaveNode, erlang, is_process_alive, [Pid1]).