Browse Source

Add registry conflict resolution.

Roberto Ostinelli 3 years ago
parent
commit
eac882a50e
4 changed files with 252 additions and 113 deletions
  1. 33 23
      src/syn_event_handler.erl
  2. 112 35
      src/syn_registry.erl
  3. 104 55
      test/syn_registry_SUITE.erl
  4. 3 0
      test/syn_test_suite_helper.erl

+ 33 - 23
src/syn_event_handler.erl

@@ -28,35 +28,45 @@
 %% ==========================================================================================================
 -module(syn_event_handler).
 
--export([on_process_unregistered/5]).
+-export([do_resolve_registry_conflict/5]).
 
--callback on_process_unregistered(
-    Scope :: atom(),
+-callback resolve_registry_conflict(
     Name :: any(),
-    Pid :: pid(),
-    Meta :: any(),
-    Reason :: any()
-) -> any().
+    {Pid1 :: pid(), Meta1 :: any()},
+    {Pid2 :: pid(), Meta2 :: any()}
+) -> PidToKeep :: pid() | undefined.
 
--optional_callbacks([on_process_unregistered/5]).
+-optional_callbacks([resolve_registry_conflict/3]).
 
-%% ===================================================================
-%% API
-%% ===================================================================
--spec on_process_unregistered(
+-spec do_resolve_registry_conflict(
     Scope :: atom(),
     Name :: any(),
-    Pid :: pid(),
-    Meta :: any(),
-    Reason :: any()
-) -> any().
-on_process_unregistered(Scope, Name, Pid, Meta, Reason) ->
-    CustomEventHandler = undefined,
-    case erlang:function_exported(CustomEventHandler, on_process_unregistered, 5) of
+    {Pid1 :: pid(), Meta1 :: any(), Time1 :: non_neg_integer()},
+    {Pid2 :: pid(), Meta2 :: any(), Time2 :: non_neg_integer()},
+    CustomEventHandler :: module() | undefined
+) -> PidToKeep :: pid() | undefined.
+do_resolve_registry_conflict(Scope, Name, {Pid1, Meta1, Time1}, {Pid2, Meta2, Time2}, CustomEventHandler) ->
+    case erlang:function_exported(CustomEventHandler, resolve_registry_conflict, 4) of
         true ->
-            spawn(fun() ->
-                CustomEventHandler:on_process_unregistered(Scope, Name, Pid, Meta, Reason)
-            end);
+            try CustomEventHandler:resolve_registry_conflict(Scope, Name, {Pid1, Meta1}, {Pid2, Meta2}) of
+                PidToKeep when is_pid(PidToKeep) -> PidToKeep;
+                _ -> undefined
+
+            catch Exception:Reason ->
+                error_logger:error_msg(
+                    "Syn(~p): Error ~p in custom handler resolve_registry_conflict: ~p~n",
+                    [node(), Exception, Reason]
+                ),
+                undefined
+            end;
+
         _ ->
-            ok
+            %% by default, keep pid registered more recently
+            %% this is a simple mechanism that can be imprecise, as system clocks are not perfectly aligned in a cluster
+            %% if something more elaborate is desired (such as vector clocks) use Meta to store data and a custom event handler
+            PidToKeep = case Time1 > Time2 of
+                true -> Pid1;
+                _ -> Pid2
+            end,
+            PidToKeep
     end.

+ 112 - 35
src/syn_registry.erl

@@ -198,8 +198,8 @@ handle_call({register_on_node, Name, Pid, Meta}, _From, #state{
                     %% return
                     {reply, ok, State};
 
-                {{Name, Pid}, _Meta, _Time, MRef, _Node} ->
-                    %% same pid, possibly new meta, overwrite
+                {{Name, Pid}, _TableMeta, _TableTime, MRef, _TableNode} ->
+                    %% same pid, possibly new meta or time, overwrite
                     Time = erlang:system_time(),
                     add_to_local_table(Scope, Name, Pid, Meta, Time, MRef),
                     %% broadcast
@@ -247,28 +247,7 @@ handle_call(Request, From, State) ->
     {noreply, #state{}, Timeout :: non_neg_integer()} |
     {stop, Reason :: any(), #state{}}.
 handle_cast({'3.0', sync_register, Scope, Name, Pid, Meta, Time}, State) ->
-
-    ct:pal("----> ~p~n", [{Scope, Name, Pid, Meta, Time}]),
-
-    case find_registry_entry_by_name(Scope, Name) of
-        undefined ->
-            %% no conflict
-            add_to_local_table(Scope, Name, Pid, Meta, Time, undefined);
-
-        {{Name, Pid}, _Meta, TableTime, MRef, _Node} when TableTime < Time ->
-            %% same pid, more recent time
-            add_to_local_table(Scope, Name, Pid, Meta, Time, MRef);
-
-        {{Name, Pid}, _Meta, _TableTime, _MRef, _Node} ->
-            %% same pid, not more recent time
-            ok;
-
-        {{Name, TablePid}, _Meta, _Time, _MRef, _Node} ->
-            %% different pid -> conflict
-
-            ct:pal("CONFLICT!!!!!"),
-            ok
-    end,
+    handle_registry_sync(Scope, Name, Pid, Meta, Time, State),
     {noreply, State};
 
 handle_cast({'3.0', sync_unregister, Name, Pid}, #state{scope = Scope} = State) ->
@@ -307,7 +286,7 @@ handle_cast({'3.0', sync, RemoteScopePid, RegistryTuplesOfRemoteNode}, #state{
     ),
     %% insert tuples
     lists:foreach(fun({Name, Pid, Meta, Time}) ->
-        add_to_local_table(Scope, Name, Pid, Meta, Time, undefined)
+        handle_registry_sync(Scope, Name, Pid, Meta, Time, State)
     end, RegistryTuplesOfRemoteNode),
     %% is this a new node?
     case maps:is_key(RemoteScopeNode, Nodes) of
@@ -317,7 +296,7 @@ handle_cast({'3.0', sync, RemoteScopePid, RegistryTuplesOfRemoteNode}, #state{
 
         false ->
             %% if we don't know about the node, it is because it's the response to the first broadcast of announce message
-            %% monitor
+            %% -> monitor
             _MRef = monitor(process, RemoteScopePid),
             {noreply, State#state{nodes = Nodes#{RemoteScopeNode => RemoteScopePid}}}
     end;
@@ -344,6 +323,7 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason}, #state{
     scope = Scope,
     nodes = Nodes
 } = State) when node(Pid) =/= node() ->
+    %% scope process down
     RemoteNode = node(Pid),
     case maps:take(RemoteNode, Nodes) of
         {Pid, Nodes1} ->
@@ -457,7 +437,7 @@ find_registry_entry_by_name(Scope, Name) ->
         ['$_']
     }]) of
         [RegistryEntry] -> RegistryEntry;
-        _ -> undefined
+        [] -> undefined
     end.
 
 -spec find_registry_entries_by_pid(Scope :: atom(), Pid :: pid()) -> RegistryEntries :: [syn_registry_entry()].
@@ -470,14 +450,13 @@ find_registry_entries_by_pid(Scope, Pid) when is_pid(Pid) ->
 
 -spec find_monitor_for_pid(Scope :: atom(), Pid :: pid()) -> reference() | undefined.
 find_monitor_for_pid(Scope, Pid) when is_pid(Pid) ->
-    TableName = syn_backbone:get_table_name(syn_registry_by_pid, Scope),
-    case ets:select(TableName, [{
+    case ets:select(syn_backbone:get_table_name(syn_registry_by_pid, Scope), [{
         {{Pid, '_'}, '_', '_', '$5', '_'},
         [],
         ['$5']
     }], 1) of
         {[MRef], _} -> MRef;
-        _ -> undefined
+        '$end_of_table' -> undefined
     end.
 
 -spec maybe_demonitor(Scope :: atom(), Pid :: pid()) -> ok.
@@ -489,7 +468,7 @@ maybe_demonitor(Scope, Pid) ->
         [],
         ['$5']
     }], 2) of
-        {[MRef], _} ->
+        {[MRef], _} when is_reference(MRef) ->
             %% no other aliases, demonitor
             erlang:demonitor(MRef, [flush]),
             ok;
@@ -506,22 +485,120 @@ maybe_demonitor(Scope, Pid) ->
     MRef :: undefined | reference()
 ) -> true.
 add_to_local_table(Scope, Name, Pid, Meta, Time, MRef) ->
-
     true = ets:insert(syn_backbone:get_table_name(syn_registry_by_name, Scope),
         {{Name, Pid}, Meta, Time, MRef, node(Pid)}
     ),
     true = ets:insert(syn_backbone:get_table_name(syn_registry_by_pid, Scope),
         {{Pid, Name}, Meta, Time, MRef, node(Pid)}
-    ),
-
-    ct:pal("WTF: ~p~n",[ets:tab2list(syn_backbone:get_table_name(syn_registry_by_name, Scope))]).
+    ).
 
 -spec remove_from_local_table(Scope :: atom(), Name :: any(), Pid :: pid()) -> true.
 remove_from_local_table(Scope, Name, Pid) ->
     true = ets:delete(syn_backbone:get_table_name(syn_registry_by_name, Scope), {Name, Pid}),
     true = ets:delete(syn_backbone:get_table_name(syn_registry_by_pid, Scope), {Pid, Name}).
 
+-spec update_local_table(
+    Scope :: atom(),
+    Name :: any(),
+    PreviousPid :: pid(),
+    {
+        Pid :: pid(),
+        Meta :: any(),
+        Time :: integer(),
+        MRef :: undefined | reference()
+    }
+) -> true.
+update_local_table(Scope, Name, PreviousPid, {Pid, Meta, Time, MRef}) ->
+    maybe_demonitor(Scope, PreviousPid),
+    remove_from_local_table(Scope, Name, PreviousPid),
+    add_to_local_table(Scope, Name, Pid, Meta, Time, MRef).
+
 -spec purge_registry_for_remote_node(Scope :: atom(), Node :: atom()) -> true.
 purge_registry_for_remote_node(Scope, Node) when Node =/= node() ->
     true = ets:match_delete(syn_backbone:get_table_name(syn_registry_by_name, Scope), {{'_', '_'}, '_', '_', '_', Node}),
     true = ets:match_delete(syn_backbone:get_table_name(syn_registry_by_pid, Scope), {{'_', '_'}, '_', '_', '_', Node}).
+
+-spec handle_registry_sync(
+    Scope :: atom(),
+    Name :: any(),
+    Pid :: pid(),
+    Meta :: any(),
+    Time :: non_neg_integer(),
+    #state{}
+) -> any().
+handle_registry_sync(Scope, Name, Pid, Meta, Time, State) ->
+    case find_registry_entry_by_name(Scope, Name) of
+        undefined ->
+            %% no conflict
+            add_to_local_table(Scope, Name, Pid, Meta, Time, undefined);
+
+        {{Name, Pid}, _TableMeta, _TableTime, MRef, _TableNode} ->
+            %% same pid, more recent (because it comes from the same node, which means that it's sequential)
+            add_to_local_table(Scope, Name, Pid, Meta, Time, MRef);
+
+        {{Name, TablePid}, TableMeta, TableTime, TableMRef, _TableNode} when node(TablePid) =:= node() ->
+            %% current node runs a conflicting process -> resolve
+            %% * the conflict is resolved by the two nodes that own the conflicting processes
+            %% * when a process is chosen, the time is updated
+            %% * the node that runs the process that is kept sends the sync_register message
+            %% * recipients check that the time is more recent that what they have to ensure that there are no race conditions
+            resolve_conflict(Scope, Name, {Pid, Meta, Time}, {TablePid, TableMeta, TableTime, TableMRef}, State);
+
+        {{Name, TablePid}, _TableMeta, TableTime, _TableMRef, _TableNode} when TableTime < Time ->
+            %% current node does not own any of the conflicting processes, update
+            update_local_table(Scope, Name, TablePid, {Pid, Meta, Time, undefined});
+
+        {{Name, _TablePid}, _TableMeta, _TableTime, _TableMRef, _TableNode} ->
+            %% race condition: incoming data is older, ignore
+            ok
+    end.
+
+-spec resolve_conflict(
+    Scope :: atom(),
+    Name :: any(),
+    {Pid :: pid(), Meta :: any(), Time :: non_neg_integer()},
+    {TablePid :: pid(), TableMeta :: any(), TableTime :: non_neg_integer(), TableMRef :: reference()},
+    #state{}
+) -> KeptPid :: pid().
+resolve_conflict(Scope, Name, {Pid, Meta, Time}, {TablePid, TableMeta, TableTime, TableMRef}, State) ->
+    CustomEventHandler = undefined,
+    %% call conflict resolution
+    PidToKeep = syn_event_handler:do_resolve_registry_conflict(
+        Scope,
+        Name,
+        {Pid, Meta, Time},
+        {TablePid, TableMeta, TableTime},
+        CustomEventHandler
+    ),
+    %% resolve
+    case PidToKeep of
+        Pid ->
+            %% -> we keep the remote pid
+            %% update locally, the incoming sync_register will update with the time coming from remote node
+            update_local_table(Scope, Name, TablePid, {Pid, Meta, Time, undefined}),
+            %% kill
+            exit(TablePid, {syn_resolve_kill, Name, TableMeta}),
+            error_logger:info_msg("SYN[~p] Registry CONFLICT for name ~p@~p: ~p ~p -> chosen: ~p~n",
+                [node(), Name, Scope, Pid, TablePid, Pid]
+            );
+
+        TablePid ->
+            %% -> we keep the local pid
+            %% overwrite with updated time
+            ResolveTime = erlang:system_time(),
+            add_to_local_table(Scope, Name, TablePid, TableMeta, ResolveTime, TableMRef),
+            %% broadcast
+            broadcast({'3.0', sync_register, Scope, Name, TablePid, TableMeta, ResolveTime}, State),
+            error_logger:info_msg("SYN[~p] Registry CONFLICT for name ~p@~p: ~p ~p -> chosen: ~p~n",
+                [node(), Name, Scope, Pid, TablePid, TablePid]
+            );
+
+        Invalid ->
+            maybe_demonitor(Scope, TablePid),
+            remove_from_local_table(Scope, Name, TablePid),
+            %% kill local, remote will be killed by other node performing the resolve
+            exit(TablePid, {syn_resolve_kill, Name, TableMeta}),
+            error_logger:info_msg("SYN[~p] Registry CONFLICT for name ~p@~p: ~p ~p -> none chosen (got: ~p)~n",
+                [node(), Name, Scope, Pid, TablePid, Invalid]
+            )
+    end.

+ 104 - 55
test/syn_registry_SUITE.erl

@@ -37,7 +37,8 @@
     three_nodes_discover_custom_scope/1,
     three_nodes_register_unregister_and_monitor_default_scope/1,
     three_nodes_register_unregister_and_monitor_custom_scope/1,
-    three_nodes_cluster_changes_and_conflicts/1
+    three_nodes_cluster_changes/1,
+    three_nodes_cluster_conflicts/1
 ]).
 
 %% include
@@ -75,11 +76,12 @@ all() ->
 groups() ->
     [
         {three_nodes_process_registration, [shuffle], [
-%%            three_nodes_discover_default_scope,
-%%            three_nodes_discover_custom_scope,
-%%            three_nodes_register_unregister_and_monitor_default_scope,
-%%            three_nodes_register_unregister_and_monitor_custom_scope,
-            three_nodes_cluster_changes_and_conflicts
+            three_nodes_discover_default_scope,
+            three_nodes_discover_custom_scope,
+            three_nodes_register_unregister_and_monitor_default_scope,
+            three_nodes_register_unregister_and_monitor_custom_scope,
+            three_nodes_cluster_changes,
+            three_nodes_cluster_conflicts
         ]}
     ].
 %% -------------------------------------------------------------------
@@ -172,11 +174,12 @@ three_nodes_discover_default_scope(Config) ->
     %% get slaves
     SlaveNode1 = proplists:get_value(slave_node_1, Config),
     SlaveNode2 = proplists:get_value(slave_node_2, Config),
+
     %% start syn on nodes
     ok = syn:start(),
     ok = rpc:call(SlaveNode1, syn, start, []),
     ok = rpc:call(SlaveNode2, syn, start, []),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), default, [SlaveNode1, SlaveNode2]),
@@ -187,7 +190,7 @@ three_nodes_discover_default_scope(Config) ->
     rpc:call(SlaveNode1, syn_test_suite_helper, disconnect_node, [SlaveNode2]),
     syn_test_suite_helper:disconnect_node(SlaveNode1),
     syn_test_suite_helper:disconnect_node(SlaveNode2),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), default, []),
@@ -212,7 +215,7 @@ three_nodes_discover_default_scope(Config) ->
 
     %% crash the scope process on local
     syn_test_suite_helper:kill_process(syn_registry_default),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check, it should have rebuilt after supervisor restarts it
     assert_scope_subcluster(node(), default, [SlaveNode1, SlaveNode2]),
@@ -221,7 +224,7 @@ three_nodes_discover_default_scope(Config) ->
 
     %% crash scopes supervisor on local
     syn_test_suite_helper:kill_process(syn_scopes_sup),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), default, [SlaveNode1, SlaveNode2]),
@@ -232,18 +235,19 @@ three_nodes_discover_custom_scope(Config) ->
     %% get slaves
     SlaveNode1 = proplists:get_value(slave_node_1, Config),
     SlaveNode2 = proplists:get_value(slave_node_2, Config),
+
     %% start syn on nodes
     ok = syn:start(),
     ok = rpc:call(SlaveNode1, syn, start, []),
     ok = rpc:call(SlaveNode2, syn, start, []),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% add custom scopes
     ok = syn:add_node_to_scope(custom_scope_ab),
     ok = syn:add_node_to_scope(custom_scope_all),
     ok = rpc:call(SlaveNode1, syn, add_node_to_scopes, [[custom_scope_ab, custom_scope_bc, custom_scope_all]]),
     ok = rpc:call(SlaveNode2, syn, add_node_to_scopes, [[custom_scope_bc, custom_scope_c, custom_scope_all]]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), custom_scope_ab, [SlaveNode1]),
@@ -262,7 +266,7 @@ three_nodes_discover_custom_scope(Config) ->
 
     %% disconnect node 2 (node 1 can still see node 2)
     syn_test_suite_helper:disconnect_node(SlaveNode2),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), custom_scope_ab, [SlaveNode1]),
@@ -287,7 +291,7 @@ three_nodes_discover_custom_scope(Config) ->
 
     %% crash a scope process on 2
     rpc:call(SlaveNode2, syn_test_suite_helper, kill_process, [syn_registry_custom_scope_bc]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), custom_scope_ab, [SlaveNode1]),
@@ -301,7 +305,7 @@ three_nodes_discover_custom_scope(Config) ->
 
     %% crash scopes supervisor on local
     syn_test_suite_helper:kill_process(syn_scopes_sup),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% check
     assert_scope_subcluster(node(), custom_scope_ab, [SlaveNode1]),
@@ -317,11 +321,12 @@ three_nodes_register_unregister_and_monitor_default_scope(Config) ->
     %% get slaves
     SlaveNode1 = proplists:get_value(slave_node_1, Config),
     SlaveNode2 = proplists:get_value(slave_node_2, Config),
+
     %% start syn on nodes
     ok = syn:start(),
     ok = rpc:call(SlaveNode1, syn, start, []),
     ok = rpc:call(SlaveNode2, syn, start, []),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% start processes
     Pid = syn_test_suite_helper:start_process(),
@@ -351,7 +356,7 @@ three_nodes_register_unregister_and_monitor_default_scope(Config) ->
     ok = syn:register({"my proc alias"}, Pid), %% same pid, different name
     ok = syn:register(<<"my proc with meta">>, PidWithMeta, {meta, <<"meta">>}), %% pid with meta
     ok = syn:register({remote_pid_on, slave_1}, PidRemoteOn1), %% remote on slave 1
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% errors
     {error, taken} = syn:register(<<"my proc">>, PidRemoteOn1),
@@ -378,7 +383,7 @@ three_nodes_register_unregister_and_monitor_default_scope(Config) ->
     %% re-register to edit meta
     ok = syn:register(<<"my proc with meta">>, PidWithMeta, {meta2, <<"meta2">>}),
     ok = rpc:call(SlaveNode2, syn, register, [{remote_pid_on, slave_1}, PidRemoteOn1, added_meta]), %% updated on slave 2
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% retrieve
     {PidWithMeta, {meta2, <<"meta2">>}} = syn:lookup(<<"my proc with meta">>),
@@ -394,14 +399,14 @@ three_nodes_register_unregister_and_monitor_default_scope(Config) ->
 
     %% crash scope process to ensure that monitors get recreated
     exit(whereis(syn_registry_default), kill),
-    timer:sleep(100), %$ wait for sup to restart it
+    timer:sleep(250), %$ wait for sup to restart it
 
     %% kill process
     syn_test_suite_helper:kill_process(Pid),
     syn_test_suite_helper:kill_process(PidRemoteOn1),
     %% unregister process
     ok = syn:unregister(<<"my proc with meta">>),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% retrieve
     undefined = syn:lookup(<<"my proc">>),
@@ -428,7 +433,7 @@ three_nodes_register_unregister_and_monitor_default_scope(Config) ->
     Pid1 = syn_test_suite_helper:start_process(),
     Pid2 = syn_test_suite_helper:start_process(),
     ok = syn:register(<<"my proc">>, Pid1),
-    timer:sleep(100),
+    timer:sleep(250),
     syn_registry:remove_from_local_table(default, <<"my proc">>, Pid1),
     syn_registry:add_to_local_table(default, <<"my proc">>, Pid2, undefined, 0, undefined),
     {error, race_condition} = rpc:call(SlaveNode1, syn, unregister, [<<"my proc">>]).
@@ -437,17 +442,18 @@ three_nodes_register_unregister_and_monitor_custom_scope(Config) ->
     %% get slaves
     SlaveNode1 = proplists:get_value(slave_node_1, Config),
     SlaveNode2 = proplists:get_value(slave_node_2, Config),
+
     %% start syn on nodes
     ok = syn:start(),
     ok = rpc:call(SlaveNode1, syn, start, []),
     ok = rpc:call(SlaveNode2, syn, start, []),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% add custom scopes
     ok = syn:add_node_to_scope(custom_scope_ab),
     ok = rpc:call(SlaveNode1, syn, add_node_to_scopes, [[custom_scope_ab, custom_scope_bc]]),
     ok = rpc:call(SlaveNode2, syn, add_node_to_scopes, [[custom_scope_bc]]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% start processes
     Pid = syn_test_suite_helper:start_process(),
@@ -498,7 +504,7 @@ three_nodes_register_unregister_and_monitor_custom_scope(Config) ->
     {'EXIT', {{invalid_scope, custom_scope_bc}, _}} = catch syn:register(custom_scope_bc, "scope_a", Pid),
     {'EXIT', {{invalid_scope, non_existent_scope}, _}} = catch syn:register(non_existent_scope, "scope_a", Pid),
     ok = rpc:call(SlaveNode2, syn, register, [custom_scope_bc, {remote_scoped_bc}, PidRemoteWithMetaOn1, <<"with_meta 1">>]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% errors
     {error, taken} = syn:register(custom_scope_ab, "scope_a", PidWithMeta),
@@ -545,14 +551,14 @@ three_nodes_register_unregister_and_monitor_custom_scope(Config) ->
 
     %% re-register to edit meta
     ok = syn:register(custom_scope_ab, "scope_a_alias", PidWithMeta, <<"with_meta_updated">>),
-    timer:sleep(100),
+    timer:sleep(250),
     {PidWithMeta, <<"with_meta_updated">>} = syn:lookup(custom_scope_ab, "scope_a_alias"),
     {PidWithMeta, <<"with_meta_updated">>} = rpc:call(SlaveNode1, syn, lookup, [custom_scope_ab, "scope_a_alias"]),
     {badrpc, {'EXIT', {{invalid_scope, custom_scope_ab}, _}}} = catch rpc:call(SlaveNode2, syn, lookup, [custom_scope_ab, "scope_a_alias"]),
 
     %% crash scope process to ensure that monitors get recreated
     exit(whereis(syn_registry_custom_scope_ab), kill),
-    timer:sleep(100), %$ wait for sup to restart it
+    timer:sleep(250), %$ wait for sup to restart it
 
     %% kill process
     syn_test_suite_helper:kill_process(Pid),
@@ -561,7 +567,7 @@ three_nodes_register_unregister_and_monitor_custom_scope(Config) ->
     {error, undefined} = catch syn:unregister(<<"my proc with meta">>),
     {'EXIT', {{invalid_scope, custom_scope_bc}, _}} = catch syn:unregister(custom_scope_bc, <<"my proc with meta">>),
     ok = rpc:call(SlaveNode1, syn, unregister, [custom_scope_bc, {remote_scoped_bc}]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% retrieve
     undefined = syn:lookup("scope_a"),
@@ -608,12 +614,12 @@ three_nodes_register_unregister_and_monitor_custom_scope(Config) ->
     Pid1 = syn_test_suite_helper:start_process(),
     Pid2 = syn_test_suite_helper:start_process(),
     ok = syn:register(custom_scope_ab, <<"my proc">>, Pid1),
-    timer:sleep(100),
+    timer:sleep(250),
     syn_registry:remove_from_local_table(custom_scope_ab, <<"my proc">>, Pid1),
     syn_registry:add_to_local_table(custom_scope_ab, <<"my proc">>, Pid2, undefined, 0, undefined),
     {error, race_condition} = rpc:call(SlaveNode1, syn, unregister, [custom_scope_ab, <<"my proc">>]).
 
-three_nodes_cluster_changes_and_conflicts(Config) ->
+three_nodes_cluster_changes(Config) ->
     %% get slaves
     SlaveNode1 = proplists:get_value(slave_node_1, Config),
     SlaveNode2 = proplists:get_value(slave_node_2, Config),
@@ -628,7 +634,7 @@ three_nodes_cluster_changes_and_conflicts(Config) ->
     %% add custom scopes
     ok = rpc:call(SlaveNode1, syn, add_node_to_scopes, [[custom_scope_bc]]),
     ok = rpc:call(SlaveNode2, syn, add_node_to_scopes, [[custom_scope_bc]]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% start processes
     PidRemoteOn1 = syn_test_suite_helper:start_process(SlaveNode1),
@@ -639,12 +645,13 @@ three_nodes_cluster_changes_and_conflicts(Config) ->
     ok = rpc:call(SlaveNode1, syn, register, ["proc-2", PidRemoteOn2, "meta-2"]),
     ok = rpc:call(SlaveNode1, syn, register, [custom_scope_bc, "BC-proc-1", PidRemoteOn1, "meta-1"]),
     ok = rpc:call(SlaveNode1, syn, register, [custom_scope_bc, "BC-proc-1 alias", PidRemoteOn1, "meta-1 alias"]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% form full cluster
     ok = syn:start(),
     rpc:call(SlaveNode1, syn_test_suite_helper, connect_node, [SlaveNode2]),
     syn_test_suite_helper:wait_cluster_connected([node(), SlaveNode1, SlaveNode2]),
+    timer:sleep(250),
 
     %% retrieve
     {PidRemoteOn1, "meta-1"} = syn:lookup("proc-1"),
@@ -684,7 +691,7 @@ three_nodes_cluster_changes_and_conflicts(Config) ->
 
     %% partial netsplit (1 cannot see 2)
     rpc:call(SlaveNode1, syn_test_suite_helper, disconnect_node, [SlaveNode2]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% retrieve
     {PidRemoteOn1, "meta-1"} = syn:lookup("proc-1"),
@@ -725,6 +732,7 @@ three_nodes_cluster_changes_and_conflicts(Config) ->
     %% re-join
     rpc:call(SlaveNode1, syn_test_suite_helper, connect_node, [SlaveNode2]),
     syn_test_suite_helper:wait_cluster_connected([node(), SlaveNode1, SlaveNode2]),
+    timer:sleep(250),
 
     %% retrieve
     {PidRemoteOn1, "meta-1"} = syn:lookup("proc-1"),
@@ -760,49 +768,90 @@ three_nodes_cluster_changes_and_conflicts(Config) ->
     2 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc]),
     0 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, node()]),
     2 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, SlaveNode1]),
-    0 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, SlaveNode2]),
+    0 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, SlaveNode2]).
+
+three_nodes_cluster_conflicts(Config) ->
+    %% get slaves
+    SlaveNode1 = proplists:get_value(slave_node_1, Config),
+    SlaveNode2 = proplists:get_value(slave_node_2, Config),
+
+    %% start syn on nodes
+    ok = syn:start(),
+    ok = rpc:call(SlaveNode1, syn, start, []),
+    ok = rpc:call(SlaveNode2, syn, start, []),
+
+    %% add custom scopes
+    ok = rpc:call(SlaveNode1, syn, add_node_to_scopes, [[custom_scope_bc]]),
+    ok = rpc:call(SlaveNode2, syn, add_node_to_scopes, [[custom_scope_bc]]),
+    timer:sleep(250),
 
     %% partial netsplit (1 cannot see 2)
     rpc:call(SlaveNode1, syn_test_suite_helper, disconnect_node, [SlaveNode2]),
-    timer:sleep(100),
+    timer:sleep(250),
 
     %% start conflict processes
     Pid2RemoteOn1 = syn_test_suite_helper:start_process(SlaveNode1),
     Pid2RemoteOn2 = syn_test_suite_helper:start_process(SlaveNode2),
 
-    ct:pal("STARTED On1: ~p, On2: ~p", [Pid2RemoteOn1, Pid2RemoteOn2]),
-
-    %% register conflicts new during netsplit
-    ok = rpc:call(SlaveNode1, syn, register, ["proc-2", Pid2RemoteOn1, "new-meta-2"]),
-    ok = rpc:call(SlaveNode2, syn, register, ["proc-1", Pid2RemoteOn2, "new-meta-1"]),
+    %% --> conflict by netsplit
+    ok = rpc:call(SlaveNode1, syn, register, ["proc-confict", Pid2RemoteOn1, "meta-1"]),
+    ok = rpc:call(SlaveNode2, syn, register, ["proc-confict", Pid2RemoteOn2, "meta-2"]),
+    ok = rpc:call(SlaveNode1, syn, register, [custom_scope_bc, "proc-confict", Pid2RemoteOn1, "meta-1"]),
+    ok = rpc:call(SlaveNode2, syn, register, [custom_scope_bc, "proc-confict", Pid2RemoteOn2, "meta-2"]),
 
     %% re-join
     rpc:call(SlaveNode1, syn_test_suite_helper, connect_node, [SlaveNode2]),
     syn_test_suite_helper:wait_cluster_connected([node(), SlaveNode1, SlaveNode2]),
+    timer:sleep(250),
 
     %% retrieve
-    {Pid2RemoteOn2, "new-meta-1"} = syn:lookup("proc-1"),
-    {Pid2RemoteOn2, "new-meta-1"} = rpc:call(SlaveNode1, syn, lookup, ["proc-1"]),
-    {Pid2RemoteOn2, "new-meta-1"} = rpc:call(SlaveNode2, syn, lookup, ["proc-1"]),
-    {Pid2RemoteOn1, "new-meta-2"} = syn:lookup("proc-2"),
-    {Pid2RemoteOn1, "new-meta-2"} = rpc:call(SlaveNode1, syn, lookup, ["proc-2"]),
-    {Pid2RemoteOn1, "new-meta-2"} = rpc:call(SlaveNode2, syn, lookup, ["proc-2"]),
-    2 = syn:registry_count(default),
+    {Pid2RemoteOn2, "meta-2"} = syn:lookup("proc-confict"),
+    {Pid2RemoteOn2, "meta-2"} = rpc:call(SlaveNode1, syn, lookup, ["proc-confict"]),
+    {Pid2RemoteOn2, "meta-2"} = rpc:call(SlaveNode2, syn, lookup, ["proc-confict"]),
+    1 = syn:registry_count(default),
     0 = syn:registry_count(default, node()),
-    1 = syn:registry_count(default, SlaveNode1),
+    0 = syn:registry_count(default, SlaveNode1),
     1 = syn:registry_count(default, SlaveNode2),
-    2 = rpc:call(SlaveNode1, syn, registry_count, [default]),
+    1 = rpc:call(SlaveNode1, syn, registry_count, [default]),
     0 = rpc:call(SlaveNode1, syn, registry_count, [default, node()]),
-    1 = rpc:call(SlaveNode1, syn, registry_count, [default, SlaveNode1]),
+    0 = rpc:call(SlaveNode1, syn, registry_count, [default, SlaveNode1]),
     1 = rpc:call(SlaveNode1, syn, registry_count, [default, SlaveNode2]),
-    2 = rpc:call(SlaveNode2, syn, registry_count, [default]),
+    1 = rpc:call(SlaveNode2, syn, registry_count, [default]),
     0 = rpc:call(SlaveNode2, syn, registry_count, [default, node()]),
-    1 = rpc:call(SlaveNode2, syn, registry_count, [default, SlaveNode1]),
-    1 = rpc:call(SlaveNode2, syn, registry_count, [default, SlaveNode2]).
-
-
-
+    0 = rpc:call(SlaveNode2, syn, registry_count, [default, SlaveNode1]),
+    1 = rpc:call(SlaveNode2, syn, registry_count, [default, SlaveNode2]),
+    {Pid2RemoteOn2, "meta-2"} = rpc:call(SlaveNode1, syn, lookup, [custom_scope_bc, "proc-confict"]),
+    {Pid2RemoteOn2, "meta-2"} = rpc:call(SlaveNode2, syn, lookup, [custom_scope_bc, "proc-confict"]),
+    1 = rpc:call(SlaveNode1, syn, registry_count, [custom_scope_bc]),
+    0 = rpc:call(SlaveNode1, syn, registry_count, [custom_scope_bc, node()]),
+    0 = rpc:call(SlaveNode1, syn, registry_count, [custom_scope_bc, SlaveNode1]),
+    1 = rpc:call(SlaveNode1, syn, registry_count, [custom_scope_bc, SlaveNode2]),
+    1 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc]),
+    0 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, node()]),
+    0 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, SlaveNode1]),
+    1 = rpc:call(SlaveNode2, syn, registry_count, [custom_scope_bc, SlaveNode2]),
 
+    %% --> conflict by race condition
+    Pid1 = syn_test_suite_helper:start_process(),
+    Pid2 = syn_test_suite_helper:start_process(SlaveNode1),
+    rpc:call(SlaveNode1, syn_registry, add_to_local_table, [default, <<"my proc">>, Pid2, "meta-2", erlang:system_time(), undefined]),
+    ok = syn:register(<<"my proc">>, Pid1, "meta-1"),
+    timer:sleep(250),
+    {Pid1, "meta-1"} = syn:lookup(<<"my proc">>),
+    {Pid1, "meta-1"} = rpc:call(SlaveNode1, syn, lookup, [<<"my proc">>]),
+    {Pid1, "meta-1"} = rpc:call(SlaveNode2, syn, lookup, [<<"my proc">>]),
+    true = is_process_alive(Pid1),
+    false = rpc:call(SlaveNode1, erlang, is_process_alive, [Pid2]),
+
+    PidCustom1 = syn_test_suite_helper:start_process(SlaveNode1),
+    PidCustom2 = syn_test_suite_helper:start_process(SlaveNode2),
+    rpc:call(SlaveNode2, syn_registry, add_to_local_table, [custom_scope_bc, <<"my proc">>, PidCustom2, "meta-2", erlang:system_time(), undefined]),
+    ok = rpc:call(SlaveNode1, syn, register, [custom_scope_bc, <<"my proc">>, PidCustom1, "meta-1"]),
+    timer:sleep(250),
+    {PidCustom1, "meta-1"} = rpc:call(SlaveNode1, syn, lookup, [custom_scope_bc, <<"my proc">>]),
+    {PidCustom1, "meta-1"} = rpc:call(SlaveNode2, syn, lookup, [custom_scope_bc, <<"my proc">>]),
+    true = rpc:call(SlaveNode1, erlang, is_process_alive, [PidCustom1]),
+    false = rpc:call(SlaveNode2, erlang, is_process_alive, [PidCustom2]).
 
 %% ===================================================================
 %% Internal

+ 3 - 0
test/syn_test_suite_helper.erl

@@ -33,6 +33,7 @@
 -export([start_process/0, start_process/1, start_process/2]).
 -export([kill_process/1]).
 -export([wait_cluster_connected/1]).
+-export([send_error_logger_to_disk/0]).
 
 %% internal
 -export([process_main/0]).
@@ -115,6 +116,8 @@ wait_cluster_connected(Nodes, StartAt) ->
             end
     end.
 
+send_error_logger_to_disk() ->
+    error_logger:logfile({open, atom_to_list(node())}).
 
 %% ===================================================================
 %% Internal