Browse Source

Allow to specify a callback for netsplits, instead of sending a message.

Roberto Ostinelli 10 years ago
parent
commit
d0486b30d3
5 changed files with 69 additions and 53 deletions
  1. 30 11
      README.md
  2. 1 1
      src/syn_backbone.erl
  3. 27 33
      src/syn_netsplits.erl
  4. 3 4
      test/syn-test.config
  5. 8 4
      test/syn_netsplits_SUITE.erl

+ 30 - 11
README.md

@@ -109,11 +109,11 @@ Options can be set in the environment variable `syn`. You're probably best off u
 
 ```erlang
 {syn, [
-    %% define callback function
-    {process_exit_callback, [calback_module, callback_function]},
+    %% define callback function on process exit
+    {process_exit_callback, [module1, function1]},
 
-    %% send a message to the discarded process (instead of kill)
-    {netsplit_send_message_to_process, shutdown}
+    %% define callback function on conflicting process (instead of kill)
+    {netsplit_conflicting_process_callback, [module2, function2]}
 ]}
 ```
 These options are explained here below.
@@ -138,7 +138,10 @@ For instance, if you want to print a log when a process exited:
 -module(my_callback).
 
 callback_on_process_exit(Key, Pid, Reason) ->
-	error_logger:info_msg("Process with Key ~p and Pid ~p exited with reason ~p~n", [Key, Pid, Reason])
+	error_logger:info_msg(
+		"Process with Key ~p and Pid ~p exited with reason ~p~n",
+		[Key, Pid, Reason]
+	)
 ```
 
 Set it in the options:
@@ -155,19 +158,35 @@ After a net split, when nodes reconnect, Syn will merge the data from all the no
 
 If the same Key was used to register a process on different nodes during a netsplit, then there will be a conflict. By default, Syn will discard the processes running on the node the conflict is being resolved on, and will kill it by sending a `kill` signal with `exit(Pid, kill)`.
 
-If this is not desired, you can set the `netsplit_send_message_to_process` option to instruct Syn to send a message to the discarded process, so that you can trigger any actions on that process. In this case, the process will not be killed by Syn, and you'll have to decide what to do with it (for instance, a graceful shutdown).
+If this is not desired, you can set the `netsplit_conflicting_process_callback` option to instruct Syn to trigger a callback, so that you can perform custom operations (such as a graceful shutdown). In this case, the process will not be killed by Syn, and you'll have to decide what to do with it. This callback will be called only on the node where the process is running.
+
+The callback function is defined as:
+```erlang
+CallbackFun = fun(Key, Pid) -> any().
+
+Types:
+	Key = any()
+	Pid = pid()
+```
+The `Key` and `Pid` are the ones of the process that is to be discarded.
 
-For example, if you want the message `shutdown` to be send to the discarded process you can set the option:
+For instance, if you want to send a `shutdown` message to the discarded process:
 
 ```erlang
+-module(my_callback).
+
+callback_on_netsplit_conflicting_process(_Key, Pid) ->
+	Pid ! shutdown
+```
+
+Set it in the options:
+```erlang
 {syn, [
-    %% define callback function
-    {netsplit_send_message_to_process, shutdown}
+	%% define callback function
+	{netsplit_conflicting_process_callback, [my_callback, callback_on_netsplit_conflicting_process]}
 ]}
 ```
 
-If you don't set this option, then the default will apply (i.e. sending the `exit(Pid, kill)` signal).
-
 > Important Note: The conflict resolution method SHOULD be defined in the same way across all nodes of the cluster. Having different conflict resolution options on different nodes can have unexpected results.
 
 

+ 1 - 1
src/syn_backbone.erl

@@ -136,7 +136,7 @@ init([]) ->
                 process_exit_callback,
                 [undefined, undefined]
             ),
-            %% return
+            %% build state
             {ok, #state{
                 process_exit_callback_module = ProcessExitCallbackModule,
                 process_exit_callback_function = ProcessExitCallbackFunction

+ 27 - 33
src/syn_netsplits.erl

@@ -41,8 +41,8 @@
 
 %% records
 -record(state, {
-    conflicting_mode = kill :: kill | send_message,
-    message = undefined :: any()
+    netsplit_conflicting_process_callback_module = undefined :: atom(),
+    netsplit_conflicting_process_callback_function = undefined :: atom()
 }).
 
 %% include
@@ -75,20 +75,14 @@ init([]) ->
     %% monitor mnesia events
     mnesia:subscribe(system),
     %% get options
-    {ok, NetsplitSendMessageToProcess} = syn_utils:get_env_value(
-        netsplit_send_message_to_process,
-        syn_do_not_send_any_message_to_conflicting_process
+    {ok, [NetsplitConflictingProcessCallbackModule, NetsplitConflictingProcessCallbackFunction]} = syn_utils:get_env_value(
+        netsplit_conflicting_process_callback,
+        [undefined, undefined]
     ),
-    %% get state params
-    {ConflictingMode, Message} = case NetsplitSendMessageToProcess of
-        syn_do_not_send_any_message_to_conflicting_process -> {kill, undefined};
-        _ -> {send_message, NetsplitSendMessageToProcess}
-
-    end,
     %% build state
     {ok, #state{
-        conflicting_mode = ConflictingMode,
-        message = Message
+        netsplit_conflicting_process_callback_module = NetsplitConflictingProcessCallbackModule,
+        netsplit_conflicting_process_callback_function = NetsplitConflictingProcessCallbackFunction
     }}.
 
 %% ----------------------------------------------------------------------------------------------------------
@@ -127,11 +121,11 @@ handle_cast(Msg, State) ->
     {stop, Reason :: any(), #state{}}.
 
 handle_info({mnesia_system_event, {inconsistent_database, Context, Node}}, #state{
-    conflicting_mode = ConflictingMode,
-    message = Message
+    netsplit_conflicting_process_callback_module = NetsplitConflictingProcessCallbackModule,
+    netsplit_conflicting_process_callback_function = NetsplitConflictingProcessCallbackFunction
 } = State) ->
     error_logger:warning_msg("MNESIA signalled an inconsistent database on node: ~p with context: ~p, initiating automerge~n", [Node, Context]),
-    automerge(Node, ConflictingMode, Message),
+    automerge(Node, NetsplitConflictingProcessCallbackModule, NetsplitConflictingProcessCallbackFunction),
     {noreply, State};
 
 handle_info({mnesia_system_event, {mnesia_down, Node}}, State) when Node =/= node() ->
@@ -180,44 +174,44 @@ delete_pids_of_disconnected_node(Node) ->
         lists:foreach(DelF, NodePids)
     end).
 
--spec automerge(RemoteNode :: atom(), ConflictingMode :: kill | send_message, Message :: any()) -> ok.
-automerge(RemoteNode, ConflictingMode, Message) ->
+-spec automerge(RemoteNode :: atom(), CallbackModule :: atom(), CallbackFunction :: atom()) -> ok.
+automerge(RemoteNode, CallbackModule, CallbackFunction) ->
     global:trans({{?MODULE, automerge}, self()},
         fun() ->
             error_logger:warning_msg("AUTOMERGE starting for remote node ~s (global lock is set)~n", [RemoteNode]),
-            check_stitch(RemoteNode, ConflictingMode, Message),
+            check_stitch(RemoteNode, CallbackModule, CallbackFunction),
             error_logger:warning_msg("AUTOMERGE done (global lock will be unset)~n")
         end).
 
--spec check_stitch(RemoteNode :: atom(), ConflictingMode :: kill | send_message, Message :: any()) -> ok.
-check_stitch(RemoteNode, ConflictingMode, Message) ->
+-spec check_stitch(RemoteNode :: atom(), CallbackModule :: atom(), CallbackFunction :: atom()) -> ok.
+check_stitch(RemoteNode, CallbackModule, CallbackFunction) ->
     case catch lists:member(RemoteNode, mnesia:system_info(running_db_nodes)) of
         true ->
             ok;
         false ->
-            stitch(RemoteNode, ConflictingMode, Message),
+            stitch(RemoteNode, CallbackModule, CallbackFunction),
             ok;
         Error ->
             error_logger:error_msg("Could not check if node is stiched: ~p~n", [Error])
     end.
 
--spec stitch(RemoteNode :: atom(), ConflictingMode :: kill | send_message, Message :: any()) ->
+-spec stitch(RemoteNode :: atom(), CallbackModule :: atom(), CallbackFunction :: atom()) ->
     {'ok', any()} | {'error', any()}.
-stitch(RemoteNode, ConflictingMode, Message) ->
+stitch(RemoteNode, CallbackModule, CallbackFunction) ->
     mnesia_controller:connect_nodes(
         [RemoteNode],
         fun(MergeF) ->
             catch case MergeF([syn_processes_table]) of
                 {merged, _, _} = Res ->
-                    stitch_tab(RemoteNode, ConflictingMode, Message),
+                    stitch_tab(RemoteNode, CallbackModule, CallbackFunction),
                     Res;
                 Other ->
                     Other
             end
         end).
 
--spec stitch_tab(RemoteNode :: atom(), ConflictingMode :: kill | send_message, Message :: any()) -> ok.
-stitch_tab(RemoteNode, ConflictingMode, Message) ->
+-spec stitch_tab(RemoteNode :: atom(), CallbackModule :: atom(), CallbackFunction :: atom()) -> ok.
+stitch_tab(RemoteNode, CallbackModule, CallbackFunction) ->
     %% get remote processes info
     RemoteProcessesInfo = rpc:call(RemoteNode, ?MODULE, get_processes_info_of_node, [RemoteNode]),
     %% get local processes info
@@ -226,8 +220,8 @@ stitch_tab(RemoteNode, ConflictingMode, Message) ->
     {LocalProcessesInfo1, RemoteProcessesInfo1} = purge_double_processes_from_local_node(
         LocalProcessesInfo,
         RemoteProcessesInfo,
-        ConflictingMode,
-        Message
+        CallbackModule,
+        CallbackFunction
     ),
     %% write
     write_remote_processes_to_local(RemoteNode, RemoteProcessesInfo1),
@@ -240,7 +234,7 @@ stitch_tab(RemoteNode, ConflictingMode, Message) ->
     Message :: any()
 ) ->
     {LocalProcessesInfo :: list(), RemoteProcessesInfo :: list()}.
-purge_double_processes_from_local_node(LocalProcessesInfo, RemoteProcessesInfo, ConflictingMode, Message) ->
+purge_double_processes_from_local_node(LocalProcessesInfo, RemoteProcessesInfo, CallbackModule, CallbackFunction) ->
     %% create ETS table
     Tab = ets:new(syn_automerge_doubles_table, [set]),
 
@@ -258,9 +252,9 @@ purge_double_processes_from_local_node(LocalProcessesInfo, RemoteProcessesInfo,
                 %% remove it from ETS
                 ets:delete(Tab, Key),
                 %% kill or send message
-                case ConflictingMode of
-                    kill -> exit(LocalProcessPid, kill);
-                    send_message -> LocalProcessPid ! Message
+                case CallbackModule of
+                    undefined -> exit(LocalProcessPid, kill);
+                    _ -> spawn(fun() -> CallbackModule:CallbackFunction(Key, LocalProcessPid) end)
                 end
         end
     end,

+ 3 - 4
test/syn-test.config

@@ -16,11 +16,10 @@
         %% If the same Key was used to register a process on different nodes during a net split, then there will be a conflict.
         %% By default, Syn will discard the processes running on the node the conflict is being resolved on,
         %% and will kill it by sending a `kill` signal with `exit(Pid, kill)`.
-        %% If this is not desired, you can set the netsplit_send_message_to_process option here below to instruct Syn
-        %% to send a message to the discarded process, so that you can trigger any actions on that process
-        %% (such as a graceful shutdown).
+        %% If this is not desired, you can set the netsplit_conflicting_process_callback option here below to instruct Syn
+        %% to trigger a callback, so that you can perform custom operations (such as a graceful shutdown).
 
-        {netsplit_send_message_to_process, shutdown}
+        {netsplit_conflicting_process_callback, [syn_netsplits_SUITE, netsplit_conflicting_process_callback_dummy]}
 
     ]}
 

+ 8 - 4
test/syn_netsplits_SUITE.erl

@@ -37,7 +37,7 @@
 -export([
     two_nodes_netsplit_when_there_are_no_conflicts/1,
     two_nodes_netsplit_kill_resolution_when_there_are_conflicts/1,
-    two_nodes_netsplit_message_resolution_when_there_are_conflicts/1
+    two_nodes_netsplit_callback_resolution_when_there_are_conflicts/1
 ]).
 
 -export([
@@ -46,6 +46,7 @@
 
 %% internal
 -export([process_reply_main/0]).
+-export([netsplit_conflicting_process_callback_dummy/2]).
 
 %% include
 -include_lib("common_test/include/ct.hrl").
@@ -85,7 +86,7 @@ groups() ->
         {two_nodes_netsplits, [shuffle], [
             two_nodes_netsplit_when_there_are_no_conflicts,
             two_nodes_netsplit_kill_resolution_when_there_are_conflicts,
-            two_nodes_netsplit_message_resolution_when_there_are_conflicts
+            two_nodes_netsplit_callback_resolution_when_there_are_conflicts
         ]},
         {three_nodes_netsplits, [shuffle], [
             three_nodes_netsplit_kill_resolution_when_there_are_conflicts
@@ -315,12 +316,12 @@ two_nodes_netsplit_kill_resolution_when_there_are_conflicts(Config) ->
     syn_test_suite_helper:kill_process(LocalPid),
     syn_test_suite_helper:kill_process(SlavePid).
 
-two_nodes_netsplit_message_resolution_when_there_are_conflicts(Config) ->
+two_nodes_netsplit_callback_resolution_when_there_are_conflicts(Config) ->
     %% get slave
     SlaveNode = proplists:get_value(slave_node, Config),
     CurrentNode = node(),
 
-    %% load configuration variables from syn-test.config => this sets the netsplit_send_message_to_process option
+    %% load configuration variables from syn-test.config => this sets the netsplit_conflicting_process_callback option
     syn_test_suite_helper:set_environment_variables(),
     syn_test_suite_helper:set_environment_variables(SlaveNode),
 
@@ -465,3 +466,6 @@ process_reply_main() ->
             timer:sleep(500), %% wait for global processes to propagate
             global:send(syn_netsplits_SUITE_result, {exited, self()})
     end.
+
+netsplit_conflicting_process_callback_dummy(_Key, Pid) ->
+    Pid ! shutdown.