|
@@ -31,7 +31,8 @@
|
|
|
|
|
|
-export([leader_call/1,
|
|
|
leader_cast/1,
|
|
|
- sync/0]).
|
|
|
+ sync/0,
|
|
|
+ get_leader/0]).
|
|
|
|
|
|
%%% internal exports
|
|
|
-export([init/1,
|
|
@@ -54,7 +55,8 @@
|
|
|
|
|
|
-record(state, {
|
|
|
always_broadcast = false,
|
|
|
- is_leader}).
|
|
|
+ is_leader,
|
|
|
+ sync_requests = []}).
|
|
|
|
|
|
|
|
|
start_link() ->
|
|
@@ -127,18 +129,29 @@ update_counter(_, _) ->
|
|
|
%% @doc Synchronize with the gproc leader
|
|
|
%%
|
|
|
%% This function can be used to ensure that data has been replicated from the
|
|
|
-%% leader to the current node.
|
|
|
+%% leader to the current node. It does so by asking the leader to ping all
|
|
|
+%% live participating nodes. The call will return `true' when all these nodes
|
|
|
+%% have either responded or died. In the special case where the leader dies
|
|
|
+%% during an ongoing sync, the call will fail with a timeout exception.
|
|
|
+%% (Actually, it should be a `leader_died' exception; more study needed to find out
|
|
|
+%% why gen_leader times out in this situation, rather than reporting that the
|
|
|
+%% leader died.)
|
|
|
%% @end
|
|
|
%%
|
|
|
sync() ->
|
|
|
leader_call(sync).
|
|
|
|
|
|
+get_leader() ->
|
|
|
+ gen_leader:call(?MODULE, get_leader).
|
|
|
+
|
|
|
%%% ==========================================================
|
|
|
|
|
|
|
|
|
handle_cast(_Msg, S, _) ->
|
|
|
{stop, unknown_cast, S}.
|
|
|
|
|
|
+handle_call(get_leader, _, S, E) ->
|
|
|
+ {reply, gen_leader:leader_node(E), S};
|
|
|
handle_call(_, _, S, _) ->
|
|
|
{reply, badarg, S}.
|
|
|
|
|
@@ -181,21 +194,41 @@ surrendered(S, {globals, Globs}, _E) ->
|
|
|
|
|
|
|
|
|
handle_DOWN(Node, S, _E) ->
|
|
|
+ S1 = check_sync_requests(Node, S),
|
|
|
Head = {{{'_',g,'_'},'_'},'$1','_'},
|
|
|
Gs = [{'==', {node,'$1'},Node}],
|
|
|
Globs = ets:select(?TAB, [{Head, Gs, [{{{element,1,{element,1,'$_'}},
|
|
|
{element,2,'$_'}}}]}]),
|
|
|
case process_globals(Globs) of
|
|
|
[] ->
|
|
|
- {ok, S};
|
|
|
+ {ok, S1};
|
|
|
Broadcast ->
|
|
|
- {ok, Broadcast, S}
|
|
|
+ {ok, Broadcast, S1}
|
|
|
end.
|
|
|
%% ets:select_delete(?TAB, [{Head, Gs, [true]}]),
|
|
|
%% {ok, [{delete, Globs}], S}.
|
|
|
|
|
|
-handle_leader_call(sync, _From, S, _E) ->
|
|
|
- {reply, true, sync, S};
|
|
|
+check_sync_requests(Node, #state{sync_requests = SReqs} = S) ->
|
|
|
+ SReqs1 = lists:flatmap(
|
|
|
+ fun({From, Ns}) ->
|
|
|
+ case Ns -- [Node] of
|
|
|
+ [] ->
|
|
|
+ gen_leader:reply(From, {leader, reply, true}),
|
|
|
+ [];
|
|
|
+ Ns1 ->
|
|
|
+ [{From, Ns1}]
|
|
|
+ end
|
|
|
+ end, SReqs),
|
|
|
+ S#state{sync_requests = SReqs1}.
|
|
|
+
|
|
|
+handle_leader_call(sync, From, #state{sync_requests = SReqs} = S, E) ->
|
|
|
+ case gen_leader:alive(E) -- [node()] of
|
|
|
+ [] ->
|
|
|
+ {reply, true, S};
|
|
|
+ Alive ->
|
|
|
+ gen_leader:broadcast({from_leader, {sync, From}}, Alive, E),
|
|
|
+ {noreply, S#state{sync_requests = [{From, Alive}|SReqs]}}
|
|
|
+ end;
|
|
|
handle_leader_call({reg, {C,g,Name} = K, Value, Pid}, _From, S, _E) ->
|
|
|
case gproc_lib:insert_reg(K, Value, Pid, g) of
|
|
|
false ->
|
|
@@ -318,6 +351,25 @@ handle_leader_call({await, Key, Pid}, {_,Ref} = From, S, _E) ->
|
|
|
handle_leader_call(_, _, S, _E) ->
|
|
|
{reply, badarg, S}.
|
|
|
|
|
|
+handle_leader_cast({sync_reply, Node, Ref}, S, _E) ->
|
|
|
+ #state{sync_requests = SReqs} = S,
|
|
|
+ case lists:keyfind(Ref, 1, SReqs) of
|
|
|
+ false ->
|
|
|
+ %% This should never happen, except perhaps if the leader who
|
|
|
+ %% received the sync request died, and the new leader gets the
|
|
|
+ %% sync reply. In that case, we trust that the client has been notified
|
|
|
+ %% anyway, and ignore the message.
|
|
|
+ {ok, S};
|
|
|
+ {_, Ns} ->
|
|
|
+ case lists:delete(Node, Ns) of
|
|
|
+ [] ->
|
|
|
+ gen_leader:reply(Ref, {leader, reply, true}),
|
|
|
+ {ok, S#state{sync_requests = lists:keydelete(Ref,1,SReqs)}};
|
|
|
+ Ns1 ->
|
|
|
+ SReqs1 = lists:keyreplace(Ref, 1, SReqs, {Ref, Ns1}),
|
|
|
+ {ok, S#state{sync_requests = SReqs1}}
|
|
|
+ end
|
|
|
+ end;
|
|
|
handle_leader_cast({add_globals, Missing}, S, _E) ->
|
|
|
%% This is an audit message: a peer (non-leader) had info about granted
|
|
|
%% global resources that we didn't know of when we became leader.
|
|
@@ -366,7 +418,8 @@ terminate(_Reason, _S) ->
|
|
|
|
|
|
|
|
|
|
|
|
-from_leader(sync, S, _E) ->
|
|
|
+from_leader({sync, Ref}, S, _E) ->
|
|
|
+ gen_leader:leader_cast(?MODULE, {sync_reply, node(), Ref}),
|
|
|
{ok, S};
|
|
|
from_leader(Ops, S, _E) ->
|
|
|
lists:foreach(
|