supervisor.erl 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
  1. %% ``The contents of this file are subject to the Erlang Public License,
  2. %% Version 1.1, (the "License"); you may not use this file except in
  3. %% compliance with the License. You should have received a copy of the
  4. %% Erlang Public License along with this software. If not, it can be
  5. %% retrieved via the world wide web at http://www.erlang.org/.
  6. %%
  7. %% Software distributed under the License is distributed on an "AS IS"
  8. %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
  9. %% the License for the specific language governing rights and limitations
  10. %% under the License.
  11. %%
  12. %% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
  13. %% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
  14. %% AB. All Rights Reserved.''
  15. %%
  16. %% $Id$
  17. %%
  18. -module(supervisor).
  19. -behaviour(gen_server).
  20. %% External exports
  21. -export([start_link/2,start_link/3,
  22. start_child/2, restart_child/2,
  23. delete_child/2, terminate_child/2,
  24. which_children/1,
  25. check_childspecs/1]).
  26. -export([behaviour_info/1]).
  27. %% Internal exports
  28. -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
  29. -export([handle_cast/2]).
  30. -define(DICT, dict).
  31. -record(state, {name,
  32. strategy,
  33. children = [],
  34. dynamics = ?DICT:new(),
  35. intensity,
  36. period,
  37. restarts = [],
  38. module,
  39. args}).
  40. -record(child, {pid = undefined, % pid is undefined when child is not running
  41. name,
  42. mfa,
  43. restart_type,
  44. shutdown,
  45. child_type,
  46. modules = []}).
  47. -define(is_simple(State), State#state.strategy =:= simple_one_for_one).
  48. behaviour_info(callbacks) ->
  49. [{init,1}];
  50. behaviour_info(_Other) ->
  51. undefined.
  52. %%% ---------------------------------------------------
  53. %%% This is a general process supervisor built upon gen_server.erl.
  54. %%% Servers/processes should/could also be built using gen_server.erl.
  55. %%% SupName = {local, atom()} | {global, atom()}.
  56. %%% ---------------------------------------------------
  57. start_link(Mod, Args) ->
  58. gen_server:start_link(supervisor, {self, Mod, Args}, []).
  59. start_link(SupName, Mod, Args) ->
  60. gen_server:start_link(SupName, supervisor, {SupName, Mod, Args}, []).
  61. %%% ---------------------------------------------------
  62. %%% Interface functions.
  63. %%% ---------------------------------------------------
  64. start_child(Supervisor, ChildSpec) ->
  65. call(Supervisor, {start_child, ChildSpec}).
  66. restart_child(Supervisor, Name) ->
  67. call(Supervisor, {restart_child, Name}).
  68. delete_child(Supervisor, Name) ->
  69. call(Supervisor, {delete_child, Name}).
  70. %%-----------------------------------------------------------------
  71. %% Func: terminate_child/2
  72. %% Returns: ok | {error, Reason}
  73. %% Note that the child is *always* terminated in some
  74. %% way (maybe killed).
  75. %%-----------------------------------------------------------------
  76. terminate_child(Supervisor, Name) ->
  77. call(Supervisor, {terminate_child, Name}).
  78. which_children(Supervisor) ->
  79. call(Supervisor, which_children).
  80. call(Supervisor, Req) ->
  81. gen_server:call(Supervisor, Req, infinity).
  82. check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
  83. case check_startspec(ChildSpecs) of
  84. {ok, _} -> ok;
  85. Error -> {error, Error}
  86. end;
  87. check_childspecs(X) -> {error, {badarg, X}}.
  88. %%% ---------------------------------------------------
  89. %%%
  90. %%% Initialize the supervisor.
  91. %%%
  92. %%% ---------------------------------------------------
  93. init({SupName, Mod, Args}) ->
  94. process_flag(trap_exit, true),
  95. gen:reg_behaviour(?MODULE),
  96. case Mod:init(Args) of
  97. {ok, {SupFlags, StartSpec}} ->
  98. gproc:reg({p,l,supflags}, SupFlags),
  99. case init_state(SupName, SupFlags, Mod, Args) of
  100. {ok, State} when ?is_simple(State) ->
  101. init_dynamic(State, StartSpec);
  102. {ok, State} ->
  103. init_children(State, StartSpec);
  104. Error ->
  105. {stop, {supervisor_data, Error}}
  106. end;
  107. ignore ->
  108. ignore;
  109. Error ->
  110. {stop, {bad_return, {Mod, init, Error}}}
  111. end.
  112. init_children(State, StartSpec) ->
  113. SupName = State#state.name,
  114. case check_startspec(StartSpec) of
  115. {ok, Children} ->
  116. reg_children(Children),
  117. case start_children(Children, SupName) of
  118. {ok, NChildren} ->
  119. set_children(NChildren),
  120. {ok, State#state{children = NChildren}};
  121. {error, NChildren} ->
  122. terminate_children(NChildren, SupName),
  123. {stop, shutdown}
  124. end;
  125. Error ->
  126. {stop, {start_spec, Error}}
  127. end.
  128. reg_children(Children) ->
  129. lists:foreach(
  130. fun(Ch) ->
  131. gproc:reg({p,l,{childspec,Ch#child.name}}, Ch)
  132. end, Children).
  133. set_children(Children) ->
  134. lists:foreach(
  135. fun(Ch) ->
  136. gproc:set_value({p,l,{childspec,Ch#child.name}}, Ch)
  137. end, Children).
  138. unreg_child(Child) ->
  139. gproc:unreg({p,l,{childspec,Child#child.name}}).
  140. set_child(Child) ->
  141. catch gproc:set_value({p,l,{childspec,Child#child.name}}, Child).
  142. init_dynamic(State, [StartSpec]) ->
  143. case check_startspec([StartSpec]) of
  144. {ok, Children} ->
  145. reg_children(Children),
  146. {ok, State#state{children = Children}};
  147. Error ->
  148. {stop, {start_spec, Error}}
  149. end;
  150. init_dynamic(_State, StartSpec) ->
  151. {stop, {bad_start_spec, StartSpec}}.
  152. %%-----------------------------------------------------------------
  153. %% Func: start_children/2
  154. %% Args: Children = [#child] in start order
  155. %% SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
  156. %% Purpose: Start all children. The new list contains #child's
  157. %% with pids.
  158. %% Returns: {ok, NChildren} | {error, NChildren}
  159. %% NChildren = [#child] in termination order (reversed
  160. %% start order)
  161. %%-----------------------------------------------------------------
  162. start_children(Children, SupName) -> start_children(Children, [], SupName).
  163. start_children([Child|Chs], NChildren, SupName) ->
  164. case do_start_child(SupName, Child) of
  165. {ok, Pid} ->
  166. start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
  167. {ok, Pid, _Extra} ->
  168. start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName);
  169. {error, Reason} ->
  170. report_error(start_error, Reason, Child, SupName),
  171. {error, lists:reverse(Chs) ++ [Child | NChildren]}
  172. end;
  173. start_children([], NChildren, _SupName) ->
  174. {ok, NChildren}.
  175. do_start_child(SupName, Child) ->
  176. #child{mfa = {M, F, A}} = Child,
  177. case catch apply(M, F, A) of
  178. {ok, Pid} when is_pid(Pid) ->
  179. NChild = Child#child{pid = Pid},
  180. report_progress(NChild, SupName),
  181. {ok, Pid};
  182. {ok, Pid, Extra} when is_pid(Pid) ->
  183. NChild = Child#child{pid = Pid},
  184. report_progress(NChild, SupName),
  185. {ok, Pid, Extra};
  186. ignore ->
  187. {ok, undefined};
  188. {error, What} -> {error, What};
  189. What -> {error, What}
  190. end.
  191. do_start_child_i(M, F, A) ->
  192. case catch apply(M, F, A) of
  193. {ok, Pid} when is_pid(Pid) ->
  194. {ok, Pid};
  195. {ok, Pid, Extra} when is_pid(Pid) ->
  196. {ok, Pid, Extra};
  197. ignore ->
  198. {ok, undefined};
  199. {error, Error} ->
  200. {error, Error};
  201. What ->
  202. {error, What}
  203. end.
  204. %%% ---------------------------------------------------
  205. %%%
  206. %%% Callback functions.
  207. %%%
  208. %%% ---------------------------------------------------
  209. handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) ->
  210. #child{mfa = {M, F, A}} = hd(State#state.children),
  211. Args = A ++ EArgs,
  212. case do_start_child_i(M, F, Args) of
  213. {ok, Pid} ->
  214. gproc:reg({p,l,{simple_child,Pid}}, Args),
  215. NState = State#state{dynamics =
  216. ?DICT:store(Pid, Args, State#state.dynamics)},
  217. {reply, {ok, Pid}, NState};
  218. {ok, Pid, Extra} ->
  219. gproc:reg({p,l,{simple_child,Pid}}, Args),
  220. NState = State#state{dynamics =
  221. ?DICT:store(Pid, Args, State#state.dynamics)},
  222. {reply, {ok, Pid, Extra}, NState};
  223. What ->
  224. {reply, What, State}
  225. end;
  226. %%% The requests terminate_child, delete_child and restart_child are
  227. %%% invalid for simple_one_for_one supervisors.
  228. handle_call({_Req, _Data}, _From, State) when ?is_simple(State) ->
  229. {reply, {error, simple_one_for_one}, State};
  230. handle_call({start_child, ChildSpec}, _From, State) ->
  231. case check_childspec(ChildSpec) of
  232. {ok, Child} ->
  233. {Resp, NState} = handle_start_child(Child, State),
  234. {reply, Resp, NState};
  235. What ->
  236. {reply, {error, What}, State}
  237. end;
  238. handle_call({restart_child, Name}, _From, State) ->
  239. case get_child(Name, State) of
  240. {value, Child} when Child#child.pid =:= undefined ->
  241. case do_start_child(State#state.name, Child) of
  242. {ok, Pid} ->
  243. NState = replace_child(Child#child{pid = Pid}, State),
  244. {reply, {ok, Pid}, NState};
  245. {ok, Pid, Extra} ->
  246. NState = replace_child(Child#child{pid = Pid}, State),
  247. {reply, {ok, Pid, Extra}, NState};
  248. Error ->
  249. {reply, Error, State}
  250. end;
  251. {value, _} ->
  252. {reply, {error, running}, State};
  253. _ ->
  254. {reply, {error, not_found}, State}
  255. end;
  256. handle_call({delete_child, Name}, _From, State) ->
  257. case get_child(Name, State) of
  258. {value, Child} when Child#child.pid =:= undefined ->
  259. NState = remove_child(Child, State),
  260. {reply, ok, NState};
  261. {value, _} ->
  262. {reply, {error, running}, State};
  263. _ ->
  264. {reply, {error, not_found}, State}
  265. end;
  266. handle_call({terminate_child, Name}, _From, State) ->
  267. case get_child(Name, State) of
  268. {value, Child} ->
  269. NChild = do_terminate(Child, State#state.name),
  270. {reply, ok, replace_child(NChild, State)};
  271. _ ->
  272. {reply, {error, not_found}, State}
  273. end;
  274. handle_call(which_children, _From, State) when ?is_simple(State) ->
  275. [#child{child_type = CT, modules = Mods}] = State#state.children,
  276. Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end,
  277. ?DICT:to_list(State#state.dynamics)),
  278. {reply, Reply, State};
  279. handle_call(which_children, _From, State) ->
  280. Resp =
  281. lists:map(fun(#child{pid = Pid, name = Name,
  282. child_type = ChildType, modules = Mods}) ->
  283. {Name, Pid, ChildType, Mods}
  284. end,
  285. State#state.children),
  286. {reply, Resp, State}.
  287. %%% Hopefully cause a function-clause as there is no API function
  288. %%% that utilizes cast.
  289. handle_cast(null, State) ->
  290. error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n",
  291. []),
  292. {noreply, State}.
  293. %%
  294. %% Take care of terminated children.
  295. %%
  296. handle_info({'EXIT', Pid, Reason}, State) ->
  297. case restart_child(Pid, Reason, State) of
  298. {ok, State1} ->
  299. {noreply, State1};
  300. {shutdown, State1} ->
  301. {stop, shutdown, State1}
  302. end;
  303. handle_info(Msg, State) ->
  304. error_logger:error_msg("Supervisor received unexpected message: ~p~n",
  305. [Msg]),
  306. {noreply, State}.
  307. %%
  308. %% Terminate this server.
  309. %%
  310. terminate(_Reason, State) ->
  311. terminate_children(State#state.children, State#state.name),
  312. ok.
  313. %%
  314. %% Change code for the supervisor.
  315. %% Call the new call-back module and fetch the new start specification.
  316. %% Combine the new spec. with the old. If the new start spec. is
  317. %% not valid the code change will not succeed.
  318. %% Use the old Args as argument to Module:init/1.
  319. %% NOTE: This requires that the init function of the call-back module
  320. %% does not have any side effects.
  321. %%
  322. code_change(_, State, _) ->
  323. case (State#state.module):init(State#state.args) of
  324. {ok, {SupFlags, StartSpec}} ->
  325. case catch check_flags(SupFlags) of
  326. ok ->
  327. {Strategy, MaxIntensity, Period} = SupFlags,
  328. update_childspec(State#state{strategy = Strategy,
  329. intensity = MaxIntensity,
  330. period = Period},
  331. StartSpec);
  332. Error ->
  333. {error, Error}
  334. end;
  335. ignore ->
  336. {ok, State};
  337. Error ->
  338. Error
  339. end.
  340. check_flags({Strategy, MaxIntensity, Period}) ->
  341. validStrategy(Strategy),
  342. validIntensity(MaxIntensity),
  343. validPeriod(Period),
  344. ok;
  345. check_flags(What) ->
  346. {bad_flags, What}.
  347. update_childspec(State, StartSpec) when ?is_simple(State) ->
  348. case check_startspec(StartSpec) of
  349. {ok, [Child]} ->
  350. set_children([Child]),
  351. {ok, State#state{children = [Child]}};
  352. Error ->
  353. {error, Error}
  354. end;
  355. update_childspec(State, StartSpec) ->
  356. case check_startspec(StartSpec) of
  357. {ok, Children} ->
  358. OldC = State#state.children, % In reverse start order !
  359. NewC = update_childspec1(OldC, Children, []),
  360. set_children(NewC),
  361. {ok, State#state{children = NewC}};
  362. Error ->
  363. {error, Error}
  364. end.
  365. update_childspec1([Child|OldC], Children, KeepOld) ->
  366. case update_chsp(Child, Children) of
  367. {ok,NewChildren} ->
  368. update_childspec1(OldC, NewChildren, KeepOld);
  369. false ->
  370. update_childspec1(OldC, Children, [Child|KeepOld])
  371. end;
  372. update_childspec1([], Children, KeepOld) ->
  373. % Return them in (keeped) reverse start order.
  374. lists:reverse(Children ++ KeepOld).
  375. update_chsp(OldCh, Children) ->
  376. case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name ->
  377. Ch#child{pid = OldCh#child.pid};
  378. (Ch) ->
  379. Ch
  380. end,
  381. Children) of
  382. Children ->
  383. false; % OldCh not found in new spec.
  384. NewC ->
  385. {ok, NewC}
  386. end.
  387. %%% ---------------------------------------------------
  388. %%% Start a new child.
  389. %%% ---------------------------------------------------
  390. handle_start_child(Child, State) ->
  391. case get_child(Child#child.name, State) of
  392. false ->
  393. case do_start_child(State#state.name, Child) of
  394. {ok, Pid} ->
  395. Children = State#state.children,
  396. NewChild = Child#child{pid = Pid},
  397. NewC = [NewChild|Children],
  398. set_child(NewChild),
  399. {{ok, Pid},
  400. State#state{children = NewC}};
  401. {ok, Pid, Extra} ->
  402. Children = State#state.children,
  403. NewChild = Child#child{pid = Pid},
  404. NewC = [NewChild|Children],
  405. set_child(NewChild),
  406. {{ok, Pid, Extra},
  407. State#state{children = NewC}};
  408. {error, What} ->
  409. {{error, {What, Child}}, State}
  410. end;
  411. {value, OldChild} when OldChild#child.pid =/= undefined ->
  412. {{error, {already_started, OldChild#child.pid}}, State};
  413. {value, _OldChild} ->
  414. {{error, already_present}, State}
  415. end.
  416. %%% ---------------------------------------------------
  417. %%% Restart. A process has terminated.
  418. %%% Returns: {ok, #state} | {shutdown, #state}
  419. %%% ---------------------------------------------------
  420. restart_child(Pid, Reason, State) when ?is_simple(State) ->
  421. case ?DICT:find(Pid, State#state.dynamics) of
  422. {ok, Args} ->
  423. [Child] = State#state.children,
  424. RestartType = Child#child.restart_type,
  425. {M, F, _} = Child#child.mfa,
  426. NChild = Child#child{pid = Pid, mfa = {M, F, Args}},
  427. do_restart(RestartType, Reason, NChild, State);
  428. error ->
  429. {ok, State}
  430. end;
  431. restart_child(Pid, Reason, State) ->
  432. Children = State#state.children,
  433. case lists:keysearch(Pid, #child.pid, Children) of
  434. {value, Child} ->
  435. RestartType = Child#child.restart_type,
  436. do_restart(RestartType, Reason, Child, State);
  437. _ ->
  438. {ok, State}
  439. end.
  440. do_restart(permanent, Reason, Child, State) ->
  441. report_error(child_terminated, Reason, Child, State#state.name),
  442. restart(Child, State);
  443. do_restart(_, normal, Child, State) ->
  444. NState = state_del_child(Child, State),
  445. {ok, NState};
  446. do_restart(_, shutdown, Child, State) ->
  447. NState = state_del_child(Child, State),
  448. {ok, NState};
  449. do_restart(transient, Reason, Child, State) ->
  450. report_error(child_terminated, Reason, Child, State#state.name),
  451. restart(Child, State);
  452. do_restart(temporary, Reason, Child, State) ->
  453. report_error(child_terminated, Reason, Child, State#state.name),
  454. NState = state_del_child(Child, State),
  455. {ok, NState}.
  456. restart(Child, State) ->
  457. case add_restart(State) of
  458. {ok, NState} ->
  459. restart(NState#state.strategy, Child, NState);
  460. {terminate, NState} ->
  461. report_error(shutdown, reached_max_restart_intensity,
  462. Child, State#state.name),
  463. {shutdown, remove_child(Child, NState)}
  464. end.
  465. restart(simple_one_for_one, Child, State) ->
  466. #child{mfa = {M, F, A}} = Child,
  467. Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics),
  468. case do_start_child_i(M, F, A) of
  469. {ok, Pid} ->
  470. NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
  471. {ok, NState};
  472. {ok, Pid, _Extra} ->
  473. NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)},
  474. {ok, NState};
  475. {error, Error} ->
  476. report_error(start_error, Error, Child, State#state.name),
  477. restart(Child, State)
  478. end;
  479. restart(one_for_one, Child, State) ->
  480. case do_start_child(State#state.name, Child) of
  481. {ok, Pid} ->
  482. NState = replace_child(Child#child{pid = Pid}, State),
  483. {ok, NState};
  484. {ok, Pid, _Extra} ->
  485. NState = replace_child(Child#child{pid = Pid}, State),
  486. {ok, NState};
  487. {error, Reason} ->
  488. report_error(start_error, Reason, Child, State#state.name),
  489. restart(Child, State)
  490. end;
  491. restart(rest_for_one, Child, State) ->
  492. {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
  493. ChAfter2 = terminate_children(ChAfter, State#state.name),
  494. case start_children(ChAfter2, State#state.name) of
  495. {ok, ChAfter3} ->
  496. NewC = ChAfter3 ++ ChBefore,
  497. {ok, State#state{children = NewC}};
  498. {error, ChAfter3} ->
  499. NewC = ChAfter3 ++ ChBefore,
  500. restart(Child, State#state{children = NewC})
  501. end;
  502. restart(one_for_all, Child, State) ->
  503. Children1 = del_child(Child#child.pid, State#state.children),
  504. Children2 = terminate_children(Children1, State#state.name),
  505. case start_children(Children2, State#state.name) of
  506. {ok, NChs} ->
  507. {ok, State#state{children = NChs}};
  508. {error, NChs} ->
  509. restart(Child, State#state{children = NChs})
  510. end.
  511. %%-----------------------------------------------------------------
  512. %% Func: terminate_children/2
  513. %% Args: Children = [#child] in termination order
  514. %% SupName = {local, atom()} | {global, atom()} | {pid(),Mod}
  515. %% Returns: NChildren = [#child] in
  516. %% startup order (reversed termination order)
  517. %%-----------------------------------------------------------------
  518. terminate_children(Children, SupName) ->
  519. terminate_children(Children, SupName, []).
  520. terminate_children([Child | Children], SupName, Res) ->
  521. NChild = do_terminate(Child, SupName),
  522. set_child(NChild),
  523. terminate_children(Children, SupName, [NChild | Res]);
  524. terminate_children([], _SupName, Res) ->
  525. Res.
  526. do_terminate(Child, SupName) when Child#child.pid =/= undefined ->
  527. case shutdown(Child#child.pid,
  528. Child#child.shutdown) of
  529. ok ->
  530. Child#child{pid = undefined};
  531. {error, OtherReason} ->
  532. report_error(shutdown_error, OtherReason, Child, SupName),
  533. Child#child{pid = undefined}
  534. end;
  535. do_terminate(Child, _SupName) ->
  536. Child.
  537. %%-----------------------------------------------------------------
  538. %% Shutdowns a child. We must check the EXIT value
  539. %% of the child, because it might have died with another reason than
  540. %% the wanted. In that case we want to report the error. We put a
  541. %% monitor on the child an check for the 'DOWN' message instead of
  542. %% checking for the 'EXIT' message, because if we check the 'EXIT'
  543. %% message a "naughty" child, who does unlink(Sup), could hang the
  544. %% supervisor.
  545. %% Returns: ok | {error, OtherReason} (this should be reported)
  546. %%-----------------------------------------------------------------
  547. shutdown(Pid, brutal_kill) ->
  548. case monitor_child(Pid) of
  549. ok ->
  550. exit(Pid, kill),
  551. receive
  552. {'DOWN', _MRef, process, Pid, killed} ->
  553. ok;
  554. {'DOWN', _MRef, process, Pid, OtherReason} ->
  555. {error, OtherReason}
  556. end;
  557. {error, Reason} ->
  558. {error, Reason}
  559. end;
  560. shutdown(Pid, Time) ->
  561. case monitor_child(Pid) of
  562. ok ->
  563. exit(Pid, shutdown), %% Try to shutdown gracefully
  564. receive
  565. {'DOWN', _MRef, process, Pid, shutdown} ->
  566. ok;
  567. {'DOWN', _MRef, process, Pid, OtherReason} ->
  568. {error, OtherReason}
  569. after Time ->
  570. exit(Pid, kill), %% Force termination.
  571. receive
  572. {'DOWN', _MRef, process, Pid, OtherReason} ->
  573. {error, OtherReason}
  574. end
  575. end;
  576. {error, Reason} ->
  577. {error, Reason}
  578. end.
  579. %% Help function to shutdown/2 switches from link to monitor approach
  580. monitor_child(Pid) ->
  581. %% Do the monitor operation first so that if the child dies
  582. %% before the monitoring is done causing a 'DOWN'-message with
  583. %% reason noproc, we will get the real reason in the 'EXIT'-message
  584. %% unless a naughty child has already done unlink...
  585. erlang:monitor(process, Pid),
  586. unlink(Pid),
  587. receive
  588. %% If the child dies before the unlik we must empty
  589. %% the mail-box of the 'EXIT'-message and the 'DOWN'-message.
  590. {'EXIT', Pid, Reason} ->
  591. receive
  592. {'DOWN', _, process, Pid, _} ->
  593. {error, Reason}
  594. end
  595. after 0 ->
  596. %% If a naughty child did unlink and the child dies before
  597. %% monitor the result will be that shutdown/2 receives a
  598. %% 'DOWN'-message with reason noproc.
  599. %% If the child should die after the unlink there
  600. %% will be a 'DOWN'-message with a correct reason
  601. %% that will be handled in shutdown/2.
  602. ok
  603. end.
  604. %%-----------------------------------------------------------------
  605. %% Child/State manipulating functions.
  606. %%-----------------------------------------------------------------
  607. state_del_child(#child{pid = Pid}, State) when ?is_simple(State) ->
  608. gproc:unreg({p,l,{simple_child,Pid}}),
  609. NDynamics = ?DICT:erase(Pid, State#state.dynamics),
  610. State#state{dynamics = NDynamics};
  611. state_del_child(Child, State) ->
  612. NChildren = del_child(Child#child.name, State#state.children),
  613. State#state{children = NChildren}.
  614. del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name ->
  615. NewCh = Ch#child{pid = undefined},
  616. set_child(NewCh),
  617. [NewCh | Chs];
  618. del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid ->
  619. NewCh = Ch#child{pid = undefined},
  620. set_child(NewCh),
  621. [NewCh | Chs];
  622. del_child(Name, [Ch|Chs]) ->
  623. [Ch|del_child(Name, Chs)];
  624. del_child(_, []) ->
  625. [].
  626. %% Chs = [S4, S3, Ch, S1, S0]
  627. %% Ret: {[S4, S3, Ch], [S1, S0]}
  628. split_child(Name, Chs) ->
  629. split_child(Name, Chs, []).
  630. split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name ->
  631. {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
  632. split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid ->
  633. {lists:reverse([Ch#child{pid = undefined} | After]), Chs};
  634. split_child(Name, [Ch|Chs], After) ->
  635. split_child(Name, Chs, [Ch | After]);
  636. split_child(_, [], After) ->
  637. {lists:reverse(After), []}.
  638. get_child(Name, State) ->
  639. lists:keysearch(Name, #child.name, State#state.children).
  640. replace_child(Child, State) ->
  641. Chs = do_replace_child(Child, State#state.children),
  642. State#state{children = Chs}.
  643. do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name ->
  644. set_child(Child),
  645. [Child | Chs];
  646. do_replace_child(Child, [Ch|Chs]) ->
  647. [Ch|do_replace_child(Child, Chs)].
  648. remove_child(Child, State) ->
  649. Chs = lists:keydelete(Child#child.name, #child.name, State#state.children),
  650. unreg_child(Child),
  651. State#state{children = Chs}.
  652. %%-----------------------------------------------------------------
  653. %% Func: init_state/4
  654. %% Args: SupName = {local, atom()} | {global, atom()} | self
  655. %% Type = {Strategy, MaxIntensity, Period}
  656. %% Strategy = one_for_one | one_for_all | simple_one_for_one |
  657. %% rest_for_one
  658. %% MaxIntensity = integer()
  659. %% Period = integer()
  660. %% Mod :== atom()
  661. %% Arsg :== term()
  662. %% Purpose: Check that Type is of correct type (!)
  663. %% Returns: {ok, #state} | Error
  664. %%-----------------------------------------------------------------
  665. init_state(SupName, Type, Mod, Args) ->
  666. case catch init_state1(SupName, Type, Mod, Args) of
  667. {ok, State} ->
  668. {ok, State};
  669. Error ->
  670. Error
  671. end.
  672. init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) ->
  673. validStrategy(Strategy),
  674. validIntensity(MaxIntensity),
  675. validPeriod(Period),
  676. {ok, #state{name = supname(SupName,Mod),
  677. strategy = Strategy,
  678. intensity = MaxIntensity,
  679. period = Period,
  680. module = Mod,
  681. args = Args}};
  682. init_state1(_SupName, Type, _, _) ->
  683. {invalid_type, Type}.
  684. validStrategy(simple_one_for_one) -> true;
  685. validStrategy(one_for_one) -> true;
  686. validStrategy(one_for_all) -> true;
  687. validStrategy(rest_for_one) -> true;
  688. validStrategy(What) -> throw({invalid_strategy, What}).
  689. validIntensity(Max) when is_integer(Max),
  690. Max >= 0 -> true;
  691. validIntensity(What) -> throw({invalid_intensity, What}).
  692. validPeriod(Period) when is_integer(Period),
  693. Period > 0 -> true;
  694. validPeriod(What) -> throw({invalid_period, What}).
  695. supname(self,Mod) -> {self(),Mod};
  696. supname(N,_) -> N.
  697. %%% ------------------------------------------------------
  698. %%% Check that the children start specification is valid.
  699. %%% Shall be a six (6) tuple
  700. %%% {Name, Func, RestartType, Shutdown, ChildType, Modules}
  701. %%% where Name is an atom
  702. %%% Func is {Mod, Fun, Args} == {atom, atom, list}
  703. %%% RestartType is permanent | temporary | transient
  704. %%% Shutdown = integer() | infinity | brutal_kill
  705. %%% ChildType = supervisor | worker
  706. %%% Modules = [atom()] | dynamic
  707. %%% Returns: {ok, [#child]} | Error
  708. %%% ------------------------------------------------------
  709. check_startspec(Children) -> check_startspec(Children, []).
  710. check_startspec([ChildSpec|T], Res) ->
  711. case check_childspec(ChildSpec) of
  712. {ok, Child} ->
  713. case lists:keysearch(Child#child.name, #child.name, Res) of
  714. {value, _} -> {duplicate_child_name, Child#child.name};
  715. _ -> check_startspec(T, [Child | Res])
  716. end;
  717. Error -> Error
  718. end;
  719. check_startspec([], Res) ->
  720. {ok, lists:reverse(Res)}.
  721. check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) ->
  722. catch check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods);
  723. check_childspec(X) -> {invalid_child_spec, X}.
  724. check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) ->
  725. validName(Name),
  726. validFunc(Func),
  727. validRestartType(RestartType),
  728. validChildType(ChildType),
  729. validShutdown(Shutdown, ChildType),
  730. validMods(Mods),
  731. {ok, #child{name = Name, mfa = Func, restart_type = RestartType,
  732. shutdown = Shutdown, child_type = ChildType, modules = Mods}}.
  733. validChildType(supervisor) -> true;
  734. validChildType(worker) -> true;
  735. validChildType(What) -> throw({invalid_child_type, What}).
  736. validName(_Name) -> true.
  737. validFunc({M, F, A}) when is_atom(M),
  738. is_atom(F),
  739. is_list(A) -> true;
  740. validFunc(Func) -> throw({invalid_mfa, Func}).
  741. validRestartType(permanent) -> true;
  742. validRestartType(temporary) -> true;
  743. validRestartType(transient) -> true;
  744. validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
  745. validShutdown(Shutdown, _)
  746. when is_integer(Shutdown), Shutdown > 0 -> true;
  747. validShutdown(infinity, supervisor) -> true;
  748. validShutdown(brutal_kill, _) -> true;
  749. validShutdown(Shutdown, _) -> throw({invalid_shutdown, Shutdown}).
  750. validMods(dynamic) -> true;
  751. validMods(Mods) when is_list(Mods) ->
  752. lists:foreach(fun(Mod) ->
  753. if
  754. is_atom(Mod) -> ok;
  755. true -> throw({invalid_module, Mod})
  756. end
  757. end,
  758. Mods);
  759. validMods(Mods) -> throw({invalid_modules, Mods}).
  760. %%% ------------------------------------------------------
  761. %%% Add a new restart and calculate if the max restart
  762. %%% intensity has been reached (in that case the supervisor
  763. %%% shall terminate).
  764. %%% All restarts accured inside the period amount of seconds
  765. %%% are kept in the #state.restarts list.
  766. %%% Returns: {ok, State'} | {terminate, State'}
  767. %%% ------------------------------------------------------
  768. add_restart(State) ->
  769. I = State#state.intensity,
  770. P = State#state.period,
  771. R = State#state.restarts,
  772. Now = erlang:now(),
  773. R1 = add_restart([Now|R], Now, P),
  774. State1 = State#state{restarts = R1},
  775. case length(R1) of
  776. CurI when CurI =< I ->
  777. {ok, State1};
  778. _ ->
  779. {terminate, State1}
  780. end.
  781. add_restart([R|Restarts], Now, Period) ->
  782. case inPeriod(R, Now, Period) of
  783. true ->
  784. [R|add_restart(Restarts, Now, Period)];
  785. _ ->
  786. []
  787. end;
  788. add_restart([], _, _) ->
  789. [].
  790. inPeriod(Time, Now, Period) ->
  791. case difference(Time, Now) of
  792. T when T > Period ->
  793. false;
  794. _ ->
  795. true
  796. end.
  797. %%
  798. %% Time = {MegaSecs, Secs, MicroSecs} (NOTE: MicroSecs is ignored)
  799. %% Calculate the time elapsed in seconds between two timestamps.
  800. %% If MegaSecs is equal just subtract Secs.
  801. %% Else calculate the Mega difference and add the Secs difference,
  802. %% note that Secs difference can be negative, e.g.
  803. %% {827, 999999, 676} diff {828, 1, 653753} == > 2 secs.
  804. %%
  805. difference({TimeM, TimeS, _}, {CurM, CurS, _}) when CurM > TimeM ->
  806. ((CurM - TimeM) * 1000000) + (CurS - TimeS);
  807. difference({_, TimeS, _}, {_, CurS, _}) ->
  808. CurS - TimeS.
  809. %%% ------------------------------------------------------
  810. %%% Error and progress reporting.
  811. %%% ------------------------------------------------------
  812. report_error(Error, Reason, Child, SupName) ->
  813. ErrorMsg = [{supervisor, SupName},
  814. {errorContext, Error},
  815. {reason, Reason},
  816. {offender, extract_child(Child)}],
  817. error_logger:error_report(supervisor_report, ErrorMsg).
  818. extract_child(Child) ->
  819. [{pid, Child#child.pid},
  820. {name, Child#child.name},
  821. {mfa, Child#child.mfa},
  822. {restart_type, Child#child.restart_type},
  823. {shutdown, Child#child.shutdown},
  824. {child_type, Child#child.child_type}].
  825. report_progress(Child, SupName) ->
  826. Progress = [{supervisor, SupName},
  827. {started, extract_child(Child)}],
  828. error_logger:info_report(progress, Progress).