syn_benchmark.erl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. %% ==========================================================================================================
  2. %% Syn - A global Process Registry and Process Group manager.
  3. %%
  4. %% The MIT License (MIT)
  5. %%
  6. %% Copyright (c) 2019-2021 Roberto Ostinelli <roberto@ostinelli.net> and Neato Robotics, Inc.
  7. %%
  8. %% Permission is hereby granted, free of charge, to any person obtaining a copy
  9. %% of this software and associated documentation files (the "Software"), to deal
  10. %% in the Software without restriction, including without limitation the rights
  11. %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. %% copies of the Software, and to permit persons to whom the Software is
  13. %% furnished to do so, subject to the following conditions:
  14. %%
  15. %% The above copyright notice and this permission notice shall be included in
  16. %% all copies or substantial portions of the Software.
  17. %%
  18. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. %% THE SOFTWARE.
  25. %% ==========================================================================================================
  26. -module(syn_benchmark).
  27. %% API
  28. -export([
  29. start/0,
  30. start_processes/1,
  31. process_loop/0,
  32. register_on_node/4,
  33. unregister_on_node/4,
  34. wait_registry_propagation/1
  35. ]).
  36. -export([
  37. start_profiling/0,
  38. stop_profiling/0
  39. ]).
  40. %% ===================================================================
  41. %% API
  42. %% ===================================================================
  43. %% example run: `PROCESS_COUNT=100000 WORKERS_PER_NODE=100 NODES_COUNT=2 make bench`
  44. start() ->
  45. %% init
  46. ProcessCount = list_to_integer(os:getenv("PROCESS_COUNT", "100000")),
  47. WorkersPerNode = list_to_integer(os:getenv("WORKERS_PER_NODE", "1")),
  48. SlavesCount = list_to_integer(os:getenv("NODES_COUNT", "1")),
  49. ProcessesPerNode = round(ProcessCount / SlavesCount),
  50. io:format("-----> Starting benchmark~n"),
  51. io:format(" --> Nodes: ~w / ~w slave(s)~n", [SlavesCount + 1, SlavesCount]),
  52. io:format(" --> Total processes: ~w (~w / slave node)~n", [ProcessCount, ProcessesPerNode]),
  53. io:format(" --> Workers per node: ~w~n~n", [WorkersPerNode]),
  54. %% start nodes
  55. NodesInfo = lists:foldl(fun(I, Acc) ->
  56. %% start slave
  57. CountBin = integer_to_binary(I),
  58. NodeShortName = binary_to_atom(<<"slave_", CountBin/binary>>),
  59. {ok, Node} = ct_slave:start(NodeShortName, [
  60. {boot_timeout, 10},
  61. {monitor_master, true}
  62. ]),
  63. %% add code path
  64. CodePath = code:get_path(),
  65. true = rpc:call(Node, code, set_path, [CodePath]),
  66. %% start syn
  67. rpc:call(Node, syn, start, []),
  68. %% gather data
  69. FromName = (I - 1) * ProcessesPerNode + 1,
  70. ToName = FromName + ProcessesPerNode - 1,
  71. %% fold
  72. [{Node, FromName, ToName} | Acc]
  73. end, [], lists:seq(1, SlavesCount)),
  74. %% start syn locally
  75. ok = syn:start(),
  76. timer:sleep(1000),
  77. CollectorPid = self(),
  78. %% start processes
  79. PidsMap = lists:foldl(fun({Node, _FromName, _ToName}, Acc) ->
  80. Pids = rpc:call(Node, ?MODULE, start_processes, [ProcessesPerNode]),
  81. maps:put(Node, Pids, Acc)
  82. end, #{}, NodesInfo),
  83. %% start registration
  84. lists:foreach(fun({Node, FromName, _ToName}) ->
  85. Pids = maps:get(Node, PidsMap),
  86. rpc:cast(Node, ?MODULE, register_on_node, [CollectorPid, WorkersPerNode, FromName, Pids])
  87. end, NodesInfo),
  88. %% wait
  89. RegRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  90. io:format("----> Remote registration times:~n"),
  91. io:format(" --> MIN: ~p secs.~n", [lists:min(RegRemoteNodesTimes)]),
  92. io:format(" --> MAX: ~p secs.~n", [lists:max(RegRemoteNodesTimes)]),
  93. {RegPropagationTimeMs, _} = timer:tc(?MODULE, wait_registry_propagation, [ProcessCount]),
  94. RegPropagationTime = RegPropagationTimeMs / 1000000,
  95. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [RegPropagationTime]),
  96. %% sum
  97. RegTakenTime = (lists:max(RegRemoteNodesTimes) + RegPropagationTime),
  98. RegistrationRate = ProcessCount / RegTakenTime,
  99. io:format("====> Registeration rate (with propagation): ~p/sec.~n~n", [RegistrationRate]),
  100. timer:sleep(1000),
  101. %% start unregistration
  102. lists:foreach(fun({Node, FromName, ToName}) ->
  103. rpc:cast(Node, ?MODULE, unregister_on_node, [CollectorPid, WorkersPerNode, FromName, ToName])
  104. end, NodesInfo),
  105. %% wait
  106. UnregRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  107. io:format("----> Remote unregistration times:~n"),
  108. io:format(" --> MIN: ~p secs.~n", [lists:min(UnregRemoteNodesTimes)]),
  109. io:format(" --> MAX: ~p secs.~n", [lists:max(UnregRemoteNodesTimes)]),
  110. {UnregPropagationTimeMs, _} = timer:tc(?MODULE, wait_registry_propagation, [0]),
  111. UnregPropagationTime = UnregPropagationTimeMs / 1000000,
  112. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [UnregPropagationTime]),
  113. %% sum
  114. UnregTakenTime = (lists:max(UnregRemoteNodesTimes) + UnregPropagationTime),
  115. UnregistrationRate = ProcessCount / UnregTakenTime,
  116. io:format("====> Unregisteration rate (with propagation): ~p/sec.~n~n", [UnregistrationRate]),
  117. %% start re-registration
  118. lists:foreach(fun({Node, FromName, _ToName}) ->
  119. Pids = maps:get(Node, PidsMap),
  120. rpc:cast(Node, ?MODULE, register_on_node, [CollectorPid, WorkersPerNode, FromName, Pids])
  121. end, NodesInfo),
  122. %% wait
  123. ReRegRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  124. io:format("----> Remote re-registration times:~n"),
  125. io:format(" --> MIN: ~p secs.~n", [lists:min(ReRegRemoteNodesTimes)]),
  126. io:format(" --> MAX: ~p secs.~n", [lists:max(ReRegRemoteNodesTimes)]),
  127. {ReRegPropagationTimeMs, _} = timer:tc(?MODULE, wait_registry_propagation, [ProcessCount]),
  128. ReRegPropagationTime = ReRegPropagationTimeMs / 1000000,
  129. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [ReRegPropagationTime]),
  130. %% sum
  131. ReRegTakenTime = (lists:max(ReRegRemoteNodesTimes) + ReRegPropagationTime),
  132. ReRegistrationRate = ProcessCount / ReRegTakenTime,
  133. io:format("====> Re-registeration rate (with propagation): ~p/sec.~n~n", [ReRegistrationRate]),
  134. %% kill all processes
  135. maps:foreach(fun(_Node, Pids) ->
  136. lists:foreach(fun(Pid) -> exit(Pid, kill) end, Pids)
  137. end, PidsMap),
  138. %% wait all unregistered
  139. {KillPropagationTimeMs, _} = timer:tc(?MODULE, wait_registry_propagation, [0]),
  140. KillPropagationTime = KillPropagationTimeMs / 1000000,
  141. io:format("----> Time to propagate killed process to to master: ~p secs.~n", [KillPropagationTime]),
  142. KillRate = ProcessCount / KillPropagationTime,
  143. io:format("====> Unregistered after kill rate (with propagation): ~p/sec.~n~n", [KillRate]),
  144. %% stop node
  145. init:stop().
  146. register_on_node(CollectorPid, WorkersPerNode, FromName, Pids) ->
  147. %% split pids in workers
  148. PidsPerNode = round(length(Pids) / WorkersPerNode),
  149. {WorkerInfo, []} = lists:foldl(fun(I, {WInfo, RPids}) ->
  150. {WorkerPids, RestOfPids} = case I of
  151. WorkersPerNode ->
  152. %% last in the loop, get remaining pids
  153. {RPids, []};
  154. _ ->
  155. %% get portion of pids
  156. lists:split(PidsPerNode, RPids)
  157. end,
  158. WorkerFromName = FromName + (PidsPerNode * (I - 1)),
  159. {[{WorkerFromName, WorkerPids} | WInfo], RestOfPids}
  160. end, {[], Pids}, lists:seq(1, WorkersPerNode)),
  161. %% spawn workers
  162. ReplyPid = self(),
  163. lists:foreach(fun({WorkerFromName, WorkerPids}) ->
  164. spawn(fun() ->
  165. StartAt = os:system_time(millisecond),
  166. worker_register_on_node(WorkerFromName, WorkerPids),
  167. Time = (os:system_time(millisecond) - StartAt) / 1000,
  168. ReplyPid ! {done, Time}
  169. end)
  170. end, WorkerInfo),
  171. %% wait
  172. Time = wait_done_on_node(CollectorPid, 0, WorkersPerNode),
  173. io:format("----> Registered on node ~p on ~p secs.~n", [node(), Time]).
  174. worker_register_on_node(_Name, []) -> ok;
  175. worker_register_on_node(Name, [Pid | PidsTail]) ->
  176. ok = syn:register(Name, Pid),
  177. worker_register_on_node(Name + 1, PidsTail).
  178. unregister_on_node(CollectorPid, WorkersPerNode, FromName, ToName) ->
  179. %% split pids in workers
  180. ProcessesPerNode = ToName - FromName + 1,
  181. ProcessesPerWorker = round(ProcessesPerNode / WorkersPerNode),
  182. WorkerInfo = lists:foldl(fun(I, Acc) ->
  183. {WorkerFromName, WorkerToName} = case I of
  184. WorkersPerNode ->
  185. %% last in the loop
  186. {FromName + (I - 1) * ProcessesPerWorker, ToName};
  187. _ ->
  188. {FromName + (I - 1) * ProcessesPerWorker, FromName + I * ProcessesPerWorker - 1}
  189. end,
  190. [{WorkerFromName, WorkerToName} | Acc]
  191. end, [], lists:seq(1, WorkersPerNode)),
  192. %% spawn workers
  193. ReplyPid = self(),
  194. lists:foreach(fun({WorkerFromName, WorkerToName}) ->
  195. spawn(fun() ->
  196. StartAt = os:system_time(millisecond),
  197. worker_unregister_on_node(WorkerFromName, WorkerToName),
  198. Time = (os:system_time(millisecond) - StartAt) / 1000,
  199. ReplyPid ! {done, Time}
  200. end)
  201. end, WorkerInfo),
  202. %% wait
  203. Time = wait_done_on_node(CollectorPid, 0, WorkersPerNode),
  204. io:format("----> Unregistered on node ~p on ~p secs.~n", [node(), Time]).
  205. worker_unregister_on_node(FromName, ToName) when FromName > ToName -> ok;
  206. worker_unregister_on_node(Name, ToName) ->
  207. ok = syn:unregister(Name),
  208. worker_unregister_on_node(Name + 1, ToName).
  209. wait_done_on_node(CollectorPid, Time, 0) ->
  210. CollectorPid ! {done, node(), Time},
  211. Time;
  212. wait_done_on_node(CollectorPid, Time, WorkersRemainingCount) ->
  213. receive
  214. {done, WorkerTime} ->
  215. Time1 = lists:max([WorkerTime, Time]),
  216. wait_done_on_node(CollectorPid, Time1, WorkersRemainingCount - 1)
  217. end.
  218. start_processes(Count) ->
  219. start_processes(Count, []).
  220. start_processes(0, Pids) ->
  221. Pids;
  222. start_processes(Count, Pids) ->
  223. Pid = spawn(fun process_loop/0),
  224. start_processes(Count - 1, [Pid | Pids]).
  225. process_loop() ->
  226. receive
  227. _ -> ok
  228. end.
  229. wait_from_all_remote_nodes([], Times) -> Times;
  230. wait_from_all_remote_nodes([RemoteNode | Tail], Times) ->
  231. receive
  232. {done, RemoteNode, Time} ->
  233. wait_from_all_remote_nodes(Tail, [Time | Times])
  234. end.
  235. wait_registry_propagation(DesiredCount) ->
  236. case syn:registry_count(default) of
  237. DesiredCount ->
  238. ok;
  239. _ ->
  240. timer:sleep(50),
  241. wait_registry_propagation(DesiredCount)
  242. end.
  243. start_profiling() ->
  244. {ok, P} = eprof:start(),
  245. eprof:start_profiling(erlang:processes() -- [P]).
  246. stop_profiling() ->
  247. eprof:stop_profiling(),
  248. eprof:analyze(total).