syn_benchmark.erl 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. %% ==========================================================================================================
  2. %% Syn - A global Process Registry and Process Group manager.
  3. %%
  4. %% The MIT License (MIT)
  5. %%
  6. %% Copyright (c) 2019-2021 Roberto Ostinelli <roberto@ostinelli.net> and Neato Robotics, Inc.
  7. %%
  8. %% Permission is hereby granted, free of charge, to any person obtaining a copy
  9. %% of this software and associated documentation files (the "Software"), to deal
  10. %% in the Software without restriction, including without limitation the rights
  11. %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. %% copies of the Software, and to permit persons to whom the Software is
  13. %% furnished to do so, subject to the following conditions:
  14. %%
  15. %% The above copyright notice and this permission notice shall be included in
  16. %% all copies or substantial portions of the Software.
  17. %%
  18. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. %% THE SOFTWARE.
  25. %% ==========================================================================================================
  26. -module(syn_benchmark).
  27. %% API
  28. -export([
  29. start/0,
  30. start_processes/1,
  31. process_loop/0,
  32. register_on_node/4,
  33. unregister_on_node/4,
  34. wait_registration_propagation/1,
  35. wait_unregistration_propagation/0
  36. ]).
  37. -export([
  38. start_profiling/0,
  39. stop_profiling/0
  40. ]).
  41. %% ===================================================================
  42. %% API
  43. %% ===================================================================
  44. %% example run: `PROCESS_COUNT=100000 WORKERS_PER_NODE=100 NODES_COUNT=2 make bench`
  45. start() ->
  46. %% init
  47. ProcessCount = list_to_integer(os:getenv("PROCESS_COUNT", "100000")),
  48. WorkersPerNode = list_to_integer(os:getenv("WORKERS_PER_NODE", "1")),
  49. SlavesCount = list_to_integer(os:getenv("NODES_COUNT", "1")),
  50. ProcessesPerNode = round(ProcessCount / SlavesCount),
  51. io:format("-----> Starting benchmark~n"),
  52. io:format(" --> Nodes: ~w (~w slaves)~n", [SlavesCount + 1, SlavesCount]),
  53. io:format(" --> Total processes: ~w (~w / slave node)~n", [ProcessCount, ProcessesPerNode]),
  54. io:format(" --> Workers per node: ~w~n~n", [WorkersPerNode]),
  55. %% start nodes
  56. NodesInfo = lists:foldl(fun(I, Acc) ->
  57. %% start slave
  58. CountBin = integer_to_binary(I),
  59. NodeShortName = binary_to_atom(<<"slave_", CountBin/binary>>),
  60. {ok, Node} = ct_slave:start(NodeShortName, [
  61. {boot_timeout, 10},
  62. {monitor_master, true}
  63. ]),
  64. %% add code path
  65. CodePath = code:get_path(),
  66. true = rpc:call(Node, code, set_path, [CodePath]),
  67. %% start syn
  68. rpc:call(Node, syn, start, []),
  69. %% gather data
  70. FromName = (I - 1) * ProcessesPerNode + 1,
  71. ToName = FromName + ProcessesPerNode - 1,
  72. %% fold
  73. [{Node, FromName, ToName} | Acc]
  74. end, [], lists:seq(1, SlavesCount)),
  75. %% start syn locally
  76. ok = syn:start(),
  77. timer:sleep(1000),
  78. CollectorPid = self(),
  79. %% start processes
  80. PidsMap = lists:foldl(fun({Node, _FromName, _ToName}, Acc) ->
  81. Pids = rpc:call(Node, ?MODULE, start_processes, [ProcessesPerNode]),
  82. maps:put(Node, Pids, Acc)
  83. end, #{}, NodesInfo),
  84. %% start registration
  85. lists:foreach(fun({Node, FromName, _ToName}) ->
  86. Pids = maps:get(Node, PidsMap),
  87. rpc:cast(Node, ?MODULE, register_on_node, [CollectorPid, WorkersPerNode, FromName, Pids])
  88. end, NodesInfo),
  89. %% wait
  90. RegRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  91. io:format("----> Remote registration times:~n"),
  92. io:format(" --> MIN: ~p secs.~n", [lists:min(RegRemoteNodesTimes)]),
  93. io:format(" --> MAX: ~p secs.~n", [lists:max(RegRemoteNodesTimes)]),
  94. {RegPropagationTimeMs, _} = timer:tc(?MODULE, wait_registration_propagation, [ProcessCount]),
  95. RegPropagationTime = RegPropagationTimeMs / 1000000,
  96. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [RegPropagationTime]),
  97. %% sum
  98. RegTakenTime = (lists:max(RegRemoteNodesTimes) + RegPropagationTime),
  99. RegistrationRate = ProcessCount / RegTakenTime,
  100. io:format("====> Registeration rate (with propagation): ~p/sec.~n~n", [RegistrationRate]),
  101. timer:sleep(1000),
  102. %% start unregistration
  103. lists:foreach(fun({Node, FromName, ToName}) ->
  104. rpc:cast(Node, ?MODULE, unregister_on_node, [CollectorPid, WorkersPerNode, FromName, ToName])
  105. end, NodesInfo),
  106. %% wait
  107. UnregRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  108. io:format("----> Remote unregistration times:~n"),
  109. io:format(" --> MIN: ~p secs.~n", [lists:min(UnregRemoteNodesTimes)]),
  110. io:format(" --> MAX: ~p secs.~n", [lists:max(UnregRemoteNodesTimes)]),
  111. {UnregPropagationTimeMs, _} = timer:tc(?MODULE, wait_unregistration_propagation, []),
  112. UnregPropagationTime = UnregPropagationTimeMs / 1000000,
  113. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [UnregPropagationTime]),
  114. %% sum
  115. UnregTakenTime = (lists:max(UnregRemoteNodesTimes) + UnregPropagationTime),
  116. UnregistrationRate = ProcessCount / UnregTakenTime,
  117. io:format("====> Unregisteration rate (with propagation): ~p/sec.~n~n", [UnregistrationRate]),
  118. %% start re-registration
  119. lists:foreach(fun({Node, FromName, _ToName}) ->
  120. Pids = maps:get(Node, PidsMap),
  121. rpc:cast(Node, ?MODULE, register_on_node, [CollectorPid, WorkersPerNode, FromName, Pids])
  122. end, NodesInfo),
  123. %% wait
  124. ReRegRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  125. io:format("----> Remote re-registration times:~n"),
  126. io:format(" --> MIN: ~p secs.~n", [lists:min(ReRegRemoteNodesTimes)]),
  127. io:format(" --> MAX: ~p secs.~n", [lists:max(ReRegRemoteNodesTimes)]),
  128. {ReRegPropagationTimeMs, _} = timer:tc(?MODULE, wait_registration_propagation, [ProcessCount]),
  129. ReRegPropagationTime = ReRegPropagationTimeMs / 1000000,
  130. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [ReRegPropagationTime]),
  131. %% sum
  132. ReRegTakenTime = (lists:max(ReRegRemoteNodesTimes) + ReRegPropagationTime),
  133. ReRegistrationRate = ProcessCount / ReRegTakenTime,
  134. io:format("====> Re-registeration rate (with propagation): ~p/sec.~n~n", [ReRegistrationRate]),
  135. %% kill all processes
  136. maps:foreach(fun(_Node, Pids) ->
  137. lists:foreach(fun(Pid) -> exit(Pid, kill) end, Pids)
  138. end, PidsMap),
  139. %% wait all unregistered
  140. {KillPropagationTimeMs, _} = timer:tc(?MODULE, wait_unregistration_propagation, []),
  141. KillPropagationTime = KillPropagationTimeMs / 1000000,
  142. io:format("----> Time to propagate killed process to to master: ~p secs.~n", [KillPropagationTime]),
  143. KillRate = ProcessCount / KillPropagationTime,
  144. io:format("====> Unregistered after kill rate (with propagation): ~p/sec.~n~n", [KillRate]),
  145. %% stop node
  146. init:stop().
  147. register_on_node(CollectorPid, WorkersPerNode, FromName, Pids) ->
  148. %% split pids in workers
  149. PidsPerNode = round(length(Pids) / WorkersPerNode),
  150. {WorkerInfo, []} = lists:foldl(fun(I, {WInfo, RPids}) ->
  151. {WorkerPids, RestOfPids} = case I of
  152. WorkersPerNode ->
  153. %% last in the loop, get remaining pids
  154. {RPids, []};
  155. _ ->
  156. %% get portion of pids
  157. lists:split(PidsPerNode, RPids)
  158. end,
  159. WorkerFromName = FromName + (PidsPerNode * (I - 1)),
  160. {[{WorkerFromName, WorkerPids} | WInfo], RestOfPids}
  161. end, {[], Pids}, lists:seq(1, WorkersPerNode)),
  162. %% spawn workers
  163. ReplyPid = self(),
  164. lists:foreach(fun({WorkerFromName, WorkerPids}) ->
  165. spawn(fun() ->
  166. StartAt = os:system_time(millisecond),
  167. worker_register_on_node(WorkerFromName, WorkerPids),
  168. Time = (os:system_time(millisecond) - StartAt) / 1000,
  169. ReplyPid ! {done, Time}
  170. end)
  171. end, WorkerInfo),
  172. %% wait
  173. wait_register_on_node(CollectorPid, 0, WorkersPerNode).
  174. worker_register_on_node(_Name, []) -> ok;
  175. worker_register_on_node(Name, [Pid | PidsTail]) ->
  176. ok = syn:register(Name, Pid),
  177. worker_register_on_node(Name + 1, PidsTail).
  178. wait_register_on_node(CollectorPid, Time, 0) ->
  179. io:format("----> Registered on node ~p on ~p secs.~n", [node(), Time]),
  180. CollectorPid ! {done, node(), Time};
  181. wait_register_on_node(CollectorPid, Time, WorkersRemainingCount) ->
  182. receive
  183. {done, WorkerTime} ->
  184. Time1 = lists:max([WorkerTime, Time]),
  185. wait_register_on_node(CollectorPid, Time1, WorkersRemainingCount - 1)
  186. end.
  187. unregister_on_node(CollectorPid, WorkersPerNode, FromName, ToName) ->
  188. %% split pids in workers
  189. ProcessesPerNode = ToName - FromName + 1,
  190. ProcessesPerWorker = round(ProcessesPerNode / WorkersPerNode),
  191. WorkerInfo = lists:foldl(fun(I, Acc) ->
  192. {WorkerFromName, WorkerToName} = case I of
  193. WorkersPerNode ->
  194. %% last in the loop
  195. {FromName + (I - 1) * ProcessesPerWorker, ToName};
  196. _ ->
  197. {FromName + (I - 1) * ProcessesPerWorker, FromName + I * ProcessesPerWorker - 1}
  198. end,
  199. [{WorkerFromName, WorkerToName} | Acc]
  200. end, [], lists:seq(1, WorkersPerNode)),
  201. %% spawn workers
  202. ReplyPid = self(),
  203. lists:foreach(fun({WorkerFromName, WorkerToName}) ->
  204. spawn(fun() ->
  205. StartAt = os:system_time(millisecond),
  206. worker_unregister_on_node(WorkerFromName, WorkerToName),
  207. Time = (os:system_time(millisecond) - StartAt) / 1000,
  208. ReplyPid ! {done, Time}
  209. end)
  210. end, WorkerInfo),
  211. %% wait
  212. wait_unregister_on_node(CollectorPid, 0, WorkersPerNode).
  213. worker_unregister_on_node(FromName, ToName) when FromName > ToName -> ok;
  214. worker_unregister_on_node(Name, ToName) ->
  215. ok = syn:unregister(Name),
  216. worker_unregister_on_node(Name + 1, ToName).
  217. wait_unregister_on_node(CollectorPid, Time, 0) ->
  218. io:format("----> Unregistered on node ~p on ~p secs.~n", [node(), Time]),
  219. CollectorPid ! {done, node(), Time};
  220. wait_unregister_on_node(CollectorPid, Time, WorkersRemainingCount) ->
  221. receive
  222. {done, WorkerTime} ->
  223. Time1 = lists:max([WorkerTime, Time]),
  224. wait_unregister_on_node(CollectorPid, Time1, WorkersRemainingCount - 1)
  225. end.
  226. start_processes(Count) ->
  227. start_processes(Count, []).
  228. start_processes(0, Pids) ->
  229. Pids;
  230. start_processes(Count, Pids) ->
  231. Pid = spawn(fun process_loop/0),
  232. start_processes(Count - 1, [Pid | Pids]).
  233. process_loop() ->
  234. receive
  235. _ -> ok
  236. end.
  237. wait_from_all_remote_nodes([], Times) -> Times;
  238. wait_from_all_remote_nodes([RemoteNode | Tail], Times) ->
  239. receive
  240. {done, RemoteNode, Time} ->
  241. wait_from_all_remote_nodes(Tail, [Time | Times])
  242. end.
  243. wait_registration_propagation(ProcessCount) ->
  244. case syn:registry_count(default) of
  245. ProcessCount ->
  246. ok;
  247. _ ->
  248. timer:sleep(50),
  249. wait_registration_propagation(ProcessCount)
  250. end.
  251. wait_unregistration_propagation() ->
  252. case syn:registry_count(default) of
  253. 0 ->
  254. ok;
  255. _ ->
  256. timer:sleep(50),
  257. wait_unregistration_propagation()
  258. end.
  259. start_profiling() ->
  260. {ok, P} = eprof:start(),
  261. eprof:start_profiling(erlang:processes() -- [P]).
  262. stop_profiling() ->
  263. eprof:stop_profiling(),
  264. eprof:analyze(total).