syn_benchmark.erl 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. %% ==========================================================================================================
  2. %% Syn - A global Process Registry and Process Group manager.
  3. %%
  4. %% The MIT License (MIT)
  5. %%
  6. %% Copyright (c) 2019-2021 Roberto Ostinelli <roberto@ostinelli.net> and Neato Robotics, Inc.
  7. %%
  8. %% Permission is hereby granted, free of charge, to any person obtaining a copy
  9. %% of this software and associated documentation files (the "Software"), to deal
  10. %% in the Software without restriction, including without limitation the rights
  11. %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. %% copies of the Software, and to permit persons to whom the Software is
  13. %% furnished to do so, subject to the following conditions:
  14. %%
  15. %% The above copyright notice and this permission notice shall be included in
  16. %% all copies or substantial portions of the Software.
  17. %%
  18. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. %% THE SOFTWARE.
  25. %% ==========================================================================================================
  26. -module(syn_benchmark).
  27. %% API
  28. -export([
  29. start/0,
  30. start_processes/1,
  31. process_loop/0,
  32. register_on_node/4,
  33. unregister_on_node/4,
  34. wait_registration_propagation/1,
  35. wait_unregistration_propagation/0
  36. ]).
  37. -export([
  38. start_profiling/0,
  39. stop_profiling/0
  40. ]).
  41. %% ===================================================================
  42. %% API
  43. %% ===================================================================
  44. %% example run: `PROCESS_COUNT=100000 WORKERS_PER_NODE=100 NODES_COUNT=2 make bench`
  45. start() ->
  46. %% init
  47. SlavesCount = list_to_integer(os:getenv("NODES_COUNT", "1")),
  48. WorkersPerNode = list_to_integer(os:getenv("WORKERS_PER_NODE", "100")),
  49. ProcessCount = list_to_integer(os:getenv("PROCESS_COUNT", "100000")),
  50. ProcessesPerNode = round(ProcessCount / SlavesCount),
  51. io:format("-----> Starting benchmark~n"),
  52. io:format(" --> Nodes: ~w (~w slaves)~n", [SlavesCount + 1, SlavesCount]),
  53. io:format(" --> Total processes: ~w (~w / slave node)~n", [ProcessCount, ProcessesPerNode]),
  54. io:format(" --> Workers per node: ~w~n~n", [WorkersPerNode]),
  55. %% start nodes
  56. NodesInfo = lists:foldl(fun(I, Acc) ->
  57. %% start slave
  58. CountBin = integer_to_binary(I),
  59. NodeShortName = binary_to_atom(<<"slave_", CountBin/binary>>),
  60. {ok, Node} = ct_slave:start(NodeShortName, [
  61. {boot_timeout, 10},
  62. {monitor_master, true}
  63. ]),
  64. %% add code path
  65. CodePath = code:get_path(),
  66. true = rpc:call(Node, code, set_path, [CodePath]),
  67. %% start syn
  68. rpc:call(Node, syn, start, []),
  69. %% gather data
  70. FromName = (I - 1) * ProcessesPerNode + 1,
  71. ToName = FromName + ProcessesPerNode - 1,
  72. %% fold
  73. [{Node, FromName, ToName} | Acc]
  74. end, [], lists:seq(1, SlavesCount)),
  75. %% start syn locally
  76. ok = syn:start(),
  77. timer:sleep(1000),
  78. CollectorPid = self(),
  79. %% start processes
  80. PidsMap = lists:foldl(fun({Node, _FromName, _ToName}, Acc) ->
  81. Pids = rpc:call(Node, ?MODULE, start_processes, [ProcessesPerNode]),
  82. maps:put(Node, Pids, Acc)
  83. end, #{}, NodesInfo),
  84. %% start registration
  85. lists:foreach(fun({Node, FromName, _ToName}) ->
  86. Pids = maps:get(Node, PidsMap),
  87. rpc:cast(Node, ?MODULE, register_on_node, [CollectorPid, WorkersPerNode, FromName, Pids])
  88. end, NodesInfo),
  89. %% wait
  90. RegRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  91. io:format("----> Remote registration times:~n"),
  92. io:format(" --> MIN: ~p secs.~n", [lists:min(RegRemoteNodesTimes)]),
  93. io:format(" --> MAX: ~p secs.~n", [lists:max(RegRemoteNodesTimes)]),
  94. {RegPropagationTimeMs, _} = timer:tc(?MODULE, wait_registration_propagation, [ProcessCount]),
  95. RegPropagationTime = RegPropagationTimeMs / 1000000,
  96. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [RegPropagationTime]),
  97. %% sum
  98. RegTakenTime = (lists:max(RegRemoteNodesTimes) + RegPropagationTime),
  99. RegistrationRate = ProcessCount / RegTakenTime,
  100. io:format("====> Registeration rate (with propagation): ~p/sec.~n~n", [RegistrationRate]),
  101. timer:sleep(1000),
  102. %% start unregistration
  103. lists:foreach(fun({Node, FromName, ToName}) ->
  104. rpc:cast(Node, ?MODULE, unregister_on_node, [CollectorPid, WorkersPerNode, FromName, ToName])
  105. end, NodesInfo),
  106. %% wait
  107. UnregRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  108. io:format("----> Remote unregistration times:~n"),
  109. io:format(" --> MIN: ~p secs.~n", [lists:min(UnregRemoteNodesTimes)]),
  110. io:format(" --> MAX: ~p secs.~n", [lists:max(UnregRemoteNodesTimes)]),
  111. {UnregPropagationTimeMs, _} = timer:tc(?MODULE, wait_unregistration_propagation, []),
  112. UnregPropagationTime = UnregPropagationTimeMs / 1000000,
  113. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [UnregPropagationTime]),
  114. %% sum
  115. UnregTakenTime = (lists:max(UnregRemoteNodesTimes) + UnregPropagationTime),
  116. UnregistrationRate = ProcessCount / UnregTakenTime,
  117. io:format("====> Unregisteration rate (with propagation): ~p/sec.~n~n", [UnregistrationRate]),
  118. %% start re-registration
  119. lists:foreach(fun({Node, FromName, _ToName}) ->
  120. Pids = maps:get(Node, PidsMap),
  121. rpc:cast(Node, ?MODULE, register_on_node, [CollectorPid, WorkersPerNode, FromName, Pids])
  122. end, NodesInfo),
  123. %% wait
  124. ReRegRemoteNodesTimes = wait_from_all_remote_nodes(nodes(), []),
  125. io:format("----> Remote re-registration times:~n"),
  126. io:format(" --> MIN: ~p secs.~n", [lists:min(ReRegRemoteNodesTimes)]),
  127. io:format(" --> MAX: ~p secs.~n", [lists:max(ReRegRemoteNodesTimes)]),
  128. {ReRegPropagationTimeMs, _} = timer:tc(?MODULE, wait_registration_propagation, [ProcessCount]),
  129. ReRegPropagationTime = ReRegPropagationTimeMs / 1000000,
  130. io:format("----> Eventual additional time to propagate all to master: ~p secs.~n", [ReRegPropagationTime]),
  131. %% sum
  132. ReRegTakenTime = (lists:max(ReRegRemoteNodesTimes) + ReRegPropagationTime),
  133. ReRegistrationRate = ProcessCount / ReRegTakenTime,
  134. io:format("====> Re-registeration rate (with propagation): ~p/sec.~n~n", [ReRegistrationRate]),
  135. %% kill all processes
  136. maps:foreach(fun(_Node, Pids) ->
  137. lists:foreach(fun(Pid) -> exit(Pid, kill) end, Pids)
  138. end, PidsMap),
  139. %% wait all unregistered
  140. {KillPropagationTimeMs, _} = timer:tc(?MODULE, wait_unregistration_propagation, []),
  141. KillPropagationTime = KillPropagationTimeMs / 1000000,
  142. io:format("----> Time to propagate killed process to to master: ~p secs.~n", [KillPropagationTime]),
  143. KillRate = ProcessCount / KillPropagationTime,
  144. io:format("====> Unregistered after kill rate (with propagation): ~p/sec.~n~n", [KillRate]),
  145. %% stop node
  146. init:stop().
  147. register_on_node(CollectorPid, WorkersPerNode, FromName, Pids) ->
  148. %% split pids in workers
  149. PidsPerNode = round(length(Pids) / WorkersPerNode),
  150. {WorkerInfo, []} = lists:foldl(fun(I, {WInfo, RPids}) ->
  151. {WorkerPids, RestOfPids} = case I of
  152. WorkersPerNode ->
  153. %% last in the loop, get remaining pids
  154. {RPids, []};
  155. _ ->
  156. %% get portion of pids
  157. lists:split(PidsPerNode, RPids)
  158. end,
  159. WorkerFromName = FromName + (PidsPerNode * (I - 1)),
  160. {[{WorkerFromName, WorkerPids} | WInfo], RestOfPids}
  161. end, {[], Pids}, lists:seq(1, WorkersPerNode)),
  162. %% start measure, spawn time is irrelevant
  163. StartAt = os:system_time(millisecond),
  164. %% spawn workers
  165. ReplyPid = self(),
  166. lists:foreach(fun({WorkerFromName, WorkerPids}) ->
  167. spawn(fun() ->
  168. worker_register_on_node(WorkerFromName, WorkerPids),
  169. ReplyPid ! done
  170. end)
  171. end, WorkerInfo),
  172. %% wait
  173. wait_register_on_node(CollectorPid, StartAt, WorkersPerNode).
  174. worker_register_on_node(_Name, []) -> ok;
  175. worker_register_on_node(Name, [Pid | PidsTail]) ->
  176. ok = syn:register(Name, Pid),
  177. worker_register_on_node(Name + 1, PidsTail).
  178. wait_register_on_node(CollectorPid, StartAt, 0) ->
  179. Time = (os:system_time(millisecond) - StartAt) / 1000,
  180. io:format("----> Registered on node ~p on ~p secs.~n", [node(), Time]),
  181. CollectorPid ! {done, node(), Time};
  182. wait_register_on_node(CollectorPid, StartAt, WorkersRemainingCount) ->
  183. receive
  184. done -> wait_register_on_node(CollectorPid, StartAt, WorkersRemainingCount - 1)
  185. end.
  186. unregister_on_node(CollectorPid, WorkersPerNode, FromName, ToName) ->
  187. %% split pids in workers
  188. ProcessesPerNode = ToName - FromName + 1,
  189. ProcessesPerWorker = round(ProcessesPerNode / WorkersPerNode),
  190. WorkerInfo = lists:foldl(fun(I, Acc) ->
  191. {WorkerFromName, WorkerToName} = case I of
  192. WorkersPerNode ->
  193. %% last in the loop
  194. {FromName + (I - 1) * ProcessesPerWorker, ToName};
  195. _ ->
  196. {FromName + (I - 1) * ProcessesPerWorker, FromName + I * ProcessesPerWorker - 1}
  197. end,
  198. [{WorkerFromName, WorkerToName} | Acc]
  199. end, [], lists:seq(1, WorkersPerNode)),
  200. %% start measure, spawn time is irrelevant
  201. StartAt = os:system_time(millisecond),
  202. %% spawn workers
  203. ReplyPid = self(),
  204. lists:foreach(fun({WorkerFromName, WorkerToName}) ->
  205. spawn(fun() ->
  206. worker_unregister_on_node(WorkerFromName, WorkerToName),
  207. ReplyPid ! done
  208. end)
  209. end, WorkerInfo),
  210. %% wait
  211. wait_unregister_on_node(CollectorPid, StartAt, WorkersPerNode).
  212. worker_unregister_on_node(FromName, ToName) when FromName > ToName -> ok;
  213. worker_unregister_on_node(Name, ToName) ->
  214. ok = syn:unregister(Name),
  215. worker_unregister_on_node(Name + 1, ToName).
  216. wait_unregister_on_node(CollectorPid, StartAt, 0) ->
  217. Time = (os:system_time(millisecond) - StartAt) / 1000,
  218. io:format("----> Unregistered on node ~p on ~p secs.~n", [node(), Time]),
  219. CollectorPid ! {done, node(), Time};
  220. wait_unregister_on_node(CollectorPid, StartAt, WorkersRemainingCount) ->
  221. receive
  222. done -> wait_unregister_on_node(CollectorPid, StartAt, WorkersRemainingCount - 1)
  223. end.
  224. start_processes(Count) ->
  225. start_processes(Count, []).
  226. start_processes(0, Pids) ->
  227. Pids;
  228. start_processes(Count, Pids) ->
  229. Pid = spawn(fun process_loop/0),
  230. start_processes(Count - 1, [Pid | Pids]).
  231. process_loop() ->
  232. receive
  233. _ -> ok
  234. end.
  235. wait_from_all_remote_nodes([], Times) -> Times;
  236. wait_from_all_remote_nodes([RemoteNode | Tail], Times) ->
  237. receive
  238. {done, RemoteNode, Time} ->
  239. wait_from_all_remote_nodes(Tail, [Time | Times])
  240. end.
  241. wait_registration_propagation(ProcessCount) ->
  242. case syn:registry_count(default) of
  243. ProcessCount ->
  244. ok;
  245. _ ->
  246. timer:sleep(50),
  247. wait_registration_propagation(ProcessCount)
  248. end.
  249. wait_unregistration_propagation() ->
  250. case syn:registry_count(default) of
  251. 0 ->
  252. ok;
  253. _ ->
  254. timer:sleep(50),
  255. wait_unregistration_propagation()
  256. end.
  257. start_profiling() ->
  258. {ok, P} = eprof:start(),
  259. eprof:start_profiling(erlang:processes() -- [P]).
  260. stop_profiling() ->
  261. eprof:stop_profiling(),
  262. eprof:analyze(total).