syn_netsplits_SUITE.erl 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. %% ==========================================================================================================
  2. %% Syn - A global process registry.
  3. %%
  4. %% Copyright (C) 2015, Roberto Ostinelli <roberto@ostinelli.net>.
  5. %% All rights reserved.
  6. %%
  7. %% The MIT License (MIT)
  8. %%
  9. %% Copyright (c) 2015 Roberto Ostinelli
  10. %%
  11. %% Permission is hereby granted, free of charge, to any person obtaining a copy
  12. %% of this software and associated documentation files (the "Software"), to deal
  13. %% in the Software without restriction, including without limitation the rights
  14. %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. %% copies of the Software, and to permit persons to whom the Software is
  16. %% furnished to do so, subject to the following conditions:
  17. %%
  18. %% The above copyright notice and this permission notice shall be included in
  19. %% all copies or substantial portions of the Software.
  20. %%
  21. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. %% THE SOFTWARE.
  28. -module(syn_netsplits_SUITE).
  29. %% callbacks
  30. -export([all/0]).
  31. -export([init_per_suite/1, end_per_suite/1]).
  32. -export([groups/0, init_per_group/2, end_per_group/2]).
  33. -export([init_per_testcase/2, end_per_testcase/2]).
  34. %% tests
  35. -export([
  36. two_nodes_netsplit_when_there_are_no_conflicts/1,
  37. two_nodes_netsplit_kill_resolution_when_there_are_conflicts/1,
  38. two_nodes_netsplit_callback_resolution_when_there_are_conflicts/1
  39. ]).
  40. -export([
  41. three_nodes_netsplit_kill_resolution_when_there_are_conflicts/1
  42. ]).
  43. %% internal
  44. -export([process_reply_main/0]).
  45. -export([netsplit_conflicting_process_callback_dummy/2]).
  46. %% include
  47. -include_lib("common_test/include/ct.hrl").
  48. %% ===================================================================
  49. %% Callbacks
  50. %% ===================================================================
  51. %% -------------------------------------------------------------------
  52. %% Function: all() -> GroupsAndTestCases | {skip,Reason}
  53. %% GroupsAndTestCases = [{group,GroupName} | TestCase]
  54. %% GroupName = atom()
  55. %% TestCase = atom()
  56. %% Reason = term()
  57. %% -------------------------------------------------------------------
  58. all() ->
  59. [
  60. {group, two_nodes_netsplits},
  61. {group, three_nodes_netsplits}
  62. ].
  63. %% -------------------------------------------------------------------
  64. %% Function: groups() -> [Group]
  65. %% Group = {GroupName,Properties,GroupsAndTestCases}
  66. %% GroupName = atom()
  67. %% Properties = [parallel | sequence | Shuffle | {RepeatType,N}]
  68. %% GroupsAndTestCases = [Group | {group,GroupName} | TestCase]
  69. %% TestCase = atom()
  70. %% Shuffle = shuffle | {shuffle,{integer(),integer(),integer()}}
  71. %% RepeatType = repeat | repeat_until_all_ok | repeat_until_all_fail |
  72. %% repeat_until_any_ok | repeat_until_any_fail
  73. %% N = integer() | forever
  74. %% -------------------------------------------------------------------
  75. groups() ->
  76. [
  77. {two_nodes_netsplits, [shuffle], [
  78. two_nodes_netsplit_when_there_are_no_conflicts,
  79. two_nodes_netsplit_kill_resolution_when_there_are_conflicts,
  80. two_nodes_netsplit_callback_resolution_when_there_are_conflicts
  81. ]},
  82. {three_nodes_netsplits, [shuffle], [
  83. three_nodes_netsplit_kill_resolution_when_there_are_conflicts
  84. ]}
  85. ].
  86. %% -------------------------------------------------------------------
  87. %% Function: init_per_suite(Config0) ->
  88. %% Config1 | {skip,Reason} |
  89. %% {skip_and_save,Reason,Config1}
  90. %% Config0 = Config1 = [tuple()]
  91. %% Reason = term()
  92. %% -------------------------------------------------------------------
  93. init_per_suite(Config) ->
  94. %% init
  95. SlaveNodeShortName = syn_slave,
  96. %% start slave
  97. {ok, SlaveNode} = syn_test_suite_helper:start_slave(SlaveNodeShortName),
  98. %% config
  99. [
  100. {slave_node_short_name, SlaveNodeShortName},
  101. {slave_node, SlaveNode}
  102. | Config
  103. ].
  104. %% -------------------------------------------------------------------
  105. %% Function: end_per_suite(Config0) -> void() | {save_config,Config1}
  106. %% Config0 = Config1 = [tuple()]
  107. %% -------------------------------------------------------------------
  108. end_per_suite(Config) ->
  109. %% get slave node name
  110. SlaveNodeShortName = proplists:get_value(slave_node_short_name, Config),
  111. %% stop slave
  112. syn_test_suite_helper:stop_slave(SlaveNodeShortName).
  113. %% -------------------------------------------------------------------
  114. %% Function: init_per_group(GroupName, Config0) ->
  115. %% Config1 | {skip,Reason} |
  116. %% {skip_and_save,Reason,Config1}
  117. %% GroupName = atom()
  118. %% Config0 = Config1 = [tuple()]
  119. %% Reason = term()
  120. %% -------------------------------------------------------------------
  121. init_per_group(three_nodes_netsplits, Config) ->
  122. %% init
  123. SlaveNode2ShortName = syn_slave_2,
  124. %% start slave 2
  125. {ok, SlaveNode2} = syn_test_suite_helper:start_slave(SlaveNode2ShortName),
  126. %% config
  127. [
  128. {slave_node_2_short_name, SlaveNode2ShortName},
  129. {slave_node_2, SlaveNode2}
  130. | Config
  131. ];
  132. init_per_group(_GroupName, Config) -> Config.
  133. %% -------------------------------------------------------------------
  134. %% Function: end_per_group(GroupName, Config0) ->
  135. %% void() | {save_config,Config1}
  136. %% GroupName = atom()
  137. %% Config0 = Config1 = [tuple()]
  138. %% -------------------------------------------------------------------
  139. end_per_group(three_nodes_netsplits, Config) ->
  140. %% get slave node 2 name
  141. SlaveNode2ShortName = proplists:get_value(slave_node_2_short_name, Config),
  142. %% stop slave
  143. syn_test_suite_helper:stop_slave(SlaveNode2ShortName);
  144. end_per_group(_GroupName, _Config) -> ok.
  145. % ----------------------------------------------------------------------------------------------------------
  146. % Function: init_per_testcase(TestCase, Config0) ->
  147. % Config1 | {skip,Reason} | {skip_and_save,Reason,Config1}
  148. % TestCase = atom()
  149. % Config0 = Config1 = [tuple()]
  150. % Reason = term()
  151. % ----------------------------------------------------------------------------------------------------------
  152. init_per_testcase(_TestCase, Config) ->
  153. %% get slave
  154. SlaveNode = proplists:get_value(slave_node, Config),
  155. %% set schema location
  156. application:set_env(mnesia, schema_location, ram),
  157. rpc:call(SlaveNode, mnesia, schema_location, [ram]),
  158. %% return
  159. Config.
  160. % ----------------------------------------------------------------------------------------------------------
  161. % Function: end_per_testcase(TestCase, Config0) ->
  162. % void() | {save_config,Config1} | {fail,Reason}
  163. % TestCase = atom()
  164. % Config0 = Config1 = [tuple()]
  165. % Reason = term()
  166. % ----------------------------------------------------------------------------------------------------------
  167. end_per_testcase(_TestCase, Config) ->
  168. %% get slave
  169. SlaveNode = proplists:get_value(slave_node, Config),
  170. syn_test_suite_helper:clean_after_test(SlaveNode).
  171. %% ===================================================================
  172. %% Tests
  173. %% ===================================================================
  174. two_nodes_netsplit_when_there_are_no_conflicts(Config) ->
  175. %% get slave
  176. SlaveNode = proplists:get_value(slave_node, Config),
  177. CurrentNode = node(),
  178. %% start syn
  179. ok = syn:start(),
  180. ok = rpc:call(SlaveNode, syn, start, []),
  181. timer:sleep(100),
  182. %% start processes
  183. LocalPid = syn_test_suite_helper:start_process(),
  184. SlavePidLocal = syn_test_suite_helper:start_process(SlaveNode),
  185. SlavePidSlave = syn_test_suite_helper:start_process(SlaveNode),
  186. %% register
  187. ok = syn:register(local_pid, LocalPid),
  188. ok = syn:register(slave_pid_local, SlavePidLocal), %% slave registered on local node
  189. ok = rpc:call(SlaveNode, syn, register, [slave_pid_slave, SlavePidSlave]), %% slave registered on slave node
  190. timer:sleep(100),
  191. %% check tables
  192. 3 = mnesia:table_info(syn_processes_table, size),
  193. 3 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  194. LocalActiveReplicas = mnesia:table_info(syn_processes_table, active_replicas),
  195. 2 = length(LocalActiveReplicas),
  196. true = lists:member(SlaveNode, LocalActiveReplicas),
  197. true = lists:member(CurrentNode, LocalActiveReplicas),
  198. SlaveActiveReplicas = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, active_replicas]),
  199. 2 = length(SlaveActiveReplicas),
  200. true = lists:member(SlaveNode, SlaveActiveReplicas),
  201. true = lists:member(CurrentNode, SlaveActiveReplicas),
  202. %% simulate net split
  203. syn_test_suite_helper:disconnect_node(SlaveNode),
  204. timer:sleep(1000),
  205. %% check tables
  206. 1 = mnesia:table_info(syn_processes_table, size),
  207. [CurrentNode] = mnesia:table_info(syn_processes_table, active_replicas),
  208. %% reconnect
  209. syn_test_suite_helper:connect_node(SlaveNode),
  210. timer:sleep(1000),
  211. %% check tables
  212. 3 = mnesia:table_info(syn_processes_table, size),
  213. 3 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  214. LocalActiveReplicas2 = mnesia:table_info(syn_processes_table, active_replicas),
  215. 2 = length(LocalActiveReplicas2),
  216. true = lists:member(SlaveNode, LocalActiveReplicas2),
  217. true = lists:member(CurrentNode, LocalActiveReplicas2),
  218. SlaveActiveReplicas2 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, active_replicas]),
  219. 2 = length(SlaveActiveReplicas2),
  220. true = lists:member(SlaveNode, SlaveActiveReplicas2),
  221. true = lists:member(CurrentNode, SlaveActiveReplicas2),
  222. %% check processes
  223. LocalPid = syn:find_by_key(local_pid),
  224. SlavePidLocal = syn:find_by_key(slave_pid_local),
  225. SlavePidSlave = syn:find_by_key(slave_pid_slave),
  226. LocalPid = rpc:call(SlaveNode, syn, find_by_key, [local_pid]),
  227. SlavePidLocal = rpc:call(SlaveNode, syn, find_by_key, [slave_pid_local]),
  228. SlavePidSlave = rpc:call(SlaveNode, syn, find_by_key, [slave_pid_slave]),
  229. %% kill processes
  230. syn_test_suite_helper:kill_process(LocalPid),
  231. syn_test_suite_helper:kill_process(SlavePidLocal),
  232. syn_test_suite_helper:kill_process(SlavePidSlave).
  233. two_nodes_netsplit_kill_resolution_when_there_are_conflicts(Config) ->
  234. %% get slave
  235. SlaveNode = proplists:get_value(slave_node, Config),
  236. CurrentNode = node(),
  237. %% start syn
  238. ok = syn:start(),
  239. ok = rpc:call(SlaveNode, syn, start, []),
  240. timer:sleep(100),
  241. %% start processes
  242. LocalPid = syn_test_suite_helper:start_process(),
  243. SlavePid = syn_test_suite_helper:start_process(SlaveNode),
  244. %% register
  245. ok = syn:register(conflicting_key, SlavePid),
  246. timer:sleep(100),
  247. %% check tables
  248. 1 = mnesia:table_info(syn_processes_table, size),
  249. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  250. %% check process
  251. SlavePid = syn:find_by_key(conflicting_key),
  252. %% simulate net split
  253. syn_test_suite_helper:disconnect_node(SlaveNode),
  254. timer:sleep(1000),
  255. %% check tables
  256. 0 = mnesia:table_info(syn_processes_table, size),
  257. [CurrentNode] = mnesia:table_info(syn_processes_table, active_replicas),
  258. %% now register the local pid with the same key
  259. ok = syn:register(conflicting_key, LocalPid),
  260. %% check process
  261. LocalPid = syn:find_by_key(conflicting_key),
  262. %% reconnect
  263. syn_test_suite_helper:connect_node(SlaveNode),
  264. timer:sleep(1000),
  265. %% check tables
  266. 1 = mnesia:table_info(syn_processes_table, size),
  267. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  268. %% check process
  269. FoundPid = syn:find_by_key(conflicting_key),
  270. true = lists:member(FoundPid, [LocalPid, SlavePid]),
  271. %% kill processes
  272. syn_test_suite_helper:kill_process(LocalPid),
  273. syn_test_suite_helper:kill_process(SlavePid).
  274. two_nodes_netsplit_callback_resolution_when_there_are_conflicts(Config) ->
  275. %% get slave
  276. SlaveNode = proplists:get_value(slave_node, Config),
  277. CurrentNode = node(),
  278. %% load configuration variables from syn-test.config => this sets the netsplit_conflicting_process_callback option
  279. syn_test_suite_helper:set_environment_variables(),
  280. syn_test_suite_helper:set_environment_variables(SlaveNode),
  281. %% start syn
  282. ok = syn:start(),
  283. ok = rpc:call(SlaveNode, syn, start, []),
  284. timer:sleep(100),
  285. %% start processes
  286. LocalPid = syn_test_suite_helper:start_process(fun process_reply_main/0),
  287. SlavePid = syn_test_suite_helper:start_process(SlaveNode, fun process_reply_main/0),
  288. %% register global process
  289. ResultPid = self(),
  290. global:register_name(syn_netsplits_SUITE_result, ResultPid),
  291. %% register
  292. ok = syn:register(conflicting_key, SlavePid),
  293. timer:sleep(100),
  294. %% check tables
  295. 1 = mnesia:table_info(syn_processes_table, size),
  296. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  297. %% check process
  298. SlavePid = syn:find_by_key(conflicting_key),
  299. %% simulate net split
  300. syn_test_suite_helper:disconnect_node(SlaveNode),
  301. timer:sleep(1000),
  302. %% check tables
  303. 0 = mnesia:table_info(syn_processes_table, size),
  304. [CurrentNode] = mnesia:table_info(syn_processes_table, active_replicas),
  305. %% now register the local pid with the same key
  306. ok = syn:register(conflicting_key, LocalPid),
  307. %% check process
  308. LocalPid = syn:find_by_key(conflicting_key),
  309. %% reconnect
  310. syn_test_suite_helper:connect_node(SlaveNode),
  311. timer:sleep(1000),
  312. %% check tables
  313. 1 = mnesia:table_info(syn_processes_table, size),
  314. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  315. %% check process
  316. FoundPid = syn:find_by_key(conflicting_key),
  317. true = lists:member(FoundPid, [LocalPid, SlavePid]),
  318. %% check message received from killed pid
  319. KilledPid = lists:nth(1, lists:delete(FoundPid, [LocalPid, SlavePid])),
  320. receive
  321. {exited, KilledPid} -> ok
  322. after 2000 ->
  323. ok = conflicting_process_did_not_receive_message
  324. end,
  325. %% kill processes
  326. syn_test_suite_helper:kill_process(LocalPid),
  327. syn_test_suite_helper:kill_process(SlavePid).
  328. three_nodes_netsplit_kill_resolution_when_there_are_conflicts(Config) ->
  329. %% get slaves
  330. SlaveNode = proplists:get_value(slave_node, Config),
  331. SlaveNode2 = proplists:get_value(slave_node_2, Config),
  332. CurrentNode = node(),
  333. %% start syn
  334. ok = syn:start(),
  335. ok = rpc:call(SlaveNode, syn, start, []),
  336. ok = rpc:call(SlaveNode2, syn, start, []),
  337. timer:sleep(100),
  338. %% start processes
  339. LocalPid = syn_test_suite_helper:start_process(),
  340. SlavePid = syn_test_suite_helper:start_process(SlaveNode),
  341. Slave2Pid = syn_test_suite_helper:start_process(SlaveNode2),
  342. %% register
  343. ok = syn:register(conflicting_key, SlavePid),
  344. ok = syn:register(slave_2_process, Slave2Pid),
  345. timer:sleep(100),
  346. %% check tables
  347. 2 = mnesia:table_info(syn_processes_table, size),
  348. 2 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  349. 2 = rpc:call(SlaveNode2, mnesia, table_info, [syn_processes_table, size]),
  350. %% check process
  351. SlavePid = syn:find_by_key(conflicting_key),
  352. %% simulate net split
  353. syn_test_suite_helper:disconnect_node(SlaveNode),
  354. timer:sleep(1000),
  355. %% check tables
  356. 1 = mnesia:table_info(syn_processes_table, size),
  357. 1 = rpc:call(SlaveNode2, mnesia, table_info, [syn_processes_table, size]),
  358. ActiveReplicaseDuringNetsplit = mnesia:table_info(syn_processes_table, active_replicas),
  359. true = lists:member(CurrentNode, ActiveReplicaseDuringNetsplit),
  360. true = lists:member(SlaveNode2, ActiveReplicaseDuringNetsplit),
  361. %% now register the local pid with the same conflicting key
  362. ok = syn:register(conflicting_key, LocalPid),
  363. %% check process
  364. LocalPid = syn:find_by_key(conflicting_key),
  365. %% reconnect
  366. syn_test_suite_helper:connect_node(SlaveNode),
  367. timer:sleep(1000),
  368. %% check tables
  369. 2 = mnesia:table_info(syn_processes_table, size),
  370. 2 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  371. 2 = rpc:call(SlaveNode2, mnesia, table_info, [syn_processes_table, size]),
  372. %% check processes
  373. FoundPid = syn:find_by_key(conflicting_key),
  374. true = lists:member(FoundPid, [LocalPid, SlavePid]),
  375. Slave2Pid = syn:find_by_key(slave_2_process),
  376. Slave2Pid = rpc:call(SlaveNode, syn, find_by_key, [slave_2_process]),
  377. Slave2Pid = rpc:call(SlaveNode2, syn, find_by_key, [slave_2_process]),
  378. %% kill processes
  379. syn_test_suite_helper:kill_process(LocalPid),
  380. syn_test_suite_helper:kill_process(SlavePid),
  381. syn_test_suite_helper:kill_process(Slave2Pid).
  382. %% ===================================================================
  383. %% Internal
  384. %% ===================================================================
  385. process_reply_main() ->
  386. receive
  387. shutdown ->
  388. timer:sleep(500), %% wait for global processes to propagate
  389. global:send(syn_netsplits_SUITE_result, {exited, self()})
  390. end.
  391. netsplit_conflicting_process_callback_dummy(_Key, Pid) ->
  392. Pid ! shutdown.