syn_netsplits_SUITE.erl 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. %% ==========================================================================================================
  2. %% Syn - A global process registry.
  3. %%
  4. %% Copyright (C) 2015, Roberto Ostinelli <roberto@ostinelli.net>.
  5. %% All rights reserved.
  6. %%
  7. %% The MIT License (MIT)
  8. %%
  9. %% Copyright (c) 2015 Roberto Ostinelli
  10. %%
  11. %% Permission is hereby granted, free of charge, to any person obtaining a copy
  12. %% of this software and associated documentation files (the "Software"), to deal
  13. %% in the Software without restriction, including without limitation the rights
  14. %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. %% copies of the Software, and to permit persons to whom the Software is
  16. %% furnished to do so, subject to the following conditions:
  17. %%
  18. %% The above copyright notice and this permission notice shall be included in
  19. %% all copies or substantial portions of the Software.
  20. %%
  21. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. %% THE SOFTWARE.
  28. -module(syn_netsplits_SUITE).
  29. %% callbacks
  30. -export([all/0]).
  31. -export([init_per_suite/1, end_per_suite/1]).
  32. -export([groups/0, init_per_group/2, end_per_group/2]).
  33. -export([init_per_testcase/2, end_per_testcase/2]).
  34. %% tests
  35. -export([
  36. two_nodes_netsplit_when_there_are_no_conflicts/1,
  37. two_nodes_netsplit_kill_resolution_when_there_are_conflicts/1,
  38. two_nodes_netsplit_message_resolution_when_there_are_conflicts/1
  39. ]).
  40. -export([
  41. three_nodes_netsplit_kill_resolution_when_there_are_conflicts/1
  42. ]).
  43. %% internal
  44. -export([process_reply_main/0]).
  45. %% include
  46. -include_lib("common_test/include/ct.hrl").
  47. %% ===================================================================
  48. %% Callbacks
  49. %% ===================================================================
  50. %% -------------------------------------------------------------------
  51. %% Function: all() -> GroupsAndTestCases | {skip,Reason}
  52. %% GroupsAndTestCases = [{group,GroupName} | TestCase]
  53. %% GroupName = atom()
  54. %% TestCase = atom()
  55. %% Reason = term()
  56. %% -------------------------------------------------------------------
  57. all() ->
  58. [
  59. {group, two_nodes_netsplits},
  60. {group, three_nodes_netsplits}
  61. ].
  62. %% -------------------------------------------------------------------
  63. %% Function: groups() -> [Group]
  64. %% Group = {GroupName,Properties,GroupsAndTestCases}
  65. %% GroupName = atom()
  66. %% Properties = [parallel | sequence | Shuffle | {RepeatType,N}]
  67. %% GroupsAndTestCases = [Group | {group,GroupName} | TestCase]
  68. %% TestCase = atom()
  69. %% Shuffle = shuffle | {shuffle,{integer(),integer(),integer()}}
  70. %% RepeatType = repeat | repeat_until_all_ok | repeat_until_all_fail |
  71. %% repeat_until_any_ok | repeat_until_any_fail
  72. %% N = integer() | forever
  73. %% -------------------------------------------------------------------
  74. groups() ->
  75. [
  76. {two_nodes_netsplits, [shuffle], [
  77. two_nodes_netsplit_when_there_are_no_conflicts,
  78. two_nodes_netsplit_kill_resolution_when_there_are_conflicts,
  79. two_nodes_netsplit_message_resolution_when_there_are_conflicts
  80. ]},
  81. {three_nodes_netsplits, [shuffle], [
  82. three_nodes_netsplit_kill_resolution_when_there_are_conflicts
  83. ]}
  84. ].
  85. %% -------------------------------------------------------------------
  86. %% Function: init_per_suite(Config0) ->
  87. %% Config1 | {skip,Reason} |
  88. %% {skip_and_save,Reason,Config1}
  89. %% Config0 = Config1 = [tuple()]
  90. %% Reason = term()
  91. %% -------------------------------------------------------------------
  92. init_per_suite(Config) ->
  93. %% init
  94. SlaveNodeShortName = syn_slave,
  95. %% start slave
  96. {ok, SlaveNode} = syn_test_suite_helper:start_slave(SlaveNodeShortName),
  97. %% config
  98. [
  99. {slave_node_short_name, SlaveNodeShortName},
  100. {slave_node, SlaveNode}
  101. | Config
  102. ].
  103. %% -------------------------------------------------------------------
  104. %% Function: end_per_suite(Config0) -> void() | {save_config,Config1}
  105. %% Config0 = Config1 = [tuple()]
  106. %% -------------------------------------------------------------------
  107. end_per_suite(Config) ->
  108. %% get slave node name
  109. SlaveNodeShortName = proplists:get_value(slave_node_short_name, Config),
  110. %% stop slave
  111. syn_test_suite_helper:stop_slave(SlaveNodeShortName).
  112. %% -------------------------------------------------------------------
  113. %% Function: init_per_group(GroupName, Config0) ->
  114. %% Config1 | {skip,Reason} |
  115. %% {skip_and_save,Reason,Config1}
  116. %% GroupName = atom()
  117. %% Config0 = Config1 = [tuple()]
  118. %% Reason = term()
  119. %% -------------------------------------------------------------------
  120. init_per_group(three_nodes_netsplits, Config) ->
  121. %% init
  122. SlaveNode2ShortName = syn_slave_2,
  123. %% start slave 2
  124. {ok, SlaveNode2} = syn_test_suite_helper:start_slave(SlaveNode2ShortName),
  125. %% config
  126. [
  127. {slave_node_2_short_name, SlaveNode2ShortName},
  128. {slave_node_2, SlaveNode2}
  129. | Config
  130. ];
  131. init_per_group(_GroupName, Config) -> Config.
  132. %% -------------------------------------------------------------------
  133. %% Function: end_per_group(GroupName, Config0) ->
  134. %% void() | {save_config,Config1}
  135. %% GroupName = atom()
  136. %% Config0 = Config1 = [tuple()]
  137. %% -------------------------------------------------------------------
  138. end_per_group(three_nodes_netsplits, Config) ->
  139. %% get slave node 2 name
  140. SlaveNode2ShortName = proplists:get_value(slave_node_2_short_name, Config),
  141. %% stop slave
  142. syn_test_suite_helper:stop_slave(SlaveNode2ShortName);
  143. end_per_group(_GroupName, _Config) -> ok.
  144. % ----------------------------------------------------------------------------------------------------------
  145. % Function: init_per_testcase(TestCase, Config0) ->
  146. % Config1 | {skip,Reason} | {skip_and_save,Reason,Config1}
  147. % TestCase = atom()
  148. % Config0 = Config1 = [tuple()]
  149. % Reason = term()
  150. % ----------------------------------------------------------------------------------------------------------
  151. init_per_testcase(_TestCase, Config) ->
  152. %% get slave
  153. SlaveNode = proplists:get_value(slave_node, Config),
  154. %% set schema location
  155. application:set_env(mnesia, schema_location, ram),
  156. rpc:call(SlaveNode, mnesia, schema_location, [ram]),
  157. %% return
  158. Config.
  159. % ----------------------------------------------------------------------------------------------------------
  160. % Function: end_per_testcase(TestCase, Config0) ->
  161. % void() | {save_config,Config1} | {fail,Reason}
  162. % TestCase = atom()
  163. % Config0 = Config1 = [tuple()]
  164. % Reason = term()
  165. % ----------------------------------------------------------------------------------------------------------
  166. end_per_testcase(_TestCase, Config) ->
  167. %% get slave
  168. SlaveNode = proplists:get_value(slave_node, Config),
  169. syn_test_suite_helper:clean_after_test(SlaveNode).
  170. %% ===================================================================
  171. %% Tests
  172. %% ===================================================================
  173. two_nodes_netsplit_when_there_are_no_conflicts(Config) ->
  174. %% get slave
  175. SlaveNode = proplists:get_value(slave_node, Config),
  176. CurrentNode = node(),
  177. %% start syn
  178. ok = syn:start(),
  179. ok = rpc:call(SlaveNode, syn, start, []),
  180. timer:sleep(100),
  181. %% start processes
  182. LocalPid = syn_test_suite_helper:start_process(),
  183. SlavePidLocal = syn_test_suite_helper:start_process(SlaveNode),
  184. SlavePidSlave = syn_test_suite_helper:start_process(SlaveNode),
  185. %% register
  186. ok = syn:register(local_pid, LocalPid),
  187. ok = syn:register(slave_pid_local, SlavePidLocal), %% slave registered on local node
  188. ok = rpc:call(SlaveNode, syn, register, [slave_pid_slave, SlavePidSlave]), %% slave registered on slave node
  189. timer:sleep(100),
  190. %% check tables
  191. 3 = mnesia:table_info(syn_processes_table, size),
  192. 3 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  193. LocalActiveReplicas = mnesia:table_info(syn_processes_table, active_replicas),
  194. 2 = length(LocalActiveReplicas),
  195. true = lists:member(SlaveNode, LocalActiveReplicas),
  196. true = lists:member(CurrentNode, LocalActiveReplicas),
  197. SlaveActiveReplicas = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, active_replicas]),
  198. 2 = length(SlaveActiveReplicas),
  199. true = lists:member(SlaveNode, SlaveActiveReplicas),
  200. true = lists:member(CurrentNode, SlaveActiveReplicas),
  201. %% simulate net split
  202. syn_test_suite_helper:disconnect_node(SlaveNode),
  203. timer:sleep(1000),
  204. %% check tables
  205. 1 = mnesia:table_info(syn_processes_table, size),
  206. [CurrentNode] = mnesia:table_info(syn_processes_table, active_replicas),
  207. %% reconnect
  208. syn_test_suite_helper:connect_node(SlaveNode),
  209. timer:sleep(1000),
  210. %% check tables
  211. 3 = mnesia:table_info(syn_processes_table, size),
  212. 3 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  213. LocalActiveReplicas2 = mnesia:table_info(syn_processes_table, active_replicas),
  214. 2 = length(LocalActiveReplicas2),
  215. true = lists:member(SlaveNode, LocalActiveReplicas2),
  216. true = lists:member(CurrentNode, LocalActiveReplicas2),
  217. SlaveActiveReplicas2 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, active_replicas]),
  218. 2 = length(SlaveActiveReplicas2),
  219. true = lists:member(SlaveNode, SlaveActiveReplicas2),
  220. true = lists:member(CurrentNode, SlaveActiveReplicas2),
  221. %% check processes
  222. LocalPid = syn:find_by_key(local_pid),
  223. SlavePidLocal = syn:find_by_key(slave_pid_local),
  224. SlavePidSlave = syn:find_by_key(slave_pid_slave),
  225. LocalPid = rpc:call(SlaveNode, syn, find_by_key, [local_pid]),
  226. SlavePidLocal = rpc:call(SlaveNode, syn, find_by_key, [slave_pid_local]),
  227. SlavePidSlave = rpc:call(SlaveNode, syn, find_by_key, [slave_pid_slave]),
  228. %% kill processes
  229. syn_test_suite_helper:kill_process(LocalPid),
  230. syn_test_suite_helper:kill_process(SlavePidLocal),
  231. syn_test_suite_helper:kill_process(SlavePidSlave).
  232. two_nodes_netsplit_kill_resolution_when_there_are_conflicts(Config) ->
  233. %% get slave
  234. SlaveNode = proplists:get_value(slave_node, Config),
  235. CurrentNode = node(),
  236. %% start syn
  237. ok = syn:start(),
  238. ok = rpc:call(SlaveNode, syn, start, []),
  239. timer:sleep(100),
  240. %% start processes
  241. LocalPid = syn_test_suite_helper:start_process(),
  242. SlavePid = syn_test_suite_helper:start_process(SlaveNode),
  243. %% register
  244. ok = syn:register(conflicting_key, SlavePid),
  245. timer:sleep(100),
  246. %% check tables
  247. 1 = mnesia:table_info(syn_processes_table, size),
  248. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  249. %% check process
  250. SlavePid = syn:find_by_key(conflicting_key),
  251. %% simulate net split
  252. syn_test_suite_helper:disconnect_node(SlaveNode),
  253. timer:sleep(1000),
  254. %% check tables
  255. 0 = mnesia:table_info(syn_processes_table, size),
  256. [CurrentNode] = mnesia:table_info(syn_processes_table, active_replicas),
  257. %% now register the local pid with the same key
  258. ok = syn:register(conflicting_key, LocalPid),
  259. %% check process
  260. LocalPid = syn:find_by_key(conflicting_key),
  261. %% reconnect
  262. syn_test_suite_helper:connect_node(SlaveNode),
  263. timer:sleep(1000),
  264. %% check tables
  265. 1 = mnesia:table_info(syn_processes_table, size),
  266. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  267. %% check process
  268. FoundPid = syn:find_by_key(conflicting_key),
  269. true = lists:member(FoundPid, [LocalPid, SlavePid]),
  270. %% kill processes
  271. syn_test_suite_helper:kill_process(LocalPid),
  272. syn_test_suite_helper:kill_process(SlavePid).
  273. two_nodes_netsplit_message_resolution_when_there_are_conflicts(Config) ->
  274. %% get slave
  275. SlaveNode = proplists:get_value(slave_node, Config),
  276. CurrentNode = node(),
  277. %% load configuration variables from syn-test.config => this sets the netsplit_send_message_to_process option
  278. syn_test_suite_helper:set_environment_variables(),
  279. syn_test_suite_helper:set_environment_variables(SlaveNode),
  280. %% start syn
  281. ok = syn:start(),
  282. ok = rpc:call(SlaveNode, syn, start, []),
  283. timer:sleep(100),
  284. %% start processes
  285. LocalPid = syn_test_suite_helper:start_process(fun process_reply_main/0),
  286. SlavePid = syn_test_suite_helper:start_process(SlaveNode, fun process_reply_main/0),
  287. %% register global process
  288. ResultPid = self(),
  289. global:register_name(syn_netsplits_SUITE_result, ResultPid),
  290. %% register
  291. ok = syn:register(conflicting_key, SlavePid),
  292. timer:sleep(100),
  293. %% check tables
  294. 1 = mnesia:table_info(syn_processes_table, size),
  295. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  296. %% check process
  297. SlavePid = syn:find_by_key(conflicting_key),
  298. %% simulate net split
  299. syn_test_suite_helper:disconnect_node(SlaveNode),
  300. timer:sleep(1000),
  301. %% check tables
  302. 0 = mnesia:table_info(syn_processes_table, size),
  303. [CurrentNode] = mnesia:table_info(syn_processes_table, active_replicas),
  304. %% now register the local pid with the same key
  305. ok = syn:register(conflicting_key, LocalPid),
  306. %% check process
  307. LocalPid = syn:find_by_key(conflicting_key),
  308. %% reconnect
  309. syn_test_suite_helper:connect_node(SlaveNode),
  310. timer:sleep(1000),
  311. %% check tables
  312. 1 = mnesia:table_info(syn_processes_table, size),
  313. 1 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  314. %% check process
  315. FoundPid = syn:find_by_key(conflicting_key),
  316. true = lists:member(FoundPid, [LocalPid, SlavePid]),
  317. %% check message received from killed pid
  318. KilledPid = lists:nth(1, lists:delete(FoundPid, [LocalPid, SlavePid])),
  319. receive
  320. {exited, KilledPid} -> ok
  321. after 2000 ->
  322. ok = conflicting_process_did_not_receive_message
  323. end,
  324. %% kill processes
  325. syn_test_suite_helper:kill_process(LocalPid),
  326. syn_test_suite_helper:kill_process(SlavePid).
  327. three_nodes_netsplit_kill_resolution_when_there_are_conflicts(Config) ->
  328. %% get slaves
  329. SlaveNode = proplists:get_value(slave_node, Config),
  330. SlaveNode2 = proplists:get_value(slave_node_2, Config),
  331. CurrentNode = node(),
  332. %% start syn
  333. ok = syn:start(),
  334. ok = rpc:call(SlaveNode, syn, start, []),
  335. ok = rpc:call(SlaveNode2, syn, start, []),
  336. timer:sleep(100),
  337. %% start processes
  338. LocalPid = syn_test_suite_helper:start_process(),
  339. SlavePid = syn_test_suite_helper:start_process(SlaveNode),
  340. Slave2Pid = syn_test_suite_helper:start_process(SlaveNode2),
  341. %% register
  342. ok = syn:register(conflicting_key, SlavePid),
  343. ok = syn:register(slave_2_process, Slave2Pid),
  344. timer:sleep(100),
  345. %% check tables
  346. 2 = mnesia:table_info(syn_processes_table, size),
  347. 2 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  348. 2 = rpc:call(SlaveNode2, mnesia, table_info, [syn_processes_table, size]),
  349. %% check process
  350. SlavePid = syn:find_by_key(conflicting_key),
  351. %% simulate net split
  352. syn_test_suite_helper:disconnect_node(SlaveNode),
  353. timer:sleep(1000),
  354. %% check tables
  355. 1 = mnesia:table_info(syn_processes_table, size),
  356. 1 = rpc:call(SlaveNode2, mnesia, table_info, [syn_processes_table, size]),
  357. ActiveReplicaseDuringNetsplit = mnesia:table_info(syn_processes_table, active_replicas),
  358. true = lists:member(CurrentNode, ActiveReplicaseDuringNetsplit),
  359. true = lists:member(SlaveNode2, ActiveReplicaseDuringNetsplit),
  360. %% now register the local pid with the same conflicting key
  361. ok = syn:register(conflicting_key, LocalPid),
  362. %% check process
  363. LocalPid = syn:find_by_key(conflicting_key),
  364. %% reconnect
  365. syn_test_suite_helper:connect_node(SlaveNode),
  366. timer:sleep(1000),
  367. %% check tables
  368. 2 = mnesia:table_info(syn_processes_table, size),
  369. 2 = rpc:call(SlaveNode, mnesia, table_info, [syn_processes_table, size]),
  370. 2 = rpc:call(SlaveNode2, mnesia, table_info, [syn_processes_table, size]),
  371. %% check processes
  372. FoundPid = syn:find_by_key(conflicting_key),
  373. true = lists:member(FoundPid, [LocalPid, SlavePid]),
  374. Slave2Pid = syn:find_by_key(slave_2_process),
  375. Slave2Pid = rpc:call(SlaveNode, syn, find_by_key, [slave_2_process]),
  376. Slave2Pid = rpc:call(SlaveNode2, syn, find_by_key, [slave_2_process]),
  377. %% kill processes
  378. syn_test_suite_helper:kill_process(LocalPid),
  379. syn_test_suite_helper:kill_process(SlavePid),
  380. syn_test_suite_helper:kill_process(Slave2Pid).
  381. %% ===================================================================
  382. %% Internal
  383. %% ===================================================================
  384. process_reply_main() ->
  385. receive
  386. shutdown ->
  387. timer:sleep(500), %% wait for global processes to propagate
  388. global:send(syn_netsplits_SUITE_result, {exited, self()})
  389. end.