X-Git-Url: http://xvm.mit.edu/gitweb/invirt/third/libt4.git/blobdiff_plain/03b35a9a1bd1f583e32b27d260b223a0989d6c75..c279db4240a3a3c30f069ab9dea8055cf94280da:/rsm_tester.py?ds=inline diff --git a/rsm_tester.py b/rsm_tester.py index 32f0ad2..c7b8bbd 100755 --- a/rsm_tester.py +++ b/rsm_tester.py @@ -29,7 +29,7 @@ for sig in ['HUP', 'INT', 'ABRT', 'QUIT', 'TERM']: signal.signal(num, killprocess) def paxos_log(port): - return "paxos-%d.log" % port + return "paxos-%d.log" % port def die(*s): print >>sys.stderr, ''.join(s) @@ -77,7 +77,7 @@ def spawn(p, *a): try: os.execv(p, [p] + sa) except OSError, e: - mydie("Cannot start new %s %s %s", (p, repr(sa), repr(e))) + mydie("Cannot start new %s %s %s" % (p, repr(sa), repr(e))) def randports(num): return sorted([random.randint(0, 54000/2)*2+10000 for i in xrange(num)]) @@ -114,13 +114,13 @@ def check_views(l, vs, last_v=None): # let there be extra views continue expected = vs[i] - if tuple(expected) != tuple(view): + if set(expected) != set(view): mydie("Failed: In log %s at view %s is (%s), but expected %s (%s)" % (l, str(num), repr(view), str(i), repr(expected))) i+=1 if i < len(vs): mydie("Failed: In log %s, not enough views seen!" % (l,)) - if last_v is not None and tuple(last_v) != tuple(last_view): + if last_v is not None and set(last_v) != set(last_view): mydie("Failed: In log %s last view didn't match, got view %s, but expected %s" % (l, repr(last_view), repr(last_v))) @@ -155,10 +155,10 @@ def wait_for_view_change(log, num_views, including, timeout): def waitpid_to(pid, to): start = time.time() - done_pid = -1 - while done_pid <= 0 and (time.time() - start) < to: - usleep(100000) - done_pid = os.waitpid(pid, os.WNOHANG) + done_pid = (0,0) + while done_pid == (0,0) and (time.time() - start) < to: + usleep(100000) + done_pid = os.waitpid(pid, os.WNOHANG) if done_pid <= 0: os.kill(pid, signal.SIGKILL) @@ -240,7 +240,7 @@ if do_run[1]: if do_run[2]: print "test2: start 3-process lock server, kill first server" start_nodes(3,"ls") - print "Kill first (PID: $pid[0]) on port $p[0]" + print "Kill first (PID: %d) on port %d" % (pid[0], p[0]) os.kill(pid[0], signal.SIGTERM) usleep(500000) # it should go through 4 views @@ -252,12 +252,12 @@ if do_run[2]: if do_run[3]: print "test3: start 3-process lock_server, kill a server, restart a server" start_nodes(3,"ls") - print "Kill server (PID: $pid[2]) on port $p[2]" + print "Kill server (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) usleep(500000) v4 = (p[0], p[1]) wait_and_check_expected_view(v4) - print "Restart killed server on port $p[2]" + print "Restart killed server on port %s" % (p[2],) pid[2] = spawn_ls (p[0], p[2]) usleep(500000) v5 = (p[0], p[1], p[2]) @@ -268,34 +268,34 @@ if do_run[3]: if do_run[4]: print "test4: 3-process lock_server, kill third server, kill second server, restart third server, kill third server again, restart second server, re-restart third server, check logs" start_nodes(3,"ls") - print "Kill server (PID: $pid[2]) on port $p[2]" + print "Kill server (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) usleep(500000) v4 = (p[0], p[1]) wait_and_check_expected_view(v4) - print "Kill server (PID: $pid[1]) on port $p[1]" + print "Kill server (PID: %s) on port %s" % (pid[1], p[1]) os.kill(pid[1], signal.SIGTERM) usleep(500000) #no view change can happen because of a lack of majority - print "Restarting server on port $p[2]" + print "Restarting server on port %s" % (p[2],) pid[2] = spawn_ls(p[0], p[2]) usleep(500000) #no view change can happen because of a lack of majority for port in p[0:1+2]: num_v = get_num_views(paxos_log(port), port) if num_v != in_views[port]: - die("$num_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority") + die("%s_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority" % (num,)) # kill node 3 again, - print "Kill server (PID: $pid[2]) on port $p[2]" + print "Kill server (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) usleep(500000) - print "Restarting server on port $p[1]" + print "Restarting server on port %s" % (p[1],) pid[1] = spawn_ls(p[0], p[1]) usleep(700000) for port in p[0:1+1]: in_views[port] = get_num_views(paxos_log(port), port) - print " Node $port is present in ", in_views[port], " views in ", paxos_log(port), "" - print "Restarting server on port $p[2]" + print " Node %s is present in " % (port,), in_views[port], " views in ", paxos_log(port) + print "Restarting server on port %s" % (p[2],) pid[2] = spawn_ls(p[0], p[2]) lastv = (p[0],p[1],p[2]) for port in lastv: @@ -309,17 +309,17 @@ if do_run[4]: if do_run[5]: print "test5: 3-process lock_server, send signal 1 to first server, kill third server, restart third server, check logs" start_nodes(3,"ls") - print "Sending paxos breakpoint 1 to first server on port $p[0]" + print "Sending paxos breakpoint 1 to first server on port %s" % (p[0],) spawn("./rsm_tester", p[0]+1, "breakpoint", 3) usleep(100000) - print "Kill third server (PID: $pid[2]) on port $p[2]" + print "Kill third server (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) usleep(500000) for port in p[0:1+2]: num_v = get_num_views(paxos_log(port), port) if num_v != in_views[port]: - die("$num_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority") - print "Restarting third server on port $p[2]" + die("%s_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority" % (num,)) + print "Restarting third server on port %s" % (p[2],) pid[2]= spawn_ls(p[0], p[2]) lastv = (p[1],p[2]) for port in lastv: @@ -334,18 +334,18 @@ if do_run[5]: if do_run[6]: print "test6: 4-process lock_server, send signal 2 to first server, kill fourth server, restart fourth server, check logs" start_nodes(4,"ls") - print "Sending paxos breakpoint 2 to first server on port $p[0]" + print "Sending paxos breakpoint 2 to first server on port %s" % (p[0],) spawn("./rsm_tester", p[0]+1, "breakpoint", 4) usleep(100000) - print "Kill fourth server (PID: $pid[3]) on port $p[3]" + print "Kill fourth server (PID: %s) on port %s" % (pid[3], p[3]) os.kill(pid[3], signal.SIGTERM) usleep(500000) for port in (p[1],p[2]): num_v = get_num_views(paxos_log(port), port) if num_v != in_views[port]: - die("$num_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority") + die("%s_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority" % (num,)) usleep(500000) - print "Restarting fourth server on port $p[3]" + print "Restarting fourth server on port %s" % (p[3],) pid[3] = spawn_ls(p[1], p[3]) usleep(500000) v5 = (p[0],p[1],p[2]) @@ -370,29 +370,29 @@ if do_run[6]: if do_run[7]: print "test7: 4-process lock_server, send signal 2 to first server, kill fourth server, kill other servers, restart other servers, restart fourth server, check logs" start_nodes(4,"ls") - print "Sending paxos breakpoint 2 to first server on port $p[0]" + print "Sending paxos breakpoint 2 to first server on port %s" % (p[0],) spawn("./rsm_tester", p[0]+1, "breakpoint", 4) usleep(300000) - print "Kill fourth server (PID: $pid[3]) on port $p[3]" + print "Kill fourth server (PID: %s) on port %s" % (pid[3], p[3]) os.kill(pid[3], signal.SIGTERM) usleep(500000) - print "Kill third server (PID: $pid[2]) on port $p[2]" + print "Kill third server (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) - print "Kill second server (PID: $pid[1]) on port $p[1]" + print "Kill second server (PID: %s) on port %s" % (pid[1], p[1]) os.kill(pid[1], signal.SIGTERM) usleep(500000) - print "Restarting second server on port $p[1]" + print "Restarting second server on port %s" % (p[1],) pid[1] = spawn_ls(p[0], p[1]) usleep(500000) - print "Restarting third server on port $p[2]" + print "Restarting third server on port %s" % (p[2],) pid[2] = spawn_ls(p[0], p[2]) usleep(500000) #no view change is possible by now because there is no majority for port in (p[1],p[2]): num_v = get_num_views(paxos_log(port), port) if num_v != in_views[port]: - die("$num_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority") - print "Restarting fourth server on port $p[3]" + die("%s_v views in ", paxos_log(port), " : no new views should be formed due to the lack of majority" % (num,)) + print "Restarting fourth server on port %s" % (p[3],) pid[3] = spawn_ls(p[1], p[3]) usleep(500000) v5 = (p[0], p[1], p[2]) @@ -410,11 +410,11 @@ if do_run[7]: if do_run[8]: print "test8: start 3-process lock service" start_nodes(3,"ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 8") cleanup() usleep(200000) @@ -422,18 +422,18 @@ if do_run[8]: if do_run[9]: print "test9: start 3-process rsm, kill second slave while lock_tester is running" start_nodes(3,"ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(random.randint(1,1000000)) - print "Kill slave (PID: $pid[2]) on port $p[2]" + print "Kill slave (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) usleep(300000) # it should go through 4 views v4 = (p[0], p[1]) wait_and_check_expected_view(v4) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 9") cleanup() usleep(200000) @@ -441,23 +441,23 @@ if do_run[9]: if do_run[10]: print "test10: start 3-process rsm, kill second slave and restarts it later while lock_tester is running" start_nodes(3,"ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(random.randint(1,1000000)) - print "Kill slave (PID: $pid[2]) on port $p[2]" + print "Kill slave (PID: %s) on port %s" % (pid[2], p[2]) os.kill(pid[2], signal.SIGTERM) usleep(300000) # it should go through 4 views v4 = (p[0], p[1]) wait_and_check_expected_view(v4) usleep(300000) - print "Restarting killed lock_server on port $p[2]" + print "Restarting killed lock_server on port %s" % (p[2],) pid[2] = spawn_ls(p[0], p[2]) v5 = (p[0],p[1],p[2]) wait_and_check_expected_view(v5) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 10") cleanup() usleep(200000) @@ -465,18 +465,18 @@ if do_run[10]: if do_run[11]: print "test11: start 3-process rsm, kill primary while lock_tester is running" start_nodes(3,"ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(random.randint(1,1000000)) - print "Kill primary (PID: $pid[0]) on port $p[0]" + print "Kill primary (PID: %s) on port %s" % (pid[0], p[0]) os.kill(pid[0], signal.SIGTERM) usleep(300000) # it should go through 4 views v4 = (p[1], p[2]) wait_and_check_expected_view(v4) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 11") cleanup() usleep(200000) @@ -484,16 +484,16 @@ if do_run[11]: if do_run[12]: print "test12: start 3-process rsm, kill master at break1 and restart it while lock_tester is running" start_nodes(3, "ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(100000) - print "Kill master (PID: $pid[0]) on port $p[0] at breakpoint 1" + print "Kill master (PID: %s) on port %s at breakpoint 1" % (pid[0], p[0]) spawn("./rsm_tester", p[0]+1, "breakpoint", 1) usleep(100000) # it should go through 5 views v4 = (p[1], p[2]) wait_and_check_expected_view(v4) - print "Restarting killed lock_server on port $p[0]" + print "Restarting killed lock_server on port %s" % (p[0],) pid[0] = spawn_ls(p[1], p[0]) usleep(300000) # the last view should include all nodes @@ -502,9 +502,9 @@ if do_run[12]: wait_for_view_change(paxos_log(port), in_views[port]+1, port, 20) for port in lastv: check_views(paxos_log(port), views, lastv) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 12") cleanup() usleep(200000) @@ -512,16 +512,16 @@ if do_run[12]: if do_run[13]: print "test13: start 3-process rsm, kill slave at break1 and restart it while lock_tester is running" start_nodes(3, "ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(100000) - print "Kill slave (PID: $pid[2]) on port $p[2] at breakpoint 1" + print "Kill slave (PID: %s) on port %s at breakpoint 1" % (pid[2], p[2]) spawn("./rsm_tester", p[2]+1, "breakpoint", 1) usleep(100000) # it should go through 4 views v4 = (p[0], p[1]) wait_and_check_expected_view(v4) - print "Restarting killed lock_server on port $p[2]" + print "Restarting killed lock_server on port %s" % (p[2],) pid[2] = spawn_ls(p[0], p[2]) usleep(300000) # the last view should include all nodes @@ -530,9 +530,9 @@ if do_run[13]: wait_for_view_change(paxos_log(port), in_views[port]+1, port, 20) for port in lastv: check_views(paxos_log(port), views, lastv) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 13") cleanup() usleep(200000) @@ -540,12 +540,12 @@ if do_run[13]: if do_run[14]: print "test14: start 5-process rsm, kill slave break1, kill slave break2" start_nodes(5, "ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(100000) - print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1" + print "Kill slave (PID: %s) on port %s at breakpoint 1" % (pid[4], p[4]) spawn("./rsm_tester", p[4]+1, "breakpoint", 1) - print "Kill slave (PID: $pid[3]) on port $p[3] at breakpoint 2" + print "Kill slave (PID: %s) on port %s at breakpoint 2" % (pid[3], p[3]) spawn("./rsm_tester", p[3]+1, "breakpoint", 2) usleep(100000) # two view changes: @@ -557,9 +557,9 @@ if do_run[14]: lastv = (p[0],p[1],p[2]) for port in lastv: wait_for_view_change(paxos_log(port), in_views[port]+1, port, 20) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 14") cleanup() usleep(200000) @@ -567,12 +567,12 @@ if do_run[14]: if do_run[15]: print "test15: start 5-process rsm, kill slave break1, kill primary break2" start_nodes(5, "ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(100000) - print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1" + print "Kill slave (PID: %s) on port %s at breakpoint 1" % (pid[4], p[4]) spawn("./rsm_tester", p[4]+1, "breakpoint", 1) - print "Kill primary (PID: $pid[0]) on port $p[0] at breakpoint 2" + print "Kill primary (PID: %s) on port %s at breakpoint 2" % (pid[0], p[0]) spawn("./rsm_tester", p[0]+1, "breakpoint", 2) usleep(100000) # two view changes: @@ -584,9 +584,9 @@ if do_run[15]: lastv = (p[1],p[2],p[3]) for port in lastv: wait_for_view_change(paxos_log(port), in_views[port]+1, port, 20) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 15") cleanup() usleep(200000) @@ -594,10 +594,10 @@ if do_run[15]: if do_run[16]: print "test16: start 3-process rsm, partition primary, heal it" start_nodes(3, "ls") - print "Start lock_tester $p[0]" + print "Start lock_tester %s" % (p[0],) t = spawn("./lock_tester", p[0]) usleep(100000) - print "Partition primary (PID: $pid[0]) on port $p[0] at breakpoint" + print "Partition primary (PID: %s) on port %s at breakpoint" % (pid[0], p[0]) spawn("./rsm_tester", p[0]+1, "partition", 0) usleep(300000) print "first view change wait" @@ -605,7 +605,7 @@ if do_run[16]: for port in lastv: wait_for_view_change(paxos_log(port), in_views[port]+1, port, 20) usleep(100000) - print "Heal partition primary (PID: $pid[0]) on port $p[0] at breakpoint" + print "Heal partition primary (PID: %s) on port %s at breakpoint" % (pid[0], p[0]) spawn("./rsm_tester", p[0]+1, "partition", 1) usleep(100000) # xxx it should test that this is the 5th view! @@ -613,9 +613,9 @@ if do_run[16]: lastv = (p[0], p[1],p[2]) for port in lastv: wait_for_view_change(paxos_log(port), in_views[port]+1, port, 20) - print " Wait for lock_tester to finish (waitpid $t)" + print " Wait for lock_tester to finish (waitpid %s)" % (t,) waitpid_to(t, 600) - if os.system("grep \"passed all tests successfully\" lock_tester-$p[0].log"): + if os.system("grep \"passed all tests successfully\" lock_tester-%s.log" % (p[0],)): mydie("Failed lock tester for test 16") cleanup() usleep(200000)