3 use POSIX ":sys_wait_h";
10 my @views = (); #expected views
11 my %in_views; #the number of views a node is expected to be present
16 use sigtrap 'handler' => \&killprocess, 'HUP', 'INT', 'ABRT', 'QUIT', 'TERM';
20 return "paxos-$port.log";
25 killprocess() if ($always_kill);
30 print "killprocess: forcestop all spawned processes...@pid \n";
42 my $aa = join("-", @a);
45 push( @logs, "$p-$aa.log" );
46 if( $p =~ /lock_server/ ) {
47 push( @logs, paxos_log($a[1]) );
50 } elsif (defined $pid) {
52 open(STDOUT, ">>$p-$aa.log")
53 or mydie "Couln't redirect stout\n";
54 open(STDERR, ">&STDOUT")
55 or mydie "Couln't redirect stderr\n";
59 or mydie "Cannot start new $p @a $!\n";
61 mydie "Cannot fork: $!\n";
69 for( my $i = 0; $i < $num; $i++ ) {
70 push( @p, int(rand(54000))+10000 );
72 my @sp = sort { $a <=> $b } @p;
78 open( CONFIG, ">config" ) or mydie( "Couldn't open config for writing" );
79 foreach my $p (@ports) {
80 printf CONFIG "%05d\n", $p;
88 return spawn( "./lock_server", $master, $port );
98 or mydie( "Failed: couldn't read $l" );
106 foreach my $line (@log) {
107 if( $line =~ /^done (\d+) ([\d\s]+)$/ ) {
110 my @view = split( /\s+/, $2 );
114 # let there be extra views
119 my @expected = @{$e};
121 if( @expected != @view ) {
122 mydie( "Failed: In log $l at view $num is (@view), but expected $i (@expected)" );
130 mydie( "Failed: In log $l, not enough views seen!" );
133 if( defined $last_v ) {
134 my @last_exp_v = @{$last_v};
135 if( @last_exp_v != @last_view ) {
136 mydie( "Failed: In log $l last view didn't match, got view @last_view, but expected @last_exp_v" );
145 my $including = shift;
146 my $nv = `grep "done " $log | grep "$including" | wc -l`;
152 sub wait_for_view_change {
155 my $num_views = shift;
156 my $including = shift;
160 while( (get_num_views( $log, $including ) < $num_views) and
161 ($start + $timeout > time()) ) {
162 my $lastv = `grep done $log | tail -n 1`;
164 print " Waiting for $including to be present in >=$num_views views in $log (Last view: $lastv)\n";
168 if( get_num_views( $log, $including ) < $num_views) {
169 mydie( "Failed: Timed out waiting for $including to be in >=$num_views in log $log" );
171 print " Done: $including is in >=$num_views views in $log\n";
183 $done_pid = waitpid($pid, POSIX::WNOHANG);
184 } while( $done_pid <= 0 and (time() - $start) < $to );
186 if( $done_pid <= 0 ) {
188 mydie( "Failed: Timed out waiting for process $pid\n" );
195 sub wait_and_check_expected_view($) {
198 for (my $i = 0; $i <=$#$v; $i++) {
199 $in_views{$v->[$i]}++;
201 foreach my $port (@$v) {
202 wait_for_view_change(paxos_log($port), $in_views{$port}, $port, 20);
204 foreach my $port (@$v) {
205 my $log = paxos_log($port);
206 check_views( $log, \@views );
210 sub start_nodes ($$){
215 for (my $i = 0; $i <= $#p; $i++) {
216 $in_views{$p[$i]} = 0;
222 for (my $i = 0; $i < $n; $i++) {
223 if ($command eq "ls") {
224 @pid = (@pid, spawn_ls($p[0],$p[$i]));
225 print "Start lock_server on $p[$i]\n";
230 wait_and_check_expected_view(\@vv);
236 getopts("s:k",\%options);
237 if (defined($options{s})) {
240 if (defined($options{k})) {
244 #get a sorted list of random ports
246 print_config( @p[0..4] );
251 # see which tests are set
253 foreach my $t (@ARGV) {
254 if( $t < $NUM_TESTS && $t >= 0 ) {
260 for( my $i = 0; $i < $NUM_TESTS; $i++ ) {
266 print "test0: start 3-process lock server\n";
273 print "test1: start 3-process lock server, kill third server\n";
276 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
277 kill "TERM", $pid[2];
281 # it should go through 4 views
282 my @v4 = ($p[0], $p[1]);
283 wait_and_check_expected_view(\@v4);
290 print "test2: start 3-process lock server, kill first server\n";
293 print "Kill first (PID: $pid[0]) on port $p[0]\n";
294 kill "TERM", $pid[0];
298 # it should go through 4 views
299 my @v4 = ($p[1], $p[2]);
300 wait_and_check_expected_view(\@v4);
309 print "test3: start 3-process lock_server, kill a server, restart a server\n";
312 print "Kill server (PID: $pid[2]) on port $p[2]\n";
313 kill "TERM", $pid[2];
317 my @v4 = ($p[0], $p[1]);
318 wait_and_check_expected_view(\@v4);
320 print "Restart killed server on port $p[2]\n";
321 $pid[2] = spawn_ls ($p[0], $p[2]);
325 my @v5 = ($p[0], $p[1], $p[2]);
326 wait_and_check_expected_view(\@v5);
333 print "test4: 3-process lock_server, kill third server, kill second server, restart third server, kill third server again, restart second server, re-restart third server, check logs\n";
336 print "Kill server (PID: $pid[2]) on port $p[2]\n";
337 kill "TERM", $pid[2];
340 my @v4 = ($p[0], $p[1]);
341 wait_and_check_expected_view(\@v4);
343 print "Kill server (PID: $pid[1]) on port $p[1]\n";
344 kill "TERM", $pid[1];
347 #no view change can happen because of a lack of majority
349 print "Restarting server on port $p[2]\n";
350 $pid[2] = spawn_ls($p[0], $p[2]);
354 #no view change can happen because of a lack of majority
355 foreach my $port (@p[0..2]) {
356 my $num_v = get_num_views(paxos_log($port), $port);
357 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
361 print "Kill server (PID: $pid[2]) on port $p[2]\n";
362 kill "TERM", $pid[2];
367 print "Restarting server on port $p[1]\n";
368 $pid[1] = spawn_ls($p[0], $p[1]);
372 foreach my $port (@p[0..1]) {
373 $in_views{$port} = get_num_views( paxos_log($port), $port );
374 print " Node $port is present in ", $in_views{$port}, " views in ", paxos_log($port), "\n";
377 print "Restarting server on port $p[2]\n";
378 $pid[2] = spawn_ls($p[0], $p[2]);
380 my @lastv = ($p[0],$p[1],$p[2]);
381 foreach my $port (@lastv) {
382 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
385 # now check the paxos logs and make sure the logs go through the right
388 foreach my $port (@lastv) {
389 check_views( paxos_log($port), \@views, \@lastv);
397 print "test5: 3-process lock_server, send signal 1 to first server, kill third server, restart third server, check logs\n";
400 print "Sending paxos breakpoint 1 to first server on port $p[0]\n";
401 spawn("./rsm_tester", $p[0]+1, "breakpoint", 3);
405 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
406 kill "TERM", $pid[2];
409 foreach my $port (@p[0..2]) {
410 my $num_v = get_num_views( paxos_log($port), $port );
411 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
414 print "Restarting third server on port $p[2]\n";
415 $pid[2]= spawn_ls($p[0], $p[2]);
416 my @lastv = ($p[1],$p[2]);
417 foreach my $port (@lastv) {
418 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
422 # now check the paxos logs and make sure the logs go through the right
425 foreach my $port (@lastv) {
426 check_views( paxos_log($port), \@views, \@lastv);
434 print "test6: 4-process lock_server, send signal 2 to first server, kill fourth server, restart fourth server, check logs\n";
436 print "Sending paxos breakpoint 2 to first server on port $p[0]\n";
437 spawn("./rsm_tester", $p[0]+1, "breakpoint", 4);
441 print "Kill fourth server (PID: $pid[3]) on port $p[3]\n";
442 kill "TERM", $pid[3];
446 foreach my $port ($p[1],$p[2]) {
447 my $num_v = get_num_views( paxos_log($port), $port );
448 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
453 print "Restarting fourth server on port $p[3]\n";
454 $pid[3] = spawn_ls($p[1], $p[3]);
458 my @v5 = ($p[0],$p[1],$p[2]);
459 foreach my $port (@v5) {
466 # the 6th view will be (2,3) or (1,2,3,4)
467 my @v6 = ($p[1],$p[2]);
468 foreach my $port (@v6) {
471 foreach my $port (@v6) {
472 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 30);
475 # final will be (2,3,4)
476 my @lastv = ($p[1],$p[2],$p[3]);
477 foreach my $port (@lastv) {
478 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
480 foreach my $port (@lastv) {
481 check_views( paxos_log($port), \@views, \@lastv );
488 print "test7: 4-process lock_server, send signal 2 to first server, kill fourth server, kill other servers, restart other servers, restart fourth server, check logs\n";
490 print "Sending paxos breakpoint 2 to first server on port $p[0]\n";
491 spawn("./rsm_tester", $p[0]+1, "breakpoint", 4);
494 print "Kill fourth server (PID: $pid[3]) on port $p[3]\n";
495 kill "TERM", $pid[3];
499 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
500 kill "TERM", $pid[2];
502 print "Kill second server (PID: $pid[1]) on port $p[1]\n";
503 kill "TERM", $pid[1];
507 print "Restarting second server on port $p[1]\n";
508 $pid[1] = spawn_ls($p[0], $p[1]);
512 print "Restarting third server on port $p[2]\n";
513 $pid[2] = spawn_ls($p[0], $p[2]);
517 #no view change is possible by now because there is no majority
518 foreach my $port ($p[1],$p[2]) {
519 my $num_v = get_num_views( paxos_log($port), $port );
520 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
523 print "Restarting fourth server on port $p[3]\n";
524 $pid[3] = spawn_ls($p[1], $p[3]);
528 my @v5 = ($p[0], $p[1], $p[2]);
530 foreach my $port (@v5) {
535 my @lastv = ($p[1],$p[2],$p[3]);
536 foreach my $port (@lastv) {
537 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
540 foreach my $port (@lastv) {
541 check_views( paxos_log($port), \@views, \@lastv);
549 print "test8: start 3-process lock service\n";
552 print "Start lock_tester $p[0]\n";
553 $t = spawn("./lock_tester", $p[0]);
555 print " Wait for lock_tester to finish (waitpid $t)\n";
558 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
559 mydie( "Failed lock tester for test 8" );
568 print "test9: start 3-process rsm, kill second slave while lock_tester is running\n";
571 print "Start lock_tester $p[0]\n";
572 $t = spawn("./lock_tester", $p[0]);
574 sleep int(rand(10)+1);
576 print "Kill slave (PID: $pid[2]) on port $p[2]\n";
577 kill "TERM", $pid[2];
581 # it should go through 4 views
582 my @v4 = ($p[0], $p[1]);
583 wait_and_check_expected_view(\@v4);
585 print " Wait for lock_tester to finish (waitpid $t)\n";
588 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
589 mydie( "Failed lock tester for test 9" );
598 print "test10: start 3-process rsm, kill second slave and restarts it later while lock_tester is running\n";
601 print "Start lock_tester $p[0]\n";
602 $t = spawn("./lock_tester", $p[0]);
604 sleep int(rand(10)+1);
606 print "Kill slave (PID: $pid[2]) on port $p[2]\n";
607 kill "TERM", $pid[2];
611 # it should go through 4 views
612 my @v4 = ($p[0], $p[1]);
613 wait_and_check_expected_view(\@v4);
617 print "Restarting killed lock_server on port $p[2]\n";
618 $pid[2] = spawn_ls($p[0], $p[2]);
619 my @v5 = ($p[0],$p[1],$p[2]);
620 wait_and_check_expected_view(\@v5);
622 print " Wait for lock_tester to finish (waitpid $t)\n";
625 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
626 mydie( "Failed lock tester for test 10" );
636 print "test11: start 3-process rsm, kill primary while lock_tester is running\n";
639 print "Start lock_tester $p[0]\n";
640 $t = spawn("./lock_tester", $p[0]);
642 sleep int(rand(10)+1);
644 print "Kill primary (PID: $pid[0]) on port $p[0]\n";
645 kill "TERM", $pid[0];
649 # it should go through 4 views
650 my @v4 = ($p[1], $p[2]);
651 wait_and_check_expected_view(\@v4);
653 print " Wait for lock_tester to finish (waitpid $t)\n";
656 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
657 mydie( "Failed lock tester for test 11" );
666 print "test12: start 3-process rsm, kill master at break1 and restart it while lock_tester is running\n";
668 start_nodes(3, "ls");
670 print "Start lock_tester $p[0]\n";
671 $t = spawn("./lock_tester", $p[0]);
675 print "Kill master (PID: $pid[0]) on port $p[0] at breakpoint 1\n";
676 spawn("./rsm_tester", $p[0]+1, "breakpoint", 1);
681 # it should go through 5 views
682 my @v4 = ($p[1], $p[2]);
683 wait_and_check_expected_view(\@v4);
685 print "Restarting killed lock_server on port $p[0]\n";
686 $pid[0] = spawn_ls($p[1], $p[0]);
690 # the last view should include all nodes
691 my @lastv = ($p[0],$p[1],$p[2]);
692 foreach my $port (@lastv) {
693 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
696 foreach my $port (@lastv) {
697 check_views( paxos_log($port), \@views, \@lastv);
700 print " Wait for lock_tester to finish (waitpid $t)\n";
703 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
704 mydie( "Failed lock tester for test 12" );
713 print "test13: start 3-process rsm, kill slave at break1 and restart it while lock_tester is running\n";
715 start_nodes(3, "ls");
717 print "Start lock_tester $p[0]\n";
718 $t = spawn("./lock_tester", $p[0]);
722 print "Kill slave (PID: $pid[2]) on port $p[2] at breakpoint 1\n";
723 spawn("./rsm_tester", $p[2]+1, "breakpoint", 1);
727 # it should go through 4 views
728 my @v4 = ($p[0], $p[1]);
729 wait_and_check_expected_view(\@v4);
731 print "Restarting killed lock_server on port $p[2]\n";
732 $pid[2] = spawn_ls($p[0], $p[2]);
736 # the last view should include all nodes
737 my @lastv = ($p[0],$p[1],$p[2]);
738 foreach my $port (@lastv) {
739 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
742 foreach my $port (@lastv) {
743 check_views( paxos_log($port), \@views, \@lastv);
746 print " Wait for lock_tester to finish (waitpid $t)\n";
749 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
750 mydie( "Failed lock tester for test 13" );
759 print "test14: start 5-process rsm, kill slave break1, kill slave break2\n";
761 start_nodes(5, "ls");
763 print "Start lock_tester $p[0]\n";
764 $t = spawn("./lock_tester", $p[0]);
768 print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n";
769 spawn("./rsm_tester", $p[4]+1, "breakpoint", 1);
772 print "Kill slave (PID: $pid[3]) on port $p[3] at breakpoint 2\n";
773 spawn("./rsm_tester", $p[3]+1, "breakpoint", 2);
780 print "first view change wait\n";
781 my @lastv = ($p[0],$p[1],$p[2],$p[3]);
782 foreach my $port (@lastv) {
783 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
786 print "second view change wait\n";
788 @lastv = ($p[0],$p[1],$p[2]);
789 foreach my $port (@lastv) {
790 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
793 print " Wait for lock_tester to finish (waitpid $t)\n";
796 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
797 mydie( "Failed lock tester for test 14" );
806 print "test15: start 5-process rsm, kill slave break1, kill primary break2\n";
808 start_nodes(5, "ls");
810 print "Start lock_tester $p[0]\n";
811 $t = spawn("./lock_tester", $p[0]);
815 print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n";
816 spawn("./rsm_tester", $p[4]+1, "breakpoint", 1);
819 print "Kill primary (PID: $pid[0]) on port $p[0] at breakpoint 2\n";
820 spawn("./rsm_tester", $p[0]+1, "breakpoint", 2);
826 print "first view change wait\n";
827 my @lastv = ($p[0],$p[1],$p[2],$p[3]);
828 foreach my $port (@lastv) {
829 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
832 print "second view change wait\n";
834 @lastv = ($p[1],$p[2],$p[3]);
835 foreach my $port (@lastv) {
836 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
839 print " Wait for lock_tester to finish (waitpid $t)\n";
842 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
843 mydie( "Failed lock tester for test 15" );
852 print "test16: start 3-process rsm, partition primary, heal it\n";
854 start_nodes(3, "ls");
856 print "Start lock_tester $p[0]\n";
857 $t = spawn("./lock_tester", $p[0]);
861 print "Partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n";
863 spawn("./rsm_tester", $p[0]+1, "partition", 0);
867 print "first view change wait\n";
868 my @lastv = ($p[1],$p[2]);
869 foreach my $port (@lastv) {
870 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
875 print "Heal partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n";
876 spawn("./rsm_tester", $p[0]+1, "partition", 1);
880 # xxx it should test that this is the 5th view!
881 print "second view change wait\n";
882 @lastv = ($p[0], $p[1],$p[2]);
883 foreach my $port (@lastv) {
884 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
887 print " Wait for lock_tester to finish (waitpid $t)\n";
890 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
891 mydie( "Failed lock tester for test 16" );
898 print "tests done OK\n";