3 use POSIX ":sys_wait_h";
5 use Time::HiRes (usleep);
11 my @views = (); #expected views
12 my %in_views; #the number of views a node is expected to be present
17 use sigtrap 'handler' => \&killprocess, 'HUP', 'INT', 'ABRT', 'QUIT', 'TERM';
21 return "paxos-$port.log";
26 killprocess() if ($always_kill);
31 print "killprocess: forcestop all spawned processes...@pid \n";
43 my $aa = join("-", @a);
46 push( @logs, "$p-$aa.log" );
47 if( $p =~ /lock_server/ ) {
48 push( @logs, paxos_log($a[1]) );
51 } elsif (defined $pid) {
53 open(STDOUT, ">>$p-$aa.log")
54 or mydie "Couln't redirect stout\n";
55 open(STDERR, ">&STDOUT")
56 or mydie "Couln't redirect stderr\n";
60 or mydie "Cannot start new $p @a $!\n";
62 mydie "Cannot fork: $!\n";
70 for( my $i = 0; $i < $num; $i++ ) {
71 push( @p, int(rand(54000/2))*2+10000 );
73 my @sp = sort { $a <=> $b } @p;
79 open( CONFIG, ">config" ) or mydie( "Couldn't open config for writing" );
80 foreach my $p (@ports) {
81 printf CONFIG "%05d\n", $p;
89 return spawn( "./lock_server", $master, $port );
99 or mydie( "Failed: couldn't read $l" );
107 foreach my $line (@log) {
108 if( $line =~ /^done (\d+) ([\d\s]+)$/ ) {
111 my @view = split( /\s+/, $2 );
115 # let there be extra views
120 my @expected = @{$e};
122 if( @expected != @view ) {
123 mydie( "Failed: In log $l at view $num is (@view), but expected $i (@expected)" );
131 mydie( "Failed: In log $l, not enough views seen!" );
134 if( defined $last_v ) {
135 my @last_exp_v = @{$last_v};
136 if( @last_exp_v != @last_view ) {
137 mydie( "Failed: In log $l last view didn't match, got view @last_view, but expected @last_exp_v" );
146 my $including = shift;
147 my $nv = `grep "done " $log | grep "$including" | wc -l`;
153 sub wait_for_view_change {
156 my $num_views = shift;
157 my $including = shift;
161 while( (get_num_views( $log, $including ) < $num_views) and
162 ($start + $timeout > time()) ) {
163 my $lastv = `grep done $log | tail -n 1`;
165 print " Waiting for $including to be present in >=$num_views views in $log (Last view: $lastv)\n";
169 if( get_num_views( $log, $including ) < $num_views) {
170 mydie( "Failed: Timed out waiting for $including to be in >=$num_views in log $log" );
172 print " Done: $including is in >=$num_views views in $log\n";
184 $done_pid = waitpid($pid, POSIX::WNOHANG);
185 } while( $done_pid <= 0 and (time() - $start) < $to );
187 if( $done_pid <= 0 ) {
189 mydie( "Failed: Timed out waiting for process $pid\n" );
196 sub wait_and_check_expected_view($) {
199 for (my $i = 0; $i <=$#$v; $i++) {
200 $in_views{$v->[$i]}++;
202 foreach my $port (@$v) {
203 wait_for_view_change(paxos_log($port), $in_views{$port}, $port, 20);
205 foreach my $port (@$v) {
206 my $log = paxos_log($port);
207 check_views( $log, \@views );
211 sub start_nodes ($$){
216 for (my $i = 0; $i <= $#p; $i++) {
217 $in_views{$p[$i]} = 0;
223 for (my $i = 0; $i < $n; $i++) {
224 if ($command eq "ls") {
225 @pid = (@pid, spawn_ls($p[0],$p[$i]));
226 print "Start lock_server on $p[$i]\n";
231 wait_and_check_expected_view(\@vv);
237 getopts("s:k",\%options);
238 if (defined($options{s})) {
241 if (defined($options{k})) {
245 #get a sorted list of random ports
247 print_config( @p[0..4] );
252 # see which tests are set
254 foreach my $t (@ARGV) {
255 if( $t < $NUM_TESTS && $t >= 0 ) {
261 for( my $i = 0; $i < $NUM_TESTS; $i++ ) {
267 print "test0: start 3-process lock server\n";
274 print "test1: start 3-process lock server, kill third server\n";
277 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
278 kill "TERM", $pid[2];
282 # it should go through 4 views
283 my @v4 = ($p[0], $p[1]);
284 wait_and_check_expected_view(\@v4);
291 print "test2: start 3-process lock server, kill first server\n";
294 print "Kill first (PID: $pid[0]) on port $p[0]\n";
295 kill "TERM", $pid[0];
299 # it should go through 4 views
300 my @v4 = ($p[1], $p[2]);
301 wait_and_check_expected_view(\@v4);
310 print "test3: start 3-process lock_server, kill a server, restart a server\n";
313 print "Kill server (PID: $pid[2]) on port $p[2]\n";
314 kill "TERM", $pid[2];
318 my @v4 = ($p[0], $p[1]);
319 wait_and_check_expected_view(\@v4);
321 print "Restart killed server on port $p[2]\n";
322 $pid[2] = spawn_ls ($p[0], $p[2]);
326 my @v5 = ($p[0], $p[1], $p[2]);
327 wait_and_check_expected_view(\@v5);
334 print "test4: 3-process lock_server, kill third server, kill second server, restart third server, kill third server again, restart second server, re-restart third server, check logs\n";
337 print "Kill server (PID: $pid[2]) on port $p[2]\n";
338 kill "TERM", $pid[2];
341 my @v4 = ($p[0], $p[1]);
342 wait_and_check_expected_view(\@v4);
344 print "Kill server (PID: $pid[1]) on port $p[1]\n";
345 kill "TERM", $pid[1];
348 #no view change can happen because of a lack of majority
350 print "Restarting server on port $p[2]\n";
351 $pid[2] = spawn_ls($p[0], $p[2]);
355 #no view change can happen because of a lack of majority
356 foreach my $port (@p[0..2]) {
357 my $num_v = get_num_views(paxos_log($port), $port);
358 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
362 print "Kill server (PID: $pid[2]) on port $p[2]\n";
363 kill "TERM", $pid[2];
368 print "Restarting server on port $p[1]\n";
369 $pid[1] = spawn_ls($p[0], $p[1]);
373 foreach my $port (@p[0..1]) {
374 $in_views{$port} = get_num_views( paxos_log($port), $port );
375 print " Node $port is present in ", $in_views{$port}, " views in ", paxos_log($port), "\n";
378 print "Restarting server on port $p[2]\n";
379 $pid[2] = spawn_ls($p[0], $p[2]);
381 my @lastv = ($p[0],$p[1],$p[2]);
382 foreach my $port (@lastv) {
383 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
386 # now check the paxos logs and make sure the logs go through the right
389 foreach my $port (@lastv) {
390 check_views( paxos_log($port), \@views, \@lastv);
398 print "test5: 3-process lock_server, send signal 1 to first server, kill third server, restart third server, check logs\n";
401 print "Sending paxos breakpoint 1 to first server on port $p[0]\n";
402 spawn("./rsm_tester", $p[0]+1, "breakpoint", 3);
406 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
407 kill "TERM", $pid[2];
410 foreach my $port (@p[0..2]) {
411 my $num_v = get_num_views( paxos_log($port), $port );
412 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
415 print "Restarting third server on port $p[2]\n";
416 $pid[2]= spawn_ls($p[0], $p[2]);
417 my @lastv = ($p[1],$p[2]);
418 foreach my $port (@lastv) {
419 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
423 # now check the paxos logs and make sure the logs go through the right
426 foreach my $port (@lastv) {
427 check_views( paxos_log($port), \@views, \@lastv);
435 print "test6: 4-process lock_server, send signal 2 to first server, kill fourth server, restart fourth server, check logs\n";
437 print "Sending paxos breakpoint 2 to first server on port $p[0]\n";
438 spawn("./rsm_tester", $p[0]+1, "breakpoint", 4);
442 print "Kill fourth server (PID: $pid[3]) on port $p[3]\n";
443 kill "TERM", $pid[3];
447 foreach my $port ($p[1],$p[2]) {
448 my $num_v = get_num_views( paxos_log($port), $port );
449 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
454 print "Restarting fourth server on port $p[3]\n";
455 $pid[3] = spawn_ls($p[1], $p[3]);
459 my @v5 = ($p[0],$p[1],$p[2]);
460 foreach my $port (@v5) {
467 # the 6th view will be (2,3) or (1,2,3,4)
468 my @v6 = ($p[1],$p[2]);
469 foreach my $port (@v6) {
472 foreach my $port (@v6) {
473 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 30);
476 # final will be (2,3,4)
477 my @lastv = ($p[1],$p[2],$p[3]);
478 foreach my $port (@lastv) {
479 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
481 foreach my $port (@lastv) {
482 check_views( paxos_log($port), \@views, \@lastv );
489 print "test7: 4-process lock_server, send signal 2 to first server, kill fourth server, kill other servers, restart other servers, restart fourth server, check logs\n";
491 print "Sending paxos breakpoint 2 to first server on port $p[0]\n";
492 spawn("./rsm_tester", $p[0]+1, "breakpoint", 4);
495 print "Kill fourth server (PID: $pid[3]) on port $p[3]\n";
496 kill "TERM", $pid[3];
500 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
501 kill "TERM", $pid[2];
503 print "Kill second server (PID: $pid[1]) on port $p[1]\n";
504 kill "TERM", $pid[1];
508 print "Restarting second server on port $p[1]\n";
509 $pid[1] = spawn_ls($p[0], $p[1]);
513 print "Restarting third server on port $p[2]\n";
514 $pid[2] = spawn_ls($p[0], $p[2]);
518 #no view change is possible by now because there is no majority
519 foreach my $port ($p[1],$p[2]) {
520 my $num_v = get_num_views( paxos_log($port), $port );
521 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
524 print "Restarting fourth server on port $p[3]\n";
525 $pid[3] = spawn_ls($p[1], $p[3]);
529 my @v5 = ($p[0], $p[1], $p[2]);
531 foreach my $port (@v5) {
536 my @lastv = ($p[1],$p[2],$p[3]);
537 foreach my $port (@lastv) {
538 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
541 foreach my $port (@lastv) {
542 check_views( paxos_log($port), \@views, \@lastv);
550 print "test8: start 3-process lock service\n";
553 print "Start lock_tester $p[0]\n";
554 $t = spawn("./lock_tester", $p[0]);
556 print " Wait for lock_tester to finish (waitpid $t)\n";
559 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
560 mydie( "Failed lock tester for test 8" );
569 print "test9: start 3-process rsm, kill second slave while lock_tester is running\n";
572 print "Start lock_tester $p[0]\n";
573 $t = spawn("./lock_tester", $p[0]);
575 sleep int(rand(10)+1);
577 print "Kill slave (PID: $pid[2]) on port $p[2]\n";
578 kill "TERM", $pid[2];
582 # it should go through 4 views
583 my @v4 = ($p[0], $p[1]);
584 wait_and_check_expected_view(\@v4);
586 print " Wait for lock_tester to finish (waitpid $t)\n";
589 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
590 mydie( "Failed lock tester for test 9" );
599 print "test10: start 3-process rsm, kill second slave and restarts it later while lock_tester is running\n";
602 print "Start lock_tester $p[0]\n";
603 $t = spawn("./lock_tester", $p[0]);
605 sleep int(rand(10)+1);
607 print "Kill slave (PID: $pid[2]) on port $p[2]\n";
608 kill "TERM", $pid[2];
612 # it should go through 4 views
613 my @v4 = ($p[0], $p[1]);
614 wait_and_check_expected_view(\@v4);
618 print "Restarting killed lock_server on port $p[2]\n";
619 $pid[2] = spawn_ls($p[0], $p[2]);
620 my @v5 = ($p[0],$p[1],$p[2]);
621 wait_and_check_expected_view(\@v5);
623 print " Wait for lock_tester to finish (waitpid $t)\n";
626 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
627 mydie( "Failed lock tester for test 10" );
637 print "test11: start 3-process rsm, kill primary while lock_tester is running\n";
640 print "Start lock_tester $p[0]\n";
641 $t = spawn("./lock_tester", $p[0]);
643 sleep int(rand(10)+1);
645 print "Kill primary (PID: $pid[0]) on port $p[0]\n";
646 kill "TERM", $pid[0];
650 # it should go through 4 views
651 my @v4 = ($p[1], $p[2]);
652 wait_and_check_expected_view(\@v4);
654 print " Wait for lock_tester to finish (waitpid $t)\n";
657 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
658 mydie( "Failed lock tester for test 11" );
667 print "test12: start 3-process rsm, kill master at break1 and restart it while lock_tester is running\n";
669 start_nodes(3, "ls");
671 print "Start lock_tester $p[0]\n";
672 $t = spawn("./lock_tester", $p[0]);
676 print "Kill master (PID: $pid[0]) on port $p[0] at breakpoint 1\n";
677 spawn("./rsm_tester", $p[0]+1, "breakpoint", 1);
682 # it should go through 5 views
683 my @v4 = ($p[1], $p[2]);
684 wait_and_check_expected_view(\@v4);
686 print "Restarting killed lock_server on port $p[0]\n";
687 $pid[0] = spawn_ls($p[1], $p[0]);
691 # the last view should include all nodes
692 my @lastv = ($p[0],$p[1],$p[2]);
693 foreach my $port (@lastv) {
694 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
697 foreach my $port (@lastv) {
698 check_views( paxos_log($port), \@views, \@lastv);
701 print " Wait for lock_tester to finish (waitpid $t)\n";
704 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
705 mydie( "Failed lock tester for test 12" );
714 print "test13: start 3-process rsm, kill slave at break1 and restart it while lock_tester is running\n";
716 start_nodes(3, "ls");
718 print "Start lock_tester $p[0]\n";
719 $t = spawn("./lock_tester", $p[0]);
723 print "Kill slave (PID: $pid[2]) on port $p[2] at breakpoint 1\n";
724 spawn("./rsm_tester", $p[2]+1, "breakpoint", 1);
728 # it should go through 4 views
729 my @v4 = ($p[0], $p[1]);
730 wait_and_check_expected_view(\@v4);
732 print "Restarting killed lock_server on port $p[2]\n";
733 $pid[2] = spawn_ls($p[0], $p[2]);
737 # the last view should include all nodes
738 my @lastv = ($p[0],$p[1],$p[2]);
739 foreach my $port (@lastv) {
740 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
743 foreach my $port (@lastv) {
744 check_views( paxos_log($port), \@views, \@lastv);
747 print " Wait for lock_tester to finish (waitpid $t)\n";
750 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
751 mydie( "Failed lock tester for test 13" );
760 print "test14: start 5-process rsm, kill slave break1, kill slave break2\n";
762 start_nodes(5, "ls");
764 print "Start lock_tester $p[0]\n";
765 $t = spawn("./lock_tester", $p[0]);
769 print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n";
770 spawn("./rsm_tester", $p[4]+1, "breakpoint", 1);
773 print "Kill slave (PID: $pid[3]) on port $p[3] at breakpoint 2\n";
774 spawn("./rsm_tester", $p[3]+1, "breakpoint", 2);
781 print "first view change wait\n";
782 my @lastv = ($p[0],$p[1],$p[2],$p[3]);
783 foreach my $port (@lastv) {
784 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
787 print "second view change wait\n";
789 @lastv = ($p[0],$p[1],$p[2]);
790 foreach my $port (@lastv) {
791 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
794 print " Wait for lock_tester to finish (waitpid $t)\n";
797 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
798 mydie( "Failed lock tester for test 14" );
807 print "test15: start 5-process rsm, kill slave break1, kill primary break2\n";
809 start_nodes(5, "ls");
811 print "Start lock_tester $p[0]\n";
812 $t = spawn("./lock_tester", $p[0]);
816 print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n";
817 spawn("./rsm_tester", $p[4]+1, "breakpoint", 1);
820 print "Kill primary (PID: $pid[0]) on port $p[0] at breakpoint 2\n";
821 spawn("./rsm_tester", $p[0]+1, "breakpoint", 2);
827 print "first view change wait\n";
828 my @lastv = ($p[0],$p[1],$p[2],$p[3]);
829 foreach my $port (@lastv) {
830 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
833 print "second view change wait\n";
835 @lastv = ($p[1],$p[2],$p[3]);
836 foreach my $port (@lastv) {
837 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
840 print " Wait for lock_tester to finish (waitpid $t)\n";
843 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
844 mydie( "Failed lock tester for test 15" );
853 print "test16: start 3-process rsm, partition primary, heal it\n";
855 start_nodes(3, "ls");
857 print "Start lock_tester $p[0]\n";
858 $t = spawn("./lock_tester", $p[0]);
862 print "Partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n";
864 spawn("./rsm_tester", $p[0]+1, "partition", 0);
868 print "first view change wait\n";
869 my @lastv = ($p[1],$p[2]);
870 foreach my $port (@lastv) {
871 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
876 print "Heal partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n";
877 spawn("./rsm_tester", $p[0]+1, "partition", 1);
881 # xxx it should test that this is the 5th view!
882 print "second view change wait\n";
883 @lastv = ($p[0], $p[1],$p[2]);
884 foreach my $port (@lastv) {
885 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
888 print " Wait for lock_tester to finish (waitpid $t)\n";
891 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
892 mydie( "Failed lock tester for test 16" );
899 print "tests done OK\n";