3 use POSIX ":sys_wait_h";
10 my @views = (); #expected views
11 my %in_views; #the number of views a node is expected to be present
16 use sigtrap 'handler' => \&killprocess, 'HUP', 'INT', 'ABRT', 'QUIT', 'TERM';
20 return "paxos-$port.log";
25 killprocess() if ($always_kill);
30 print "killprocess: forcestop all spawned processes...@pid \n";
42 my $aa = join("-", @a);
45 push( @logs, "$p-$aa.log" );
46 if( $p =~ /config_server/ ) {
47 push( @logs, paxos_log($a[1]) );
49 if( $p =~ /lock_server/ ) {
50 push( @logs, paxos_log($a[1]) );
53 } elsif (defined $pid) {
55 open(STDOUT, ">>$p-$aa.log")
56 or mydie "Couln't redirect stout\n";
57 open(STDERR, ">&STDOUT")
58 or mydie "Couln't redirect stderr\n";
62 or mydie "Cannot start new $p @a $!\n";
64 mydie "Cannot fork: $!\n";
72 for( my $i = 0; $i < $num; $i++ ) {
73 push( @p, int(rand(54000))+10000 );
75 my @sp = sort { $a <=> $b } @p;
81 open( CONFIG, ">config" ) or mydie( "Couldn't open config for writing" );
82 foreach my $p (@ports) {
83 printf CONFIG "%05d\n", $p;
91 return spawn( "./lock_server", $master, $port );
97 return spawn( "./config_server", $master, $port );
107 or mydie( "Failed: couldn't read $l" );
115 foreach my $line (@log) {
116 if( $line =~ /^done (\d+) ([\d\s]+)$/ ) {
119 my @view = split( /\s+/, $2 );
123 # let there be extra views
128 my @expected = @{$e};
130 if( @expected != @view ) {
131 mydie( "Failed: In log $l at view $num is (@view), but expected $i (@expected)" );
139 mydie( "Failed: In log $l, not enough views seen!" );
142 if( defined $last_v ) {
143 my @last_exp_v = @{$last_v};
144 if( @last_exp_v != @last_view ) {
145 mydie( "Failed: In log $l last view didn't match, got view @last_view, but expected @last_exp_v" );
154 my $including = shift;
155 my $nv = `grep "done " $log | grep "$including" | wc -l`;
161 sub wait_for_view_change {
164 my $num_views = shift;
165 my $including = shift;
169 while( (get_num_views( $log, $including ) < $num_views) and
170 ($start + $timeout > time()) ) {
171 my $lastv = `grep done $log | tail -n 1`;
173 print " Waiting for $including to be present in >=$num_views views in $log (Last view: $lastv)\n";
177 if( get_num_views( $log, $including ) < $num_views) {
178 mydie( "Failed: Timed out waiting for $including to be in >=$num_views in log $log" );
180 print " Done: $including is in >=$num_views views in $log\n";
192 $done_pid = waitpid($pid, POSIX::WNOHANG);
193 } while( $done_pid <= 0 and (time() - $start) < $to );
195 if( $done_pid <= 0 ) {
197 mydie( "Failed: Timed out waiting for process $pid\n" );
204 sub wait_and_check_expected_view($) {
207 for (my $i = 0; $i <=$#$v; $i++) {
208 $in_views{$v->[$i]}++;
210 foreach my $port (@$v) {
211 wait_for_view_change(paxos_log($port), $in_views{$port}, $port, 20);
213 foreach my $port (@$v) {
214 my $log = paxos_log($port);
215 check_views( $log, \@views );
219 sub start_nodes ($$){
224 for (my $i = 0; $i <= $#p; $i++) {
225 $in_views{$p[$i]} = 0;
231 for (my $i = 0; $i < $n; $i++) {
232 if ($command eq "ls") {
233 @pid = (@pid, spawn_ls($p[0],$p[$i]));
234 print "Start lock_server on $p[$i]\n";
235 }elsif ($command eq "config_server"){
236 @pid = (@pid, spawn_config($p[0],$p[$i]));
237 print "Start config on $p[$i]\n";
242 wait_and_check_expected_view(\@vv);
248 getopts("s:k",\%options);
249 if (defined($options{s})) {
252 if (defined($options{k})) {
256 #get a sorted list of random ports
258 print_config( @p[0..4] );
263 # see which tests are set
265 foreach my $t (@ARGV) {
266 if( $t < $NUM_TESTS && $t >= 0 ) {
272 for( my $i = 0; $i < $NUM_TESTS; $i++ ) {
278 print "test0: start 3-process lock server\n";
285 print "test1: start 3-process lock server, kill third server\n";
288 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
289 kill "TERM", $pid[2];
293 # it should go through 4 views
294 my @v4 = ($p[0], $p[1]);
295 wait_and_check_expected_view(\@v4);
302 print "test2: start 3-process lock server, kill first server\n";
305 print "Kill first (PID: $pid[0]) on port $p[0]\n";
306 kill "TERM", $pid[0];
310 # it should go through 4 views
311 my @v4 = ($p[1], $p[2]);
312 wait_and_check_expected_view(\@v4);
321 print "test3: start 3-process lock_server, kill a server, restart a server\n";
324 print "Kill server (PID: $pid[2]) on port $p[2]\n";
325 kill "TERM", $pid[2];
329 my @v4 = ($p[0], $p[1]);
330 wait_and_check_expected_view(\@v4);
332 print "Restart killed server on port $p[2]\n";
333 $pid[2] = spawn_ls ($p[0], $p[2]);
337 my @v5 = ($p[0], $p[1], $p[2]);
338 wait_and_check_expected_view(\@v5);
345 print "test4: 3-process lock_server, kill third server, kill second server, restart third server, kill third server again, restart second server, re-restart third server, check logs\n";
348 print "Kill server (PID: $pid[2]) on port $p[2]\n";
349 kill "TERM", $pid[2];
352 my @v4 = ($p[0], $p[1]);
353 wait_and_check_expected_view(\@v4);
355 print "Kill server (PID: $pid[1]) on port $p[1]\n";
356 kill "TERM", $pid[1];
359 #no view change can happen because of a lack of majority
361 print "Restarting server on port $p[2]\n";
362 $pid[2] = spawn_ls($p[0], $p[2]);
366 #no view change can happen because of a lack of majority
367 foreach my $port (@p[0..2]) {
368 my $num_v = get_num_views(paxos_log($port), $port);
369 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
373 print "Kill server (PID: $pid[2]) on port $p[2]\n";
374 kill "TERM", $pid[2];
379 print "Restarting server on port $p[1]\n";
380 $pid[1] = spawn_ls($p[0], $p[1]);
384 foreach my $port (@p[0..1]) {
385 $in_views{$port} = get_num_views( paxos_log($port), $port );
386 print " Node $port is present in ", $in_views{$port}, " views in ", paxos_log($port), "\n";
389 print "Restarting server on port $p[2]\n";
390 $pid[2] = spawn_ls($p[0], $p[2]);
392 my @lastv = ($p[0],$p[1],$p[2]);
393 foreach my $port (@lastv) {
394 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
397 # now check the paxos logs and make sure the logs go through the right
400 foreach my $port (@lastv) {
401 check_views( paxos_log($port), \@views, \@lastv);
409 print "test5: 3-process lock_server, send signal 1 to first server, kill third server, restart third server, check logs\n";
412 print "Sending paxos breakpoint 1 to first server on port $p[0]\n";
413 spawn("./rsm_tester", $p[0]+1, "breakpoint", 3);
417 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
418 kill "TERM", $pid[2];
421 foreach my $port (@p[0..2]) {
422 my $num_v = get_num_views( paxos_log($port), $port );
423 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
426 print "Restarting third server on port $p[2]\n";
427 $pid[2]= spawn_ls($p[0], $p[2]);
428 my @lastv = ($p[1],$p[2]);
429 foreach my $port (@lastv) {
430 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
434 # now check the paxos logs and make sure the logs go through the right
437 foreach my $port (@lastv) {
438 check_views( paxos_log($port), \@views, \@lastv);
446 print "test6: 4-process lock_server, send signal 2 to first server, kill fourth server, restart fourth server, check logs\n";
448 print "Sending paxos breakpoint 2 to first server on port $p[0]\n";
449 spawn("./rsm_tester", $p[0]+1, "breakpoint", 4);
453 print "Kill fourth server (PID: $pid[3]) on port $p[3]\n";
454 kill "TERM", $pid[3];
458 foreach my $port ($p[1],$p[2]) {
459 my $num_v = get_num_views( paxos_log($port), $port );
460 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
465 print "Restarting fourth server on port $p[3]\n";
466 $pid[3] = spawn_ls($p[1], $p[3]);
470 my @v5 = ($p[0],$p[1],$p[2]);
471 foreach my $port (@v5) {
478 # the 6th view will be (2,3) or (1,2,3,4)
479 my @v6 = ($p[1],$p[2]);
480 foreach my $port (@v6) {
483 foreach my $port (@v6) {
484 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 30);
487 # final will be (2,3,4)
488 my @lastv = ($p[1],$p[2],$p[3]);
489 foreach my $port (@lastv) {
490 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
492 foreach my $port (@lastv) {
493 check_views( paxos_log($port), \@views, \@lastv );
500 print "test7: 4-process lock_server, send signal 2 to first server, kill fourth server, kill other servers, restart other servers, restart fourth server, check logs\n";
502 print "Sending paxos breakpoint 2 to first server on port $p[0]\n";
503 spawn("./rsm_tester", $p[0]+1, "breakpoint", 4);
506 print "Kill fourth server (PID: $pid[3]) on port $p[3]\n";
507 kill "TERM", $pid[3];
511 print "Kill third server (PID: $pid[2]) on port $p[2]\n";
512 kill "TERM", $pid[2];
514 print "Kill second server (PID: $pid[1]) on port $p[1]\n";
515 kill "TERM", $pid[1];
519 print "Restarting second server on port $p[1]\n";
520 $pid[1] = spawn_ls($p[0], $p[1]);
524 print "Restarting third server on port $p[2]\n";
525 $pid[2] = spawn_ls($p[0], $p[2]);
529 #no view change is possible by now because there is no majority
530 foreach my $port ($p[1],$p[2]) {
531 my $num_v = get_num_views( paxos_log($port), $port );
532 die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port});
535 print "Restarting fourth server on port $p[3]\n";
536 $pid[3] = spawn_ls($p[1], $p[3]);
540 my @v5 = ($p[0], $p[1], $p[2]);
542 foreach my $port (@v5) {
547 my @lastv = ($p[1],$p[2],$p[3]);
548 foreach my $port (@lastv) {
549 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
552 foreach my $port (@lastv) {
553 check_views( paxos_log($port), \@views, \@lastv);
561 print "test8: start 3-process lock service\n";
564 print "Start lock_tester $p[0]\n";
565 $t = spawn("./lock_tester", $p[0]);
567 print " Wait for lock_tester to finish (waitpid $t)\n";
570 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
571 mydie( "Failed lock tester for test 8" );
580 print "test9: start 3-process rsm, kill second slave while lock_tester is running\n";
583 print "Start lock_tester $p[0]\n";
584 $t = spawn("./lock_tester", $p[0]);
586 sleep int(rand(10)+1);
588 print "Kill slave (PID: $pid[2]) on port $p[2]\n";
589 kill "TERM", $pid[2];
593 # it should go through 4 views
594 my @v4 = ($p[0], $p[1]);
595 wait_and_check_expected_view(\@v4);
597 print " Wait for lock_tester to finish (waitpid $t)\n";
600 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
601 mydie( "Failed lock tester for test 9" );
610 print "test10: start 3-process rsm, kill second slave and restarts it later while lock_tester is running\n";
613 print "Start lock_tester $p[0]\n";
614 $t = spawn("./lock_tester", $p[0]);
616 sleep int(rand(10)+1);
618 print "Kill slave (PID: $pid[2]) on port $p[2]\n";
619 kill "TERM", $pid[2];
623 # it should go through 4 views
624 my @v4 = ($p[0], $p[1]);
625 wait_and_check_expected_view(\@v4);
629 print "Restarting killed lock_server on port $p[2]\n";
630 $pid[2] = spawn_ls($p[0], $p[2]);
631 my @v5 = ($p[0],$p[1],$p[2]);
632 wait_and_check_expected_view(\@v5);
634 print " Wait for lock_tester to finish (waitpid $t)\n";
637 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
638 mydie( "Failed lock tester for test 10" );
648 print "test11: start 3-process rsm, kill primary while lock_tester is running\n";
651 print "Start lock_tester $p[0]\n";
652 $t = spawn("./lock_tester", $p[0]);
654 sleep int(rand(10)+1);
656 print "Kill primary (PID: $pid[0]) on port $p[0]\n";
657 kill "TERM", $pid[0];
661 # it should go through 4 views
662 my @v4 = ($p[1], $p[2]);
663 wait_and_check_expected_view(\@v4);
665 print " Wait for lock_tester to finish (waitpid $t)\n";
668 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
669 mydie( "Failed lock tester for test 11" );
678 print "test12: start 3-process rsm, kill master at break1 and restart it while lock_tester is running\n";
680 start_nodes(3, "ls");
682 print "Start lock_tester $p[0]\n";
683 $t = spawn("./lock_tester", $p[0]);
687 print "Kill master (PID: $pid[0]) on port $p[0] at breakpoint 1\n";
688 spawn("./rsm_tester", $p[0]+1, "breakpoint", 1);
693 # it should go through 5 views
694 my @v4 = ($p[1], $p[2]);
695 wait_and_check_expected_view(\@v4);
697 print "Restarting killed lock_server on port $p[0]\n";
698 $pid[0] = spawn_ls($p[1], $p[0]);
702 # the last view should include all nodes
703 my @lastv = ($p[0],$p[1],$p[2]);
704 foreach my $port (@lastv) {
705 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
708 foreach my $port (@lastv) {
709 check_views( paxos_log($port), \@views, \@lastv);
712 print " Wait for lock_tester to finish (waitpid $t)\n";
715 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
716 mydie( "Failed lock tester for test 12" );
725 print "test13: start 3-process rsm, kill slave at break1 and restart it while lock_tester is running\n";
727 start_nodes(3, "ls");
729 print "Start lock_tester $p[0]\n";
730 $t = spawn("./lock_tester", $p[0]);
734 print "Kill slave (PID: $pid[2]) on port $p[2] at breakpoint 1\n";
735 spawn("./rsm_tester", $p[2]+1, "breakpoint", 1);
739 # it should go through 4 views
740 my @v4 = ($p[0], $p[1]);
741 wait_and_check_expected_view(\@v4);
743 print "Restarting killed lock_server on port $p[2]\n";
744 $pid[2] = spawn_ls($p[0], $p[2]);
748 # the last view should include all nodes
749 my @lastv = ($p[0],$p[1],$p[2]);
750 foreach my $port (@lastv) {
751 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
754 foreach my $port (@lastv) {
755 check_views( paxos_log($port), \@views, \@lastv);
758 print " Wait for lock_tester to finish (waitpid $t)\n";
761 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
762 mydie( "Failed lock tester for test 13" );
771 print "test14: start 5-process rsm, kill slave break1, kill slave break2\n";
773 start_nodes(5, "ls");
775 print "Start lock_tester $p[0]\n";
776 $t = spawn("./lock_tester", $p[0]);
780 print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n";
781 spawn("./rsm_tester", $p[4]+1, "breakpoint", 1);
784 print "Kill slave (PID: $pid[3]) on port $p[3] at breakpoint 2\n";
785 spawn("./rsm_tester", $p[3]+1, "breakpoint", 2);
792 print "first view change wait\n";
793 my @lastv = ($p[0],$p[1],$p[2],$p[3]);
794 foreach my $port (@lastv) {
795 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
798 print "second view change wait\n";
800 @lastv = ($p[0],$p[1],$p[2]);
801 foreach my $port (@lastv) {
802 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
805 print " Wait for lock_tester to finish (waitpid $t)\n";
808 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
809 mydie( "Failed lock tester for test 14" );
818 print "test15: start 5-process rsm, kill slave break1, kill primary break2\n";
820 start_nodes(5, "ls");
822 print "Start lock_tester $p[0]\n";
823 $t = spawn("./lock_tester", $p[0]);
827 print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n";
828 spawn("./rsm_tester", $p[4]+1, "breakpoint", 1);
831 print "Kill primary (PID: $pid[0]) on port $p[0] at breakpoint 2\n";
832 spawn("./rsm_tester", $p[0]+1, "breakpoint", 2);
838 print "first view change wait\n";
839 my @lastv = ($p[0],$p[1],$p[2],$p[3]);
840 foreach my $port (@lastv) {
841 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
844 print "second view change wait\n";
846 @lastv = ($p[1],$p[2],$p[3]);
847 foreach my $port (@lastv) {
848 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
851 print " Wait for lock_tester to finish (waitpid $t)\n";
854 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
855 mydie( "Failed lock tester for test 15" );
864 print "test16: start 3-process rsm, partition primary, heal it\n";
866 start_nodes(3, "ls");
868 print "Start lock_tester $p[0]\n";
869 $t = spawn("./lock_tester", $p[0]);
873 print "Partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n";
875 spawn("./rsm_tester", $p[0]+1, "partition", 0);
879 print "first view change wait\n";
880 my @lastv = ($p[1],$p[2]);
881 foreach my $port (@lastv) {
882 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
887 print "Heal partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n";
888 spawn("./rsm_tester", $p[0]+1, "partition", 1);
892 # xxx it should test that this is the 5th view!
893 print "second view change wait\n";
894 @lastv = ($p[0], $p[1],$p[2]);
895 foreach my $port (@lastv) {
896 wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20);
899 print " Wait for lock_tester to finish (waitpid $t)\n";
902 if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) {
903 mydie( "Failed lock tester for test 16" );
910 print "tests done OK\n";