From: Peter Iannucci Date: Thu, 19 Dec 2013 20:01:03 +0000 (-0800) Subject: More clean-ups X-Git-Url: http://xvm.mit.edu/gitweb/invirt/third/libt4.git/commitdiff_plain/5224670fe4e903bd507eabd486f8723353893bfa More clean-ups --- diff --git a/Makefile b/Makefile index caf3439..5ec634c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CXX ?= g++ CC ?= g++ EXTRA_TARGETS ?= -all: lock_demo lock_server lock_tester rsm_tester rpc/rpctest $(EXTRA_TARGETS) +all: lock_server lock_tester rsm_tester rpc/rpctest $(EXTRA_TARGETS) rpc/librpc.a: rpc/rpc.o rpc/connection.o rpc/poll_mgr.o rpc/thread_pool.o rm -f $@ @@ -13,8 +13,6 @@ rpc/librpc.a: rpc/rpc.o rpc/connection.o rpc/poll_mgr.o rpc/thread_pool.o rpc/rpctest: rpc/rpctest.o threaded_log.o rpc/librpc.a t4.o -lock_demo : lock_demo.o lock_client.o threaded_log.o rsm_client.o rpc/librpc.a t4.o - lock_tester : lock_tester.o lock_client.o threaded_log.o rsm_client.o rpc/librpc.a t4.o lock_server : lock_smain.o threaded_log.o rsm.o paxos.o config.o log.o lock_server.o rpc/librpc.a t4.o @@ -27,7 +25,7 @@ rsm_tester: rsm_tester.o rsmtest_client.o threaded_log.o rpc/librpc.a t4.o -include *.d -include rpc/*.d -clean_files=rpc/rpctest rpc/*.o *.d rpc/*.d rpc/librpc.a *.o config *.log lock_server lock_tester lock_demo rsm_tester +clean_files=rpc/rpctest rpc/*.o *.d rpc/*.d rpc/librpc.a *.o config *.log lock_server lock_tester rsm_tester .PHONY: clean $(EXTRA_TARGETS) clean: rm -rf $(clean_files) diff --git a/lock_client.cc b/lock_client.cc index b29fff4..6ec3cbc 100644 --- a/lock_client.cc +++ b/lock_client.cc @@ -26,10 +26,6 @@ lock_state & lock_client::get_lock_state(lock_protocol::lockid_t lid) { } lock_client::lock_client(string xdst, lock_release_user *_lu) : lu(_lu), next_xid(0) { - cl = unique_ptr(new rpcc(xdst)); - if (cl->bind() < 0) - LOG << "lock_client: call bind"; - rlock_port = std::uniform_int_distribution(1024,32000+1024)(global->random_generator); id = "127.0.0.1:" + std::to_string(rlock_port); rlsrpc = unique_ptr(new rpcs(rlock_port)); @@ -69,14 +65,6 @@ void lock_client::releaser() { LOG << "Releaser stopping"; } -int lock_client::stat(lock_protocol::lockid_t lid) { - VERIFY(0); - int r; - auto ret = (lock_protocol::status)cl->call(lock_protocol::stat, r, lid, id); - VERIFY (ret == lock_protocol::OK); - return r; -} - lock_protocol::status lock_client::acquire(lock_protocol::lockid_t lid) { lock_state & st = get_lock_state(lid); lock sl(st.m); @@ -213,7 +201,3 @@ t4_status t4_lock_client_acquire(t4_lock_client *client, t4_lockid_t lid) { t4_status t4_lock_client_release(t4_lock_client *client, t4_lockid_t lid) { return ((lock_client *)client)->release(lid); } - -t4_status t4_lock_client_stat(t4_lock_client *client, t4_lockid_t lid) { - return ((lock_client *)client)->stat(lid); -} diff --git a/lock_client.h b/lock_client.h index 7a4d43f..9e449f4 100644 --- a/lock_client.h +++ b/lock_client.h @@ -43,7 +43,6 @@ typedef std::map lock_map; // lock_revoke_server. class lock_client { private: - unique_ptr cl; unique_ptr rlsrpc; thread releaser_thread; unique_ptr rsmc; @@ -62,7 +61,6 @@ class lock_client { ~lock_client(); lock_protocol::status acquire(lock_protocol::lockid_t); lock_protocol::status release(lock_protocol::lockid_t); - int stat(lock_protocol::lockid_t); void releaser(); rlock_protocol::status revoke_handler(int &, lock_protocol::lockid_t, lock_protocol::xid_t); rlock_protocol::status retry_handler(int &, lock_protocol::lockid_t, lock_protocol::xid_t); @@ -93,7 +91,6 @@ t4_lock_client *t4_lock_client_new(const char *dst); void t4_lock_client_delete(t4_lock_client *); t4_status t4_lock_client_acquire(t4_lock_client *, t4_lockid_t); t4_status t4_lock_client_release(t4_lock_client *, t4_lockid_t); -t4_status t4_lock_client_stat(t4_lock_client *, t4_lockid_t); #ifdef __cplusplus } diff --git a/lock_demo.cc b/lock_demo.cc deleted file mode 100644 index 88383ab..0000000 --- a/lock_demo.cc +++ /dev/null @@ -1,12 +0,0 @@ -#include "lock_client.h" - -int main(int argc, char *argv[]) { - global = new t4_state('d'); - if(argc != 2) { - LOG_NONMEMBER << "Usage: " << argv[0] << " [host:]port"; - return 1; - } - - lock_client *lc = new lock_client(argv[1]); - LOG_NONMEMBER << "stat returned " << lc->stat("1"); -} diff --git a/lock_protocol.h b/lock_protocol.h index 8cccebe..bd691a0 100644 --- a/lock_protocol.h +++ b/lock_protocol.h @@ -13,7 +13,6 @@ namespace lock_protocol { REMOTE_PROCEDURE_BASE(0x7000); REMOTE_PROCEDURE(1, acquire, (int &, lockid_t, callback_t, xid_t)); REMOTE_PROCEDURE(2, release, (int &, lockid_t, callback_t, xid_t)); - REMOTE_PROCEDURE(3, stat, (int &, lockid_t, callback_t)); } namespace rlock_protocol { diff --git a/lock_server.cc b/lock_server.cc index efb23f5..86e5ad2 100644 --- a/lock_server.cc +++ b/lock_server.cc @@ -34,7 +34,6 @@ lock_server::lock_server(rsm & r) : rsm_ (&r) { r.reg(lock_protocol::acquire, &lock_server::acquire, this); r.reg(lock_protocol::release, &lock_server::release, this); - r.reg(lock_protocol::stat, &lock_server::stat, this); } void lock_server::revoker () { @@ -161,11 +160,3 @@ void lock_server::unmarshal_state(const string & state) { lock sl(lock_table_lock); unmarshall(state, false, nacquire, lock_table); } - -lock_protocol::status lock_server::stat(int & r, lock_protocol::lockid_t lid, const callback_t &) { - LOG << "stat request for " << lid; - VERIFY(0); - r = nacquire; - return lock_protocol::OK; -} - diff --git a/lock_server.h b/lock_server.h index 88b9e11..d182876 100644 --- a/lock_server.h +++ b/lock_server.h @@ -41,7 +41,6 @@ class lock_server : private rsm_state_transfer { lock_server(rsm & r); lock_protocol::status acquire(int &, lock_protocol::lockid_t, const callback_t & id, lock_protocol::xid_t); lock_protocol::status release(int &, lock_protocol::lockid_t, const callback_t & id, lock_protocol::xid_t); - lock_protocol::status stat(int &, lock_protocol::lockid_t, const callback_t & id); }; #endif diff --git a/paxos.cc b/paxos.cc index 2c7e79a..c7f2d1d 100644 --- a/paxos.cc +++ b/paxos.cc @@ -93,20 +93,20 @@ bool proposer_acceptor::prepare(unsigned instance, nodes_t & accepts, prepareres res; prop_t highest_n_a{0, ""}; for (auto i : nodes) { - auto r = rpcc::bind_cached(i); - if (!r) + auto cl = rpcc::bind_cached(i); + if (!cl) continue; - auto status = (paxos_protocol::status)r->call_timeout( - paxos_protocol::preparereq, milliseconds(100), res, me, instance, proposal); + int status = cl->call_timeout(paxos_protocol::preparereq, milliseconds(100), + res, me, instance, proposal); if (status == paxos_protocol::OK) { - if (res.oldinstance) { + LOG << "preparereq response type=" << res.type << " n_a=(" << res.n_a.n + << ", " << res.n_a.m << ") " << "v_a=\"" << res.v_a << "\""; + if (res.type == prepareres::oldinstance) { LOG << "commiting old instance!"; - commit(instance, res.v_a); + lock ml(acceptor_mutex); + commit(instance, res.v_a, ml); return false; - } - LOG << "preparereq responded with oldinstance=" << res.oldinstance << " accept=" << res.accept << " n_a=(" << res.n_a.n << ", " << res.n_a.m << ") " - << "v_a=\"" << res.v_a << "\""; - if (res.accept) { + } else if (res.type == prepareres::accept) { accepts.push_back(i); if (res.n_a >= highest_n_a) { LOG << "found a newer accepted proposal, \"" << res.v_a << "\", with number (" << res.n_a.n << ", " << res.n_a.m << ")"; @@ -121,50 +121,42 @@ bool proposer_acceptor::prepare(unsigned instance, nodes_t & accepts, void proposer_acceptor::accept(unsigned instance, nodes_t & accepts, const nodes_t & nodes, const value_t & v) { + bool accept = false; for (auto i : nodes) { - auto r = rpcc::bind_cached(i); - if (!r) - continue; - bool accept = false; - int status = r->call_timeout( - paxos_protocol::acceptreq, milliseconds(100), accept, me, instance, proposal, v); - if (status == paxos_protocol::OK && accept) - accepts.push_back(i); + if (auto cl = rpcc::bind_cached(i)) { + int status = cl->call_timeout(paxos_protocol::acceptreq, milliseconds(100), + accept, me, instance, proposal, v); + if (status == paxos_protocol::OK && accept) + accepts.push_back(i); + } } } void proposer_acceptor::decide(unsigned instance, const nodes_t & accepts, const value_t & v) { - for (auto i : accepts) { - auto r = rpcc::bind_cached(i); - if (!r) - continue; - int res = 0; - r->call_timeout(paxos_protocol::decidereq, milliseconds(100), res, me, instance, v); - } + int res = 0; + for (auto i : accepts) + if (auto cl = rpcc::bind_cached(i)) + cl->call_timeout(paxos_protocol::decidereq, milliseconds(100), res, me, instance, v); } paxos_protocol::status proposer_acceptor::preparereq(prepareres & r, const node_t &, unsigned instance, prop_t n) { LOG << "instance " << instance << " proposal (" << n.n << ", " << n.m << ")"; lock ml(acceptor_mutex); - r.oldinstance = false; - r.accept = false; - r.n_a = accepted; - r.v_a = accepted_value; if (instance <= instance_h) { LOG << "old instance " << instance << " has value " << values[instance]; - r.oldinstance = true; - r.v_a = values[instance]; + r = prepareres{prepareres::oldinstance, accepted, values[instance]}; } else if (n > promise) { LOG << "looks good to me"; promise = n; l.logprop(promise); - r.accept = true; + r = prepareres{prepareres::accept, accepted, accepted_value}; } else { LOG << "I totally rejected this request. Ha."; + r = prepareres{prepareres::reject, accepted, accepted_value}; } - LOG << "preparereq is responding with oldinstance=" << r.oldinstance << " accept=" << r.accept << " n_a=(" << r.n_a.n << ", " << r.n_a.m << ") " - << "v_a=\"" << r.v_a << "\""; + LOG << "preparereq is responding with oldinstance=" << r.oldinstance << " accept=" << r.accept + << " n_a=(" << r.n_a.n << ", " << r.n_a.m << ") " << "v_a=\"" << r.v_a << "\""; return paxos_protocol::OK; } @@ -172,17 +164,15 @@ paxos_protocol::status proposer_acceptor::acceptreq(bool & r, const node_t &, unsigned instance, prop_t n, const value_t & v) { lock ml(acceptor_mutex); r = false; - if (instance == instance_h + 1) { - if (n >= promise) { - accepted = n; - accepted_value = v; - l.logaccept(accepted, accepted_value); - r = true; - } - return paxos_protocol::OK; - } else { + if (instance != instance_h + 1) return paxos_protocol::ERR; + if (n >= promise) { + accepted = n; + accepted_value = v; + l.logaccept(accepted, accepted_value); + r = true; } + return paxos_protocol::OK; } paxos_protocol::status @@ -191,7 +181,7 @@ proposer_acceptor::decidereq(int &, const node_t &, unsigned instance, const val LOG << "decidereq for accepted instance " << instance << " (my instance " << instance_h << ") v=" << accepted_value; if (instance == instance_h + 1) { VERIFY(accepted_value == v); - commit(instance, accepted_value, ml); + commit(instance, v, ml); } else if (instance <= instance_h) { // we are ahead; ignore. } else { @@ -201,12 +191,9 @@ proposer_acceptor::decidereq(int &, const node_t &, unsigned instance, const val return paxos_protocol::OK; } -void proposer_acceptor::commit(unsigned instance, const value_t & value) { - lock ml(acceptor_mutex); - commit(instance, value, ml); -} - -void proposer_acceptor::commit(unsigned instance, const value_t & value, lock & pxs_mutex_lock) { +void proposer_acceptor::commit(unsigned instance, const value_t & value, lock & acceptor_mutex_lock) { + VERIFY(&value != &accepted_value); // eited by aliasing? + VERIFY(acceptor_mutex_lock); LOG << "instance=" << instance << " has v=" << value; if (instance > instance_h) { LOG << "highestacceptedinstance = " << instance; @@ -214,12 +201,11 @@ void proposer_acceptor::commit(unsigned instance, const value_t & value, lock & l.loginstance(instance, value); instance_h = instance; accepted = promise = {0, me}; - string v = value; // gaaahhh aliasing of value and accepted_value - accepted_value.clear(); // this wipes out "value", too + accepted_value.clear(); if (delegate) { - pxs_mutex_lock.unlock(); - delegate->paxos_commit(instance, v); - pxs_mutex_lock.lock(); + acceptor_mutex_lock.unlock(); + delegate->paxos_commit(instance, value); + acceptor_mutex_lock.lock(); } } } diff --git a/paxos.h b/paxos.h index 79924a3..20e249c 100644 --- a/paxos.h +++ b/paxos.h @@ -46,8 +46,7 @@ class proposer_acceptor { friend class log; class log l = {this, me}; - void commit(unsigned instance, const value_t & v); - void commit(unsigned instance, const value_t & v, lock & pxs_mutex_lock); + void commit(unsigned instance, const value_t & v, lock & acceptor_mutex_lock); paxos_protocol::status preparereq(prepareres & r, const node_t & src, unsigned instance, prop_t n); paxos_protocol::status acceptreq(bool & r, const node_t & src, unsigned instance, prop_t n, const value_t & v); diff --git a/paxos_protocol.h b/paxos_protocol.h index 8f8f816..3e1fbcd 100644 --- a/paxos_protocol.h +++ b/paxos_protocol.h @@ -15,12 +15,11 @@ struct prop_t { namespace paxos_protocol { enum status : rpc_protocol::status { OK, ERR }; struct prepareres { - bool oldinstance; - bool accept; + enum { reject, oldinstance, accept } type; prop_t n_a; string v_a; - MEMBERS(oldinstance, accept, n_a, v_a) + MEMBERS(type, n_a, v_a) }; using node_t = string; using nodes_t = std::vector; diff --git a/rpc/marshall_wrap.h b/rpc/marshall_wrap.h index 6754acc..2e54e47 100644 --- a/rpc/marshall_wrap.h +++ b/rpc/marshall_wrap.h @@ -33,7 +33,6 @@ typedef std::function handler; struct VerifyOnFailure { static inline int unmarshall_args_failure() { VERIFY(0); - return 0; } }; diff --git a/rpc/rpc.cc b/rpc/rpc.cc index e6ec410..80ec124 100644 --- a/rpc/rpc.cc +++ b/rpc/rpc.cc @@ -387,7 +387,6 @@ void rpcs::dispatch(shared_ptr c, const string & buf) { if (procs_.count(proc) < 1) { LOG << "unknown proc 0x" << std::hex << proc << " with h.srv_nonce=" << h.srv_nonce << ", my srv_nonce=" << nonce_; VERIFY(0); - return; } f = procs_[proc]; diff --git a/rpc/rpc.h b/rpc/rpc.h index df5d89e..c0f8d43 100644 --- a/rpc/rpc.h +++ b/rpc/rpc.h @@ -100,14 +100,8 @@ class rpcc : private connection_delegate { inline int call_m(proc_id_t proc, milliseconds to, R & r, marshall && req) { string rep; int intret = call1(proc, to, rep, req); - if (intret < 0) return intret; - unmarshall u(rep, true, r); - if (u.okdone() != true) { - LOG << "rpcc::call_m: failed to unmarshall the reply. You are probably " - << "calling RPC 0x" << std::hex << proc << " with the wrong return type."; - VERIFY(0); - return rpc_protocol::unmarshall_reply_failure; - } + if (intret >= 0) + VERIFY(unmarshall(rep, true, r).okdone()); // guaranteed by static type checking return intret; } @@ -118,8 +112,6 @@ class rpcc : private connection_delegate { rpcc(const string & d); ~rpcc(); - nonce_t id() { return clt_nonce_; } - int bind(milliseconds to = rpc::to_max); // Manages a cache of RPC connections. Usage: diff --git a/rsm_tester.pl b/rsm_tester.pl deleted file mode 100755 index 43a0fb4..0000000 --- a/rsm_tester.pl +++ /dev/null @@ -1,901 +0,0 @@ -#!/usr/bin/perl -w - -use POSIX ":sys_wait_h"; -use Getopt::Std; -use Time::HiRes (usleep); -use strict; - - -my @pid; -my @logs = (); -my @views = (); #expected views -my %in_views; #the number of views a node is expected to be present -my @p; -my $t; -my $always_kill = 0; - -use sigtrap 'handler' => \&killprocess, 'HUP', 'INT', 'ABRT', 'QUIT', 'TERM'; - -sub paxos_log { - my $port = shift; - return "paxos-$port.log"; -} - -sub mydie { - my ($s) = @_; - killprocess() if ($always_kill); - die $s; -} - -sub killprocess { - print "killprocess: forcestop all spawned processes...@pid \n"; - kill 9, @pid; -} - -sub cleanup { - kill 9, @pid; - unlink(@logs); - usleep 200000; -} - -sub spawn { - my ($p, @a) = @_; - my $aa = join("-", @a); - if (my $pid = fork) { -# parent - push( @logs, "$p-$aa.log" ); - if( $p =~ /lock_server/ ) { - push( @logs, paxos_log($a[1]) ); - } - return $pid; - } elsif (defined $pid) { -# child - open(STDOUT, ">>$p-$aa.log") - or mydie "Couln't redirect stout\n"; - open(STDERR, ">&STDOUT") - or mydie "Couln't redirect stderr\n"; - $| = 1; - print "$p @a\n"; - exec "$p @a" - or mydie "Cannot start new $p @a $!\n"; - } else { - mydie "Cannot fork: $!\n"; - } -} - -sub randports { - - my $num = shift; - my @p = (); - for( my $i = 0; $i < $num; $i++ ) { - push( @p, int(rand(54000/2))*2+10000 ); - } - my @sp = sort { $a <=> $b } @p; - return @sp; -} - -sub print_config { - my @ports = @_; - open( CONFIG, ">config" ) or mydie( "Couldn't open config for writing" ); - foreach my $p (@ports) { - printf CONFIG "%05d\n", $p; - } - close( CONFIG ); -} - -sub spawn_ls { - my $master = shift; - my $port = shift; - return spawn( "./lock_server", $master, $port ); -} - -sub check_views { - - my $l = shift; - my $v = shift; - my $last_v = shift; - - open( LOG, "<$l" ) - or mydie( "Failed: couldn't read $l" ); - my @log = ; - close(LOG); - - my @vs = @{$v}; - - my $i = 0; - my @last_view; - foreach my $line (@log) { - if( $line =~ /^done (\d+) ([\d\s]+)$/ ) { - - my $num = $1; - my @view = split( /\s+/, $2 ); - @last_view = @view; - - if( $i > $#vs ) { -# let there be extra views - next; - } - - my $e = $vs[$i]; - my @expected = @{$e}; - - if( @expected != @view ) { - mydie( "Failed: In log $l at view $num is (@view), but expected $i (@expected)" ); - } - - $i++; - } - } - - if( $i <= $#vs ) { - mydie( "Failed: In log $l, not enough views seen!" ); - } - - if( defined $last_v ) { - my @last_exp_v = @{$last_v}; - if( @last_exp_v != @last_view ) { - mydie( "Failed: In log $l last view didn't match, got view @last_view, but expected @last_exp_v" ); - } - } - -} - -sub get_num_views { - - my $log = shift; - my $including = shift; - my $nv = `grep "done " $log | grep "$including" | wc -l`; - chomp $nv; - return $nv; - -} - -sub wait_for_view_change { - - my $log = shift; - my $num_views = shift; - my $including = shift; - my $timeout = shift; - - my $start = time(); - while( (get_num_views( $log, $including ) < $num_views) and - ($start + $timeout > time()) ) { - my $lastv = `grep done $log | tail -n 1`; - chomp $lastv; - print " Waiting for $including to be present in >=$num_views views in $log (Last view: $lastv)\n"; - usleep 100000; - } - - if( get_num_views( $log, $including ) < $num_views) { - mydie( "Failed: Timed out waiting for $including to be in >=$num_views in log $log" ); - }else{ - print " Done: $including is in >=$num_views views in $log\n"; - } -} - -sub waitpid_to { - my $pid = shift; - my $to = shift; - - my $start = time(); - my $done_pid; - do { - usleep 100000; - $done_pid = waitpid($pid, POSIX::WNOHANG); - } while( $done_pid <= 0 and (time() - $start) < $to ); - - if( $done_pid <= 0 ) { - kill 9,$pid; - mydie( "Failed: Timed out waiting for process $pid\n" ); - } else { - return 1; - } - -} - -sub wait_and_check_expected_view($) { - my $v = shift; - push @views, $v; - for (my $i = 0; $i <=$#$v; $i++) { - $in_views{$v->[$i]}++; - } - foreach my $port (@$v) { - wait_for_view_change(paxos_log($port), $in_views{$port}, $port, 20); - } - foreach my $port (@$v) { - my $log = paxos_log($port); - check_views( $log, \@views ); - } -} - -sub start_nodes ($$){ - - @pid = (); - @logs = (); - @views = (); - for (my $i = 0; $i <= $#p; $i++) { - $in_views{$p[$i]} = 0; - } - - my $n = shift; - my $command = shift; - - for (my $i = 0; $i < $n; $i++) { - if ($command eq "ls") { - @pid = (@pid, spawn_ls($p[0],$p[$i])); - print "Start lock_server on $p[$i]\n"; - } - usleep 100000; - - my @vv = @p[0..$i]; - wait_and_check_expected_view(\@vv); - } - -} - -my %options; -getopts("s:k",\%options); -if (defined($options{s})) { - srand($options{s}); -} -if (defined($options{k})) { - $always_kill = 1; -} - -#get a sorted list of random ports -@p = randports(5); -print_config( @p[0..4] ); - -my @do_run = (); -my $NUM_TESTS = 17; - -# see which tests are set -if( $#ARGV > -1 ) { - foreach my $t (@ARGV) { - if( $t < $NUM_TESTS && $t >= 0 ) { - $do_run[$t] = 1; - } - } -} else { -# turn on all tests - for( my $i = 0; $i < $NUM_TESTS; $i++ ) { - $do_run[$i] = 1; - } -} - -if ($do_run[0]) { - print "test0: start 3-process lock server\n"; - start_nodes(3,"ls"); - cleanup(); - usleep 200000; -} - -if ($do_run[1]) { - print "test1: start 3-process lock server, kill third server\n"; - start_nodes(3,"ls"); - - print "Kill third server (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 500000; - - # it should go through 4 views - my @v4 = ($p[0], $p[1]); - wait_and_check_expected_view(\@v4); - - cleanup(); - usleep 200000; -} - -if ($do_run[2]) { - print "test2: start 3-process lock server, kill first server\n"; - start_nodes(3,"ls"); - - print "Kill first (PID: $pid[0]) on port $p[0]\n"; - kill "TERM", $pid[0]; - - usleep 500000; - - # it should go through 4 views - my @v4 = ($p[1], $p[2]); - wait_and_check_expected_view(\@v4); - - cleanup(); - usleep 200000; -} - - -if ($do_run[3]) { - - print "test3: start 3-process lock_server, kill a server, restart a server\n"; - start_nodes(3,"ls"); - - print "Kill server (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 500000; - - my @v4 = ($p[0], $p[1]); - wait_and_check_expected_view(\@v4); - - print "Restart killed server on port $p[2]\n"; - $pid[2] = spawn_ls ($p[0], $p[2]); - - usleep 500000; - - my @v5 = ($p[0], $p[1], $p[2]); - wait_and_check_expected_view(\@v5); - - cleanup(); - usleep 200000; -} - -if ($do_run[4]) { - print "test4: 3-process lock_server, kill third server, kill second server, restart third server, kill third server again, restart second server, re-restart third server, check logs\n"; - start_nodes(3,"ls"); - - print "Kill server (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 500000; - my @v4 = ($p[0], $p[1]); - wait_and_check_expected_view(\@v4); - - print "Kill server (PID: $pid[1]) on port $p[1]\n"; - kill "TERM", $pid[1]; - - usleep 500000; - #no view change can happen because of a lack of majority - - print "Restarting server on port $p[2]\n"; - $pid[2] = spawn_ls($p[0], $p[2]); - - usleep 500000; - - #no view change can happen because of a lack of majority - foreach my $port (@p[0..2]) { - my $num_v = get_num_views(paxos_log($port), $port); - die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port}); - } - - # kill node 3 again, - print "Kill server (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 500000; - - - print "Restarting server on port $p[1]\n"; - $pid[1] = spawn_ls($p[0], $p[1]); - - usleep 700000; - - foreach my $port (@p[0..1]) { - $in_views{$port} = get_num_views( paxos_log($port), $port ); - print " Node $port is present in ", $in_views{$port}, " views in ", paxos_log($port), "\n"; - } - - print "Restarting server on port $p[2]\n"; - $pid[2] = spawn_ls($p[0], $p[2]); - - my @lastv = ($p[0],$p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - -# now check the paxos logs and make sure the logs go through the right -# views - - foreach my $port (@lastv) { - check_views( paxos_log($port), \@views, \@lastv); - } - - cleanup(); - -} - -if ($do_run[5]) { - print "test5: 3-process lock_server, send signal 1 to first server, kill third server, restart third server, check logs\n"; - start_nodes(3,"ls"); - - print "Sending paxos breakpoint 1 to first server on port $p[0]\n"; - spawn("./rsm_tester", $p[0]+1, "breakpoint", 3); - - usleep 100000; - - print "Kill third server (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 500000; - foreach my $port (@p[0..2]) { - my $num_v = get_num_views( paxos_log($port), $port ); - die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port}); - } - - print "Restarting third server on port $p[2]\n"; - $pid[2]= spawn_ls($p[0], $p[2]); - my @lastv = ($p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - usleep 1000000; - -# now check the paxos logs and make sure the logs go through the right -# views - - foreach my $port (@lastv) { - check_views( paxos_log($port), \@views, \@lastv); - } - - cleanup(); - -} - -if ($do_run[6]) { - print "test6: 4-process lock_server, send signal 2 to first server, kill fourth server, restart fourth server, check logs\n"; - start_nodes(4,"ls"); - print "Sending paxos breakpoint 2 to first server on port $p[0]\n"; - spawn("./rsm_tester", $p[0]+1, "breakpoint", 4); - - usleep 100000; - - print "Kill fourth server (PID: $pid[3]) on port $p[3]\n"; - kill "TERM", $pid[3]; - - usleep 500000; - - foreach my $port ($p[1],$p[2]) { - my $num_v = get_num_views( paxos_log($port), $port ); - die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port}); - } - - usleep 500000; - - print "Restarting fourth server on port $p[3]\n"; - $pid[3] = spawn_ls($p[1], $p[3]); - - usleep 500000; - - my @v5 = ($p[0],$p[1],$p[2]); - foreach my $port (@v5) { - $in_views{$port}++; - } - push @views, \@v5; - - usleep 1000000; - - # the 6th view will be (2,3) or (1,2,3,4) - my @v6 = ($p[1],$p[2]); - foreach my $port (@v6) { - $in_views{$port}++; - } - foreach my $port (@v6) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 30); - } - - # final will be (2,3,4) - my @lastv = ($p[1],$p[2],$p[3]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - foreach my $port (@lastv) { - check_views( paxos_log($port), \@views, \@lastv ); - } - cleanup(); - -} - -if ($do_run[7]) { - print "test7: 4-process lock_server, send signal 2 to first server, kill fourth server, kill other servers, restart other servers, restart fourth server, check logs\n"; - start_nodes(4,"ls"); - print "Sending paxos breakpoint 2 to first server on port $p[0]\n"; - spawn("./rsm_tester", $p[0]+1, "breakpoint", 4); - usleep 300000; - - print "Kill fourth server (PID: $pid[3]) on port $p[3]\n"; - kill "TERM", $pid[3]; - - usleep 500000; - - print "Kill third server (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - print "Kill second server (PID: $pid[1]) on port $p[1]\n"; - kill "TERM", $pid[1]; - - usleep 500000; - - print "Restarting second server on port $p[1]\n"; - $pid[1] = spawn_ls($p[0], $p[1]); - - usleep 500000; - - print "Restarting third server on port $p[2]\n"; - $pid[2] = spawn_ls($p[0], $p[2]); - - usleep 500000; - -#no view change is possible by now because there is no majority - foreach my $port ($p[1],$p[2]) { - my $num_v = get_num_views( paxos_log($port), $port ); - die "$num_v views in ", paxos_log($port), " : no new views should be formed due to the lack of majority\n" if ($num_v != $in_views{$port}); - } - - print "Restarting fourth server on port $p[3]\n"; - $pid[3] = spawn_ls($p[1], $p[3]); - - usleep 500000; - - my @v5 = ($p[0], $p[1], $p[2]); - push @views, \@v5; - foreach my $port (@v5) { - $in_views{$port}++; - } - - usleep 1500000; - my @lastv = ($p[1],$p[2],$p[3]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - foreach my $port (@lastv) { - check_views( paxos_log($port), \@views, \@lastv); - } - - cleanup(); - -} - -if ($do_run[8]) { - print "test8: start 3-process lock service\n"; - start_nodes(3,"ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 8" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[9]) { - - print "test9: start 3-process rsm, kill second slave while lock_tester is running\n"; - start_nodes(3,"ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - sleep int(rand(10)+1); - - print "Kill slave (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 300000; - - # it should go through 4 views - my @v4 = ($p[0], $p[1]); - wait_and_check_expected_view(\@v4); - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 9" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[10]) { - - print "test10: start 3-process rsm, kill second slave and restarts it later while lock_tester is running\n"; - start_nodes(3,"ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - sleep int(rand(10)+1); - - print "Kill slave (PID: $pid[2]) on port $p[2]\n"; - kill "TERM", $pid[2]; - - usleep 300000; - - # it should go through 4 views - my @v4 = ($p[0], $p[1]); - wait_and_check_expected_view(\@v4); - - usleep 300000; - - print "Restarting killed lock_server on port $p[2]\n"; - $pid[2] = spawn_ls($p[0], $p[2]); - my @v5 = ($p[0],$p[1],$p[2]); - wait_and_check_expected_view(\@v5); - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 10" ); - } - - cleanup(); - usleep 200000; -} - - -if ($do_run[11]) { - - print "test11: start 3-process rsm, kill primary while lock_tester is running\n"; - start_nodes(3,"ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - sleep int(rand(10)+1); - - print "Kill primary (PID: $pid[0]) on port $p[0]\n"; - kill "TERM", $pid[0]; - - usleep 300000; - - # it should go through 4 views - my @v4 = ($p[1], $p[2]); - wait_and_check_expected_view(\@v4); - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 11" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[12]) { - - print "test12: start 3-process rsm, kill master at break1 and restart it while lock_tester is running\n"; - - start_nodes(3, "ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - usleep 100000; - - print "Kill master (PID: $pid[0]) on port $p[0] at breakpoint 1\n"; - spawn("./rsm_tester", $p[0]+1, "breakpoint", 1); - - - usleep 100000; - - # it should go through 5 views - my @v4 = ($p[1], $p[2]); - wait_and_check_expected_view(\@v4); - - print "Restarting killed lock_server on port $p[0]\n"; - $pid[0] = spawn_ls($p[1], $p[0]); - - usleep 300000; - - # the last view should include all nodes - my @lastv = ($p[0],$p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - foreach my $port (@lastv) { - check_views( paxos_log($port), \@views, \@lastv); - } - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 12" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[13]) { - - print "test13: start 3-process rsm, kill slave at break1 and restart it while lock_tester is running\n"; - - start_nodes(3, "ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - usleep 100000; - - print "Kill slave (PID: $pid[2]) on port $p[2] at breakpoint 1\n"; - spawn("./rsm_tester", $p[2]+1, "breakpoint", 1); - - usleep 100000; - - # it should go through 4 views - my @v4 = ($p[0], $p[1]); - wait_and_check_expected_view(\@v4); - - print "Restarting killed lock_server on port $p[2]\n"; - $pid[2] = spawn_ls($p[0], $p[2]); - - usleep 300000; - - # the last view should include all nodes - my @lastv = ($p[0],$p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - foreach my $port (@lastv) { - check_views( paxos_log($port), \@views, \@lastv); - } - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 13" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[14]) { - - print "test14: start 5-process rsm, kill slave break1, kill slave break2\n"; - - start_nodes(5, "ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - usleep 100000; - - print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n"; - spawn("./rsm_tester", $p[4]+1, "breakpoint", 1); - - - print "Kill slave (PID: $pid[3]) on port $p[3] at breakpoint 2\n"; - spawn("./rsm_tester", $p[3]+1, "breakpoint", 2); - - - usleep 100000; - - # two view changes: - - print "first view change wait\n"; - my @lastv = ($p[0],$p[1],$p[2],$p[3]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - print "second view change wait\n"; - - @lastv = ($p[0],$p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 14" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[15]) { - - print "test15: start 5-process rsm, kill slave break1, kill primary break2\n"; - - start_nodes(5, "ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - usleep 100000; - - print "Kill slave (PID: $pid[4]) on port $p[4] at breakpoint 1\n"; - spawn("./rsm_tester", $p[4]+1, "breakpoint", 1); - - - print "Kill primary (PID: $pid[0]) on port $p[0] at breakpoint 2\n"; - spawn("./rsm_tester", $p[0]+1, "breakpoint", 2); - - usleep 100000; - - # two view changes: - - print "first view change wait\n"; - my @lastv = ($p[0],$p[1],$p[2],$p[3]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - print "second view change wait\n"; - - @lastv = ($p[1],$p[2],$p[3]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 15" ); - } - - cleanup(); - usleep 200000; -} - -if ($do_run[16]) { - - print "test16: start 3-process rsm, partition primary, heal it\n"; - - start_nodes(3, "ls"); - - print "Start lock_tester $p[0]\n"; - $t = spawn("./lock_tester", $p[0]); - - usleep 100000; - - print "Partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n"; - - spawn("./rsm_tester", $p[0]+1, "partition", 0); - - usleep 300000; - - print "first view change wait\n"; - my @lastv = ($p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - usleep 100000; - - print "Heal partition primary (PID: $pid[0]) on port $p[0] at breakpoint\n"; - spawn("./rsm_tester", $p[0]+1, "partition", 1); - - usleep 100000; - - # xxx it should test that this is the 5th view! - print "second view change wait\n"; - @lastv = ($p[0], $p[1],$p[2]); - foreach my $port (@lastv) { - wait_for_view_change(paxos_log($port), $in_views{$port}+1, $port, 20); - } - - print " Wait for lock_tester to finish (waitpid $t)\n"; - waitpid_to($t, 600); - - if( system( "grep \"passed all tests successfully\" lock_tester-$p[0].log" ) ) { - mydie( "Failed lock tester for test 16" ); - } - - cleanup(); - usleep 200000; -} - -print "tests done OK\n"; - -unlink("config"); diff --git a/rsmtest_client.cc b/rsmtest_client.cc index 249f1a3..e9c8001 100644 --- a/rsmtest_client.cc +++ b/rsmtest_client.cc @@ -3,21 +3,21 @@ #include "rsmtest_client.h" #include -rsmtest_client::rsmtest_client(string dst) : cl(dst) { - if (cl.bind() < 0) +rsmtest_client::rsmtest_client(string dst) { + if (!(cl = rpcc::bind_cached(dst))) LOG << "rsmtest_client: call bind"; } rsm_test_protocol::status rsmtest_client::net_repair(int heal) { rsm_test_protocol::status r = rsm_test_protocol::ERR; - auto ret = (rsm_test_protocol::status)cl.call(rsm_test_protocol::net_repair, r, heal); + auto ret = (rsm_test_protocol::status)cl->call(rsm_test_protocol::net_repair, r, heal); VERIFY (ret == rsm_test_protocol::OK); return r; } rsm_test_protocol::status rsmtest_client::breakpoint(int b) { rsm_test_protocol::status r = rsm_test_protocol::ERR; - auto ret = (rsm_test_protocol::status)cl.call(rsm_test_protocol::breakpoint, r, b); + auto ret = (rsm_test_protocol::status)cl->call(rsm_test_protocol::breakpoint, r, b); VERIFY (ret == rsm_test_protocol::OK); return r; } diff --git a/rsmtest_client.h b/rsmtest_client.h index e71fede..e7add37 100644 --- a/rsmtest_client.h +++ b/rsmtest_client.h @@ -9,7 +9,7 @@ // Client interface to the rsmtest server class rsmtest_client { protected: - rpcc cl; + shared_ptr cl; public: rsmtest_client(string d); virtual ~rsmtest_client() {} diff --git a/types.h b/types.h index ce52a15..797ccf0 100644 --- a/types.h +++ b/types.h @@ -106,7 +106,8 @@ operator<<(std::ostream & o, const A & a) { return o << "[" << implode(a, ", ") << "]"; } -#include "verify.h" +#include +#define VERIFY(expr) { if (!(expr)) abort(); } // struct tuple adapter, useful for marshalling and endian swapping. usage: // diff --git a/verify.h b/verify.h deleted file mode 100644 index 622aaf2..0000000 --- a/verify.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef verify_client_h -#define verify_client_h - -#include -#include - -#ifdef NDEBUG -#define VERIFY(expr) { if (!(expr)) abort(); } -#else -#define VERIFY(expr) assert(expr) -#endif - -#endif