diff --git a/src/kvsrv1/client.go b/src/kvsrv1/client.go index 73f1d29..80c3dec 100644 --- a/src/kvsrv1/client.go +++ b/src/kvsrv1/client.go @@ -41,7 +41,7 @@ func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) { // performed at the server. If the server returns ErrVersion on a // resend RPC, then Put must return ErrMaybe to the application, since // its earlier RPC might have been processed by the server successfully -// but the response was lost, and the the Clerk doesn't know if +// but the response was lost, and the Clerk doesn't know if // the Put was performed or not. // // You can send an RPC with code like this: diff --git a/src/shardkv1/shardctrler/lock/lock.go b/src/shardkv1/shardctrler/lock/lock.go new file mode 100644 index 0000000..7b5ab5f --- /dev/null +++ b/src/shardkv1/shardctrler/lock/lock.go @@ -0,0 +1,31 @@ +package lock + +import ( + + "6.5840/kvsrv1" + "6.5840/kvsrv1/rpc" + "6.5840/shardkv1/shardctrler/param" +) + + +type Lock struct { + ck *kvsrv.Clerk + +} + +// Use l as the key to store the "lock state" (you would have to decide +// precisely what the lock state is). +func MakeLock(ck kvtest.IKVClerk, l string) *Lock { + lk := &Lock{ck: ck.(*kvsrv.Clerk)} + // You may add code here + return lk +} + + +func (lk *Lock) Acquire() { + // You may add code here. +} + +func (lk *Lock) Release() { + // You may add code here. +} diff --git a/src/shardkv1/shardctrler/lock/lock_test.go b/src/shardkv1/shardctrler/lock/lock_test.go new file mode 100644 index 0000000..92c9e67 --- /dev/null +++ b/src/shardkv1/shardctrler/lock/lock_test.go @@ -0,0 +1,89 @@ +package lock + +import ( + "fmt" + // "log" + "strconv" + "testing" + "time" + + "6.5840/kvsrv1" + "6.5840/kvsrv1/rpc" + "6.5840/kvtest1" +) + +const ( + NACQUIRE = 10 + NCLNT = 10 + NSEC = 2 +) + +func oneClient(t *testing.T, me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes { + lk := MakeLock(ck, "l") + ck.Put("l0", "", 0) + for i := 1; true; i++ { + select { + case <-done: + return kvtest.ClntRes{i, 0} + default: + lk.Acquire() + + // log.Printf("%d: acquired lock", me) + + b := strconv.Itoa(me) + val, ver, err := ck.Get("l0") + if err == rpc.OK { + if val != "" { + t.Fatalf("%d: two clients acquired lock %v", me, val) + } + } else { + t.Fatalf("%d: get failed %v", me, err) + } + + err = ck.Put("l0", string(b), ver) + if !(err == rpc.OK || err == rpc.ErrMaybe) { + t.Fatalf("%d: put failed %v", me, err) + } + + time.Sleep(10 * time.Millisecond) + + err = ck.Put("l0", "", ver+1) + if !(err == rpc.OK || err == rpc.ErrMaybe) { + t.Fatalf("%d: put failed %v", me, err) + } + + // log.Printf("%d: release lock", me) + + lk.Release() + } + } + return kvtest.ClntRes{} +} + +// Run test clients +func runClients(t *testing.T, nclnt int, reliable bool) { + ts := kvsrv.MakeTestKV(t, reliable) + defer ts.Cleanup() + + ts.Begin(fmt.Sprintf("Test: %d lock clients", nclnt)) + + ts.SpawnClientsAndWait(nclnt, NSEC*time.Second, func(me int, myck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes { + return oneClient(t, me, myck, done) + }) +} + +func TestOneClientReliable(t *testing.T) { + runClients(t, 1, true) +} + +func TestManyClientsReliable(t *testing.T) { + runClients(t, NCLNT, true) +} + +func TestOneClientUnreliable(t *testing.T) { + runClients(t, 1, false) +} + +func TestManyClientsUnreliable(t *testing.T) { + runClients(t, NCLNT, false) +} diff --git a/src/shardkv1/shardctrler/shardctrler.go b/src/shardkv1/shardctrler/shardctrler.go index 3d8743e..d59c0da 100644 --- a/src/shardkv1/shardctrler/shardctrler.go +++ b/src/shardkv1/shardctrler/shardctrler.go @@ -39,12 +39,8 @@ func MakeShardCtrler(clnt *tester.Clnt, leases bool) *ShardCtrler { // The tester calls InitController() before starting a new // controller. In part A, this method doesn't need to do anything. In // B and C, this method implements recovery (part B) and uses a lock -// to become leader (part C). InitController should return -// rpc.ErrVersion when another controller supersedes it (e.g., when -// this controller is partitioned during recovery); this happens only -// in Part C. Otherwise, it returns rpc.OK. -func (sck *ShardCtrler) InitController() rpc.Err { - return rpc.ErrVersion +// to become leader (part C). +func (sck *ShardCtrler) InitController() { } // The tester calls ExitController to exit a controller. In part B and @@ -61,12 +57,11 @@ func (sck *ShardCtrler) InitConfig(cfg *shardcfg.ShardConfig) { } // Called by the tester to ask the controller to change the -// configuration from the current one to new. It should return -// rpc.ErrVersion if this controller is superseded by another -// controller, as in part C. In all other cases, it should return -// rpc.OK. -func (sck *ShardCtrler) ChangeConfigTo(new *shardcfg.ShardConfig) rpc.Err { - return rpc.OK +// configuration from the current one to new. While the controller +// changes the configuration it may be superseded by another +// controller. +func (sck *ShardCtrler) ChangeConfigTo(new *shardcfg.ShardConfig) { + return } // Tester "kills" shardctrler by calling Kill(). For your diff --git a/src/shardkv1/shardgrp/server.go b/src/shardkv1/shardgrp/server.go index c6a8e38..a723a95 100644 --- a/src/shardkv1/shardgrp/server.go +++ b/src/shardkv1/shardgrp/server.go @@ -14,11 +14,10 @@ import ( type KVServer struct { - gid tester.Tgid - me int - dead int32 // set by Kill() - rsm *rsm.RSM - frozen bool // for testing purposes + gid tester.Tgid + me int + dead int32 // set by Kill() + rsm *rsm.RSM } diff --git a/src/shardkv1/shardkv_test.go b/src/shardkv1/shardkv_test.go index 78f0f58..ce37791 100644 --- a/src/shardkv1/shardkv_test.go +++ b/src/shardkv1/shardkv_test.go @@ -67,7 +67,7 @@ func TestStaticOneShardGroup5A(t *testing.T) { } // disconnect raft leader of shardgrp and check that keys are - // still avaialable + // still available ts.disconnectClntFromLeader(ck.(*kvtest.TestClerk).Clnt, shardcfg.Gid1) for i := 0; i < n; i++ { @@ -90,18 +90,17 @@ func TestJoinBasic5A(t *testing.T) { cfg, _ := sck.Query() gid2 := ts.newGid() - err := ts.joinGroups(sck, []tester.Tgid{gid2}) - if err != rpc.OK { - ts.t.Fatalf("joinGroups: err %v", err) + if ok := ts.joinGroups(sck, []tester.Tgid{gid2}); !ok { + ts.t.Fatalf("TestJoinBasic5A: joinGroups failed") } cfg1, _ := sck.Query() if cfg.Num+1 != cfg1.Num { - ts.t.Fatalf("wrong num %d expected %d ", cfg1.Num, cfg.Num+1) + ts.t.Fatalf("TestJoinBasic5A: wrong num %d expected %d ", cfg1.Num, cfg.Num+1) } if !cfg1.IsMember(gid2) { - ts.t.Fatalf("%d isn't a member of %v", gid2, cfg1) + ts.t.Fatalf("TestJoinBasic5A: %d isn't a member of %v", gid2, cfg1) } ts.checkShutdownSharding(gid1, ka, va) @@ -136,9 +135,8 @@ func TestDeleteBasic5A(t *testing.T) { sck := ts.ShardCtrler() gid2 := ts.newGid() - err := ts.joinGroups(sck, []tester.Tgid{gid2}) - if err != rpc.OK { - ts.t.Fatalf("joinGroups: err %v", err) + if ok := ts.joinGroups(sck, []tester.Tgid{gid2}); !ok { + ts.t.Fatalf("TestDeleteBasic5A: joinGroups failed") } // push more Get's through so that all peers snapshot @@ -150,7 +148,7 @@ func TestDeleteBasic5A(t *testing.T) { sz1 := ts.Group(gid1).SnapshotSize() sz2 := ts.Group(gid2).SnapshotSize() if sz1+sz2 > sz+10000 { - ts.t.Fatalf("gid1 %d + gid2 %d = %d use too much space %d", sz1, sz2, sz1+sz2, sz) + ts.t.Fatalf("TestDeleteBasic5A: gid1 %d + gid2 %d = %d use too much space %d", sz1, sz2, sz1+sz2, sz) } } @@ -165,9 +163,8 @@ func TestJoinLeaveBasic5A(t *testing.T) { sck := ts.ShardCtrler() gid2 := ts.newGid() - err := ts.joinGroups(sck, []tester.Tgid{gid2}) - if err != rpc.OK { - ts.t.Fatalf("joinGroups: err %v", err) + if ok := ts.joinGroups(sck, []tester.Tgid{gid2}); !ok { + ts.t.Fatalf("TestJoinLeaveBasic5A: joinGroups failed") } ts.checkShutdownSharding(gid1, ka, va) @@ -176,13 +173,9 @@ func TestJoinLeaveBasic5A(t *testing.T) { ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) } - err = ts.leave(sck, shardcfg.Gid1) - if err != rpc.OK { - ts.t.Fatalf("Leave: err %v", err) - } - cfg, _ := sck.Query() - if cfg.IsMember(shardcfg.Gid1) { - ts.t.Fatalf("%d is a member of %v", shardcfg.Gid1, cfg) + ts.leave(sck, shardcfg.Gid1) + if ok := ts.checkMember(sck, shardcfg.Gid1); ok { + ts.t.Fatalf("%d is a member after leave", shardcfg.Gid1) } ts.Group(shardcfg.Gid1).Shutdown() @@ -382,11 +375,11 @@ func TestProgressJoin(t *testing.T) { return default: //log.Printf("join/leave %v", newgid) - if err := ts.joinGroups(sck, []tester.Tgid{newgid}); err != rpc.OK { - t.Fatalf("joined err %v", err) + if ok := ts.joinGroups(sck, []tester.Tgid{newgid}); !ok { + t.Fatalf("TestProgressJoin: join failed") } - if err := ts.leaveGroups(sck, []tester.Tgid{newgid}); err != rpc.OK { - t.Fatalf("leave err %v", err) + if ok := ts.leaveGroups(sck, []tester.Tgid{newgid}); !ok { + t.Fatalf("TestProgressJoin: leave failed") } } } @@ -451,12 +444,12 @@ func concurrentClerk(t *testing.T, nclnt int, reliable bool, part string) { sck := ts.ShardCtrler() grps := ts.groups(NGRP) - if err := ts.joinGroups(sck, grps); err != rpc.OK { - t.Fatalf("joinGroups err %v", err) + if ok := ts.joinGroups(sck, grps); !ok { + t.Fatalf("concurrentClerk: joinGroups failed") } - if err := ts.leaveGroups(sck, grps); err != rpc.OK { - t.Fatalf("leaveGroups err %v", err) + if ok := ts.leaveGroups(sck, grps); !ok { + t.Fatalf("concurrentClerk: leaveGroups failed") } <-ch @@ -504,10 +497,9 @@ func TestJoinLeave5B(t *testing.T) { ts.Group(gid1).Shutdown() gid2 := ts.newGid() - ch := make(chan rpc.Err) + ch := make(chan bool) go func() { - err := ts.joinGroups(sck, []tester.Tgid{gid2}) - ch <- err + ch <- ts.joinGroups(sck, []tester.Tgid{gid2}) }() select { @@ -521,9 +513,9 @@ func TestJoinLeave5B(t *testing.T) { ts.Group(gid1).StartServers() select { - case err := <-ch: - if err != rpc.OK { - ts.Fatalf("Join returns err %v", err) + case ok := <-ch: + if !ok { + ts.Fatalf("TestJoinLeave5B: Join returned %t", ok) } case <-time.After(time.Second * NSEC): ts.Fatalf("Join didn't complete") @@ -536,15 +528,13 @@ func TestJoinLeave5B(t *testing.T) { ts.Group(gid2).Shutdown() - ch = make(chan rpc.Err) go func() { - err := ts.leave(sck, shardcfg.Gid1) - ch <- err + ch <- ts.leaveGroups(sck, []tester.Tgid{shardcfg.Gid1}) }() select { - case err := <-ch: - ts.Fatalf("Leave finished %v", err) + case <-ch: + ts.Fatalf("Leave finished") case <-time.After(NSEC * time.Second): // Give give some time to try to join } @@ -553,9 +543,9 @@ func TestJoinLeave5B(t *testing.T) { ts.Group(gid2).StartServers() select { - case err := <-ch: - if err != rpc.OK { - ts.Fatalf("Leave returns err %v", err) + case ok := <-ch: + if !ok { + ts.Fatalf("TestJoinLeave5B: Leave failed %t", ok) } case <-time.After(time.Second * NSEC): ts.Fatalf("Leave didn't complete") @@ -611,9 +601,7 @@ func TestLeaseBasicRelease5C(t *testing.T) { sck0, clnt0 := ts.makeShardCtrlerClnt() go func() { - if err := sck0.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck0.InitController() time.Sleep(200 * time.Millisecond) sck0.ExitController() }() @@ -624,9 +612,7 @@ func TestLeaseBasicRelease5C(t *testing.T) { sck1, clnt1 := ts.makeShardCtrlerClnt() ch := make(chan struct{}) go func() { - if err := sck1.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck1.InitController() time.Sleep(200 * time.Millisecond) sck1.ExitController() ch <- struct{}{} @@ -650,9 +636,7 @@ func TestLeaseBasicExpire5C(t *testing.T) { sck0, clnt0 := ts.makeShardCtrlerClnt() go func() { - if err := sck0.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck0.InitController() for { time.Sleep(10 * time.Millisecond) } @@ -667,9 +651,7 @@ func TestLeaseBasicExpire5C(t *testing.T) { sck1, clnt1 := ts.makeShardCtrlerClnt() ch := make(chan struct{}) go func() { - if err := sck1.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck1.InitController() time.Sleep(100 * time.Millisecond) sck1.ExitController() ch <- struct{}{} @@ -695,9 +677,7 @@ func TestLeaseBasicRefresh5C(t *testing.T) { sck0, clnt0 := ts.makeShardCtrlerClnt() go func() { - if err := sck0.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck0.InitController() time.Sleep(LEADERSEC * param.LEASETIMESEC * time.Second) sck0.ExitController() }() @@ -709,9 +689,7 @@ func TestLeaseBasicRefresh5C(t *testing.T) { sck1, clnt1 := ts.makeShardCtrlerClnt() ch := make(chan struct{}) go func() { - if err := sck1.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck1.InitController() time.Sleep(100 * time.Millisecond) sck1.ExitController() ch <- struct{}{} @@ -742,9 +720,7 @@ func TestPartitionControllerJoin5C(t *testing.T) { ka, va := ts.SpreadPuts(ck, NKEYS) sck, clnt := ts.makeShardCtrlerClnt() - if err := sck.InitController(); err != rpc.OK { - ts.Fatalf("failed to init controller %v", err) - } + sck.InitController() ch := make(chan rpc.Err) ngid := tester.Tgid(0) @@ -752,7 +728,8 @@ func TestPartitionControllerJoin5C(t *testing.T) { ngid = ts.newGid() ts.Config.MakeGroupStart(ngid, NSRV, ts.StartServerShardGrp) ts.Group(ngid).Shutdown() - ch <- ts.join(sck, ngid, ts.Group(ngid).SrvNames()) + ts.join(sck, ngid, ts.Group(ngid).SrvNames()) + ch <- rpc.OK }() // sleep for a while to get the chance for the controller to get @@ -769,9 +746,7 @@ func TestPartitionControllerJoin5C(t *testing.T) { // start new controller to supersede partitioned one, sck0 := ts.makeShardCtrler() - if err := sck0.InitController(); err != rpc.OK { - t.Fatalf("failed to init controller %v", err) - } + sck0.InitController() scfg, _ := sck0.Query() if !scfg.IsMember(ngid) { @@ -783,10 +758,8 @@ func TestPartitionControllerJoin5C(t *testing.T) { // reconnect old controller, which shouldn't finish ChangeConfigTo clnt.ConnectAll() - err := <-ch - if err == rpc.OK { - t.Fatalf("Old leader succeeded %v", err) - } + // wait for old controller to finish/exit + <-ch time.Sleep(1 * time.Second) @@ -797,19 +770,64 @@ func TestPartitionControllerJoin5C(t *testing.T) { // Make a leader controller loses its leadership during join/leave and // test if the next controller recovers correctly. -func TestPartitionRecovery5C(t *testing.T) { - const ( - // NPARTITION = 10 - NPARTITION = 5 - ) - - ts := MakeTestLeases(t, "Test (5C): controllers with leased leadership ...", true) +func partitionRecovery5C(t *testing.T, reliable bool, npart, nclnt int) { + const NSEC = 60 + ts := MakeTestLeases(t, "Test (5C): controllers with leased leadership ...", reliable) defer ts.Cleanup() gid := ts.setupKVService() ck := ts.MakeClerk() - ka, va := ts.SpreadPuts(ck, NKEYS) - for i := 0; i < NPARTITION; i++ { + ka := make([]string, 0) + va := make([]string, 0) + if nclnt <= 0 { + ka, va = ts.SpreadPuts(ck, NKEYS) + } + + ch := make(chan []kvtest.ClntRes) + if nclnt > 0 { + ka := kvtest.MakeKeys(1) + go func(ch chan []kvtest.ClntRes) { + rs := ts.SpawnClientsAndWait(nclnt, NSEC*time.Second, func(me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes { + return ts.OneClientPut(me, ck, ka, done) + }) + ch <- rs + }(ch) + } + + for i := 0; i < npart; i++ { ts.killCtrler(ck, gid, ka, va) } + + if nclnt > 0 { + <-ch + ts.CheckPorcupine() + } +} + +func TestPartitionRecoveryReliableNoClerk5C(t *testing.T) { + const ( + NPARTITION = 5 + ) + partitionRecovery5C(t, true, NPARTITION, 0) +} + +func TestPartitionRecoveryUnreliableNoClerk5C(t *testing.T) { + const ( + NPARTITION = 3 + ) + partitionRecovery5C(t, false, NPARTITION, 0) +} + +func TestPartitionRecoveryReliableClerks5C(t *testing.T) { + const ( + NPARTITION = 5 + ) + partitionRecovery5C(t, true, NPARTITION, 5) +} + +func TestPartitionRecoveryUnreliableClerks5C(t *testing.T) { + const ( + NPARTITION = 5 + ) + partitionRecovery5C(t, false, NPARTITION, 5) } diff --git a/src/shardkv1/test.go b/src/shardkv1/test.go index 2020f48..045bacf 100644 --- a/src/shardkv1/test.go +++ b/src/shardkv1/test.go @@ -50,6 +50,8 @@ func MakeTestMaxRaft(t *testing.T, part string, reliable, leases bool, maxraftst } cfg := tester.MakeConfig(t, 1, reliable, kvsrv.StartKVServer) ts.Test = kvtest.MakeTest(t, cfg, false, ts) + // XXX to avoid panic + tester.AnnotateTest(part, 1) ts.Begin(part) return ts } @@ -125,48 +127,56 @@ func (ts *Test) StartServerShardGrp(servers []*labrpc.ClientEnd, gid tester.Tgid return shardgrp.StartServerShardGrp(servers, gid, me, persister, ts.maxraftstate) } +func (ts *Test) checkMember(sck *shardctrler.ShardCtrler, gid tester.Tgid) bool { + cfg, _ := sck.Query() + ok := cfg.IsMember(gid) + return ok +} + // Add group gid -func (ts *Test) join(sck *shardctrler.ShardCtrler, gid tester.Tgid, srvs []string) rpc.Err { +func (ts *Test) join(sck *shardctrler.ShardCtrler, gid tester.Tgid, srvs []string) { cfg, _ := sck.Query() newcfg := cfg.Copy() ok := newcfg.JoinBalance(map[tester.Tgid][]string{gid: srvs}) if !ok { log.Fatalf("join: group %d is already present", gid) } - return sck.ChangeConfigTo(newcfg) + sck.ChangeConfigTo(newcfg) } -func (ts *Test) joinGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) rpc.Err { +func (ts *Test) joinGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) bool { for _, gid := range gids { ts.Config.MakeGroupStart(gid, NSRV, ts.StartServerShardGrp) - if err := ts.join(sck, gid, ts.Group(gid).SrvNames()); err != rpc.OK { - return err + ts.join(sck, gid, ts.Group(gid).SrvNames()) + if ok := ts.checkMember(sck, gid); !ok { + return false } time.Sleep(INTERGRPDELAY * time.Millisecond) } - return rpc.OK + return true } // Group gid leaves. -func (ts *Test) leave(sck *shardctrler.ShardCtrler, gid tester.Tgid) rpc.Err { +func (ts *Test) leave(sck *shardctrler.ShardCtrler, gid tester.Tgid) { cfg, _ := sck.Query() newcfg := cfg.Copy() ok := newcfg.LeaveBalance([]tester.Tgid{gid}) if !ok { log.Fatalf("leave: group %d is already not present", gid) } - return sck.ChangeConfigTo(newcfg) + sck.ChangeConfigTo(newcfg) } -func (ts *Test) leaveGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) rpc.Err { +func (ts *Test) leaveGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) bool { for _, gid := range gids { - if err := ts.leave(sck, gid); err != rpc.OK { - return err + ts.leave(sck, gid) + if ok := ts.checkMember(sck, gid); ok { + return false } ts.Config.ExitGroup(gid) time.Sleep(INTERGRPDELAY * time.Millisecond) } - return rpc.OK + return true } func (ts *Test) disconnectRaftLeader(gid tester.Tgid) (int, string) { @@ -257,9 +267,7 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) ) sck, clnt := ts.makeShardCtrlerClnt() - if err := sck.InitController(); err != rpc.OK { - ts.Fatalf("failed to init controller %v", err) - } + sck.InitController() cfg, _ := ts.ShardCtrler().Query() num := cfg.Num @@ -270,12 +278,12 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) for { ngid = ts.newGid() state = JOIN - err := ts.joinGroups(sck, []tester.Tgid{ngid}) - if err == rpc.OK { + ts.joinGroups(sck, []tester.Tgid{ngid}) + if ok := ts.checkMember(sck, ngid); ok { state = LEAVE - err = ts.leaveGroups(sck, []tester.Tgid{ngid}) + ts.leaveGroups(sck, []tester.Tgid{ngid}) } else { - //log.Printf("deposed err %v", err) + //log.Printf("deposed") return } } @@ -306,9 +314,8 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) // start new controler to pick up where sck left off sck0, clnt0 := ts.makeShardCtrlerClnt() - if err := sck0.InitController(); err != rpc.OK { - ts.Fatalf("failed to init controller %v", err) - } + + sck0.InitController() cfg, _ = sck0.Query() s := "join" if state == LEAVE { @@ -337,6 +344,8 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) sck0.ExitController() if ts.leases { + //log.Printf("reconnect old controller") + // reconnect old controller, which should bail out, because // it has been superseded. clnt.ConnectAll() @@ -366,12 +375,15 @@ func (ts *Test) electCtrler(ck kvtest.IKVClerk, ka, va []string) { default: ngid := ts.newGid() sck := ts.makeShardCtrler() - if err := sck.InitController(); err != rpc.OK { - ts.Fatalf("failed to init controller %v", err) - } + sck.InitController() //log.Printf("%d(%p): join/leave %v", i, sck, ngid) - if err := ts.joinGroups(sck, []tester.Tgid{ngid}); err == rpc.OK { - ts.leaveGroups(sck, []tester.Tgid{ngid}) + ts.joinGroups(sck, []tester.Tgid{ngid}) + if ok := ts.checkMember(sck, ngid); ok { + if ok := ts.leaveGroups(sck, []tester.Tgid{ngid}); !ok { + log.Fatalf("electCtrler: %d(%p): leave %v failed", i, sck, ngid) + } + } else { + log.Fatalf("electCtrler: %d(%p): join %v failed", i, sck, ngid) } sck.ExitController() } diff --git a/src/shardkv1/test.out b/src/shardkv1/test.out new file mode 100644 index 0000000..31d8045 --- /dev/null +++ b/src/shardkv1/test.out @@ -0,0 +1,169 @@ +=== RUN TestAcquireLockConcurrentReliable5C +Test (5C): Concurent ctrlers acquiring leadership ... (reliable network)... +2025/03/20 09:18:33 PecUxIPV: acquire success 1 l 2.999731394s +2025/03/20 09:18:35 PecUxIPV: Release 3 +2025/03/20 09:18:35 aKDBLFuF: acquire success 4 l 2.999504542s +2025/03/20 09:18:37 aKDBLFuF: Release 6 +2025/03/20 09:18:37 HxhaFlAP: acquire success 7 l 2.999622621s +2025/03/20 09:18:39 HxhaFlAP: Release 9 +2025/03/20 09:18:39 LpTmFCGC: acquire success 10 l 2.999747179s +2025/03/20 09:18:41 LpTmFCGC: Release 13 +2025/03/20 09:18:41 klmldUQn: acquire success 14 l 2.999558604s +2025/03/20 09:18:43 klmldUQn: Release 17 +2025/03/20 09:18:43 AWgiWKPZ: acquire success 18 l 2.999701903s +2025/03/20 09:18:46 AWgiWKPZ: Release 21 + ... Passed -- 16.4 1 2061 120 +--- PASS: TestAcquireLockConcurrentReliable5C (16.38s) +=== RUN TestAcquireLockConcurrentUnreliable5C +Test (5C): Concurent ctrlers acquiring leadership ... (unreliable network)... +2025/03/20 09:19:00 xulPPlwd: acquire success 2 l 2.768860613s +2025/03/20 09:19:05 xulPPlwd: Release 6 +2025/03/20 09:19:05 SGXgIJeR: acquire success 7 l 2.984694448s +2025/03/20 09:19:08 SGXgIJeR: Release 11 +2025/03/20 09:19:08 kNvktGla: acquire success 12 l 2.986135242s +2025/03/20 09:19:13 kNvktGla: Release 17 +2025/03/20 09:19:13 usGKuyeI: acquire success 18 l 2.97484218s +2025/03/20 09:19:19 usGKuyeI: Release 24 + ... Passed -- 38.4 1 2226 120 +--- PASS: TestAcquireLockConcurrentUnreliable5C (38.37s) +=== RUN TestLeaseBasicRelease5C +Test (5C): release lease ... (reliable network)... +2025/03/20 09:19:25 fWllyjFs: acquire success 1 l 2.999778852s +2025/03/20 09:19:25 fWllyjFs: Release 2 +2025/03/20 09:19:25 HqoctgYf: acquire success 3 l 2.999623311s +2025/03/20 09:19:26 HqoctgYf: Release 4 + ... Passed -- 0.4 1 17 0 +--- PASS: TestLeaseBasicRelease5C (0.42s) +=== RUN TestLeaseBasicExpire5C +Test (5C): lease expiring ... (reliable network)... +2025/03/20 09:19:26 MgmIiwHw: acquire success 1 l 2.999622077s +2025/03/20 09:19:29 PviuBaqZ: acquire: MgmIiwHw lease expired -31.512117ms +2025/03/20 09:19:29 PviuBaqZ: acquire success 2 l 2.9996929s +2025/03/20 09:19:29 PviuBaqZ: Release 3 + ... Passed -- 3.1 1 81 0 +--- PASS: TestLeaseBasicExpire5C (3.14s) +=== RUN TestLeaseBasicRefresh5C +Test (5C): lease refresh ... (reliable network)... +2025/03/20 09:19:29 CqhHcMdl: acquire success 1 l 2.999690343s + ... Passed -- 7.1 1 144 0 +--- PASS: TestLeaseBasicRefresh5C (7.10s) +=== RUN TestPartitionControllerJoin5C +Test (5C): partition controller in join... (reliable network)... +2025/03/20 09:19:38 CqhHcMdl: Release 9 +2025/03/20 09:19:38 QykadXGi: acquire success 1 l 2.999763148s +2025/03/20 09:19:43 YWktoCTH: acquire: QykadXGi lease expired -2.003411436s +2025/03/20 09:19:43 YWktoCTH: acquire success 2 l 2.999580573s +2025/03/20 09:19:45 YWktoCTH: Release 4 +2025/03/20 09:19:45 QykadXGi: refresher: exit expired -3.255782562s + ... Passed -- 11.2 1 1011 120 +--- PASS: TestPartitionControllerJoin5C (11.22s) +=== RUN TestPartitionRecoveryReliableNoClerk5C +Test (5C): controllers with leased leadership ... (reliable network)... +2025/03/20 09:19:50 nLqpPYYg: acquire success 1 l 2.999773699s +2025/03/20 09:19:56 Yauplngb: acquire: nLqpPYYg lease expired -1.030252686s +2025/03/20 09:19:56 Yauplngb: acquire success 4 l 2.999760357s +2025/03/20 09:19:58 Yauplngb: Release 7 +2025/03/20 09:19:58 nLqpPYYg: refresher: exit expired -3.848348135s +2025/03/20 09:20:00 dsvADejV: acquire success 8 l 2.999675453s +2025/03/20 09:20:07 jdcPVdvf: acquire: dsvADejV lease expired -1.56610473s +2025/03/20 09:20:07 jdcPVdvf: acquire success 11 l 2.999839821s +2025/03/20 09:20:10 jdcPVdvf: Release 15 +2025/03/20 09:20:10 dsvADejV: refresher: exit expired -4.604218577s +2025/03/20 09:20:12 vzVcVtTQ: acquire success 16 l 2.999743618s +2025/03/20 09:20:19 valCDRmB: acquire: vzVcVtTQ lease expired -1.988170854s +2025/03/20 09:20:19 valCDRmB: acquire success 19 l 2.999667662s +2025/03/20 09:20:22 valCDRmB: Release 22 +2025/03/20 09:20:22 vzVcVtTQ: refresher: exit expired -4.943386258s +2025/03/20 09:20:23 RJYqYuLF: acquire success 23 l 2.999774783s +2025/03/20 09:20:30 KaeJpVvL: acquire: RJYqYuLF lease expired -1.222157296s +2025/03/20 09:20:30 KaeJpVvL: acquire success 26 l 2.999897268s +2025/03/20 09:20:33 KaeJpVvL: Release 30 +2025/03/20 09:20:33 RJYqYuLF: refresher: exit expired -4.429889332s +2025/03/20 09:20:34 leVdobnP: acquire success 31 l 2.999770816s +2025/03/20 09:20:41 DFnmWean: acquire: leVdobnP lease expired -1.756292497s +2025/03/20 09:20:41 DFnmWean: acquire success 34 l 2.999905276s +2025/03/20 09:20:44 DFnmWean: Release 38 +2025/03/20 09:20:44 leVdobnP: refresher: exit expired -4.84260629s + ... Passed -- 59.3 1 5454 660 +--- PASS: TestPartitionRecoveryReliableNoClerk5C (59.30s) +=== RUN TestPartitionRecoveryUnreliableNoClerk5C +Test (5C): controllers with leased leadership ... (unreliable network)... +2025/03/20 09:21:01 oBRWPJFn: acquire success 1 l 2.999668901s +2025/03/20 09:21:08 WCfEtCSF: acquire: oBRWPJFn lease expired -1.960469635s +2025/03/20 09:21:08 WCfEtCSF: acquire success 4 l 2.989064006s +2025/03/20 09:21:19 WCfEtCSF: Release 15 +2025/03/20 09:21:20 oBRWPJFn: refresher: exit expired -13.623366094s +2025/03/20 09:21:25 BKOHUPgK: acquire success 16 l 2.974368151s +2025/03/20 09:21:32 dpZEDTAn: acquire: BKOHUPgK lease expired -1.266079689s +2025/03/20 09:21:32 dpZEDTAn: acquire success 19 l 2.986737971s +2025/03/20 09:21:42 dpZEDTAn: Release 29 +2025/03/20 09:21:43 BKOHUPgK: refresher: exit expired -12.408069097s +2025/03/20 09:21:50 TiapOztE: acquire: dpZEDTAn lease expired -4.992859225s +2025/03/20 09:21:50 TiapOztE: acquire success 30 l 2.972701594s +2025/03/20 09:21:57 aDyCYcpR: acquire: TiapOztE lease expired -1.338848496s +2025/03/20 09:21:57 aDyCYcpR: acquire success 33 l 2.99686939s +2025/03/20 09:22:07 aDyCYcpR: Release 43 +2025/03/20 09:22:07 TiapOztE: refresher: exit expired -12.147734461s + ... Passed -- 86.9 1 4985 420 +--- PASS: TestPartitionRecoveryUnreliableNoClerk5C (86.88s) +=== RUN TestPartitionRecoveryReliableClerks5C +Test (5C): controllers with leased leadership ... (reliable network)... +2025/03/20 09:22:13 vZrMwEsy: acquire success 1 l 2.999893567s +2025/03/20 09:22:20 AFHDpDYV: acquire: vZrMwEsy lease expired -1.657500925s +2025/03/20 09:22:20 AFHDpDYV: acquire success 4 l 2.999596975s +2025/03/20 09:22:22 AFHDpDYV: Release 6 +2025/03/20 09:22:22 vZrMwEsy: refresher: exit expired -3.627083489s +2025/03/20 09:22:23 tserHLNb: acquire success 7 l 2.999932478s +2025/03/20 09:22:29 msIfUgIC: acquire: tserHLNb lease expired -1.13789373s +2025/03/20 09:22:29 msIfUgIC: acquire success 10 l 2.999755401s +2025/03/20 09:22:31 msIfUgIC: Release 12 +2025/03/20 09:22:31 tserHLNb: refresher: exit expired -3.083945752s +2025/03/20 09:22:32 YLEIZyDn: acquire success 13 l 2.999940475s +2025/03/20 09:22:38 TIibzsMc: acquire: YLEIZyDn lease expired -1.017825561s +2025/03/20 09:22:38 TIibzsMc: acquire success 16 l 2.999907075s +2025/03/20 09:22:40 TIibzsMc: Release 18 +2025/03/20 09:22:40 YLEIZyDn: refresher: exit expired -2.789136907s +2025/03/20 09:22:41 knOnYtxW: acquire success 19 l 2.999891429s +2025/03/20 09:22:47 KyiPMsgB: acquire: knOnYtxW lease expired -1.534324297s +2025/03/20 09:22:47 KyiPMsgB: acquire success 22 l 2.999822725s +2025/03/20 09:22:49 KyiPMsgB: Release 24 +2025/03/20 09:22:49 knOnYtxW: refresher: exit expired -3.516354686s +2025/03/20 09:22:50 wHNCImkl: acquire success 25 l 2.999917928s +2025/03/20 09:22:56 CSBcxnyr: acquire: wHNCImkl lease expired -1.051161379s +2025/03/20 09:22:56 CSBcxnyr: acquire success 28 l 2.999745303s +2025/03/20 09:22:58 CSBcxnyr: Release 31 +2025/03/20 09:22:58 wHNCImkl: refresher: exit expired -3.241024197s + ... Passed -- 60.1 1 15934 5124 +--- PASS: TestPartitionRecoveryReliableClerks5C (60.14s) +=== RUN TestPartitionRecoveryUnreliableClerks5C +Test (5C): controllers with leased leadership ... (unreliable network)... +2025/03/20 09:23:14 ydfNYYir: acquire success 1 l 2.871807366s +2025/03/20 09:23:21 KmfOaYym: acquire: ydfNYYir lease expired -1.96910688s +2025/03/20 09:23:21 KmfOaYym: acquire success 4 l 2.976357121s +2025/03/20 09:23:25 KmfOaYym: Release 9 +2025/03/20 09:23:26 ydfNYYir: refresher: exit expired -6.960801287s +2025/03/20 09:23:27 XErxjiqb: acquire success 10 l 2.994288153s +2025/03/20 09:23:34 VQFBAKED: acquire: XErxjiqb lease expired -1.186993995s +2025/03/20 09:23:34 VQFBAKED: acquire success 14 l 2.978008397s +2025/03/20 09:23:40 VQFBAKED: Release 20 +2025/03/20 09:23:40 XErxjiqb: refresher: exit expired -7.422563867s +2025/03/20 09:23:41 IqJHVjsW: acquire success 21 l 2.984528802s +2025/03/20 09:23:47 NAaIOMcb: acquire: IqJHVjsW lease expired -1.19246442s +2025/03/20 09:23:48 NAaIOMcb: acquire success 25 l 2.521727902s +2025/03/20 09:23:53 NAaIOMcb: Release 30 +2025/03/20 09:23:53 IqJHVjsW: refresher: exit expired -7.130118022s +2025/03/20 09:23:54 pwTkolYO: acquire success 32 l 2.761741697s +2025/03/20 09:24:01 GAueeCFX: acquire: pwTkolYO lease expired -1.496813006s +2025/03/20 09:24:01 GAueeCFX: acquire success 34 l 2.977558093s +2025/03/20 09:24:06 GAueeCFX: Release 39 +2025/03/20 09:24:06 pwTkolYO: refresher: exit expired -6.843949894s +2025/03/20 09:24:09 FIDtQSlF: acquire: GAueeCFX lease expired -15.508321ms +2025/03/20 09:24:09 FIDtQSlF: acquire success 40 l 2.998464382s +2025/03/20 09:24:16 wExaLSov: acquire: FIDtQSlF lease expired -1.874162487s +2025/03/20 09:24:16 wExaLSov: acquire success 43 l 2.860519358s +2025/03/20 09:24:21 wExaLSov: Release 48 +2025/03/20 09:24:21 FIDtQSlF: refresher: exit expired -6.924846198s + ... Passed -- 68.4 1 10469 1186 +--- PASS: TestPartitionRecoveryUnreliableClerks5C (68.39s) +PASS +ok 6.5840/shardkv1 351.349s