This commit is contained in:
Frans Kaashoek 2025-03-20 10:56:55 -04:00
parent efc69b96f9
commit 958a4d45b7
8 changed files with 438 additions and 125 deletions

View File

@ -41,7 +41,7 @@ func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) {
// performed at the server. If the server returns ErrVersion on a // performed at the server. If the server returns ErrVersion on a
// resend RPC, then Put must return ErrMaybe to the application, since // resend RPC, then Put must return ErrMaybe to the application, since
// its earlier RPC might have been processed by the server successfully // its earlier RPC might have been processed by the server successfully
// but the response was lost, and the the Clerk doesn't know if // but the response was lost, and the Clerk doesn't know if
// the Put was performed or not. // the Put was performed or not.
// //
// You can send an RPC with code like this: // You can send an RPC with code like this:

View File

@ -0,0 +1,31 @@
package lock
import (
"6.5840/kvsrv1"
"6.5840/kvsrv1/rpc"
"6.5840/shardkv1/shardctrler/param"
)
type Lock struct {
ck *kvsrv.Clerk
}
// Use l as the key to store the "lock state" (you would have to decide
// precisely what the lock state is).
func MakeLock(ck kvtest.IKVClerk, l string) *Lock {
lk := &Lock{ck: ck.(*kvsrv.Clerk)}
// You may add code here
return lk
}
func (lk *Lock) Acquire() {
// You may add code here.
}
func (lk *Lock) Release() {
// You may add code here.
}

View File

@ -0,0 +1,89 @@
package lock
import (
"fmt"
// "log"
"strconv"
"testing"
"time"
"6.5840/kvsrv1"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
)
const (
NACQUIRE = 10
NCLNT = 10
NSEC = 2
)
func oneClient(t *testing.T, me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
lk := MakeLock(ck, "l")
ck.Put("l0", "", 0)
for i := 1; true; i++ {
select {
case <-done:
return kvtest.ClntRes{i, 0}
default:
lk.Acquire()
// log.Printf("%d: acquired lock", me)
b := strconv.Itoa(me)
val, ver, err := ck.Get("l0")
if err == rpc.OK {
if val != "" {
t.Fatalf("%d: two clients acquired lock %v", me, val)
}
} else {
t.Fatalf("%d: get failed %v", me, err)
}
err = ck.Put("l0", string(b), ver)
if !(err == rpc.OK || err == rpc.ErrMaybe) {
t.Fatalf("%d: put failed %v", me, err)
}
time.Sleep(10 * time.Millisecond)
err = ck.Put("l0", "", ver+1)
if !(err == rpc.OK || err == rpc.ErrMaybe) {
t.Fatalf("%d: put failed %v", me, err)
}
// log.Printf("%d: release lock", me)
lk.Release()
}
}
return kvtest.ClntRes{}
}
// Run test clients
func runClients(t *testing.T, nclnt int, reliable bool) {
ts := kvsrv.MakeTestKV(t, reliable)
defer ts.Cleanup()
ts.Begin(fmt.Sprintf("Test: %d lock clients", nclnt))
ts.SpawnClientsAndWait(nclnt, NSEC*time.Second, func(me int, myck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
return oneClient(t, me, myck, done)
})
}
func TestOneClientReliable(t *testing.T) {
runClients(t, 1, true)
}
func TestManyClientsReliable(t *testing.T) {
runClients(t, NCLNT, true)
}
func TestOneClientUnreliable(t *testing.T) {
runClients(t, 1, false)
}
func TestManyClientsUnreliable(t *testing.T) {
runClients(t, NCLNT, false)
}

View File

@ -39,12 +39,8 @@ func MakeShardCtrler(clnt *tester.Clnt, leases bool) *ShardCtrler {
// The tester calls InitController() before starting a new // The tester calls InitController() before starting a new
// controller. In part A, this method doesn't need to do anything. In // controller. In part A, this method doesn't need to do anything. In
// B and C, this method implements recovery (part B) and uses a lock // B and C, this method implements recovery (part B) and uses a lock
// to become leader (part C). InitController should return // to become leader (part C).
// rpc.ErrVersion when another controller supersedes it (e.g., when func (sck *ShardCtrler) InitController() {
// this controller is partitioned during recovery); this happens only
// in Part C. Otherwise, it returns rpc.OK.
func (sck *ShardCtrler) InitController() rpc.Err {
return rpc.ErrVersion
} }
// The tester calls ExitController to exit a controller. In part B and // The tester calls ExitController to exit a controller. In part B and
@ -61,12 +57,11 @@ func (sck *ShardCtrler) InitConfig(cfg *shardcfg.ShardConfig) {
} }
// Called by the tester to ask the controller to change the // Called by the tester to ask the controller to change the
// configuration from the current one to new. It should return // configuration from the current one to new. While the controller
// rpc.ErrVersion if this controller is superseded by another // changes the configuration it may be superseded by another
// controller, as in part C. In all other cases, it should return // controller.
// rpc.OK. func (sck *ShardCtrler) ChangeConfigTo(new *shardcfg.ShardConfig) {
func (sck *ShardCtrler) ChangeConfigTo(new *shardcfg.ShardConfig) rpc.Err { return
return rpc.OK
} }
// Tester "kills" shardctrler by calling Kill(). For your // Tester "kills" shardctrler by calling Kill(). For your

View File

@ -14,11 +14,10 @@ import (
type KVServer struct { type KVServer struct {
gid tester.Tgid gid tester.Tgid
me int me int
dead int32 // set by Kill() dead int32 // set by Kill()
rsm *rsm.RSM rsm *rsm.RSM
frozen bool // for testing purposes
} }

View File

@ -67,7 +67,7 @@ func TestStaticOneShardGroup5A(t *testing.T) {
} }
// disconnect raft leader of shardgrp and check that keys are // disconnect raft leader of shardgrp and check that keys are
// still avaialable // still available
ts.disconnectClntFromLeader(ck.(*kvtest.TestClerk).Clnt, shardcfg.Gid1) ts.disconnectClntFromLeader(ck.(*kvtest.TestClerk).Clnt, shardcfg.Gid1)
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
@ -90,18 +90,17 @@ func TestJoinBasic5A(t *testing.T) {
cfg, _ := sck.Query() cfg, _ := sck.Query()
gid2 := ts.newGid() gid2 := ts.newGid()
err := ts.joinGroups(sck, []tester.Tgid{gid2}) if ok := ts.joinGroups(sck, []tester.Tgid{gid2}); !ok {
if err != rpc.OK { ts.t.Fatalf("TestJoinBasic5A: joinGroups failed")
ts.t.Fatalf("joinGroups: err %v", err)
} }
cfg1, _ := sck.Query() cfg1, _ := sck.Query()
if cfg.Num+1 != cfg1.Num { if cfg.Num+1 != cfg1.Num {
ts.t.Fatalf("wrong num %d expected %d ", cfg1.Num, cfg.Num+1) ts.t.Fatalf("TestJoinBasic5A: wrong num %d expected %d ", cfg1.Num, cfg.Num+1)
} }
if !cfg1.IsMember(gid2) { if !cfg1.IsMember(gid2) {
ts.t.Fatalf("%d isn't a member of %v", gid2, cfg1) ts.t.Fatalf("TestJoinBasic5A: %d isn't a member of %v", gid2, cfg1)
} }
ts.checkShutdownSharding(gid1, ka, va) ts.checkShutdownSharding(gid1, ka, va)
@ -136,9 +135,8 @@ func TestDeleteBasic5A(t *testing.T) {
sck := ts.ShardCtrler() sck := ts.ShardCtrler()
gid2 := ts.newGid() gid2 := ts.newGid()
err := ts.joinGroups(sck, []tester.Tgid{gid2}) if ok := ts.joinGroups(sck, []tester.Tgid{gid2}); !ok {
if err != rpc.OK { ts.t.Fatalf("TestDeleteBasic5A: joinGroups failed")
ts.t.Fatalf("joinGroups: err %v", err)
} }
// push more Get's through so that all peers snapshot // push more Get's through so that all peers snapshot
@ -150,7 +148,7 @@ func TestDeleteBasic5A(t *testing.T) {
sz1 := ts.Group(gid1).SnapshotSize() sz1 := ts.Group(gid1).SnapshotSize()
sz2 := ts.Group(gid2).SnapshotSize() sz2 := ts.Group(gid2).SnapshotSize()
if sz1+sz2 > sz+10000 { if sz1+sz2 > sz+10000 {
ts.t.Fatalf("gid1 %d + gid2 %d = %d use too much space %d", sz1, sz2, sz1+sz2, sz) ts.t.Fatalf("TestDeleteBasic5A: gid1 %d + gid2 %d = %d use too much space %d", sz1, sz2, sz1+sz2, sz)
} }
} }
@ -165,9 +163,8 @@ func TestJoinLeaveBasic5A(t *testing.T) {
sck := ts.ShardCtrler() sck := ts.ShardCtrler()
gid2 := ts.newGid() gid2 := ts.newGid()
err := ts.joinGroups(sck, []tester.Tgid{gid2}) if ok := ts.joinGroups(sck, []tester.Tgid{gid2}); !ok {
if err != rpc.OK { ts.t.Fatalf("TestJoinLeaveBasic5A: joinGroups failed")
ts.t.Fatalf("joinGroups: err %v", err)
} }
ts.checkShutdownSharding(gid1, ka, va) ts.checkShutdownSharding(gid1, ka, va)
@ -176,13 +173,9 @@ func TestJoinLeaveBasic5A(t *testing.T) {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
} }
err = ts.leave(sck, shardcfg.Gid1) ts.leave(sck, shardcfg.Gid1)
if err != rpc.OK { if ok := ts.checkMember(sck, shardcfg.Gid1); ok {
ts.t.Fatalf("Leave: err %v", err) ts.t.Fatalf("%d is a member after leave", shardcfg.Gid1)
}
cfg, _ := sck.Query()
if cfg.IsMember(shardcfg.Gid1) {
ts.t.Fatalf("%d is a member of %v", shardcfg.Gid1, cfg)
} }
ts.Group(shardcfg.Gid1).Shutdown() ts.Group(shardcfg.Gid1).Shutdown()
@ -382,11 +375,11 @@ func TestProgressJoin(t *testing.T) {
return return
default: default:
//log.Printf("join/leave %v", newgid) //log.Printf("join/leave %v", newgid)
if err := ts.joinGroups(sck, []tester.Tgid{newgid}); err != rpc.OK { if ok := ts.joinGroups(sck, []tester.Tgid{newgid}); !ok {
t.Fatalf("joined err %v", err) t.Fatalf("TestProgressJoin: join failed")
} }
if err := ts.leaveGroups(sck, []tester.Tgid{newgid}); err != rpc.OK { if ok := ts.leaveGroups(sck, []tester.Tgid{newgid}); !ok {
t.Fatalf("leave err %v", err) t.Fatalf("TestProgressJoin: leave failed")
} }
} }
} }
@ -451,12 +444,12 @@ func concurrentClerk(t *testing.T, nclnt int, reliable bool, part string) {
sck := ts.ShardCtrler() sck := ts.ShardCtrler()
grps := ts.groups(NGRP) grps := ts.groups(NGRP)
if err := ts.joinGroups(sck, grps); err != rpc.OK { if ok := ts.joinGroups(sck, grps); !ok {
t.Fatalf("joinGroups err %v", err) t.Fatalf("concurrentClerk: joinGroups failed")
} }
if err := ts.leaveGroups(sck, grps); err != rpc.OK { if ok := ts.leaveGroups(sck, grps); !ok {
t.Fatalf("leaveGroups err %v", err) t.Fatalf("concurrentClerk: leaveGroups failed")
} }
<-ch <-ch
@ -504,10 +497,9 @@ func TestJoinLeave5B(t *testing.T) {
ts.Group(gid1).Shutdown() ts.Group(gid1).Shutdown()
gid2 := ts.newGid() gid2 := ts.newGid()
ch := make(chan rpc.Err) ch := make(chan bool)
go func() { go func() {
err := ts.joinGroups(sck, []tester.Tgid{gid2}) ch <- ts.joinGroups(sck, []tester.Tgid{gid2})
ch <- err
}() }()
select { select {
@ -521,9 +513,9 @@ func TestJoinLeave5B(t *testing.T) {
ts.Group(gid1).StartServers() ts.Group(gid1).StartServers()
select { select {
case err := <-ch: case ok := <-ch:
if err != rpc.OK { if !ok {
ts.Fatalf("Join returns err %v", err) ts.Fatalf("TestJoinLeave5B: Join returned %t", ok)
} }
case <-time.After(time.Second * NSEC): case <-time.After(time.Second * NSEC):
ts.Fatalf("Join didn't complete") ts.Fatalf("Join didn't complete")
@ -536,15 +528,13 @@ func TestJoinLeave5B(t *testing.T) {
ts.Group(gid2).Shutdown() ts.Group(gid2).Shutdown()
ch = make(chan rpc.Err)
go func() { go func() {
err := ts.leave(sck, shardcfg.Gid1) ch <- ts.leaveGroups(sck, []tester.Tgid{shardcfg.Gid1})
ch <- err
}() }()
select { select {
case err := <-ch: case <-ch:
ts.Fatalf("Leave finished %v", err) ts.Fatalf("Leave finished")
case <-time.After(NSEC * time.Second): case <-time.After(NSEC * time.Second):
// Give give some time to try to join // Give give some time to try to join
} }
@ -553,9 +543,9 @@ func TestJoinLeave5B(t *testing.T) {
ts.Group(gid2).StartServers() ts.Group(gid2).StartServers()
select { select {
case err := <-ch: case ok := <-ch:
if err != rpc.OK { if !ok {
ts.Fatalf("Leave returns err %v", err) ts.Fatalf("TestJoinLeave5B: Leave failed %t", ok)
} }
case <-time.After(time.Second * NSEC): case <-time.After(time.Second * NSEC):
ts.Fatalf("Leave didn't complete") ts.Fatalf("Leave didn't complete")
@ -611,9 +601,7 @@ func TestLeaseBasicRelease5C(t *testing.T) {
sck0, clnt0 := ts.makeShardCtrlerClnt() sck0, clnt0 := ts.makeShardCtrlerClnt()
go func() { go func() {
if err := sck0.InitController(); err != rpc.OK { sck0.InitController()
t.Fatalf("failed to init controller %v", err)
}
time.Sleep(200 * time.Millisecond) time.Sleep(200 * time.Millisecond)
sck0.ExitController() sck0.ExitController()
}() }()
@ -624,9 +612,7 @@ func TestLeaseBasicRelease5C(t *testing.T) {
sck1, clnt1 := ts.makeShardCtrlerClnt() sck1, clnt1 := ts.makeShardCtrlerClnt()
ch := make(chan struct{}) ch := make(chan struct{})
go func() { go func() {
if err := sck1.InitController(); err != rpc.OK { sck1.InitController()
t.Fatalf("failed to init controller %v", err)
}
time.Sleep(200 * time.Millisecond) time.Sleep(200 * time.Millisecond)
sck1.ExitController() sck1.ExitController()
ch <- struct{}{} ch <- struct{}{}
@ -650,9 +636,7 @@ func TestLeaseBasicExpire5C(t *testing.T) {
sck0, clnt0 := ts.makeShardCtrlerClnt() sck0, clnt0 := ts.makeShardCtrlerClnt()
go func() { go func() {
if err := sck0.InitController(); err != rpc.OK { sck0.InitController()
t.Fatalf("failed to init controller %v", err)
}
for { for {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
@ -667,9 +651,7 @@ func TestLeaseBasicExpire5C(t *testing.T) {
sck1, clnt1 := ts.makeShardCtrlerClnt() sck1, clnt1 := ts.makeShardCtrlerClnt()
ch := make(chan struct{}) ch := make(chan struct{})
go func() { go func() {
if err := sck1.InitController(); err != rpc.OK { sck1.InitController()
t.Fatalf("failed to init controller %v", err)
}
time.Sleep(100 * time.Millisecond) time.Sleep(100 * time.Millisecond)
sck1.ExitController() sck1.ExitController()
ch <- struct{}{} ch <- struct{}{}
@ -695,9 +677,7 @@ func TestLeaseBasicRefresh5C(t *testing.T) {
sck0, clnt0 := ts.makeShardCtrlerClnt() sck0, clnt0 := ts.makeShardCtrlerClnt()
go func() { go func() {
if err := sck0.InitController(); err != rpc.OK { sck0.InitController()
t.Fatalf("failed to init controller %v", err)
}
time.Sleep(LEADERSEC * param.LEASETIMESEC * time.Second) time.Sleep(LEADERSEC * param.LEASETIMESEC * time.Second)
sck0.ExitController() sck0.ExitController()
}() }()
@ -709,9 +689,7 @@ func TestLeaseBasicRefresh5C(t *testing.T) {
sck1, clnt1 := ts.makeShardCtrlerClnt() sck1, clnt1 := ts.makeShardCtrlerClnt()
ch := make(chan struct{}) ch := make(chan struct{})
go func() { go func() {
if err := sck1.InitController(); err != rpc.OK { sck1.InitController()
t.Fatalf("failed to init controller %v", err)
}
time.Sleep(100 * time.Millisecond) time.Sleep(100 * time.Millisecond)
sck1.ExitController() sck1.ExitController()
ch <- struct{}{} ch <- struct{}{}
@ -742,9 +720,7 @@ func TestPartitionControllerJoin5C(t *testing.T) {
ka, va := ts.SpreadPuts(ck, NKEYS) ka, va := ts.SpreadPuts(ck, NKEYS)
sck, clnt := ts.makeShardCtrlerClnt() sck, clnt := ts.makeShardCtrlerClnt()
if err := sck.InitController(); err != rpc.OK { sck.InitController()
ts.Fatalf("failed to init controller %v", err)
}
ch := make(chan rpc.Err) ch := make(chan rpc.Err)
ngid := tester.Tgid(0) ngid := tester.Tgid(0)
@ -752,7 +728,8 @@ func TestPartitionControllerJoin5C(t *testing.T) {
ngid = ts.newGid() ngid = ts.newGid()
ts.Config.MakeGroupStart(ngid, NSRV, ts.StartServerShardGrp) ts.Config.MakeGroupStart(ngid, NSRV, ts.StartServerShardGrp)
ts.Group(ngid).Shutdown() ts.Group(ngid).Shutdown()
ch <- ts.join(sck, ngid, ts.Group(ngid).SrvNames()) ts.join(sck, ngid, ts.Group(ngid).SrvNames())
ch <- rpc.OK
}() }()
// sleep for a while to get the chance for the controller to get // sleep for a while to get the chance for the controller to get
@ -769,9 +746,7 @@ func TestPartitionControllerJoin5C(t *testing.T) {
// start new controller to supersede partitioned one, // start new controller to supersede partitioned one,
sck0 := ts.makeShardCtrler() sck0 := ts.makeShardCtrler()
if err := sck0.InitController(); err != rpc.OK { sck0.InitController()
t.Fatalf("failed to init controller %v", err)
}
scfg, _ := sck0.Query() scfg, _ := sck0.Query()
if !scfg.IsMember(ngid) { if !scfg.IsMember(ngid) {
@ -783,10 +758,8 @@ func TestPartitionControllerJoin5C(t *testing.T) {
// reconnect old controller, which shouldn't finish ChangeConfigTo // reconnect old controller, which shouldn't finish ChangeConfigTo
clnt.ConnectAll() clnt.ConnectAll()
err := <-ch // wait for old controller to finish/exit
if err == rpc.OK { <-ch
t.Fatalf("Old leader succeeded %v", err)
}
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
@ -797,19 +770,64 @@ func TestPartitionControllerJoin5C(t *testing.T) {
// Make a leader controller loses its leadership during join/leave and // Make a leader controller loses its leadership during join/leave and
// test if the next controller recovers correctly. // test if the next controller recovers correctly.
func TestPartitionRecovery5C(t *testing.T) { func partitionRecovery5C(t *testing.T, reliable bool, npart, nclnt int) {
const ( const NSEC = 60
// NPARTITION = 10 ts := MakeTestLeases(t, "Test (5C): controllers with leased leadership ...", reliable)
NPARTITION = 5
)
ts := MakeTestLeases(t, "Test (5C): controllers with leased leadership ...", true)
defer ts.Cleanup() defer ts.Cleanup()
gid := ts.setupKVService() gid := ts.setupKVService()
ck := ts.MakeClerk() ck := ts.MakeClerk()
ka, va := ts.SpreadPuts(ck, NKEYS)
for i := 0; i < NPARTITION; i++ { ka := make([]string, 0)
va := make([]string, 0)
if nclnt <= 0 {
ka, va = ts.SpreadPuts(ck, NKEYS)
}
ch := make(chan []kvtest.ClntRes)
if nclnt > 0 {
ka := kvtest.MakeKeys(1)
go func(ch chan []kvtest.ClntRes) {
rs := ts.SpawnClientsAndWait(nclnt, NSEC*time.Second, func(me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
return ts.OneClientPut(me, ck, ka, done)
})
ch <- rs
}(ch)
}
for i := 0; i < npart; i++ {
ts.killCtrler(ck, gid, ka, va) ts.killCtrler(ck, gid, ka, va)
} }
if nclnt > 0 {
<-ch
ts.CheckPorcupine()
}
}
func TestPartitionRecoveryReliableNoClerk5C(t *testing.T) {
const (
NPARTITION = 5
)
partitionRecovery5C(t, true, NPARTITION, 0)
}
func TestPartitionRecoveryUnreliableNoClerk5C(t *testing.T) {
const (
NPARTITION = 3
)
partitionRecovery5C(t, false, NPARTITION, 0)
}
func TestPartitionRecoveryReliableClerks5C(t *testing.T) {
const (
NPARTITION = 5
)
partitionRecovery5C(t, true, NPARTITION, 5)
}
func TestPartitionRecoveryUnreliableClerks5C(t *testing.T) {
const (
NPARTITION = 5
)
partitionRecovery5C(t, false, NPARTITION, 5)
} }

View File

@ -50,6 +50,8 @@ func MakeTestMaxRaft(t *testing.T, part string, reliable, leases bool, maxraftst
} }
cfg := tester.MakeConfig(t, 1, reliable, kvsrv.StartKVServer) cfg := tester.MakeConfig(t, 1, reliable, kvsrv.StartKVServer)
ts.Test = kvtest.MakeTest(t, cfg, false, ts) ts.Test = kvtest.MakeTest(t, cfg, false, ts)
// XXX to avoid panic
tester.AnnotateTest(part, 1)
ts.Begin(part) ts.Begin(part)
return ts return ts
} }
@ -125,48 +127,56 @@ func (ts *Test) StartServerShardGrp(servers []*labrpc.ClientEnd, gid tester.Tgid
return shardgrp.StartServerShardGrp(servers, gid, me, persister, ts.maxraftstate) return shardgrp.StartServerShardGrp(servers, gid, me, persister, ts.maxraftstate)
} }
func (ts *Test) checkMember(sck *shardctrler.ShardCtrler, gid tester.Tgid) bool {
cfg, _ := sck.Query()
ok := cfg.IsMember(gid)
return ok
}
// Add group gid // Add group gid
func (ts *Test) join(sck *shardctrler.ShardCtrler, gid tester.Tgid, srvs []string) rpc.Err { func (ts *Test) join(sck *shardctrler.ShardCtrler, gid tester.Tgid, srvs []string) {
cfg, _ := sck.Query() cfg, _ := sck.Query()
newcfg := cfg.Copy() newcfg := cfg.Copy()
ok := newcfg.JoinBalance(map[tester.Tgid][]string{gid: srvs}) ok := newcfg.JoinBalance(map[tester.Tgid][]string{gid: srvs})
if !ok { if !ok {
log.Fatalf("join: group %d is already present", gid) log.Fatalf("join: group %d is already present", gid)
} }
return sck.ChangeConfigTo(newcfg) sck.ChangeConfigTo(newcfg)
} }
func (ts *Test) joinGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) rpc.Err { func (ts *Test) joinGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) bool {
for _, gid := range gids { for _, gid := range gids {
ts.Config.MakeGroupStart(gid, NSRV, ts.StartServerShardGrp) ts.Config.MakeGroupStart(gid, NSRV, ts.StartServerShardGrp)
if err := ts.join(sck, gid, ts.Group(gid).SrvNames()); err != rpc.OK { ts.join(sck, gid, ts.Group(gid).SrvNames())
return err if ok := ts.checkMember(sck, gid); !ok {
return false
} }
time.Sleep(INTERGRPDELAY * time.Millisecond) time.Sleep(INTERGRPDELAY * time.Millisecond)
} }
return rpc.OK return true
} }
// Group gid leaves. // Group gid leaves.
func (ts *Test) leave(sck *shardctrler.ShardCtrler, gid tester.Tgid) rpc.Err { func (ts *Test) leave(sck *shardctrler.ShardCtrler, gid tester.Tgid) {
cfg, _ := sck.Query() cfg, _ := sck.Query()
newcfg := cfg.Copy() newcfg := cfg.Copy()
ok := newcfg.LeaveBalance([]tester.Tgid{gid}) ok := newcfg.LeaveBalance([]tester.Tgid{gid})
if !ok { if !ok {
log.Fatalf("leave: group %d is already not present", gid) log.Fatalf("leave: group %d is already not present", gid)
} }
return sck.ChangeConfigTo(newcfg) sck.ChangeConfigTo(newcfg)
} }
func (ts *Test) leaveGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) rpc.Err { func (ts *Test) leaveGroups(sck *shardctrler.ShardCtrler, gids []tester.Tgid) bool {
for _, gid := range gids { for _, gid := range gids {
if err := ts.leave(sck, gid); err != rpc.OK { ts.leave(sck, gid)
return err if ok := ts.checkMember(sck, gid); ok {
return false
} }
ts.Config.ExitGroup(gid) ts.Config.ExitGroup(gid)
time.Sleep(INTERGRPDELAY * time.Millisecond) time.Sleep(INTERGRPDELAY * time.Millisecond)
} }
return rpc.OK return true
} }
func (ts *Test) disconnectRaftLeader(gid tester.Tgid) (int, string) { func (ts *Test) disconnectRaftLeader(gid tester.Tgid) (int, string) {
@ -257,9 +267,7 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string)
) )
sck, clnt := ts.makeShardCtrlerClnt() sck, clnt := ts.makeShardCtrlerClnt()
if err := sck.InitController(); err != rpc.OK { sck.InitController()
ts.Fatalf("failed to init controller %v", err)
}
cfg, _ := ts.ShardCtrler().Query() cfg, _ := ts.ShardCtrler().Query()
num := cfg.Num num := cfg.Num
@ -270,12 +278,12 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string)
for { for {
ngid = ts.newGid() ngid = ts.newGid()
state = JOIN state = JOIN
err := ts.joinGroups(sck, []tester.Tgid{ngid}) ts.joinGroups(sck, []tester.Tgid{ngid})
if err == rpc.OK { if ok := ts.checkMember(sck, ngid); ok {
state = LEAVE state = LEAVE
err = ts.leaveGroups(sck, []tester.Tgid{ngid}) ts.leaveGroups(sck, []tester.Tgid{ngid})
} else { } else {
//log.Printf("deposed err %v", err) //log.Printf("deposed")
return return
} }
} }
@ -306,9 +314,8 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string)
// start new controler to pick up where sck left off // start new controler to pick up where sck left off
sck0, clnt0 := ts.makeShardCtrlerClnt() sck0, clnt0 := ts.makeShardCtrlerClnt()
if err := sck0.InitController(); err != rpc.OK {
ts.Fatalf("failed to init controller %v", err) sck0.InitController()
}
cfg, _ = sck0.Query() cfg, _ = sck0.Query()
s := "join" s := "join"
if state == LEAVE { if state == LEAVE {
@ -337,6 +344,8 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string)
sck0.ExitController() sck0.ExitController()
if ts.leases { if ts.leases {
//log.Printf("reconnect old controller")
// reconnect old controller, which should bail out, because // reconnect old controller, which should bail out, because
// it has been superseded. // it has been superseded.
clnt.ConnectAll() clnt.ConnectAll()
@ -366,12 +375,15 @@ func (ts *Test) electCtrler(ck kvtest.IKVClerk, ka, va []string) {
default: default:
ngid := ts.newGid() ngid := ts.newGid()
sck := ts.makeShardCtrler() sck := ts.makeShardCtrler()
if err := sck.InitController(); err != rpc.OK { sck.InitController()
ts.Fatalf("failed to init controller %v", err)
}
//log.Printf("%d(%p): join/leave %v", i, sck, ngid) //log.Printf("%d(%p): join/leave %v", i, sck, ngid)
if err := ts.joinGroups(sck, []tester.Tgid{ngid}); err == rpc.OK { ts.joinGroups(sck, []tester.Tgid{ngid})
ts.leaveGroups(sck, []tester.Tgid{ngid}) if ok := ts.checkMember(sck, ngid); ok {
if ok := ts.leaveGroups(sck, []tester.Tgid{ngid}); !ok {
log.Fatalf("electCtrler: %d(%p): leave %v failed", i, sck, ngid)
}
} else {
log.Fatalf("electCtrler: %d(%p): join %v failed", i, sck, ngid)
} }
sck.ExitController() sck.ExitController()
} }

169
src/shardkv1/test.out Normal file
View File

@ -0,0 +1,169 @@
=== RUN TestAcquireLockConcurrentReliable5C
Test (5C): Concurent ctrlers acquiring leadership ... (reliable network)...
2025/03/20 09:18:33 PecUxIPV: acquire success 1 l 2.999731394s
2025/03/20 09:18:35 PecUxIPV: Release 3
2025/03/20 09:18:35 aKDBLFuF: acquire success 4 l 2.999504542s
2025/03/20 09:18:37 aKDBLFuF: Release 6
2025/03/20 09:18:37 HxhaFlAP: acquire success 7 l 2.999622621s
2025/03/20 09:18:39 HxhaFlAP: Release 9
2025/03/20 09:18:39 LpTmFCGC: acquire success 10 l 2.999747179s
2025/03/20 09:18:41 LpTmFCGC: Release 13
2025/03/20 09:18:41 klmldUQn: acquire success 14 l 2.999558604s
2025/03/20 09:18:43 klmldUQn: Release 17
2025/03/20 09:18:43 AWgiWKPZ: acquire success 18 l 2.999701903s
2025/03/20 09:18:46 AWgiWKPZ: Release 21
... Passed -- 16.4 1 2061 120
--- PASS: TestAcquireLockConcurrentReliable5C (16.38s)
=== RUN TestAcquireLockConcurrentUnreliable5C
Test (5C): Concurent ctrlers acquiring leadership ... (unreliable network)...
2025/03/20 09:19:00 xulPPlwd: acquire success 2 l 2.768860613s
2025/03/20 09:19:05 xulPPlwd: Release 6
2025/03/20 09:19:05 SGXgIJeR: acquire success 7 l 2.984694448s
2025/03/20 09:19:08 SGXgIJeR: Release 11
2025/03/20 09:19:08 kNvktGla: acquire success 12 l 2.986135242s
2025/03/20 09:19:13 kNvktGla: Release 17
2025/03/20 09:19:13 usGKuyeI: acquire success 18 l 2.97484218s
2025/03/20 09:19:19 usGKuyeI: Release 24
... Passed -- 38.4 1 2226 120
--- PASS: TestAcquireLockConcurrentUnreliable5C (38.37s)
=== RUN TestLeaseBasicRelease5C
Test (5C): release lease ... (reliable network)...
2025/03/20 09:19:25 fWllyjFs: acquire success 1 l 2.999778852s
2025/03/20 09:19:25 fWllyjFs: Release 2
2025/03/20 09:19:25 HqoctgYf: acquire success 3 l 2.999623311s
2025/03/20 09:19:26 HqoctgYf: Release 4
... Passed -- 0.4 1 17 0
--- PASS: TestLeaseBasicRelease5C (0.42s)
=== RUN TestLeaseBasicExpire5C
Test (5C): lease expiring ... (reliable network)...
2025/03/20 09:19:26 MgmIiwHw: acquire success 1 l 2.999622077s
2025/03/20 09:19:29 PviuBaqZ: acquire: MgmIiwHw lease expired -31.512117ms
2025/03/20 09:19:29 PviuBaqZ: acquire success 2 l 2.9996929s
2025/03/20 09:19:29 PviuBaqZ: Release 3
... Passed -- 3.1 1 81 0
--- PASS: TestLeaseBasicExpire5C (3.14s)
=== RUN TestLeaseBasicRefresh5C
Test (5C): lease refresh ... (reliable network)...
2025/03/20 09:19:29 CqhHcMdl: acquire success 1 l 2.999690343s
... Passed -- 7.1 1 144 0
--- PASS: TestLeaseBasicRefresh5C (7.10s)
=== RUN TestPartitionControllerJoin5C
Test (5C): partition controller in join... (reliable network)...
2025/03/20 09:19:38 CqhHcMdl: Release 9
2025/03/20 09:19:38 QykadXGi: acquire success 1 l 2.999763148s
2025/03/20 09:19:43 YWktoCTH: acquire: QykadXGi lease expired -2.003411436s
2025/03/20 09:19:43 YWktoCTH: acquire success 2 l 2.999580573s
2025/03/20 09:19:45 YWktoCTH: Release 4
2025/03/20 09:19:45 QykadXGi: refresher: exit expired -3.255782562s
... Passed -- 11.2 1 1011 120
--- PASS: TestPartitionControllerJoin5C (11.22s)
=== RUN TestPartitionRecoveryReliableNoClerk5C
Test (5C): controllers with leased leadership ... (reliable network)...
2025/03/20 09:19:50 nLqpPYYg: acquire success 1 l 2.999773699s
2025/03/20 09:19:56 Yauplngb: acquire: nLqpPYYg lease expired -1.030252686s
2025/03/20 09:19:56 Yauplngb: acquire success 4 l 2.999760357s
2025/03/20 09:19:58 Yauplngb: Release 7
2025/03/20 09:19:58 nLqpPYYg: refresher: exit expired -3.848348135s
2025/03/20 09:20:00 dsvADejV: acquire success 8 l 2.999675453s
2025/03/20 09:20:07 jdcPVdvf: acquire: dsvADejV lease expired -1.56610473s
2025/03/20 09:20:07 jdcPVdvf: acquire success 11 l 2.999839821s
2025/03/20 09:20:10 jdcPVdvf: Release 15
2025/03/20 09:20:10 dsvADejV: refresher: exit expired -4.604218577s
2025/03/20 09:20:12 vzVcVtTQ: acquire success 16 l 2.999743618s
2025/03/20 09:20:19 valCDRmB: acquire: vzVcVtTQ lease expired -1.988170854s
2025/03/20 09:20:19 valCDRmB: acquire success 19 l 2.999667662s
2025/03/20 09:20:22 valCDRmB: Release 22
2025/03/20 09:20:22 vzVcVtTQ: refresher: exit expired -4.943386258s
2025/03/20 09:20:23 RJYqYuLF: acquire success 23 l 2.999774783s
2025/03/20 09:20:30 KaeJpVvL: acquire: RJYqYuLF lease expired -1.222157296s
2025/03/20 09:20:30 KaeJpVvL: acquire success 26 l 2.999897268s
2025/03/20 09:20:33 KaeJpVvL: Release 30
2025/03/20 09:20:33 RJYqYuLF: refresher: exit expired -4.429889332s
2025/03/20 09:20:34 leVdobnP: acquire success 31 l 2.999770816s
2025/03/20 09:20:41 DFnmWean: acquire: leVdobnP lease expired -1.756292497s
2025/03/20 09:20:41 DFnmWean: acquire success 34 l 2.999905276s
2025/03/20 09:20:44 DFnmWean: Release 38
2025/03/20 09:20:44 leVdobnP: refresher: exit expired -4.84260629s
... Passed -- 59.3 1 5454 660
--- PASS: TestPartitionRecoveryReliableNoClerk5C (59.30s)
=== RUN TestPartitionRecoveryUnreliableNoClerk5C
Test (5C): controllers with leased leadership ... (unreliable network)...
2025/03/20 09:21:01 oBRWPJFn: acquire success 1 l 2.999668901s
2025/03/20 09:21:08 WCfEtCSF: acquire: oBRWPJFn lease expired -1.960469635s
2025/03/20 09:21:08 WCfEtCSF: acquire success 4 l 2.989064006s
2025/03/20 09:21:19 WCfEtCSF: Release 15
2025/03/20 09:21:20 oBRWPJFn: refresher: exit expired -13.623366094s
2025/03/20 09:21:25 BKOHUPgK: acquire success 16 l 2.974368151s
2025/03/20 09:21:32 dpZEDTAn: acquire: BKOHUPgK lease expired -1.266079689s
2025/03/20 09:21:32 dpZEDTAn: acquire success 19 l 2.986737971s
2025/03/20 09:21:42 dpZEDTAn: Release 29
2025/03/20 09:21:43 BKOHUPgK: refresher: exit expired -12.408069097s
2025/03/20 09:21:50 TiapOztE: acquire: dpZEDTAn lease expired -4.992859225s
2025/03/20 09:21:50 TiapOztE: acquire success 30 l 2.972701594s
2025/03/20 09:21:57 aDyCYcpR: acquire: TiapOztE lease expired -1.338848496s
2025/03/20 09:21:57 aDyCYcpR: acquire success 33 l 2.99686939s
2025/03/20 09:22:07 aDyCYcpR: Release 43
2025/03/20 09:22:07 TiapOztE: refresher: exit expired -12.147734461s
... Passed -- 86.9 1 4985 420
--- PASS: TestPartitionRecoveryUnreliableNoClerk5C (86.88s)
=== RUN TestPartitionRecoveryReliableClerks5C
Test (5C): controllers with leased leadership ... (reliable network)...
2025/03/20 09:22:13 vZrMwEsy: acquire success 1 l 2.999893567s
2025/03/20 09:22:20 AFHDpDYV: acquire: vZrMwEsy lease expired -1.657500925s
2025/03/20 09:22:20 AFHDpDYV: acquire success 4 l 2.999596975s
2025/03/20 09:22:22 AFHDpDYV: Release 6
2025/03/20 09:22:22 vZrMwEsy: refresher: exit expired -3.627083489s
2025/03/20 09:22:23 tserHLNb: acquire success 7 l 2.999932478s
2025/03/20 09:22:29 msIfUgIC: acquire: tserHLNb lease expired -1.13789373s
2025/03/20 09:22:29 msIfUgIC: acquire success 10 l 2.999755401s
2025/03/20 09:22:31 msIfUgIC: Release 12
2025/03/20 09:22:31 tserHLNb: refresher: exit expired -3.083945752s
2025/03/20 09:22:32 YLEIZyDn: acquire success 13 l 2.999940475s
2025/03/20 09:22:38 TIibzsMc: acquire: YLEIZyDn lease expired -1.017825561s
2025/03/20 09:22:38 TIibzsMc: acquire success 16 l 2.999907075s
2025/03/20 09:22:40 TIibzsMc: Release 18
2025/03/20 09:22:40 YLEIZyDn: refresher: exit expired -2.789136907s
2025/03/20 09:22:41 knOnYtxW: acquire success 19 l 2.999891429s
2025/03/20 09:22:47 KyiPMsgB: acquire: knOnYtxW lease expired -1.534324297s
2025/03/20 09:22:47 KyiPMsgB: acquire success 22 l 2.999822725s
2025/03/20 09:22:49 KyiPMsgB: Release 24
2025/03/20 09:22:49 knOnYtxW: refresher: exit expired -3.516354686s
2025/03/20 09:22:50 wHNCImkl: acquire success 25 l 2.999917928s
2025/03/20 09:22:56 CSBcxnyr: acquire: wHNCImkl lease expired -1.051161379s
2025/03/20 09:22:56 CSBcxnyr: acquire success 28 l 2.999745303s
2025/03/20 09:22:58 CSBcxnyr: Release 31
2025/03/20 09:22:58 wHNCImkl: refresher: exit expired -3.241024197s
... Passed -- 60.1 1 15934 5124
--- PASS: TestPartitionRecoveryReliableClerks5C (60.14s)
=== RUN TestPartitionRecoveryUnreliableClerks5C
Test (5C): controllers with leased leadership ... (unreliable network)...
2025/03/20 09:23:14 ydfNYYir: acquire success 1 l 2.871807366s
2025/03/20 09:23:21 KmfOaYym: acquire: ydfNYYir lease expired -1.96910688s
2025/03/20 09:23:21 KmfOaYym: acquire success 4 l 2.976357121s
2025/03/20 09:23:25 KmfOaYym: Release 9
2025/03/20 09:23:26 ydfNYYir: refresher: exit expired -6.960801287s
2025/03/20 09:23:27 XErxjiqb: acquire success 10 l 2.994288153s
2025/03/20 09:23:34 VQFBAKED: acquire: XErxjiqb lease expired -1.186993995s
2025/03/20 09:23:34 VQFBAKED: acquire success 14 l 2.978008397s
2025/03/20 09:23:40 VQFBAKED: Release 20
2025/03/20 09:23:40 XErxjiqb: refresher: exit expired -7.422563867s
2025/03/20 09:23:41 IqJHVjsW: acquire success 21 l 2.984528802s
2025/03/20 09:23:47 NAaIOMcb: acquire: IqJHVjsW lease expired -1.19246442s
2025/03/20 09:23:48 NAaIOMcb: acquire success 25 l 2.521727902s
2025/03/20 09:23:53 NAaIOMcb: Release 30
2025/03/20 09:23:53 IqJHVjsW: refresher: exit expired -7.130118022s
2025/03/20 09:23:54 pwTkolYO: acquire success 32 l 2.761741697s
2025/03/20 09:24:01 GAueeCFX: acquire: pwTkolYO lease expired -1.496813006s
2025/03/20 09:24:01 GAueeCFX: acquire success 34 l 2.977558093s
2025/03/20 09:24:06 GAueeCFX: Release 39
2025/03/20 09:24:06 pwTkolYO: refresher: exit expired -6.843949894s
2025/03/20 09:24:09 FIDtQSlF: acquire: GAueeCFX lease expired -15.508321ms
2025/03/20 09:24:09 FIDtQSlF: acquire success 40 l 2.998464382s
2025/03/20 09:24:16 wExaLSov: acquire: FIDtQSlF lease expired -1.874162487s
2025/03/20 09:24:16 wExaLSov: acquire success 43 l 2.860519358s
2025/03/20 09:24:21 wExaLSov: Release 48
2025/03/20 09:24:21 FIDtQSlF: refresher: exit expired -6.924846198s
... Passed -- 68.4 1 10469 1186
--- PASS: TestPartitionRecoveryUnreliableClerks5C (68.39s)
PASS
ok 6.5840/shardkv1 351.349s