diff --git a/src/shardkv1/shardctrler/shardctrler.go b/src/shardkv1/shardctrler/shardctrler.go index d59c0da..27b555a 100644 --- a/src/shardkv1/shardctrler/shardctrler.go +++ b/src/shardkv1/shardctrler/shardctrler.go @@ -6,8 +6,6 @@ package shardctrler import ( - "sync/atomic" - "6.5840/kvsrv1" "6.5840/kvsrv1/rpc" "6.5840/kvtest1" @@ -22,14 +20,13 @@ type ShardCtrler struct { kvtest.IKVClerk killed int32 // set by Kill() - leases bool // Your data here. } // Make a ShardCltler, which stores its state in a kvsrv. -func MakeShardCtrler(clnt *tester.Clnt, leases bool) *ShardCtrler { - sck := &ShardCtrler{clnt: clnt, leases: leases} +func MakeShardCtrler(clnt *tester.Clnt) *ShardCtrler { + sck := &ShardCtrler{clnt: clnt} srv := tester.ServerName(tester.GRP0, 0) sck.IKVClerk = kvsrv.MakeClerk(clnt, srv) // Your code here. @@ -38,20 +35,15 @@ func MakeShardCtrler(clnt *tester.Clnt, leases bool) *ShardCtrler { // The tester calls InitController() before starting a new // controller. In part A, this method doesn't need to do anything. In -// B and C, this method implements recovery (part B) and uses a lock -// to become leader (part C). +// B and C, this method implements recovery. func (sck *ShardCtrler) InitController() { } -// The tester calls ExitController to exit a controller. In part B and -// C, release lock. -func (sck *ShardCtrler) ExitController() { -} - // Called once by the tester to supply the first configuration. You // can marshal ShardConfig into a string using shardcfg.String(), and // then Put it in the kvsrv for the controller at version 0. You can -// pick the key to name the configuration. +// pick the key to name the configuration. The initial configuration +// lists shardgrp shardcfg.Gid1 for all shards. func (sck *ShardCtrler) InitConfig(cfg *shardcfg.ShardConfig) { // Your code here } @@ -64,18 +56,6 @@ func (sck *ShardCtrler) ChangeConfigTo(new *shardcfg.ShardConfig) { return } -// Tester "kills" shardctrler by calling Kill(). For your -// convenience, we also supply isKilled() method to test killed in -// loops. -func (sck *ShardCtrler) Kill() { - atomic.StoreInt32(&sck.killed, 1) -} - -func (sck *ShardCtrler) isKilled() bool { - z := atomic.LoadInt32(&sck.killed) - return z == 1 -} - // Return the current configuration and its version number func (sck *ShardCtrler) Query() (*shardcfg.ShardConfig, rpc.Tversion) { diff --git a/src/shardkv1/shardgrp/client.go b/src/shardkv1/shardgrp/client.go index e99325e..9461fbf 100644 --- a/src/shardkv1/shardgrp/client.go +++ b/src/shardkv1/shardgrp/client.go @@ -19,14 +19,14 @@ func MakeClerk(clnt *tester.Clnt, servers []string) *Clerk { return ck } -func (ck *Clerk) Get(key string, n shardcfg.Tnum) (string, rpc.Tversion, rpc.Err) { +func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) { // Your code here return "", 0, "" } -func (ck *Clerk) Put(key string, value string, version rpc.Tversion, n shardcfg.Tnum) (bool, rpc.Err) { +func (ck *Clerk) Put(key string, value string, version rpc.Tversion) rpc.Err { // Your code here - return false, "" + return "" } func (ck *Clerk) Freeze(s shardcfg.Tshid, num shardcfg.Tnum) ([]byte, rpc.Err) { diff --git a/src/shardkv1/shardgrp/shardrpc/shardrpc.go b/src/shardkv1/shardgrp/shardrpc/shardrpc.go index 03f83a1..007f11c 100644 --- a/src/shardkv1/shardgrp/shardrpc/shardrpc.go +++ b/src/shardkv1/shardgrp/shardrpc/shardrpc.go @@ -5,18 +5,16 @@ import ( "6.5840/shardkv1/shardcfg" ) -// Same as Put in kvsrv1/rpc, but with a configuration number +// Same as Put in kvsrv1/rpc type PutArgs struct { Key string Value string Version rpc.Tversion - Num shardcfg.Tnum } -// Same as Get in kvsrv1/rpc, but with a configuration number. +// Same as Get in kvsrv1/rpc type GetArgs struct { Key string - Num shardcfg.Tnum } type FreezeArgs struct { diff --git a/src/shardkv1/shardkv_test.go b/src/shardkv1/shardkv_test.go index ce37791..8dc41bc 100644 --- a/src/shardkv1/shardkv_test.go +++ b/src/shardkv1/shardkv_test.go @@ -1,7 +1,7 @@ package shardkv import ( - "log" + //"log" "testing" "time" @@ -9,7 +9,6 @@ import ( "6.5840/kvtest1" "6.5840/shardkv1/shardcfg" "6.5840/shardkv1/shardctrler" - "6.5840/shardkv1/shardctrler/param" "6.5840/tester1" ) @@ -28,7 +27,7 @@ func TestInitQuery5A(t *testing.T) { defer ts.Cleanup() // Make a shard controller - sck := shardctrler.MakeShardCtrler(ts.Config.MakeClient(), ts.leases) + sck := shardctrler.MakeShardCtrler(ts.Config.MakeClient()) // Make an empty shard configuration scfg := shardcfg.MakeShardConfig() @@ -409,7 +408,7 @@ func TestProgressJoin(t *testing.T) { select { case cnt := <-ch1: - log.Printf("cnt %d", cnt) + //log.Printf("cnt %d", cnt) if cnt < NCNT { ts.Fatalf("Two few gets finished %d; expected more than %d", cnt, NCNT) } @@ -569,140 +568,28 @@ func TestRecoverCtrler5B(t *testing.T) { ka, va := ts.SpreadPuts(ck, NKEYS) for i := 0; i < NPARTITION; i++ { - ts.killCtrler(ck, gid, ka, va) + ts.partitionCtrler(ck, gid, ka, va) } } // Test concurrent ctrlers fighting for leadership reliable -func TestAcquireLockConcurrentReliable5C(t *testing.T) { - ts := MakeTestLeases(t, "Test (5C): Concurent ctrlers acquiring leadership ...", true) +func TestConcurrentReliable5C(t *testing.T) { + ts := MakeTestLeases(t, "Test (5C): Concurent ctrlers ...", true) defer ts.Cleanup() ts.setupKVService() ck := ts.MakeClerk() ka, va := ts.SpreadPuts(ck, NKEYS) - ts.electCtrler(ck, ka, va) + ts.concurCtrler(ck, ka, va) } // Test concurrent ctrlers fighting for leadership unreliable func TestAcquireLockConcurrentUnreliable5C(t *testing.T) { - ts := MakeTestLeases(t, "Test (5C): Concurent ctrlers acquiring leadership ...", false) + ts := MakeTestLeases(t, "Test (5C): Concurent ctrlers ...", false) defer ts.Cleanup() ts.setupKVService() ck := ts.MakeClerk() ka, va := ts.SpreadPuts(ck, NKEYS) - ts.electCtrler(ck, ka, va) -} - -// Test that ReleaseLock allows a new leader to start quickly -func TestLeaseBasicRelease5C(t *testing.T) { - ts := MakeTestLeases(t, "Test (5C): release lease ...", true) - defer ts.Cleanup() - ts.setupKVService() - - sck0, clnt0 := ts.makeShardCtrlerClnt() - go func() { - sck0.InitController() - time.Sleep(200 * time.Millisecond) - sck0.ExitController() - }() - - time.Sleep(10 * time.Millisecond) - - // start new controller - sck1, clnt1 := ts.makeShardCtrlerClnt() - ch := make(chan struct{}) - go func() { - sck1.InitController() - time.Sleep(200 * time.Millisecond) - sck1.ExitController() - ch <- struct{}{} - }() - - select { - case <-time.After(1 * time.Second): - ts.Fatalf("Release didn't give up leadership") - case <-ch: - } - - ts.Config.DeleteClient(clnt0) - ts.Config.DeleteClient(clnt1) -} - -// Test lease expiring -func TestLeaseBasicExpire5C(t *testing.T) { - ts := MakeTestLeases(t, "Test (5C): lease expiring ...", true) - defer ts.Cleanup() - ts.setupKVService() - - sck0, clnt0 := ts.makeShardCtrlerClnt() - go func() { - sck0.InitController() - for { - time.Sleep(10 * time.Millisecond) - } - }() - - time.Sleep(100 * time.Millisecond) - - // partition sck0 forever - clnt0.DisconnectAll() - - // start new controller - sck1, clnt1 := ts.makeShardCtrlerClnt() - ch := make(chan struct{}) - go func() { - sck1.InitController() - time.Sleep(100 * time.Millisecond) - sck1.ExitController() - ch <- struct{}{} - }() - - select { - case <-time.After((param.LEASETIMESEC + 1) * time.Second): - ts.Fatalf("Lease didn't expire") - case <-ch: - } - - ts.Config.DeleteClient(clnt0) - ts.Config.DeleteClient(clnt1) -} - -// Test lease is being extended -func TestLeaseBasicRefresh5C(t *testing.T) { - const LEADERSEC = 3 - - ts := MakeTestLeases(t, "Test (5C): lease refresh ...", true) - defer ts.Cleanup() - ts.setupKVService() - - sck0, clnt0 := ts.makeShardCtrlerClnt() - go func() { - sck0.InitController() - time.Sleep(LEADERSEC * param.LEASETIMESEC * time.Second) - sck0.ExitController() - }() - - // give sck0 time to become leader - time.Sleep(100 * time.Millisecond) - - // start new controller - sck1, clnt1 := ts.makeShardCtrlerClnt() - ch := make(chan struct{}) - go func() { - sck1.InitController() - time.Sleep(100 * time.Millisecond) - sck1.ExitController() - ch <- struct{}{} - }() - - select { - case <-time.After((LEADERSEC + param.LEASETIMESEC + 1) * time.Second): - case <-ch: - ts.Fatalf("Lease not refreshed") - } - - ts.Config.DeleteClient(clnt0) - ts.Config.DeleteClient(clnt1) + ts.concurCtrler(ck, ka, va) } // Test if old leader is fenced off when reconnecting while it is in @@ -710,6 +597,7 @@ func TestLeaseBasicRefresh5C(t *testing.T) { func TestPartitionControllerJoin5C(t *testing.T) { const ( NSLEEP = 2 + NSEC = 1 RAND = 1000 ) @@ -739,8 +627,8 @@ func TestPartitionControllerJoin5C(t *testing.T) { // partition sck clnt.DisconnectAll() - // wait until sck's lease expired before restarting shardgrp `ngid` - time.Sleep((param.LEASETIMESEC + 1) * time.Second) + // wait a while before restarting shardgrp `ngid` + time.Sleep(NSEC * time.Second) ts.Group(ngid).StartServers() @@ -753,8 +641,6 @@ func TestPartitionControllerJoin5C(t *testing.T) { t.Fatalf("Didn't recover gid %d", ngid) } - sck0.ExitController() - // reconnect old controller, which shouldn't finish ChangeConfigTo clnt.ConnectAll() @@ -795,7 +681,7 @@ func partitionRecovery5C(t *testing.T, reliable bool, npart, nclnt int) { } for i := 0; i < npart; i++ { - ts.killCtrler(ck, gid, ka, va) + ts.partitionCtrler(ck, gid, ka, va) } if nclnt > 0 { @@ -827,7 +713,7 @@ func TestPartitionRecoveryReliableClerks5C(t *testing.T) { func TestPartitionRecoveryUnreliableClerks5C(t *testing.T) { const ( - NPARTITION = 5 + NPARTITION = 3 ) partitionRecovery5C(t, false, NPARTITION, 5) } diff --git a/src/shardkv1/test.go b/src/shardkv1/test.go index 045bacf..3ee5ad1 100644 --- a/src/shardkv1/test.go +++ b/src/shardkv1/test.go @@ -16,7 +16,6 @@ import ( "6.5840/labrpc" "6.5840/shardkv1/shardcfg" "6.5840/shardkv1/shardctrler" - "6.5840/shardkv1/shardctrler/param" "6.5840/shardkv1/shardgrp" "6.5840/tester1" ) @@ -25,9 +24,9 @@ type Test struct { t *testing.T *kvtest.Test - sck *shardctrler.ShardCtrler - part string - leases bool + sck *shardctrler.ShardCtrler + part string + partition bool maxraftstate int mu sync.Mutex @@ -41,11 +40,11 @@ const ( ) // Setup kvserver for the shard controller and make the controller -func MakeTestMaxRaft(t *testing.T, part string, reliable, leases bool, maxraftstate int) *Test { +func MakeTestMaxRaft(t *testing.T, part string, reliable, partition bool, maxraftstate int) *Test { ts := &Test{ ngid: shardcfg.Gid1 + 1, // Gid1 is in use t: t, - leases: leases, + partition: partition, maxraftstate: maxraftstate, } cfg := tester.MakeConfig(t, 1, reliable, kvsrv.StartKVServer) @@ -86,7 +85,7 @@ func (ts *Test) makeShardCtrler() *shardctrler.ShardCtrler { func (ts *Test) makeShardCtrlerClnt() (*shardctrler.ShardCtrler, *tester.Clnt) { clnt := ts.Config.MakeClient() - return shardctrler.MakeShardCtrler(clnt, ts.leases), clnt + return shardctrler.MakeShardCtrler(clnt), clnt } func (ts *Test) makeKVClerk() *kvsrv.Clerk { @@ -252,15 +251,14 @@ func (ts *Test) checkShutdownSharding(down tester.Tgid, ka []string, va []string // Run one controler and then partition it after some time. Run // another cntrler that must finish the first ctrler's unfinished -// shard moves. To ensure first ctrler is in a join/leave the test -// shuts down shardgrp `gid`. After the second controller is done, -// heal the partition to test if Freeze,InstallShard, and Delete are -// are fenced. -func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) { +// shard moves. To make it likely that first ctrler is in a join/leave +// the test shuts down shardgrp `gid`. After the second controller is +// done, heal the partition. partitionCtrler returns if recovery +// happened. +func (ts *Test) partitionCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) { const ( - NSLEEP = 2 - - RAND = 1000 + RAND = 400 + NSEC = 1 JOIN = 1 LEAVE = 2 @@ -283,33 +281,39 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) state = LEAVE ts.leaveGroups(sck, []tester.Tgid{ngid}) } else { - //log.Printf("deposed") + //log.Printf("%v: deposed", sck.Id()) return } } }() + // let sck run for a little while + time.Sleep(1000 * time.Millisecond) + r := rand.Int() % RAND d := time.Duration(r) * time.Millisecond time.Sleep(d) - //log.Printf("shutdown gid %d after %dms", gid, r) + //log.Printf("shutdown gid %d after %dms %v", gid, r, time.Now().Sub(t)) + ts.Group(gid).Shutdown() - // sleep for a while to get the chance for the controler to get stuck - // in join or leave, because gid is down - time.Sleep(NSLEEP * time.Second) + // sleep for a while to get sck stuck in join or leave, because + // gid is down + time.Sleep(1000 * time.Millisecond) //log.Printf("disconnect sck %v ngid %d num %d state %d", d, ngid, num, state) // partition controller clnt.DisconnectAll() - if ts.leases { - // wait until sck's lease expired before restarting shardgrp `gid` - time.Sleep((param.LEASETIMESEC + 1) * time.Second) + if ts.partition { + // wait a while before restarting shardgrp `gid` + time.Sleep(NSEC * time.Second) } + //log.Printf("startservers %v lease expired %t", time.Now().Sub(t), ts.leases) + ts.Group(gid).StartServers() // start new controler to pick up where sck left off @@ -321,12 +325,13 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) if state == LEAVE { s = "leave" } - //log.Printf("%v cfg %v recovered %s", s, cfg, s) if cfg.Num <= num { ts.Fatalf("didn't recover; expected %d > %d", num, cfg.Num) } + //log.Printf("%v: recovered %v %v %v", sck0.Id(), time.Now().Sub(t), s, cfg) + present := cfg.IsMember(ngid) if (state == JOIN && !present) || (state == LEAVE && present) { ts.Fatalf("didn't recover %d correctly after %v", ngid, s) @@ -337,32 +342,30 @@ func (ts *Test) killCtrler(ck kvtest.IKVClerk, gid tester.Tgid, ka, va []string) ts.leaveGroups(sck0, []tester.Tgid{ngid}) } - for i := 0; i < len(ka); i++ { - ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) - } - - sck0.ExitController() - - if ts.leases { - //log.Printf("reconnect old controller") - + if ts.partition { // reconnect old controller, which should bail out, because // it has been superseded. clnt.ConnectAll() - time.Sleep(1 * time.Second) + time.Sleep(100 * time.Millisecond) - for i := 0; i < len(ka); i++ { - ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) - } } + + //log.Printf("reconnected %v", time.Now().Sub(t)) + + for i := 0; i < len(ka); i++ { + ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) + } + + //log.Printf("done get %v", time.Now().Sub(t)) + ts.Config.DeleteClient(clnt) ts.Config.DeleteClient(clnt0) } -func (ts *Test) electCtrler(ck kvtest.IKVClerk, ka, va []string) { +func (ts *Test) concurCtrler(ck kvtest.IKVClerk, ka, va []string) { const ( - NSEC = 5 + NSEC = 2 N = 4 ) @@ -376,16 +379,16 @@ func (ts *Test) electCtrler(ck kvtest.IKVClerk, ka, va []string) { ngid := ts.newGid() sck := ts.makeShardCtrler() sck.InitController() - //log.Printf("%d(%p): join/leave %v", i, sck, ngid) + //log.Printf("%v: electCtrler %d join/leave %v", sck.Id(), i, ngid) ts.joinGroups(sck, []tester.Tgid{ngid}) if ok := ts.checkMember(sck, ngid); ok { + //log.Printf("%v: electCtrler %d leave %d", sck.Id(), i, ngid) if ok := ts.leaveGroups(sck, []tester.Tgid{ngid}); !ok { - log.Fatalf("electCtrler: %d(%p): leave %v failed", i, sck, ngid) + //log.Printf("%v: electCtrler %d leave %v failed", sck.Id(), i, ngid) } } else { - log.Fatalf("electCtrler: %d(%p): join %v failed", i, sck, ngid) + //log.Printf("%v: electCtrler %d join %v failed", sck.Id(), i, ngid) } - sck.ExitController() } } } @@ -399,6 +402,7 @@ func (ts *Test) electCtrler(ck kvtest.IKVClerk, ka, va []string) { for i := 0; i < N; i++ { ch <- struct{}{} } + for i := 0; i < len(ka); i++ { ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) } diff --git a/src/tester1/config.go b/src/tester1/config.go index 1a37782..09ff9bd 100644 --- a/src/tester1/config.go +++ b/src/tester1/config.go @@ -138,7 +138,7 @@ func (cfg *Config) End() { ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls fmt.Printf(" ... Passed --") - fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops) + fmt.Printf(" time %4.1fs #peers %d #RPCs %5d #Ops %4d\n", t, npeers, nrpc, ops) } }