diff --git a/.check-build b/.check-build index a38f07a..80e07fe 100755 --- a/.check-build +++ b/.check-build @@ -15,8 +15,11 @@ REFERENCE_FILES=( src/main/mrworker.go # lab 2 - src/kvsrv/test_test.go - src/kvsrv/config.go + src/kvsrv1/lock/lock_test.go + src/kvsrv1/kvsrv_test.go + src/kvsrv1/test.go + src/kvtest1/kvtest.go + src/kvtest1/porcupine.go # lab 3 src/raft/persister.go diff --git a/.gitignore b/.gitignore index a209ee8..23c290e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,2 @@ pkg/ -api.key -.api.key.trimmed *-handin.tar.gz diff --git a/Makefile b/Makefile index 181b826..71457a9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,5 @@ # This is the Makefile helping you submit the labs. -# Just create 6.5840/api.key with your API key in it, -# and submit your lab with the following command: +# Submit your lab with the following command: # $ make [lab1|lab2|lab3a|lab3b|lab3c|lab3d|lab4a|lab4b|lab5a|lab5b] LABS=" lab1 lab2 lab3a lab3b lab3c lab3d lab4a lab4b lab5a lab5b " diff --git a/src/kvraft1/kvraft_test.go b/src/kvraft1/kvraft_test.go index 13d8c29..8a77607 100644 --- a/src/kvraft1/kvraft_test.go +++ b/src/kvraft1/kvraft_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "6.5840/kvraft1/rsm" "6.5840/kvsrv1/rpc" "6.5840/kvtest1" ) @@ -147,38 +148,38 @@ func (ts *Test) GenericTestSpeed() { } } -func TestBasic4A(t *testing.T) { - ts := MakeTest(t, "4A basic", 1, 5, true, false, false, -1, false) +func TestBasic4B(t *testing.T) { + ts := MakeTest(t, "4B basic", 1, 5, true, false, false, -1, false) ts.GenericTest() } -func TestSpeed4A(t *testing.T) { - ts := MakeTest(t, "4A speed", 1, 3, true, false, false, -1, false) +func TestSpeed4B(t *testing.T) { + ts := MakeTest(t, "4B speed", 1, 3, true, false, false, -1, false) ts.GenericTestSpeed() } -func TestConcurrent4A(t *testing.T) { - ts := MakeTest(t, "4A many clients", 5, 5, true, false, false, -1, false) +func TestConcurrent4B(t *testing.T) { + ts := MakeTest(t, "4B many clients", 5, 5, true, false, false, -1, false) ts.GenericTest() } -func TestUnreliable4A(t *testing.T) { - ts := MakeTest(t, "4A unreliable net, many clients", 5, 5, false, false, false, -1, false) +func TestUnreliable4B(t *testing.T) { + ts := MakeTest(t, "4B unreliable net, many clients", 5, 5, false, false, false, -1, false) ts.GenericTest() } // Submit a request in the minority partition and check that the requests // doesn't go through until the partition heals. The leader in the original // network ends up in the minority partition. -func TestOnePartition4A(t *testing.T) { - ts := MakeTest(t, "4A progress in majority", 0, 5, false, false, false, -1, false) +func TestOnePartition4B(t *testing.T) { + ts := MakeTest(t, "4B progress in majority", 0, 5, false, false, false, -1, false) defer ts.Cleanup() ck := ts.MakeClerk() ver0 := ts.PutAtLeastOnce(ck, "1", "13", rpc.Tversion(0), -1) - _, l := ts.Leader() + _, l := rsm.Leader(ts.Config, Gid) p1, p2 := ts.Group(Gid).MakePartition(l) ts.Group(Gid).Partition(p1, p2) @@ -194,7 +195,7 @@ func TestOnePartition4A(t *testing.T) { done0 := make(chan rpc.Tversion) done1 := make(chan rpc.Tversion) - ts.Begin("Test: no progress in minority (4A)") + ts.Begin("Test: no progress in minority (4B)") go func() { ver := ts.PutAtLeastOnce(ckp2a, "1", "15", ver1+1, -1) done0 <- ver @@ -218,7 +219,7 @@ func TestOnePartition4A(t *testing.T) { ts.End() - ts.Begin("Test: completion after heal (4A)") + ts.Begin("Test: completion after heal (4B)") ts.Group(Gid).ConnectAll() ckp2a.(*kvtest.TestClerk).Clnt.ConnectAll() @@ -242,43 +243,43 @@ func TestOnePartition4A(t *testing.T) { ts.CheckGet(ck, "1", "15", ver2+1) } -func TestManyPartitionsOneClient4A(t *testing.T) { - ts := MakeTest(t, "4A partitions, one client", 1, 5, false, false, true, -1, false) +func TestManyPartitionsOneClient4B(t *testing.T) { + ts := MakeTest(t, "4B partitions, one client", 1, 5, false, false, true, -1, false) ts.GenericTest() } -func TestManyPartitionsManyClients4A(t *testing.T) { - ts := MakeTest(t, "4A partitions, many clients (4A)", 5, 5, false, false, true, -1, false) +func TestManyPartitionsManyClients4B(t *testing.T) { + ts := MakeTest(t, "4B partitions, many clients (4B)", 5, 5, false, false, true, -1, false) ts.GenericTest() } -func TestPersistOneClient4A(t *testing.T) { - ts := MakeTest(t, "4A restarts, one client 4A ", 1, 5, false, true, false, -1, false) +func TestPersistOneClient4B(t *testing.T) { + ts := MakeTest(t, "4B restarts, one client 4B ", 1, 5, false, true, false, -1, false) ts.GenericTest() } -func TestPersistConcurrent4A(t *testing.T) { - ts := MakeTest(t, "4A restarts, many clients", 5, 5, false, true, false, -1, false) +func TestPersistConcurrent4B(t *testing.T) { + ts := MakeTest(t, "4B restarts, many clients", 5, 5, false, true, false, -1, false) ts.GenericTest() } -func TestPersistConcurrentUnreliable4A(t *testing.T) { - ts := MakeTest(t, "4A unreliable net, restarts, many clients ", 5, 5, true, true, false, -1, false) +func TestPersistConcurrentUnreliable4B(t *testing.T) { + ts := MakeTest(t, "4B unreliable net, restarts, many clients ", 5, 5, true, true, false, -1, false) ts.GenericTest() } -func TestPersistPartition4A(t *testing.T) { - ts := MakeTest(t, "4A restarts, partitions, many clients", 5, 5, false, true, true, -1, false) +func TestPersistPartition4B(t *testing.T) { + ts := MakeTest(t, "4B restarts, partitions, many clients", 5, 5, false, true, true, -1, false) ts.GenericTest() } -func TestPersistPartitionUnreliable4A(t *testing.T) { - ts := MakeTest(t, "4A unreliable net, restarts, partitions, many clients", 5, 5, true, true, true, -1, false) +func TestPersistPartitionUnreliable4B(t *testing.T) { + ts := MakeTest(t, "4B unreliable net, restarts, partitions, many clients", 5, 5, true, true, true, -1, false) ts.GenericTest() } -func TestPersistPartitionUnreliableLinearizable4A(t *testing.T) { - ts := MakeTest(t, "4A unreliable net, restarts, partitions, random keys, many clients", 15, 7, true, true, true, -1, true) +func TestPersistPartitionUnreliableLinearizable4B(t *testing.T) { + ts := MakeTest(t, "4B unreliable net, restarts, partitions, random keys, many clients", 15, 7, true, true, true, -1, true) ts.GenericTest() } @@ -286,13 +287,13 @@ func TestPersistPartitionUnreliableLinearizable4A(t *testing.T) { // recover by using the InstallSnapshot RPC? // also checks that majority discards committed log entries // even if minority doesn't respond. -func TestSnapshotRPC4B(t *testing.T) { - ts := MakeTest(t, "4B SnapshotsRPC", 0, 3, false, false, false, 1000, false) +func TestSnapshotRPC4C(t *testing.T) { + ts := MakeTest(t, "4C SnapshotsRPC", 0, 3, false, false, false, 1000, false) defer ts.Cleanup() ck := ts.MakeClerk() - ts.Begin("Test: InstallSnapshot RPC (4B)") + ts.Begin("Test: InstallSnapshot RPC (4C)") vera := ts.PutAtLeastOnce(ck, "a", "A", rpc.Tversion(0), -1) ts.CheckGet(ck, "a", "A", vera) @@ -340,8 +341,8 @@ func TestSnapshotRPC4B(t *testing.T) { // are the snapshots not too huge? 500 bytes is a generous bound for the // operations we're doing here. -func TestSnapshotSize4B(t *testing.T) { - ts := MakeTest(t, "4B snapshot size is reasonable", 0, 3, false, false, false, 1000, false) +func TestSnapshotSize4C(t *testing.T) { + ts := MakeTest(t, "4C snapshot size is reasonable", 0, 3, false, false, false, 1000, false) defer ts.Cleanup() maxsnapshotstate := 500 @@ -370,37 +371,37 @@ func TestSnapshotSize4B(t *testing.T) { } } -func TestSpeed4B(t *testing.T) { - ts := MakeTest(t, "4B speed", 1, 3, true, false, false, 1000, false) +func TestSpeed4C(t *testing.T) { + ts := MakeTest(t, "4C speed", 1, 3, true, false, false, 1000, false) ts.GenericTestSpeed() } -func TestSnapshotRecover4B(t *testing.T) { - ts := MakeTest(t, "4B restarts, snapshots, one client", 1, 5, true, true, false, 1000, false) +func TestSnapshotRecover4C(t *testing.T) { + ts := MakeTest(t, "4C restarts, snapshots, one client", 1, 5, true, true, false, 1000, false) ts.GenericTest() } -func TestSnapshotRecoverManyClients4B(t *testing.T) { - ts := MakeTest(t, "4B restarts, snapshots, many clients ", 20, 5, true, true, false, 1000, false) +func TestSnapshotRecoverManyClients4C(t *testing.T) { + ts := MakeTest(t, "4C restarts, snapshots, many clients ", 20, 5, true, true, false, 1000, false) ts.GenericTest() } -func TestSnapshotUnreliable4B(t *testing.T) { - ts := MakeTest(t, "4B unreliable net, snapshots, many clients", 5, 5, false, false, false, 1000, false) +func TestSnapshotUnreliable4C(t *testing.T) { + ts := MakeTest(t, "4C unreliable net, snapshots, many clients", 5, 5, false, false, false, 1000, false) ts.GenericTest() } -func TestSnapshotUnreliableRecover4B(t *testing.T) { - ts := MakeTest(t, "4B unreliable net, restarts, snapshots, many clients", 5, 5, false, true, false, 1000, false) +func TestSnapshotUnreliableRecover4C(t *testing.T) { + ts := MakeTest(t, "4C unreliable net, restarts, snapshots, many clients", 5, 5, false, true, false, 1000, false) ts.GenericTest() } -func TestSnapshotUnreliableRecoverConcurrentPartition4B(t *testing.T) { - ts := MakeTest(t, "4B unreliable net, restarts, partitions, snapshots, many clients", 5, 5, false, true, true, 1000, false) +func TestSnapshotUnreliableRecoverConcurrentPartition4C(t *testing.T) { + ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, many clients", 5, 5, false, true, true, 1000, false) ts.GenericTest() } -func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4B(t *testing.T) { - ts := MakeTest(t, "4B unreliable net, restarts, partitions, snapshots, random keys, many clients", 15, 7, false, true, true, 1000, true) +func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4C(t *testing.T) { + ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, random keys, many clients", 15, 7, false, true, true, 1000, true) ts.GenericTest() } diff --git a/src/kvraft1/rsm/rsm.go b/src/kvraft1/rsm/rsm.go index f7d711f..bd9cdc0 100644 --- a/src/kvraft1/rsm/rsm.go +++ b/src/kvraft1/rsm/rsm.go @@ -70,15 +70,11 @@ func (rsm *RSM) Raft() *raft.Raft { // submit a command to Raft, // and wait for it to be committed. -// perform() will tell us via ClientStatus and lastApplied -// when our command is either executed or not. // // returns (executeError, executeResult) // if executeError==ErrWrongLeader, client should find new leader // and try again. func (rsm *RSM) Submit(req any) (rpc.Err, any) { - rsm.mu.Lock() - defer rsm.mu.Unlock() // Submit creates an Op structure to run a command through Raft; // for example: op := Op{Id: rsm.nextId, Req: req}, where req is diff --git a/src/kvraft1/rsm/rsm_test.go b/src/kvraft1/rsm/rsm_test.go index cf87c00..fa56f1c 100644 --- a/src/kvraft1/rsm/rsm_test.go +++ b/src/kvraft1/rsm/rsm_test.go @@ -3,16 +3,19 @@ package rsm import ( //"log" "testing" + "time" + + "6.5840/kvsrv1/rpc" ) // test that each server executes increments and updates its counter. -func TestBasic(t *testing.T) { +func TestBasic4A(t *testing.T) { ts := makeTest(t, -1) defer ts.cleanup() ts.Begin("Test RSM basic") for i := 0; i < 10; i++ { - r := ts.one() + r := ts.oneInc() if r.N != i+1 { ts.t.Fatalf("expected %d instead of %d", i, r.N) } @@ -22,15 +25,17 @@ func TestBasic(t *testing.T) { // test that each server executes increments after disconnecting and // reconnecting leader -func TestLeaderFailure(t *testing.T) { +func TestLeaderFailure4A(t *testing.T) { ts := makeTest(t, -1) defer ts.cleanup() - r := ts.one() + ts.Begin("Test Leader Failure") + + r := ts.oneInc() ts.checkCounter(r.N, NSRV) l := ts.disconnectLeader() - r = ts.one() + r = ts.oneInc() ts.checkCounter(r.N, NSRV-1) ts.connect(l) @@ -38,26 +43,80 @@ func TestLeaderFailure(t *testing.T) { ts.checkCounter(r.N, NSRV) } -// test snapshot and restore -func TestSnapshot(t *testing.T) { - const N = 100 +func TestLeaderPartition4A(t *testing.T) { + ts := makeTest(t, -1) + defer ts.cleanup() - ts := makeTest(t, 1000) + ts.Begin("Test Leader Partition") + + // submit an Inc + r := ts.oneInc() + ts.checkCounter(r.N, NSRV) + + // partition leader + _, l := Leader(ts.Config, Gid) + p1, p2 := ts.Group(Gid).MakePartition(l) + ts.Group(Gid).Partition(p1, p2) + + done := make(chan rpc.Err) + go func() { + err, _ := ts.srvs[l].rsm.Submit(Inc{}) + done <- err + }() + + // submit an Inc in the majority + rep := ts.oneIncPartition(p1) + + select { + case ver := <-done: + ts.Fatalf("Inc in minority completed %v", ver) + case <-time.After(time.Second): + } + + // reconnect leader + ts.connect(l) + + select { + case err := <-done: + if err == rpc.OK { + ts.Fatalf("Inc in minority didn't fail") + } + case <-time.After(time.Second): + ts.Fatalf("Submit after healing didn't return") + } + + // check that all replicas have the same value for counter + ts.checkCounter(rep.N, NSRV) +} + +// test snapshot and restore +func TestSnapshot4C(t *testing.T) { + const ( + N = 100 + MAXRAFTSTATE = 1000 + ) + + ts := makeTest(t, MAXRAFTSTATE) defer ts.cleanup() for i := 0; i < N; i++ { - ts.one() + ts.oneInc() } ts.checkCounter(N, NSRV) - // rsm must have made snapshots by now shutdown all servers and + sz := ts.Group(Gid).LogSize() + if sz > 2*MAXRAFTSTATE { + ts.Fatalf("logs were not trimmed (%v > 2 * %v)", sz, ts.maxraftstate) + } + + // rsm must have made snapshots by now; shutdown all servers and // restart them from a snapshot ts.g.Shutdown() ts.g.StartServers() // make restarted servers do one increment - ts.one() + ts.oneInc() ts.checkCounter(N+1, NSRV) } diff --git a/src/kvraft1/rsm/server.go b/src/kvraft1/rsm/server.go index 8d91f16..307c257 100644 --- a/src/kvraft1/rsm/server.go +++ b/src/kvraft1/rsm/server.go @@ -41,7 +41,13 @@ func makeRsmSrv(ts *Test, srv int, ends []*labrpc.ClientEnd, persister *tester.P func (rs *rsmSrv) DoOp(req any) any { //log.Printf("%d: DoOp: %v", rs.me, req) + if _, ok := req.(Inc); ok == false { + // wrong type! expecting an Inc. + log.Fatalf("DoOp called with the wrong type") + } + rs.mu.Lock() rs.counter += 1 + rs.mu.Unlock() return &Rep{rs.counter} } diff --git a/src/kvraft1/rsm/test.go b/src/kvraft1/rsm/test.go index 0f6040f..1ad62a9 100644 --- a/src/kvraft1/rsm/test.go +++ b/src/kvraft1/rsm/test.go @@ -7,7 +7,7 @@ import ( "6.5840/kvsrv1/rpc" "6.5840/labrpc" - //"6.5840/raft1" + "6.5840/raft1" "6.5840/tester1" ) @@ -23,6 +23,8 @@ type Test struct { const ( NSRV = 3 NSEC = 10 + + Gid = tester.GRP0 ) func makeTest(t *testing.T, maxraftstate int) *Test { @@ -48,7 +50,19 @@ func (ts *Test) mksrv(ends []*labrpc.ClientEnd, grp tester.Tgid, srv int, persis return []tester.IService{s.rsm.Raft()} } -func (ts *Test) one() *Rep { +func inPartition(s int, p []int) bool { + if p == nil { + return true + } + for _, i := range p { + if s == i { + return true + } + } + return false +} + +func (ts *Test) oneIncPartition(p []int) *Rep { // try all the servers, maybe one is the leader but give up after NSEC t0 := time.Now() for time.Since(t0).Seconds() < NSEC { @@ -56,7 +70,7 @@ func (ts *Test) one() *Rep { for range ts.srvs { if ts.g.IsConnected(index) { s := ts.srvs[index] - if s.rsm != nil { + if s.rsm != nil && inPartition(index, p) { err, rep := s.rsm.Submit(Inc{}) if err == rpc.OK { ts.leader = index @@ -70,11 +84,14 @@ func (ts *Test) one() *Rep { time.Sleep(50 * time.Millisecond) //log.Printf("try again: no leader") } - ts.Fatalf("one: took too long") return nil } +func (ts *Test) oneInc() *Rep { + return ts.oneIncPartition(nil) +} + func (ts *Test) checkCounter(v int, nsrv int) { to := 10 * time.Millisecond n := 0 @@ -94,9 +111,11 @@ func (ts *Test) checkCounter(v int, nsrv int) { func (ts *Test) countValue(v int) int { i := 0 for _, s := range ts.srvs { + s.mu.Lock() if s.counter == v { i += 1 } + s.mu.Unlock() } return i } @@ -111,3 +130,19 @@ func (ts *Test) connect(i int) { //log.Printf("connect %d", i) ts.g.ConnectOne(i) } + +func Leader(cfg *tester.Config, gid tester.Tgid) (bool, int) { + for i, ss := range cfg.Group(gid).Services() { + for _, s := range ss { + switch r := s.(type) { + case *raft.Raft: + _, isLeader := r.GetState() + if isLeader { + return true, i + } + default: + } + } + } + return false, 0 +} diff --git a/src/kvraft1/test.go b/src/kvraft1/test.go index ef1dbfa..3ef2787 100644 --- a/src/kvraft1/test.go +++ b/src/kvraft1/test.go @@ -5,7 +5,6 @@ import ( "6.5840/kvtest1" "6.5840/labrpc" - "6.5840/raft1" "6.5840/tester1" ) @@ -51,22 +50,6 @@ func (ts *Test) MakeClerk() kvtest.IKVClerk { return &kvtest.TestClerk{ck, clnt} } -func (ts *Test) Leader() (bool, int) { - for i, ss := range ts.Group(Gid).Services() { - for _, s := range ss { - switch r := s.(type) { - case *raft.Raft: - _, is_leader := r.GetState() - if is_leader { - return true, i - } - default: - } - } - } - return false, 0 -} - func (ts *Test) DeleteClerk(ck kvtest.IKVClerk) { tck := ck.(*kvtest.TestClerk) ts.DeleteClient(tck.Clnt) diff --git a/src/kvsrv1/client.go b/src/kvsrv1/client.go index f15f839..c617677 100644 --- a/src/kvsrv1/client.go +++ b/src/kvsrv1/client.go @@ -25,32 +25,32 @@ func MakeClerk(clnt *tester.Clnt, server string) kvtest.IKVClerk { // You can send an RPC with code like this: // ok := ck.clnt.Call(ck.server, "KVServer.Get", &args, &reply) // -// the types of args and reply (including whether they are pointers) +// The types of args and reply (including whether they are pointers) // must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. +// arguments. Additionally, reply must be passed as a pointer. +// func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) { - // You will have to modify this function. return "", 0, rpc.ErrNoKey } -// Put updates key with value only if version is the version in the +// Put updates key with value only if the version in the // request matches the version of the key at the server. If the // versions numbers don't match, the server should return -// ErrNoVersion. If Put receives an ErrVersion on its first RPC, Put +// ErrVersion. If Put receives an ErrVersion on its first RPC, Put // should return ErrVersion, since the Put was definitely not // performed at the server. If the server returns ErrVersion on a // resend RPC, then Put must return ErrMaybe to the application, since -// its earlier RPC might have een processed by the server successfully +// its earlier RPC might have been processed by the server successfully // but the response was lost, and the the Clerk doesn't know if // the Put was performed or not. // // You can send an RPC with code like this: // ok := ck.clnt.Call(ck.server, "KVServer.Put", &args, &reply) // -// the types of args and reply (including whether they are pointers) +// The types of args and reply (including whether they are pointers) // must match the declared types of the RPC handler function's -// arguments. and reply must be passed as a pointer. +// arguments. Additionally, reply must be passed as a pointer. func (ck *Clerk) Put(key, value string, version rpc.Tversion) rpc.Err { // You will have to modify this function. return rpc.ErrNoKey diff --git a/src/kvsrv1/lock/lock.go b/src/kvsrv1/lock/lock.go index 96d398a..0a5adda 100644 --- a/src/kvsrv1/lock/lock.go +++ b/src/kvsrv1/lock/lock.go @@ -5,7 +5,7 @@ import ( ) type Lock struct { - // IKVClerk is a go interface for k/v clerks: the interfaces hides + // IKVClerk is a go interface for k/v clerks: the interface hides // the specific Clerk type of ck but promises that ck supports // Put and Get. The tester passes the clerk in when calling // MakeLock(). @@ -13,8 +13,10 @@ type Lock struct { // You may add code here } -// The tester calls MakeLock() and passes in a k/v clerk; you code can +// The tester calls MakeLock() and passes in a k/v clerk; your code can // perform a Put or Get by calling lk.ck.Put() or lk.ck.Get(). +// +// l is the key protected by the lock to be created. func MakeLock(ck kvtest.IKVClerk, l string) *Lock { lk := &Lock{ck: ck} // You may add code here diff --git a/src/kvsrv1/server.go b/src/kvsrv1/server.go index 2201071..db4a08c 100644 --- a/src/kvsrv1/server.go +++ b/src/kvsrv1/server.go @@ -40,7 +40,7 @@ func (kv *KVServer) Get(args *rpc.GetArgs, reply *rpc.GetReply) { // Update the value for a key if args.Version matches the version of // the key on the server. If versions don't match, return ErrVersion. // If the key doesn't exist, Put installs the value if the -// Args.Version is 0. +// args.Version is 0, and returns ErrNoKey otherwise. func (kv *KVServer) Put(args *rpc.PutArgs, reply *rpc.PutReply) { // Your code here. } diff --git a/src/kvtest1/kvtest.go b/src/kvtest1/kvtest.go index 62c14b7..6e5562d 100644 --- a/src/kvtest1/kvtest.go +++ b/src/kvtest1/kvtest.go @@ -16,6 +16,7 @@ import ( // (much more than the paper's range of timeouts). const ElectionTimeout = 1 * time.Second +// n specifies the length of the string to be generated. func RandValue(n int) string { const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"