This commit is contained in:
Yun-Sheng Chang 2025-03-17 22:41:41 -04:00
parent ea04b4c78f
commit c48f28b300
10 changed files with 272 additions and 61 deletions

View File

@ -5,14 +5,15 @@ import (
"strconv" "strconv"
"testing" "testing"
"time" "time"
"fmt"
"6.5840/kvraft1/rsm" "6.5840/kvraft1/rsm"
"6.5840/kvsrv1/rpc" "6.5840/kvsrv1/rpc"
"6.5840/kvtest1" "6.5840/kvtest1"
tester "6.5840/tester1"
) )
const ( const (
NSEC = 1
NCLNT = 10 NCLNT = 10
) )
@ -29,6 +30,7 @@ const (
func (ts *Test) GenericTest() { func (ts *Test) GenericTest() {
const ( const (
NITER = 3 NITER = 3
NSEC = 1
T = NSEC * time.Second T = NSEC * time.Second
NKEYS = 100 NKEYS = 100
) )
@ -64,17 +66,21 @@ func (ts *Test) GenericTest() {
<-ch_spawn // wait for clients to be done <-ch_spawn // wait for clients to be done
if i == NITER-1 {
tester.SetAnnotationFinalized()
}
ts.CheckPorcupine() ts.CheckPorcupine()
if ts.partitions { if ts.partitions {
ch_partitioner <- true ch_partitioner <- true
// log.Printf("wait for partitioner\n") //log.Printf("wait for partitioner\n")
<-ch_partitioner <-ch_partitioner
// reconnect network and submit a request. A client may // reconnect network and submit a request. A client may
// have submitted a request in a minority. That request // have submitted a request in a minority. That request
// won't return until that server discovers a new term // won't return until that server discovers a new term
// has started. // has started.
ts.Group(Gid).ConnectAll() ts.Group(Gid).ConnectAll()
tester.AnnotateClearFailure()
// wait for a while so that we have a new term // wait for a while so that we have a new term
time.Sleep(kvtest.ElectionTimeout) time.Sleep(kvtest.ElectionTimeout)
} }
@ -84,6 +90,7 @@ func (ts *Test) GenericTest() {
for i := 0; i < ts.nservers; i++ { for i := 0; i < ts.nservers; i++ {
ts.Group(Gid).ShutdownServer(i) ts.Group(Gid).ShutdownServer(i)
} }
tester.AnnotateShutdownAll()
// Wait for a while for servers to shutdown, since // Wait for a while for servers to shutdown, since
// shutdown isn't a real crash and isn't instantaneous // shutdown isn't a real crash and isn't instantaneous
time.Sleep(kvtest.ElectionTimeout) time.Sleep(kvtest.ElectionTimeout)
@ -93,6 +100,7 @@ func (ts *Test) GenericTest() {
ts.Group(Gid).StartServer(i) ts.Group(Gid).StartServer(i)
} }
ts.Group(Gid).ConnectAll() ts.Group(Gid).ConnectAll()
tester.AnnotateClearFailure()
} }
if ts.maxraftstate > 0 { if ts.maxraftstate > 0 {
@ -100,14 +108,18 @@ func (ts *Test) GenericTest() {
// requests and had time to checkpoint. // requests and had time to checkpoint.
sz := ts.Config.Group(Gid).LogSize() sz := ts.Config.Group(Gid).LogSize()
if sz > 8*ts.maxraftstate { if sz > 8*ts.maxraftstate {
ts.t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate) err := fmt.Sprintf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
tester.AnnotateCheckerFailure(err, err)
ts.t.Fatalf(err)
} }
} }
if ts.maxraftstate < 0 { if ts.maxraftstate < 0 {
// Check that snapshots are not used // Check that snapshots are not used
ssz := ts.Group(Gid).SnapshotSize() ssz := ts.Group(Gid).SnapshotSize()
if ssz > 0 { if ssz > 0 {
ts.t.Fatalf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, ts.maxraftstate) err := fmt.Sprintf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, ts.maxraftstate)
tester.AnnotateCheckerFailure(err, err)
ts.t.Fatalf(err)
} }
} }
} }
@ -150,21 +162,25 @@ func (ts *Test) GenericTestSpeed() {
func TestBasic4B(t *testing.T) { func TestBasic4B(t *testing.T) {
ts := MakeTest(t, "4B basic", 1, 5, true, false, false, -1, false) ts := MakeTest(t, "4B basic", 1, 5, true, false, false, -1, false)
tester.AnnotateTest("TestBasic4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestSpeed4B(t *testing.T) { func TestSpeed4B(t *testing.T) {
ts := MakeTest(t, "4B speed", 1, 3, true, false, false, -1, false) ts := MakeTest(t, "4B speed", 1, 3, true, false, false, -1, false)
tester.AnnotateTest("TestSpeed4B", ts.nservers)
ts.GenericTestSpeed() ts.GenericTestSpeed()
} }
func TestConcurrent4B(t *testing.T) { func TestConcurrent4B(t *testing.T) {
ts := MakeTest(t, "4B many clients", 5, 5, true, false, false, -1, false) ts := MakeTest(t, "4B many clients", 5, 5, true, false, false, -1, false)
tester.AnnotateTest("TestConcurrent4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestUnreliable4B(t *testing.T) { func TestUnreliable4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, many clients", 5, 5, false, false, false, -1, false) ts := MakeTest(t, "4B unreliable net, many clients", 5, 5, false, false, false, -1, false)
tester.AnnotateTest("TestUnreliable4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
@ -175,19 +191,29 @@ func TestOnePartition4B(t *testing.T) {
ts := MakeTest(t, "4B progress in majority", 0, 5, false, false, false, -1, false) ts := MakeTest(t, "4B progress in majority", 0, 5, false, false, false, -1, false)
defer ts.Cleanup() defer ts.Cleanup()
tester.AnnotateTest("TestOnePartition4B", ts.nservers)
ck := ts.MakeClerk() ck := ts.MakeClerk()
ver0 := ts.PutAtLeastOnce(ck, "1", "13", rpc.Tversion(0), -1) ver0 := ts.PutAtLeastOnce(ck, "1", "13", rpc.Tversion(0), -1)
_, l := rsm.Leader(ts.Config, Gid) foundl, l := rsm.Leader(ts.Config, Gid)
if foundl {
text := fmt.Sprintf("leader found = %v", l)
tester.AnnotateInfo(text, text)
} else {
text := "did not find a leader"
tester.AnnotateInfo(text, text)
}
p1, p2 := ts.Group(Gid).MakePartition(l) p1, p2 := ts.Group(Gid).MakePartition(l)
ts.Group(Gid).Partition(p1, p2) ts.Group(Gid).Partition(p1, p2)
tester.AnnotateTwoPartitions(p1, p2)
ckp1 := ts.MakeClerkTo(p1) // connect ckp1 to p1 ckp1 := ts.MakeClerkTo(p1) // connect ckp1 to p1
ckp2a := ts.MakeClerkTo(p2) // connect ckp2a to p2 ckp2a := ts.MakeClerkTo(p2) // connect ckp2a to p2
ckp2b := ts.MakeClerkTo(p2) // connect ckp2b to p2 ckp2b := ts.MakeClerkTo(p2) // connect ckp2b to p2
ver1 := ts.PutAtLeastOnce(ckp1, "1", "14", ver0+1, -1) ver1 := ts.PutAtLeastOnce(ckp1, "1", "14", ver0, -1)
ts.CheckGet(ckp1, "1", "14", ver1) ts.CheckGet(ckp1, "1", "14", ver1)
ts.End() ts.End()
@ -196,8 +222,9 @@ func TestOnePartition4B(t *testing.T) {
done1 := make(chan rpc.Tversion) done1 := make(chan rpc.Tversion)
ts.Begin("Test: no progress in minority (4B)") ts.Begin("Test: no progress in minority (4B)")
tester.AnnotateCheckerBegin(fmt.Sprintf("submit Put(1, 15) and Get(1) to %v", p2))
go func() { go func() {
ver := ts.PutAtLeastOnce(ckp2a, "1", "15", ver1+1, -1) ver := ts.PutAtLeastOnce(ckp2a, "1", "15", ver1, -1)
done0 <- ver done0 <- ver
}() }()
go func() { go func() {
@ -207,14 +234,20 @@ func TestOnePartition4B(t *testing.T) {
select { select {
case ver := <-done0: case ver := <-done0:
t.Fatalf("Put in minority completed %v", ver) err := fmt.Sprintf("Put in minority completed with version = %v", ver)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
case ver := <-done1: case ver := <-done1:
t.Fatalf("Get in minority completed %v", ver) err := fmt.Sprintf("Get in minority completed with version = %v", ver)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
case <-time.After(time.Second): case <-time.After(time.Second):
} }
tester.AnnotateCheckerSuccess(
"commands to minority partition not committed after 1 second", "not committed")
ts.CheckGet(ckp1, "1", "14", ver1) ts.CheckGet(ckp1, "1", "14", ver1)
ver2 := ts.PutAtLeastOnce(ckp1, "1", "16", ver1+1, -1) ver2 := ts.PutAtLeastOnce(ckp1, "1", "16", ver1, -1)
ts.CheckGet(ckp1, "1", "16", ver2) ts.CheckGet(ckp1, "1", "16", ver2)
ts.End() ts.End()
@ -222,64 +255,82 @@ func TestOnePartition4B(t *testing.T) {
ts.Begin("Test: completion after heal (4B)") ts.Begin("Test: completion after heal (4B)")
ts.Group(Gid).ConnectAll() ts.Group(Gid).ConnectAll()
tester.AnnotateClearFailure()
ckp2a.(*kvtest.TestClerk).Clnt.ConnectAll() ckp2a.(*kvtest.TestClerk).Clnt.ConnectAll()
ckp2b.(*kvtest.TestClerk).Clnt.ConnectAll() ckp2b.(*kvtest.TestClerk).Clnt.ConnectAll()
time.Sleep(kvtest.ElectionTimeout) time.Sleep(kvtest.ElectionTimeout)
tester.AnnotateCheckerBegin("status of Put(1, 15)")
ver15 := rpc.Tversion(0)
select { select {
case <-done0: case ver15 = <-done0:
case <-time.After(30 * 100 * time.Millisecond): case <-time.After(30 * 100 * time.Millisecond):
tester.AnnotateCheckerFailure(
"Put(1, 15) did not complete after partition resolved", "OK")
t.Fatalf("Put did not complete") t.Fatalf("Put did not complete")
} }
tester.AnnotateCheckerSuccess("Put(1, 15) completed after partition resolved", "OK")
tester.AnnotateCheckerBegin("status of Get(1)")
select { select {
case <-done1: case <-done1:
case <-time.After(30 * 100 * time.Millisecond): case <-time.After(30 * 100 * time.Millisecond):
tester.AnnotateCheckerFailure(
"Get(1) did not complete after partition resolved", "OK")
t.Fatalf("Get did not complete") t.Fatalf("Get did not complete")
default: default:
} }
tester.AnnotateCheckerSuccess("Get(1) completed after partition resolved", "OK")
ts.CheckGet(ck, "1", "15", ver2+1) ts.CheckGet(ck, "1", "15", ver15)
} }
func TestManyPartitionsOneClient4B(t *testing.T) { func TestManyPartitionsOneClient4B(t *testing.T) {
ts := MakeTest(t, "4B partitions, one client", 1, 5, false, false, true, -1, false) ts := MakeTest(t, "4B partitions, one client", 1, 5, false, false, true, -1, false)
tester.AnnotateTest("TestManyPartitionsOneClient4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestManyPartitionsManyClients4B(t *testing.T) { func TestManyPartitionsManyClients4B(t *testing.T) {
ts := MakeTest(t, "4B partitions, many clients (4B)", 5, 5, false, false, true, -1, false) ts := MakeTest(t, "4B partitions, many clients (4B)", 5, 5, false, false, true, -1, false)
tester.AnnotateTest("TestManyPartitionsManyClients4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestPersistOneClient4B(t *testing.T) { func TestPersistOneClient4B(t *testing.T) {
ts := MakeTest(t, "4B restarts, one client 4B ", 1, 5, false, true, false, -1, false) ts := MakeTest(t, "4B restarts, one client 4B ", 1, 5, false, true, false, -1, false)
tester.AnnotateTest("TestPersistOneClient4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestPersistConcurrent4B(t *testing.T) { func TestPersistConcurrent4B(t *testing.T) {
ts := MakeTest(t, "4B restarts, many clients", 5, 5, false, true, false, -1, false) ts := MakeTest(t, "4B restarts, many clients", 5, 5, false, true, false, -1, false)
tester.AnnotateTest("TestPersistConcurrent4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestPersistConcurrentUnreliable4B(t *testing.T) { func TestPersistConcurrentUnreliable4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, restarts, many clients ", 5, 5, true, true, false, -1, false) ts := MakeTest(t, "4B unreliable net, restarts, many clients ", 5, 5, true, true, false, -1, false)
tester.AnnotateTest("TestPersistConcurrentUnreliable4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestPersistPartition4B(t *testing.T) { func TestPersistPartition4B(t *testing.T) {
ts := MakeTest(t, "4B restarts, partitions, many clients", 5, 5, false, true, true, -1, false) ts := MakeTest(t, "4B restarts, partitions, many clients", 5, 5, false, true, true, -1, false)
tester.AnnotateTest("TestPersistPartition4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestPersistPartitionUnreliable4B(t *testing.T) { func TestPersistPartitionUnreliable4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, restarts, partitions, many clients", 5, 5, true, true, true, -1, false) ts := MakeTest(t, "4B unreliable net, restarts, partitions, many clients", 5, 5, true, true, true, -1, false)
tester.AnnotateTest("TestPersistPartitionUnreliable4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestPersistPartitionUnreliableLinearizable4B(t *testing.T) { func TestPersistPartitionUnreliableLinearizable4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, restarts, partitions, random keys, many clients", 15, 7, true, true, true, -1, true) ts := MakeTest(t, "4B unreliable net, restarts, partitions, random keys, many clients", 15, 7, true, true, true, -1, true)
tester.AnnotateTest("TestPersistPartitionUnreliableLinearizable4B", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
@ -293,6 +344,7 @@ func TestSnapshotRPC4C(t *testing.T) {
MAXRAFTSTATE = 1000 MAXRAFTSTATE = 1000
) )
ts := MakeTest(t, "4C SnapshotsRPC", 0, NSRV, true, false, false, MAXRAFTSTATE, false) ts := MakeTest(t, "4C SnapshotsRPC", 0, NSRV, true, false, false, MAXRAFTSTATE, false)
tester.AnnotateTest("TestSnapshotRPC4C", ts.nservers)
defer ts.Cleanup() defer ts.Cleanup()
ck := ts.MakeClerk() ck := ts.MakeClerk()
@ -305,6 +357,7 @@ func TestSnapshotRPC4C(t *testing.T) {
verb := rpc.Tversion(0) verb := rpc.Tversion(0)
// a bunch of puts into the majority partition. // a bunch of puts into the majority partition.
ts.Group(Gid).Partition([]int{0, 1}, []int{2}) ts.Group(Gid).Partition([]int{0, 1}, []int{2})
tester.AnnotateTwoPartitions([]int{0, 1}, []int{2})
{ {
ck1 := ts.MakeClerkTo([]int{0, 1}) ck1 := ts.MakeClerkTo([]int{0, 1})
for i := 0; i < 50; i++ { for i := 0; i < 50; i++ {
@ -318,15 +371,19 @@ func TestSnapshotRPC4C(t *testing.T) {
// most of its log entries. // most of its log entries.
sz := ts.Group(Gid).LogSize() sz := ts.Group(Gid).LogSize()
if sz > 8*ts.maxraftstate { if sz > 8*ts.maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate) err := fmt.Sprintf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
} }
// now make group that requires participation of // now make group that requires participation of
// lagging server, so that it has to catch up. // lagging server, so that it has to catch up.
verc := rpc.Tversion(0)
ts.Group(Gid).Partition([]int{0, 2}, []int{1}) ts.Group(Gid).Partition([]int{0, 2}, []int{1})
tester.AnnotateTwoPartitions([]int{0, 2}, []int{1})
{ {
ck1 := ts.MakeClerkTo([]int{0, 2}) ck1 := ts.MakeClerkTo([]int{0, 2})
ts.PutAtLeastOnce(ck1, "c", "C", rpc.Tversion(0), -1) verc = ts.PutAtLeastOnce(ck1, "c", "C", rpc.Tversion(0), -1)
ts.PutAtLeastOnce(ck1, "d", "D", rpc.Tversion(0), -1) ts.PutAtLeastOnce(ck1, "d", "D", rpc.Tversion(0), -1)
ts.CheckGet(ck1, "a", "A", vera) ts.CheckGet(ck1, "a", "A", vera)
ts.CheckGet(ck1, "b", "B", verb) ts.CheckGet(ck1, "b", "B", verb)
@ -336,9 +393,10 @@ func TestSnapshotRPC4C(t *testing.T) {
// now everybody // now everybody
ts.Group(Gid).Partition([]int{0, 1, 2}, []int{}) ts.Group(Gid).Partition([]int{0, 1, 2}, []int{})
tester.AnnotateClearFailure()
vere := ts.PutAtLeastOnce(ck, "e", "E", rpc.Tversion(0), -1) vere := ts.PutAtLeastOnce(ck, "e", "E", rpc.Tversion(0), -1)
ts.CheckGet(ck, "c", "C", 1) ts.CheckGet(ck, "c", "C", verc)
ts.CheckGet(ck, "e", "E", vere) ts.CheckGet(ck, "e", "E", vere)
ts.CheckGet(ck, "1", "1", rpc.Tversion(1)) ts.CheckGet(ck, "1", "1", rpc.Tversion(1))
} }
@ -347,6 +405,7 @@ func TestSnapshotRPC4C(t *testing.T) {
// operations we're doing here. // operations we're doing here.
func TestSnapshotSize4C(t *testing.T) { func TestSnapshotSize4C(t *testing.T) {
ts := MakeTest(t, "4C snapshot size is reasonable", 0, 3, true, false, false, 1000, false) ts := MakeTest(t, "4C snapshot size is reasonable", 0, 3, true, false, false, 1000, false)
tester.AnnotateTest("TestSnapshotSize4C", ts.nservers)
defer ts.Cleanup() defer ts.Cleanup()
maxsnapshotstate := 500 maxsnapshotstate := 500
@ -357,55 +416,65 @@ func TestSnapshotSize4C(t *testing.T) {
for i := 0; i < 200; i++ { for i := 0; i < 200; i++ {
ver = ts.PutAtLeastOnce(ck, "x", "0", ver, -1) ver = ts.PutAtLeastOnce(ck, "x", "0", ver, -1)
ts.CheckGet(ck, "x", "0", ver) ts.CheckGet(ck, "x", "0", ver)
ver = ts.PutAtLeastOnce(ck, "x", "1", ver+1, -1) ver = ts.PutAtLeastOnce(ck, "x", "1", ver, -1)
ts.CheckGet(ck, "x", "1", ver) ts.CheckGet(ck, "x", "1", ver)
ver += 1
} }
// check that servers have thrown away most of their log entries // check that servers have thrown away most of their log entries
sz := ts.Group(Gid).LogSize() sz := ts.Group(Gid).LogSize()
if sz > 8*ts.maxraftstate { if sz > 8*ts.maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate) err := fmt.Sprintf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
} }
// check that the snapshots are not unreasonably large // check that the snapshots are not unreasonably large
ssz := ts.Group(Gid).SnapshotSize() ssz := ts.Group(Gid).SnapshotSize()
if ssz > maxsnapshotstate { if ssz > maxsnapshotstate {
t.Fatalf("snapshot too large (%v > %v)", ssz, maxsnapshotstate) err := fmt.Sprintf("snapshot too large (%v > %v)", ssz, maxsnapshotstate)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
} }
} }
func TestSpeed4C(t *testing.T) { func TestSpeed4C(t *testing.T) {
ts := MakeTest(t, "4C speed", 1, 3, true, false, false, 1000, false) ts := MakeTest(t, "4C speed", 1, 3, true, false, false, 1000, false)
tester.AnnotateTest("TestSpeed4C", ts.nservers)
ts.GenericTestSpeed() ts.GenericTestSpeed()
} }
func TestSnapshotRecover4C(t *testing.T) { func TestSnapshotRecover4C(t *testing.T) {
ts := MakeTest(t, "4C restarts, snapshots, one client", 1, 5, true, true, false, 1000, false) ts := MakeTest(t, "4C restarts, snapshots, one client", 1, 5, true, true, false, 1000, false)
tester.AnnotateTest("TestSnapshotRecover4C", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestSnapshotRecoverManyClients4C(t *testing.T) { func TestSnapshotRecoverManyClients4C(t *testing.T) {
ts := MakeTest(t, "4C restarts, snapshots, many clients ", 20, 5, true, true, false, 1000, false) ts := MakeTest(t, "4C restarts, snapshots, many clients ", 20, 5, true, true, false, 1000, false)
tester.AnnotateTest("TestSnapshotRecoverManyClients4C", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestSnapshotUnreliable4C(t *testing.T) { func TestSnapshotUnreliable4C(t *testing.T) {
ts := MakeTest(t, "4C unreliable net, snapshots, many clients", 5, 5, false, false, false, 1000, false) ts := MakeTest(t, "4C unreliable net, snapshots, many clients", 5, 5, false, false, false, 1000, false)
tester.AnnotateTest("TestSnapshotUnreliable4C", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestSnapshotUnreliableRecover4C(t *testing.T) { func TestSnapshotUnreliableRecover4C(t *testing.T) {
ts := MakeTest(t, "4C unreliable net, restarts, snapshots, many clients", 5, 5, false, true, false, 1000, false) ts := MakeTest(t, "4C unreliable net, restarts, snapshots, many clients", 5, 5, false, true, false, 1000, false)
tester.AnnotateTest("TestSnapshotUnreliableRecover4C", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestSnapshotUnreliableRecoverConcurrentPartition4C(t *testing.T) { func TestSnapshotUnreliableRecoverConcurrentPartition4C(t *testing.T) {
ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, many clients", 5, 5, false, true, true, 1000, false) ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, many clients", 5, 5, false, true, true, 1000, false)
tester.AnnotateTest("TestSnapshotUnreliableRecoverConcurrentPartition4C", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }
func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4C(t *testing.T) { func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4C(t *testing.T) {
ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, random keys, many clients", 15, 7, false, true, true, 1000, true) ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, random keys, many clients", 15, 7, false, true, true, 1000, true)
tester.AnnotateTest("TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4C", ts.nservers)
ts.GenericTest() ts.GenericTest()
} }

View File

@ -2,11 +2,13 @@ package rsm
import ( import (
//"log" //"log"
"fmt"
"sync" "sync"
"testing" "testing"
"time" "time"
"6.5840/kvsrv1/rpc" "6.5840/kvsrv1/rpc"
"6.5840/tester1"
) )
// test that each server executes increments and updates its counter. // test that each server executes increments and updates its counter.
@ -15,11 +17,15 @@ func TestBasic4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestBasic4A", NSRV)
ts.Begin("Test RSM basic") ts.Begin("Test RSM basic")
for i := 0; i < NINC; i++ { for i := 0; i < NINC; i++ {
r := ts.oneInc() r := ts.oneInc()
if r.N != i+1 { if r.N != i+1 {
ts.t.Fatalf("expected %d instead of %d", i, r.N) err := fmt.Sprintf("expected %d instead of %d", i, r.N)
tester.AnnotateCheckerFailure(err, err)
ts.t.Fatalf(err)
} }
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
} }
@ -31,6 +37,7 @@ func TestConcurrent4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestConcurrent4A", NSRV)
ts.Begin("Test concurrent submit") ts.Begin("Test concurrent submit")
var wg sync.WaitGroup var wg sync.WaitGroup
@ -51,16 +58,19 @@ func TestLeaderFailure4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestLeaderFailure4A", NSRV)
ts.Begin("Test Leader Failure") ts.Begin("Test Leader Failure")
r := ts.oneInc() r := ts.oneInc()
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
l := ts.disconnectLeader() l := ts.disconnectLeader()
tester.AnnotateConnection(ts.g.GetConnected())
r = ts.oneInc() r = ts.oneInc()
ts.checkCounter(r.N, NSRV-1) ts.checkCounter(r.N, NSRV-1)
ts.connect(l) ts.connect(l)
tester.AnnotateConnection(ts.g.GetConnected())
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
} }
@ -72,6 +82,7 @@ func TestLeaderPartition4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestLeaderPartition4A", NSRV)
ts.Begin("Test Leader Partition") ts.Begin("Test Leader Partition")
// submit an Inc // submit an Inc
@ -79,10 +90,20 @@ func TestLeaderPartition4A(t *testing.T) {
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
// partition leader // partition leader
_, l := Leader(ts.Config, Gid) foundl, l := Leader(ts.Config, Gid)
if foundl {
text := fmt.Sprintf("leader found = %v", l)
tester.AnnotateInfo(text, text)
} else {
text := "did not find a leader"
tester.AnnotateInfo(text, text)
}
p1, p2 := ts.Group(Gid).MakePartition(l) p1, p2 := ts.Group(Gid).MakePartition(l)
ts.Group(Gid).Partition(p1, p2) ts.Group(Gid).Partition(p1, p2)
tester.AnnotateTwoPartitions(p1, p2)
text := fmt.Sprintf("concurrently submitting %v Dec to %v", NSUBMIT, l)
tester.AnnotateInfo(text, text)
done := make(chan struct{}) done := make(chan struct{})
go func() { go func() {
// Submit many Dec's concurrently, which will results in many // Submit many Dec's concurrently, which will results in many
@ -109,17 +130,24 @@ func TestLeaderPartition4A(t *testing.T) {
select { select {
case err := <-done: case err := <-done:
ts.Fatalf("Dec's in minority completed %v", err) text := fmt.Sprintf("Dec's in minority completed; Submit returns %v", err)
tester.AnnotateCheckerFailure(text, text)
ts.Fatalf(text)
case <-time.After(time.Second): case <-time.After(time.Second):
} }
// reconnect leader // reconnect leader
ts.connect(l) ts.connect(l)
// use the fact that there are only three servers, so connecting the leader
// resolves all partitions (the last line of test also relies on this)
tester.AnnotateClearFailure()
select { select {
case <-done: case <-done:
case <-time.After(time.Second): case <-time.After(time.Second):
ts.Fatalf("Submit after healing didn't return") text := "Submit after healing didn't return"
tester.AnnotateCheckerFailure(text, text)
ts.Fatalf(text)
} }
// check that all replicas have the same value for counter // check that all replicas have the same value for counter
@ -136,27 +164,34 @@ func TestRestartReplay4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestRestartReplay4A", NSRV)
ts.Begin("Test Restart") ts.Begin("Test Restart")
for i := 0; i < NINC; i++ { for i := 0; i < NINC; i++ {
r := ts.oneInc() r := ts.oneInc()
if r.N != i+1 { if r.N != i+1 {
ts.t.Fatalf("expected %d instead of %d", i, r.N) err := fmt.Sprintf("expected %d instead of %d", i, r.N)
tester.AnnotateCheckerFailure(err, err)
ts.t.Fatalf(err)
} }
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
} }
ts.Group(Gid).Shutdown() ts.Group(Gid).Shutdown()
tester.AnnotateShutdownAll()
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
ts.Group(Gid).StartServers() ts.Group(Gid).StartServers()
tester.AnnotateRestartAll()
// submit an Inc // submit an Inc
r := ts.oneInc() r := ts.oneInc()
if r.N != NINC+1 { if r.N != NINC+1 {
t.Fatalf("Expected %d got %d", NINC+1, r.N) err := fmt.Sprintf("expected %d got %d", NINC+1, r.N)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
} }
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
@ -176,6 +211,7 @@ func TestShutdown4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestShutdown4A", NSRV)
ts.Begin("Test Shutdown") ts.Begin("Test Shutdown")
// Submit many Null's concurrently // Submit many Null's concurrently
@ -197,11 +233,14 @@ func TestShutdown4A(t *testing.T) {
time.Sleep(20 * time.Millisecond) time.Sleep(20 * time.Millisecond)
ts.Group(Gid).Shutdown() ts.Group(Gid).Shutdown()
tester.AnnotateShutdownAll()
select { select {
case <-done: case <-done:
case <-time.After((NSEC + 1) * time.Second): case <-time.After((NSEC + 1) * time.Second):
ts.Fatalf("Submit didn't stop after shutdown") err := "Submit didn't stop after shutdown"
tester.AnnotateCheckerFailure(err, err)
ts.Fatalf(err)
} }
} }
@ -216,32 +255,41 @@ func TestRestartSubmit4A(t *testing.T) {
ts := makeTest(t, -1) ts := makeTest(t, -1)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestRestartSubmit4A", NSRV)
ts.Begin("Test Restart and submit") ts.Begin("Test Restart and submit")
for i := 0; i < NINC; i++ { for i := 0; i < NINC; i++ {
r := ts.oneInc() r := ts.oneInc()
if r.N != i+1 { if r.N != i+1 {
ts.t.Fatalf("expected %d instead of %d", i, r.N) err := fmt.Sprintf("expected %d instead of %d", i, r.N)
tester.AnnotateCheckerFailure(err, err)
ts.t.Fatalf(err)
} }
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
} }
ts.Group(Gid).Shutdown() ts.Group(Gid).Shutdown()
tester.AnnotateShutdownAll()
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
ts.Group(Gid).StartServers() ts.Group(Gid).StartServers()
tester.AnnotateRestartAll()
// submit an Inc // submit an Inc
r := ts.oneInc() r := ts.oneInc()
if r.N != NINC+1 { if r.N != NINC+1 {
t.Fatalf("Expected %d got %d", NINC+1, r.N) err := fmt.Sprintf("Expected %d got %d", NINC+1, r.N)
tester.AnnotateCheckerFailure(err, err)
t.Fatalf(err)
} }
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
// Submit many Null's concurrently // Submit many Null's concurrently
text := fmt.Sprintf("concurrently submitting %v Null", NSUBMIT)
tester.AnnotateInfo(text, text)
done := make(chan struct{}) done := make(chan struct{})
go func() { go func() {
var wg sync.WaitGroup var wg sync.WaitGroup
@ -260,14 +308,18 @@ func TestRestartSubmit4A(t *testing.T) {
time.Sleep(20 * time.Millisecond) time.Sleep(20 * time.Millisecond)
ts.Group(Gid).Shutdown() ts.Group(Gid).Shutdown()
tester.AnnotateShutdownAll()
select { select {
case <-done: case <-done:
case <-time.After((NSEC + 1) * time.Second): case <-time.After((NSEC + 1) * time.Second):
ts.Fatalf("Submit didn't stop after shutdown") err := "Submit didn't stop after shutdown"
tester.AnnotateCheckerFailure(err, err)
ts.Fatalf(err)
} }
ts.Group(Gid).StartServers() ts.Group(Gid).StartServers()
tester.AnnotateRestartAll()
r = ts.oneInc() r = ts.oneInc()
ts.checkCounter(r.N, NSRV) ts.checkCounter(r.N, NSRV)
@ -283,6 +335,9 @@ func TestSnapshot4C(t *testing.T) {
ts := makeTest(t, MAXRAFTSTATE) ts := makeTest(t, MAXRAFTSTATE)
defer ts.cleanup() defer ts.cleanup()
tester.AnnotateTest("TestSnapshot4C", NSRV)
ts.Begin("Test creating and restoring snapshot")
for i := 0; i < N; i++ { for i := 0; i < N; i++ {
ts.oneInc() ts.oneInc()
} }
@ -290,14 +345,18 @@ func TestSnapshot4C(t *testing.T) {
sz := ts.Group(Gid).LogSize() sz := ts.Group(Gid).LogSize()
if sz > 2*MAXRAFTSTATE { if sz > 2*MAXRAFTSTATE {
ts.Fatalf("logs were not trimmed (%v > 2 * %v)", sz, ts.maxraftstate) err := fmt.Sprintf("logs were not trimmed (%v > 2 * %v)", sz, ts.maxraftstate)
tester.AnnotateCheckerFailure(err, err)
ts.Fatalf(err)
} }
// rsm must have made snapshots by now; shutdown all servers and // rsm must have made snapshots by now; shutdown all servers and
// restart them from a snapshot // restart them from a snapshot
ts.g.Shutdown() ts.g.Shutdown()
tester.AnnotateShutdownAll()
ts.g.StartServers() ts.g.StartServers()
tester.AnnotateRestartAll()
// make restarted servers do one increment // make restarted servers do one increment
ts.oneInc() ts.oneInc()

View File

@ -5,6 +5,7 @@ import (
"sync" "sync"
"testing" "testing"
"time" "time"
"fmt"
"6.5840/kvsrv1/rpc" "6.5840/kvsrv1/rpc"
"6.5840/labrpc" "6.5840/labrpc"
@ -115,6 +116,8 @@ func (ts *Test) checkCounter(v int, nsrv int) {
for iters := 0; iters < 30; iters++ { for iters := 0; iters < 30; iters++ {
n = ts.countValue(v) n = ts.countValue(v)
if n >= nsrv { if n >= nsrv {
text := fmt.Sprintf("all %v servers have counter value %v", nsrv, v)
tester.AnnotateCheckerSuccess(text, text)
return return
} }
time.Sleep(to) time.Sleep(to)
@ -122,7 +125,9 @@ func (ts *Test) checkCounter(v int, nsrv int) {
to *= 2 to *= 2
} }
} }
ts.Fatalf("checkCounter: only %d srvs have %v instead of %d", n, v, nsrv) err := fmt.Sprintf("checkCounter: only %d srvs have %v instead of %d", n, v, nsrv)
tester.AnnotateCheckerFailure(err, err)
ts.Fatalf(err)
} }
func (ts *Test) countValue(v int) int { func (ts *Test) countValue(v int) int {

View File

@ -3,6 +3,7 @@ package kvtest
import ( import (
"encoding/json" "encoding/json"
//"log" //"log"
"fmt"
"math/rand" "math/rand"
"strconv" "strconv"
"testing" "testing"
@ -78,9 +79,11 @@ func (ts *Test) MakeClerk() IKVClerk {
// Assumes different ck's put to different keys // Assumes different ck's put to different keys
func (ts *Test) PutAtLeastOnce(ck IKVClerk, key, value string, ver rpc.Tversion, me int) rpc.Tversion { func (ts *Test) PutAtLeastOnce(ck IKVClerk, key, value string, ver rpc.Tversion, me int) rpc.Tversion {
verPrev := ver
for true { for true {
err := ts.Put(ck, key, value, ver, me) err := ts.Put(ck, key, value, ver, me)
if err == rpc.OK { if err == rpc.OK {
ver += 1
break break
} }
if err == rpc.ErrMaybe || err == rpc.ErrVersion { if err == rpc.ErrMaybe || err == rpc.ErrVersion {
@ -92,17 +95,27 @@ func (ts *Test) PutAtLeastOnce(ck IKVClerk, key, value string, ver rpc.Tversion,
} }
} }
} }
desp := fmt.Sprintf("Put(%v, %v) completes", key, value)
details := fmt.Sprintf("version: %v -> %v", verPrev, ver)
tester.AnnotateInfo(desp, details)
return ver return ver
} }
func (ts *Test) CheckGet(ck IKVClerk, key, value string, version rpc.Tversion) { func (ts *Test) CheckGet(ck IKVClerk, key, value string, version rpc.Tversion) {
tester.AnnotateCheckerBegin(fmt.Sprintf("checking Get(%v) = (%v, %v)", key, value, version))
val, ver, err := ts.Get(ck, key, 0) val, ver, err := ts.Get(ck, key, 0)
if err != rpc.OK { if err != rpc.OK {
ts.Fatalf("CheckGet err %v", err) text := fmt.Sprintf("Get(%v) returns error = %v", key, err)
tester.AnnotateCheckerFailure(text, text)
ts.Fatalf(text)
} }
if val != value || ver != ver { if val != value || ver != version {
ts.Fatalf("Get(%v): expected:\n%v %v\nreceived:\n%v %v", key, value, val, version, ver) text := fmt.Sprintf("Get(%v) returns (%v, %v) != (%v, %v)", key, val, ver, value, version)
tester.AnnotateCheckerFailure(text, text)
ts.Fatalf(text)
} }
text := fmt.Sprintf("Get(%v) returns (%v, %v) as expected", key, val, ver)
tester.AnnotateCheckerSuccess(text, "OK")
} }
type ClntRes struct { type ClntRes struct {
@ -214,9 +227,10 @@ func (ts *Test) OnePut(me int, ck IKVClerk, key string, ver rpc.Tversion) (rpc.T
// repartition the servers periodically // repartition the servers periodically
func (ts *Test) Partitioner(gid tester.Tgid, ch chan bool) { func (ts *Test) Partitioner(gid tester.Tgid, ch chan bool) {
//log.Printf("partioner %v", gid)
defer func() { ch <- true }() defer func() { ch <- true }()
for true { for true {
switch { select {
case <-ch: case <-ch:
return return
default: default:
@ -234,6 +248,7 @@ func (ts *Test) Partitioner(gid tester.Tgid, ch chan bool) {
} }
} }
ts.Group(gid).Partition(pa[0], pa[1]) ts.Group(gid).Partition(pa[0], pa[1])
tester.AnnotateTwoPartitions(pa[0], pa[1])
time.Sleep(ElectionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond) time.Sleep(ElectionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
} }
} }

View File

@ -83,9 +83,7 @@ func Put(cfg *tester.Config, ck IKVClerk, key string, value string, version rpc.
// Checks that the log of Clerk.Put's and Clerk.Get's is linearizable (see // Checks that the log of Clerk.Put's and Clerk.Get's is linearizable (see
// linearizability-faq.txt) // linearizability-faq.txt)
func checkPorcupine( func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
t *testing.T, opLog *OpLog, annotations []porcupine.Annotation, nsec time.Duration,
) {
enabled := os.Getenv("VIS_ENABLE") enabled := os.Getenv("VIS_ENABLE")
fpath := os.Getenv("VIS_FILE") fpath := os.Getenv("VIS_FILE")
res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), nsec) res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), nsec)
@ -102,6 +100,7 @@ func checkPorcupine(
fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err) fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
} else if enabled != "never" { } else if enabled != "never" {
// Don't produce visualization file if VIS_ENABLE is set to "never". // Don't produce visualization file if VIS_ENABLE is set to "never".
annotations := tester.FinalizeAnnotations("test failed")
info.AddAnnotations(annotations) info.AddAnnotations(annotations)
err = porcupine.Visualize(models.KvModel, info, file) err = porcupine.Visualize(models.KvModel, info, file)
if err != nil { if err != nil {
@ -116,7 +115,7 @@ func checkPorcupine(
} }
// The result is either legal or unknown. // The result is either legal or unknown.
if enabled == "always" { if enabled == "always" && tester.GetAnnotationFinalized() {
var file *os.File var file *os.File
var err error var err error
if fpath == "" { if fpath == "" {
@ -129,6 +128,7 @@ func checkPorcupine(
fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err) fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
return return
} }
annotations := tester.FinalizeAnnotations("test passed")
info.AddAnnotations(annotations) info.AddAnnotations(annotations)
err = porcupine.Visualize(models.KvModel, info, file) err = porcupine.Visualize(models.KvModel, info, file)
if err != nil { if err != nil {
@ -180,9 +180,8 @@ func (ts *Test) CheckPorcupine() {
} }
func (ts *Test) CheckPorcupineT(nsec time.Duration) { func (ts *Test) CheckPorcupineT(nsec time.Duration) {
// ts.RetrieveAnnotations() also clears the accumulated annotations so that // tester.RetrieveAnnotations() also clears the accumulated annotations so
// the vis file containing client operations (generated here) won't be // that the vis file containing client operations (generated here) won't be
// overridden by that without client operations (generated at cleanup time). // overridden by that without client operations (generated at cleanup time).
annotations := ts.RetrieveAnnotations() checkPorcupine(ts.t, ts.oplog, nsec)
checkPorcupine(ts.t, ts.oplog, annotations, nsec)
} }

View File

@ -8,9 +8,9 @@ import (
"sync/atomic" "sync/atomic"
"6.5840/kvsrv1"
"6.5840/kvsrv1/rpc" "6.5840/kvsrv1/rpc"
"6.5840/kvtest1" "6.5840/kvtest1"
"6.5840/shardkv1/kvsrv1"
"6.5840/shardkv1/shardcfg" "6.5840/shardkv1/shardcfg"
"6.5840/tester1" "6.5840/tester1"
) )

View File

@ -22,8 +22,8 @@ const (
// kvsrv1 lab. // kvsrv1 lab.
func TestInitQuery5A(t *testing.T) { func TestInitQuery5A(t *testing.T) {
// MakeTest starts a key/value server using `kvsrv.StartKVServer`, // MakeTest starts your lab2 key/value server using
// which is defined in shardkv1/kvsrv1. // `kvsrv.StartKVServer`.
ts := MakeTest(t, "Test (5A): Init and Query ...", true) ts := MakeTest(t, "Test (5A): Init and Query ...", true)
defer ts.Cleanup() defer ts.Cleanup()

View File

@ -10,10 +10,10 @@ import (
"time" "time"
"6.5840/kvraft1/rsm" "6.5840/kvraft1/rsm"
"6.5840/kvsrv1"
"6.5840/kvsrv1/rpc" "6.5840/kvsrv1/rpc"
"6.5840/kvtest1" "6.5840/kvtest1"
"6.5840/labrpc" "6.5840/labrpc"
"6.5840/shardkv1/kvsrv1"
"6.5840/shardkv1/shardcfg" "6.5840/shardkv1/shardcfg"
"6.5840/shardkv1/shardctrler" "6.5840/shardkv1/shardctrler"
"6.5840/shardkv1/shardctrler/param" "6.5840/shardkv1/shardctrler/param"

View File

@ -20,6 +20,7 @@ type Annotation struct {
mu *sync.Mutex mu *sync.Mutex
annotations []porcupine.Annotation annotations []porcupine.Annotation
continuous map[string]Continuous continuous map[string]Continuous
finalized bool
} }
type Continuous struct { type Continuous struct {
@ -70,8 +71,22 @@ const (
TAG_INFO string = "$ Test Info" TAG_INFO string = "$ Test Info"
) )
func (cfg *Config) RetrieveAnnotations() []porcupine.Annotation{ func FinalizeAnnotations(end string) []porcupine.Annotation {
annotations := annotation.retrieve() annotations := annotation.finalize()
// XXX: Make the last annotation an interval one to work around Porcupine's
// issue. Consider removing this once the issue is fixed.
t := timestamp()
aend := porcupine.Annotation{
Tag: TAG_INFO,
Start: t,
End: t + 1000,
Description: end,
Details: end,
BackgroundColor: COLOR_INFO,
}
annotations = append(annotations, aend)
return annotations return annotations
} }
@ -173,12 +188,39 @@ func AnnotateCheckerNeutral(desp, details string) {
AnnotateCheckerEnd(desp, details, COLOR_NEUTRAL) AnnotateCheckerEnd(desp, details, COLOR_NEUTRAL)
} }
func SetAnnotationFinalized() {
annotation.mu.Lock()
defer annotation.mu.Unlock()
annotation.finalized = true
}
func (an *Annotation) isFinalized() bool {
annotation.mu.Lock()
defer annotation.mu.Unlock()
return annotation.finalized
}
func GetAnnotationFinalized() bool {
return annotation.isFinalized()
}
// Used before log.Fatalf // Used before log.Fatalf
func AnnotateCheckerFailureBeforeExit(desp, details string) { func AnnotateCheckerFailureBeforeExit(desp, details string) {
AnnotateCheckerFailure(desp, details) AnnotateCheckerFailure(desp, details)
annotation.cleanup(true, "test failed") annotation.cleanup(true, "test failed")
} }
// The current annotation API for failures is very hacky. We really should have
// just one function that reads the current network/server status. For network,
// we should be able to read whether an endname is enabled. However, if the
// endname is enabled from X to Y, but not Y to X, the annotation would be
// downright confusing. A better design (in the tester framework, not in the
// annotation layer) is to have a single boolean for each pair of servers; once
// we have such state, the annotation can then simply read the booleans to
// determine the network partitions.
// Two functions to annotate partitions: AnnotateConnection and // Two functions to annotate partitions: AnnotateConnection and
// AnnotateTwoPartitions. The connected field of ServerGrp (in group.go) is // AnnotateTwoPartitions. The connected field of ServerGrp (in group.go) is
// precise if and only if the ServerGrp.Partition is not used. Thus, we use the // precise if and only if the ServerGrp.Partition is not used. Thus, we use the
@ -236,14 +278,31 @@ func annotateFault() {
AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT) AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
} }
// Currently this API does not work with failed servers, nor with the connected
// fields of ServerGrp (in group.go). It is used specifically for
// ServerGrp.Partition.
func AnnotateTwoPartitions(p1 []int, p2 []int) { func AnnotateTwoPartitions(p1 []int, p2 []int) {
// A bit hard to check whether the partition actually changes, so just // A bit hard to check whether the partition actually changes, so just
// annotate on every invocation. // annotate on every invocation.
// TODO text := fmt.Sprintf("partition = %v %v", p1, p2)
text := fmt.Sprintf("%v %v", p1, p2)
AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT) AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
} }
func AnnotateClearFailure() {
finfo.mu.Lock()
defer finfo.mu.Unlock()
for id := range(finfo.crashed) {
finfo.crashed[id] = false
}
for id := range(finfo.connected) {
finfo.connected[id] = true
}
AnnotateContinuousEnd(TAG_PARTITION)
}
func AnnotateShutdown(servers []int) { func AnnotateShutdown(servers []int) {
finfo.mu.Lock() finfo.mu.Lock()
defer finfo.mu.Unlock() defer finfo.mu.Unlock()
@ -316,9 +375,12 @@ func timestamp() int64 {
return int64(time.Since(time.Unix(0, 0))) return int64(time.Since(time.Unix(0, 0)))
} }
func (an *Annotation) retrieve() []porcupine.Annotation { func (an *Annotation) finalize() []porcupine.Annotation {
an.mu.Lock() an.mu.Lock()
defer an.mu.Unlock()
x := an.annotations x := an.annotations
t := timestamp() t := timestamp()
for tag, cont := range(an.continuous) { for tag, cont := range(an.continuous) {
a := porcupine.Annotation{ a := porcupine.Annotation{
@ -331,9 +393,8 @@ func (an *Annotation) retrieve() []porcupine.Annotation {
} }
x = append(x, a) x = append(x, a)
} }
an.annotations = make([]porcupine.Annotation, 0)
an.continuous = make(map[string]Continuous) an.finalized = true
an.mu.Unlock()
return x return x
} }
@ -341,6 +402,7 @@ func (an *Annotation) clear() {
an.mu.Lock() an.mu.Lock()
an.annotations = make([]porcupine.Annotation, 0) an.annotations = make([]porcupine.Annotation, 0)
an.continuous = make(map[string]Continuous) an.continuous = make(map[string]Continuous)
an.finalized = false
an.mu.Unlock() an.mu.Unlock()
} }
@ -445,21 +507,22 @@ func (an *Annotation) annotateContinuousEnd(tag string) {
func (an *Annotation) cleanup(failed bool, end string) { func (an *Annotation) cleanup(failed bool, end string) {
enabled := os.Getenv("VIS_ENABLE") enabled := os.Getenv("VIS_ENABLE")
if enabled == "never" || (!failed && enabled != "always") { if enabled == "never" || (!failed && enabled != "always") || an.isFinalized() {
// Simply clean up the annotations without producing the vis file if // Simply clean up the annotations without producing the vis file if
// VIS_ENABLE is set to "never", or if the test passes and VIS_ENABLE is // VIS_ENABLE is set to "never", OR if the test passes AND VIS_ENABLE is
// not set to "always". // not set to "always", OR the current test has already been finalized
// (because CheckPorcupine has already produced a vis file).
an.clear() an.clear()
return return
} }
annotations := an.retrieve() annotations := an.finalize()
if len(annotations) == 0 { if len(annotations) == 0 {
// Skip empty annotations. // Skip empty annotations.
return return
} }
// XXX: Make the last annotation a interval one to work around Porcupine's // XXX: Make the last annotation an interval one to work around Porcupine's
// issue. Consider removing this once the issue is fixed. // issue. Consider removing this once the issue is fixed.
t := timestamp() t := timestamp()
aend := porcupine.Annotation{ aend := porcupine.Annotation{
@ -499,6 +562,7 @@ func mkAnnotation() *Annotation {
mu: new(sync.Mutex), mu: new(sync.Mutex),
annotations: make([]porcupine.Annotation, 0), annotations: make([]porcupine.Annotation, 0),
continuous: make(map[string]Continuous), continuous: make(map[string]Continuous),
finalized: false,
} }
return &an return &an

View File

@ -313,7 +313,7 @@ func (sg *ServerGrp) AllowServersExcept(l int) []int {
} }
func (sg *ServerGrp) Partition(p1 []int, p2 []int) { func (sg *ServerGrp) Partition(p1 []int, p2 []int) {
// log.Printf("partition servers into: %v %v\n", p1, p2) //log.Printf("partition servers into: %v %v\n", p1, p2)
for i := 0; i < len(p1); i++ { for i := 0; i < len(p1); i++ {
sg.disconnect(p1[i], p2) sg.disconnect(p1[i], p2)
sg.connect(p1[i], p1) sg.connect(p1[i], p1)