diff --git a/src/kvraft1/kvraft_test.go b/src/kvraft1/kvraft_test.go
index 207d7c4..c9cdbcf 100644
--- a/src/kvraft1/kvraft_test.go
+++ b/src/kvraft1/kvraft_test.go
@@ -5,14 +5,15 @@ import (
 	"strconv"
 	"testing"
 	"time"
+	"fmt"
 
 	"6.5840/kvraft1/rsm"
 	"6.5840/kvsrv1/rpc"
 	"6.5840/kvtest1"
+	tester "6.5840/tester1"
 )
 
 const (
-	NSEC  = 1
 	NCLNT = 10
 )
 
@@ -29,6 +30,7 @@ const (
 func (ts *Test) GenericTest() {
 	const (
 		NITER = 3
+		NSEC  = 1
 		T     = NSEC * time.Second
 		NKEYS = 100
 	)
@@ -64,17 +66,21 @@ func (ts *Test) GenericTest() {
 
 		<-ch_spawn // wait for clients to be done
 
+		if i == NITER-1 {
+			tester.SetAnnotationFinalized()
+		}
 		ts.CheckPorcupine()
 
 		if ts.partitions {
 			ch_partitioner <- true
-			// log.Printf("wait for partitioner\n")
+			//log.Printf("wait for partitioner\n")
 			<-ch_partitioner
 			// reconnect network and submit a request. A client may
 			// have submitted a request in a minority.  That request
 			// won't return until that server discovers a new term
 			// has started.
 			ts.Group(Gid).ConnectAll()
+			tester.AnnotateClearFailure()
 			// wait for a while so that we have a new term
 			time.Sleep(kvtest.ElectionTimeout)
 		}
@@ -84,6 +90,7 @@ func (ts *Test) GenericTest() {
 			for i := 0; i < ts.nservers; i++ {
 				ts.Group(Gid).ShutdownServer(i)
 			}
+			tester.AnnotateShutdownAll()
 			// Wait for a while for servers to shutdown, since
 			// shutdown isn't a real crash and isn't instantaneous
 			time.Sleep(kvtest.ElectionTimeout)
@@ -93,6 +100,7 @@ func (ts *Test) GenericTest() {
 				ts.Group(Gid).StartServer(i)
 			}
 			ts.Group(Gid).ConnectAll()
+			tester.AnnotateClearFailure()
 		}
 
 		if ts.maxraftstate > 0 {
@@ -100,14 +108,18 @@ func (ts *Test) GenericTest() {
 			// requests and had time to checkpoint.
 			sz := ts.Config.Group(Gid).LogSize()
 			if sz > 8*ts.maxraftstate {
-				ts.t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
+				err := fmt.Sprintf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
+				tester.AnnotateCheckerFailure(err, err)
+				ts.t.Fatalf(err)
 			}
 		}
 		if ts.maxraftstate < 0 {
 			// Check that snapshots are not used
 			ssz := ts.Group(Gid).SnapshotSize()
 			if ssz > 0 {
-				ts.t.Fatalf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, ts.maxraftstate)
+				err := fmt.Sprintf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, ts.maxraftstate)
+				tester.AnnotateCheckerFailure(err, err)
+				ts.t.Fatalf(err)
 			}
 		}
 	}
@@ -150,21 +162,25 @@ func (ts *Test) GenericTestSpeed() {
 
 func TestBasic4B(t *testing.T) {
 	ts := MakeTest(t, "4B basic", 1, 5, true, false, false, -1, false)
+	tester.AnnotateTest("TestBasic4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestSpeed4B(t *testing.T) {
 	ts := MakeTest(t, "4B speed", 1, 3, true, false, false, -1, false)
+	tester.AnnotateTest("TestSpeed4B", ts.nservers)
 	ts.GenericTestSpeed()
 }
 
 func TestConcurrent4B(t *testing.T) {
 	ts := MakeTest(t, "4B many clients", 5, 5, true, false, false, -1, false)
+	tester.AnnotateTest("TestConcurrent4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestUnreliable4B(t *testing.T) {
 	ts := MakeTest(t, "4B unreliable net, many clients", 5, 5, false, false, false, -1, false)
+	tester.AnnotateTest("TestUnreliable4B", ts.nservers)
 	ts.GenericTest()
 }
 
@@ -175,19 +191,29 @@ func TestOnePartition4B(t *testing.T) {
 	ts := MakeTest(t, "4B progress in majority", 0, 5, false, false, false, -1, false)
 	defer ts.Cleanup()
 
+	tester.AnnotateTest("TestOnePartition4B", ts.nservers)
+
 	ck := ts.MakeClerk()
 
 	ver0 := ts.PutAtLeastOnce(ck, "1", "13", rpc.Tversion(0), -1)
 
-	_, l := rsm.Leader(ts.Config, Gid)
+	foundl, l := rsm.Leader(ts.Config, Gid)
+	if foundl {
+		text := fmt.Sprintf("leader found = %v", l)
+		tester.AnnotateInfo(text, text)
+	} else {
+		text := "did not find a leader"
+		tester.AnnotateInfo(text, text)
+	}
 	p1, p2 := ts.Group(Gid).MakePartition(l)
 	ts.Group(Gid).Partition(p1, p2)
+	tester.AnnotateTwoPartitions(p1, p2)
 
 	ckp1 := ts.MakeClerkTo(p1)  // connect ckp1 to p1
 	ckp2a := ts.MakeClerkTo(p2) // connect ckp2a to p2
 	ckp2b := ts.MakeClerkTo(p2) // connect ckp2b to p2
 
-	ver1 := ts.PutAtLeastOnce(ckp1, "1", "14", ver0+1, -1)
+	ver1 := ts.PutAtLeastOnce(ckp1, "1", "14", ver0, -1)
 	ts.CheckGet(ckp1, "1", "14", ver1)
 
 	ts.End()
@@ -196,8 +222,9 @@ func TestOnePartition4B(t *testing.T) {
 	done1 := make(chan rpc.Tversion)
 
 	ts.Begin("Test: no progress in minority (4B)")
+	tester.AnnotateCheckerBegin(fmt.Sprintf("submit Put(1, 15) and Get(1) to %v", p2))
 	go func() {
-		ver := ts.PutAtLeastOnce(ckp2a, "1", "15", ver1+1, -1)
+		ver := ts.PutAtLeastOnce(ckp2a, "1", "15", ver1, -1)
 		done0 <- ver
 	}()
 	go func() {
@@ -207,14 +234,20 @@ func TestOnePartition4B(t *testing.T) {
 
 	select {
 	case ver := <-done0:
-		t.Fatalf("Put in minority completed %v", ver)
+		err := fmt.Sprintf("Put in minority completed with version = %v", ver)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	case ver := <-done1:
-		t.Fatalf("Get in minority completed %v", ver)
+		err := fmt.Sprintf("Get in minority completed with version = %v", ver)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	case <-time.After(time.Second):
 	}
+	tester.AnnotateCheckerSuccess(
+		"commands to minority partition not committed after 1 second", "not committed")
 
 	ts.CheckGet(ckp1, "1", "14", ver1)
-	ver2 := ts.PutAtLeastOnce(ckp1, "1", "16", ver1+1, -1)
+	ver2 := ts.PutAtLeastOnce(ckp1, "1", "16", ver1, -1)
 	ts.CheckGet(ckp1, "1", "16", ver2)
 
 	ts.End()
@@ -222,64 +255,82 @@ func TestOnePartition4B(t *testing.T) {
 	ts.Begin("Test: completion after heal (4B)")
 
 	ts.Group(Gid).ConnectAll()
+	tester.AnnotateClearFailure()
 	ckp2a.(*kvtest.TestClerk).Clnt.ConnectAll()
 	ckp2b.(*kvtest.TestClerk).Clnt.ConnectAll()
 
 	time.Sleep(kvtest.ElectionTimeout)
 
+	tester.AnnotateCheckerBegin("status of Put(1, 15)")
+	ver15 := rpc.Tversion(0)
 	select {
-	case <-done0:
+	case ver15 = <-done0:
 	case <-time.After(30 * 100 * time.Millisecond):
+		tester.AnnotateCheckerFailure(
+			"Put(1, 15) did not complete after partition resolved", "OK")
 		t.Fatalf("Put did not complete")
 	}
+	tester.AnnotateCheckerSuccess("Put(1, 15) completed after partition resolved", "OK")
 
+	tester.AnnotateCheckerBegin("status of Get(1)")
 	select {
 	case <-done1:
 	case <-time.After(30 * 100 * time.Millisecond):
+		tester.AnnotateCheckerFailure(
+			"Get(1) did not complete after partition resolved", "OK")
 		t.Fatalf("Get did not complete")
 	default:
 	}
+	tester.AnnotateCheckerSuccess("Get(1) completed after partition resolved", "OK")
 
-	ts.CheckGet(ck, "1", "15", ver2+1)
+	ts.CheckGet(ck, "1", "15", ver15)
 }
 
 func TestManyPartitionsOneClient4B(t *testing.T) {
 	ts := MakeTest(t, "4B partitions, one client", 1, 5, false, false, true, -1, false)
+	tester.AnnotateTest("TestManyPartitionsOneClient4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestManyPartitionsManyClients4B(t *testing.T) {
 	ts := MakeTest(t, "4B partitions, many clients (4B)", 5, 5, false, false, true, -1, false)
+	tester.AnnotateTest("TestManyPartitionsManyClients4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestPersistOneClient4B(t *testing.T) {
 	ts := MakeTest(t, "4B restarts, one client 4B ", 1, 5, false, true, false, -1, false)
+	tester.AnnotateTest("TestPersistOneClient4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestPersistConcurrent4B(t *testing.T) {
 	ts := MakeTest(t, "4B restarts, many clients", 5, 5, false, true, false, -1, false)
+	tester.AnnotateTest("TestPersistConcurrent4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestPersistConcurrentUnreliable4B(t *testing.T) {
 	ts := MakeTest(t, "4B unreliable net, restarts, many clients ", 5, 5, true, true, false, -1, false)
+	tester.AnnotateTest("TestPersistConcurrentUnreliable4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestPersistPartition4B(t *testing.T) {
 	ts := MakeTest(t, "4B restarts, partitions, many clients", 5, 5, false, true, true, -1, false)
+	tester.AnnotateTest("TestPersistPartition4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestPersistPartitionUnreliable4B(t *testing.T) {
 	ts := MakeTest(t, "4B unreliable net, restarts, partitions, many clients", 5, 5, true, true, true, -1, false)
+	tester.AnnotateTest("TestPersistPartitionUnreliable4B", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestPersistPartitionUnreliableLinearizable4B(t *testing.T) {
 	ts := MakeTest(t, "4B unreliable net, restarts, partitions, random keys, many clients", 15, 7, true, true, true, -1, true)
+	tester.AnnotateTest("TestPersistPartitionUnreliableLinearizable4B", ts.nservers)
 	ts.GenericTest()
 }
 
@@ -293,6 +344,7 @@ func TestSnapshotRPC4C(t *testing.T) {
 		MAXRAFTSTATE = 1000
 	)
 	ts := MakeTest(t, "4C SnapshotsRPC", 0, NSRV, true, false, false, MAXRAFTSTATE, false)
+	tester.AnnotateTest("TestSnapshotRPC4C", ts.nservers)
 	defer ts.Cleanup()
 
 	ck := ts.MakeClerk()
@@ -305,6 +357,7 @@ func TestSnapshotRPC4C(t *testing.T) {
 	verb := rpc.Tversion(0)
 	// a bunch of puts into the majority partition.
 	ts.Group(Gid).Partition([]int{0, 1}, []int{2})
+	tester.AnnotateTwoPartitions([]int{0, 1}, []int{2})
 	{
 		ck1 := ts.MakeClerkTo([]int{0, 1})
 		for i := 0; i < 50; i++ {
@@ -318,15 +371,19 @@ func TestSnapshotRPC4C(t *testing.T) {
 	// most of its log entries.
 	sz := ts.Group(Gid).LogSize()
 	if sz > 8*ts.maxraftstate {
-		t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
+		err := fmt.Sprintf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	}
 
 	// now make group that requires participation of
 	// lagging server, so that it has to catch up.
+	verc := rpc.Tversion(0)
 	ts.Group(Gid).Partition([]int{0, 2}, []int{1})
+	tester.AnnotateTwoPartitions([]int{0, 2}, []int{1})
 	{
 		ck1 := ts.MakeClerkTo([]int{0, 2})
-		ts.PutAtLeastOnce(ck1, "c", "C", rpc.Tversion(0), -1)
+		verc = ts.PutAtLeastOnce(ck1, "c", "C", rpc.Tversion(0), -1)
 		ts.PutAtLeastOnce(ck1, "d", "D", rpc.Tversion(0), -1)
 		ts.CheckGet(ck1, "a", "A", vera)
 		ts.CheckGet(ck1, "b", "B", verb)
@@ -336,9 +393,10 @@ func TestSnapshotRPC4C(t *testing.T) {
 
 	// now everybody
 	ts.Group(Gid).Partition([]int{0, 1, 2}, []int{})
+	tester.AnnotateClearFailure()
 
 	vere := ts.PutAtLeastOnce(ck, "e", "E", rpc.Tversion(0), -1)
-	ts.CheckGet(ck, "c", "C", 1)
+	ts.CheckGet(ck, "c", "C", verc)
 	ts.CheckGet(ck, "e", "E", vere)
 	ts.CheckGet(ck, "1", "1", rpc.Tversion(1))
 }
@@ -347,6 +405,7 @@ func TestSnapshotRPC4C(t *testing.T) {
 // operations we're doing here.
 func TestSnapshotSize4C(t *testing.T) {
 	ts := MakeTest(t, "4C snapshot size is reasonable", 0, 3, true, false, false, 1000, false)
+	tester.AnnotateTest("TestSnapshotSize4C", ts.nservers)
 	defer ts.Cleanup()
 
 	maxsnapshotstate := 500
@@ -357,55 +416,65 @@ func TestSnapshotSize4C(t *testing.T) {
 	for i := 0; i < 200; i++ {
 		ver = ts.PutAtLeastOnce(ck, "x", "0", ver, -1)
 		ts.CheckGet(ck, "x", "0", ver)
-		ver = ts.PutAtLeastOnce(ck, "x", "1", ver+1, -1)
+		ver = ts.PutAtLeastOnce(ck, "x", "1", ver, -1)
 		ts.CheckGet(ck, "x", "1", ver)
-		ver += 1
 	}
 
 	// check that servers have thrown away most of their log entries
 	sz := ts.Group(Gid).LogSize()
 	if sz > 8*ts.maxraftstate {
-		t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
+		err := fmt.Sprintf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	}
 
 	// check that the snapshots are not unreasonably large
 	ssz := ts.Group(Gid).SnapshotSize()
 	if ssz > maxsnapshotstate {
-		t.Fatalf("snapshot too large (%v > %v)", ssz, maxsnapshotstate)
+		err := fmt.Sprintf("snapshot too large (%v > %v)", ssz, maxsnapshotstate)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	}
 }
 
 func TestSpeed4C(t *testing.T) {
 	ts := MakeTest(t, "4C speed", 1, 3, true, false, false, 1000, false)
+	tester.AnnotateTest("TestSpeed4C", ts.nservers)
 	ts.GenericTestSpeed()
 }
 
 func TestSnapshotRecover4C(t *testing.T) {
 	ts := MakeTest(t, "4C restarts, snapshots, one client", 1, 5, true, true, false, 1000, false)
+	tester.AnnotateTest("TestSnapshotRecover4C", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestSnapshotRecoverManyClients4C(t *testing.T) {
 	ts := MakeTest(t, "4C restarts, snapshots, many clients ", 20, 5, true, true, false, 1000, false)
+	tester.AnnotateTest("TestSnapshotRecoverManyClients4C", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestSnapshotUnreliable4C(t *testing.T) {
 	ts := MakeTest(t, "4C unreliable net, snapshots, many clients", 5, 5, false, false, false, 1000, false)
+	tester.AnnotateTest("TestSnapshotUnreliable4C", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestSnapshotUnreliableRecover4C(t *testing.T) {
 	ts := MakeTest(t, "4C unreliable net, restarts, snapshots, many clients", 5, 5, false, true, false, 1000, false)
+	tester.AnnotateTest("TestSnapshotUnreliableRecover4C", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestSnapshotUnreliableRecoverConcurrentPartition4C(t *testing.T) {
 	ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, many clients", 5, 5, false, true, true, 1000, false)
+	tester.AnnotateTest("TestSnapshotUnreliableRecoverConcurrentPartition4C", ts.nservers)
 	ts.GenericTest()
 }
 
 func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4C(t *testing.T) {
 	ts := MakeTest(t, "4C unreliable net, restarts, partitions, snapshots, random keys, many clients", 15, 7, false, true, true, 1000, true)
+	tester.AnnotateTest("TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4C", ts.nservers)
 	ts.GenericTest()
 }
diff --git a/src/kvraft1/rsm/rsm_test.go b/src/kvraft1/rsm/rsm_test.go
index 51fca2c..0a57adf 100644
--- a/src/kvraft1/rsm/rsm_test.go
+++ b/src/kvraft1/rsm/rsm_test.go
@@ -2,11 +2,13 @@ package rsm
 
 import (
 	//"log"
+	"fmt"
 	"sync"
 	"testing"
 	"time"
 
 	"6.5840/kvsrv1/rpc"
+	"6.5840/tester1"
 )
 
 // test that each server executes increments and updates its counter.
@@ -15,11 +17,15 @@ func TestBasic4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestBasic4A", NSRV)
 	ts.Begin("Test RSM basic")
+
 	for i := 0; i < NINC; i++ {
 		r := ts.oneInc()
 		if r.N != i+1 {
-			ts.t.Fatalf("expected %d instead of %d", i, r.N)
+			err := fmt.Sprintf("expected %d instead of %d", i, r.N)
+			tester.AnnotateCheckerFailure(err, err)
+			ts.t.Fatalf(err)
 		}
 		ts.checkCounter(r.N, NSRV)
 	}
@@ -31,6 +37,7 @@ func TestConcurrent4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestConcurrent4A", NSRV)
 	ts.Begin("Test concurrent submit")
 
 	var wg sync.WaitGroup
@@ -51,16 +58,19 @@ func TestLeaderFailure4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestLeaderFailure4A", NSRV)
 	ts.Begin("Test Leader Failure")
 
 	r := ts.oneInc()
 	ts.checkCounter(r.N, NSRV)
 
 	l := ts.disconnectLeader()
+	tester.AnnotateConnection(ts.g.GetConnected())
 	r = ts.oneInc()
 	ts.checkCounter(r.N, NSRV-1)
 
 	ts.connect(l)
+	tester.AnnotateConnection(ts.g.GetConnected())
 
 	ts.checkCounter(r.N, NSRV)
 }
@@ -72,6 +82,7 @@ func TestLeaderPartition4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestLeaderPartition4A", NSRV)
 	ts.Begin("Test Leader Partition")
 
 	// submit an Inc
@@ -79,10 +90,20 @@ func TestLeaderPartition4A(t *testing.T) {
 	ts.checkCounter(r.N, NSRV)
 
 	// partition leader
-	_, l := Leader(ts.Config, Gid)
+	foundl, l := Leader(ts.Config, Gid)
+	if foundl {
+		text := fmt.Sprintf("leader found = %v", l)
+		tester.AnnotateInfo(text, text)
+	} else {
+		text := "did not find a leader"
+		tester.AnnotateInfo(text, text)
+	}
 	p1, p2 := ts.Group(Gid).MakePartition(l)
 	ts.Group(Gid).Partition(p1, p2)
+	tester.AnnotateTwoPartitions(p1, p2)
 
+	text := fmt.Sprintf("concurrently submitting %v Dec to %v", NSUBMIT, l)
+	tester.AnnotateInfo(text, text)
 	done := make(chan struct{})
 	go func() {
 		// Submit many Dec's concurrently, which will results in many
@@ -109,17 +130,24 @@ func TestLeaderPartition4A(t *testing.T) {
 
 	select {
 	case err := <-done:
-		ts.Fatalf("Dec's in minority completed %v", err)
+		text := fmt.Sprintf("Dec's in minority completed; Submit returns %v", err)
+		tester.AnnotateCheckerFailure(text, text)
+		ts.Fatalf(text)
 	case <-time.After(time.Second):
 	}
 
 	// reconnect leader
 	ts.connect(l)
+	// use the fact that there are only three servers, so connecting the leader
+	// resolves all partitions (the last line of test also relies on this)
+	tester.AnnotateClearFailure()
 
 	select {
 	case <-done:
 	case <-time.After(time.Second):
-		ts.Fatalf("Submit after healing didn't return")
+		text := "Submit after healing didn't return"
+		tester.AnnotateCheckerFailure(text, text)
+		ts.Fatalf(text)
 	}
 
 	// check that all replicas have the same value for counter
@@ -136,27 +164,34 @@ func TestRestartReplay4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestRestartReplay4A", NSRV)
 	ts.Begin("Test Restart")
 
 	for i := 0; i < NINC; i++ {
 		r := ts.oneInc()
 		if r.N != i+1 {
-			ts.t.Fatalf("expected %d instead of %d", i, r.N)
+			err := fmt.Sprintf("expected %d instead of %d", i, r.N)
+			tester.AnnotateCheckerFailure(err, err)
+			ts.t.Fatalf(err)
 		}
 		ts.checkCounter(r.N, NSRV)
 	}
 
 	ts.Group(Gid).Shutdown()
+	tester.AnnotateShutdownAll()
 
 	time.Sleep(1 * time.Second)
 
 	ts.Group(Gid).StartServers()
+	tester.AnnotateRestartAll()
 
 	// submit an Inc
 	r := ts.oneInc()
 
 	if r.N != NINC+1 {
-		t.Fatalf("Expected %d got %d", NINC+1, r.N)
+		err := fmt.Sprintf("expected %d got %d", NINC+1, r.N)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	}
 
 	time.Sleep(1 * time.Second)
@@ -176,6 +211,7 @@ func TestShutdown4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestShutdown4A", NSRV)
 	ts.Begin("Test Shutdown")
 
 	// Submit many Null's concurrently
@@ -197,11 +233,14 @@ func TestShutdown4A(t *testing.T) {
 	time.Sleep(20 * time.Millisecond)
 
 	ts.Group(Gid).Shutdown()
+	tester.AnnotateShutdownAll()
 
 	select {
 	case <-done:
 	case <-time.After((NSEC + 1) * time.Second):
-		ts.Fatalf("Submit didn't stop after shutdown")
+		err := "Submit didn't stop after shutdown"
+		tester.AnnotateCheckerFailure(err, err)
+		ts.Fatalf(err)
 	}
 }
 
@@ -216,32 +255,41 @@ func TestRestartSubmit4A(t *testing.T) {
 	ts := makeTest(t, -1)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestRestartSubmit4A", NSRV)
 	ts.Begin("Test Restart and submit")
 
 	for i := 0; i < NINC; i++ {
 		r := ts.oneInc()
 		if r.N != i+1 {
-			ts.t.Fatalf("expected %d instead of %d", i, r.N)
+			err := fmt.Sprintf("expected %d instead of %d", i, r.N)
+			tester.AnnotateCheckerFailure(err, err)
+			ts.t.Fatalf(err)
 		}
 		ts.checkCounter(r.N, NSRV)
 	}
 
 	ts.Group(Gid).Shutdown()
+	tester.AnnotateShutdownAll()
 
 	time.Sleep(1 * time.Second)
 
 	ts.Group(Gid).StartServers()
+	tester.AnnotateRestartAll()
 
 	// submit an Inc
 	r := ts.oneInc()
 
 	if r.N != NINC+1 {
-		t.Fatalf("Expected %d got %d", NINC+1, r.N)
+		err := fmt.Sprintf("Expected %d got %d", NINC+1, r.N)
+		tester.AnnotateCheckerFailure(err, err)
+		t.Fatalf(err)
 	}
 
 	time.Sleep(1 * time.Second)
 
 	// Submit many Null's concurrently
+	text := fmt.Sprintf("concurrently submitting %v Null", NSUBMIT)
+	tester.AnnotateInfo(text, text)
 	done := make(chan struct{})
 	go func() {
 		var wg sync.WaitGroup
@@ -260,14 +308,18 @@ func TestRestartSubmit4A(t *testing.T) {
 	time.Sleep(20 * time.Millisecond)
 
 	ts.Group(Gid).Shutdown()
+	tester.AnnotateShutdownAll()
 
 	select {
 	case <-done:
 	case <-time.After((NSEC + 1) * time.Second):
-		ts.Fatalf("Submit didn't stop after shutdown")
+		err := "Submit didn't stop after shutdown"
+		tester.AnnotateCheckerFailure(err, err)
+		ts.Fatalf(err)
 	}
 
 	ts.Group(Gid).StartServers()
+	tester.AnnotateRestartAll()
 
 	r = ts.oneInc()
 	ts.checkCounter(r.N, NSRV)
@@ -283,6 +335,9 @@ func TestSnapshot4C(t *testing.T) {
 	ts := makeTest(t, MAXRAFTSTATE)
 	defer ts.cleanup()
 
+	tester.AnnotateTest("TestSnapshot4C", NSRV)
+	ts.Begin("Test creating and restoring snapshot")
+
 	for i := 0; i < N; i++ {
 		ts.oneInc()
 	}
@@ -290,14 +345,18 @@ func TestSnapshot4C(t *testing.T) {
 
 	sz := ts.Group(Gid).LogSize()
 	if sz > 2*MAXRAFTSTATE {
-		ts.Fatalf("logs were not trimmed (%v > 2 * %v)", sz, ts.maxraftstate)
+		err := fmt.Sprintf("logs were not trimmed (%v > 2 * %v)", sz, ts.maxraftstate)
+		tester.AnnotateCheckerFailure(err, err)
+		ts.Fatalf(err)
 	}
 
 	// rsm must have made snapshots by now; shutdown all servers and
 	// restart them from a snapshot
 
 	ts.g.Shutdown()
+	tester.AnnotateShutdownAll()
 	ts.g.StartServers()
+	tester.AnnotateRestartAll()
 
 	// make restarted servers do one increment
 	ts.oneInc()
diff --git a/src/kvraft1/rsm/test.go b/src/kvraft1/rsm/test.go
index 1fb3f8d..681b101 100644
--- a/src/kvraft1/rsm/test.go
+++ b/src/kvraft1/rsm/test.go
@@ -5,6 +5,7 @@ import (
 	"sync"
 	"testing"
 	"time"
+	"fmt"
 
 	"6.5840/kvsrv1/rpc"
 	"6.5840/labrpc"
@@ -115,6 +116,8 @@ func (ts *Test) checkCounter(v int, nsrv int) {
 	for iters := 0; iters < 30; iters++ {
 		n = ts.countValue(v)
 		if n >= nsrv {
+			text := fmt.Sprintf("all %v servers have counter value %v", nsrv, v)
+			tester.AnnotateCheckerSuccess(text, text)
 			return
 		}
 		time.Sleep(to)
@@ -122,7 +125,9 @@ func (ts *Test) checkCounter(v int, nsrv int) {
 			to *= 2
 		}
 	}
-	ts.Fatalf("checkCounter: only %d srvs have %v instead of %d", n, v, nsrv)
+	err := fmt.Sprintf("checkCounter: only %d srvs have %v instead of %d", n, v, nsrv)
+	tester.AnnotateCheckerFailure(err, err)
+	ts.Fatalf(err)
 }
 
 func (ts *Test) countValue(v int) int {
diff --git a/src/kvtest1/kvtest.go b/src/kvtest1/kvtest.go
index f07edf3..ef38d35 100644
--- a/src/kvtest1/kvtest.go
+++ b/src/kvtest1/kvtest.go
@@ -3,6 +3,7 @@ package kvtest
 import (
 	"encoding/json"
 	//"log"
+	"fmt"
 	"math/rand"
 	"strconv"
 	"testing"
@@ -78,9 +79,11 @@ func (ts *Test) MakeClerk() IKVClerk {
 
 // Assumes different ck's put to different keys
 func (ts *Test) PutAtLeastOnce(ck IKVClerk, key, value string, ver rpc.Tversion, me int) rpc.Tversion {
+	verPrev := ver
 	for true {
 		err := ts.Put(ck, key, value, ver, me)
 		if err == rpc.OK {
+			ver += 1
 			break
 		}
 		if err == rpc.ErrMaybe || err == rpc.ErrVersion {
@@ -92,17 +95,27 @@ func (ts *Test) PutAtLeastOnce(ck IKVClerk, key, value string, ver rpc.Tversion,
 			}
 		}
 	}
+	desp := fmt.Sprintf("Put(%v, %v) completes", key, value)
+	details := fmt.Sprintf("version: %v -> %v", verPrev, ver)
+	tester.AnnotateInfo(desp, details)
 	return ver
 }
 
 func (ts *Test) CheckGet(ck IKVClerk, key, value string, version rpc.Tversion) {
+	tester.AnnotateCheckerBegin(fmt.Sprintf("checking Get(%v) = (%v, %v)", key, value, version))
 	val, ver, err := ts.Get(ck, key, 0)
 	if err != rpc.OK {
-		ts.Fatalf("CheckGet err %v", err)
+		text := fmt.Sprintf("Get(%v) returns error = %v", key, err)
+		tester.AnnotateCheckerFailure(text, text)
+		ts.Fatalf(text)
 	}
-	if val != value || ver != ver {
-		ts.Fatalf("Get(%v): expected:\n%v %v\nreceived:\n%v %v", key, value, val, version, ver)
+	if val != value || ver != version {
+		text := fmt.Sprintf("Get(%v) returns (%v, %v) != (%v, %v)", key, val, ver, value, version)
+		tester.AnnotateCheckerFailure(text, text)
+		ts.Fatalf(text)
 	}
+	text := fmt.Sprintf("Get(%v) returns (%v, %v) as expected", key, val, ver)
+	tester.AnnotateCheckerSuccess(text, "OK")
 }
 
 type ClntRes struct {
@@ -214,9 +227,10 @@ func (ts *Test) OnePut(me int, ck IKVClerk, key string, ver rpc.Tversion) (rpc.T
 
 // repartition the servers periodically
 func (ts *Test) Partitioner(gid tester.Tgid, ch chan bool) {
+	//log.Printf("partioner %v", gid)
 	defer func() { ch <- true }()
 	for true {
-		switch {
+		select {
 		case <-ch:
 			return
 		default:
@@ -234,6 +248,7 @@ func (ts *Test) Partitioner(gid tester.Tgid, ch chan bool) {
 				}
 			}
 			ts.Group(gid).Partition(pa[0], pa[1])
+			tester.AnnotateTwoPartitions(pa[0], pa[1])
 			time.Sleep(ElectionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
 		}
 	}
diff --git a/src/kvtest1/porcupine.go b/src/kvtest1/porcupine.go
index 75a96b2..0b3b079 100644
--- a/src/kvtest1/porcupine.go
+++ b/src/kvtest1/porcupine.go
@@ -83,9 +83,7 @@ func Put(cfg *tester.Config, ck IKVClerk, key string, value string, version rpc.
 
 // Checks that the log of Clerk.Put's and Clerk.Get's is linearizable (see
 // linearizability-faq.txt)
-func checkPorcupine(
-	t *testing.T, opLog *OpLog, annotations []porcupine.Annotation, nsec time.Duration,
-) {
+func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
 	enabled := os.Getenv("VIS_ENABLE")
 	fpath := os.Getenv("VIS_FILE")
 	res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), nsec)
@@ -102,6 +100,7 @@ func checkPorcupine(
 			fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
 		} else if enabled != "never" {
 			// Don't produce visualization file if VIS_ENABLE is set to "never".
+			annotations := tester.FinalizeAnnotations("test failed")
 			info.AddAnnotations(annotations)
 			err = porcupine.Visualize(models.KvModel, info, file)
 			if err != nil {
@@ -116,7 +115,7 @@ func checkPorcupine(
 	}
 
 	// The result is either legal or unknown.
-	if enabled == "always" {
+	if enabled == "always" && tester.GetAnnotationFinalized() {
 		var file *os.File
 		var err error
 		if fpath == "" {
@@ -129,6 +128,7 @@ func checkPorcupine(
 			fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
 			return
 		}
+		annotations := tester.FinalizeAnnotations("test passed")
 		info.AddAnnotations(annotations)
 		err = porcupine.Visualize(models.KvModel, info, file)
 		if err != nil {
@@ -180,9 +180,8 @@ func (ts *Test) CheckPorcupine() {
 }
 
 func (ts *Test) CheckPorcupineT(nsec time.Duration) {
-	// ts.RetrieveAnnotations() also clears the accumulated annotations so that
-	// the vis file containing client operations (generated here) won't be
+	// tester.RetrieveAnnotations() also clears the accumulated annotations so
+	// that the vis file containing client operations (generated here) won't be
 	// overridden by that without client operations (generated at cleanup time).
-	annotations := ts.RetrieveAnnotations()
-	checkPorcupine(ts.t, ts.oplog, annotations, nsec)
+	checkPorcupine(ts.t, ts.oplog, nsec)
 }
diff --git a/src/shardkv1/shardctrler/shardctrler.go b/src/shardkv1/shardctrler/shardctrler.go
index b0f351c..3d8743e 100644
--- a/src/shardkv1/shardctrler/shardctrler.go
+++ b/src/shardkv1/shardctrler/shardctrler.go
@@ -8,9 +8,9 @@ import (
 
 	"sync/atomic"
 
+	"6.5840/kvsrv1"
 	"6.5840/kvsrv1/rpc"
 	"6.5840/kvtest1"
-	"6.5840/shardkv1/kvsrv1"
 	"6.5840/shardkv1/shardcfg"
 	"6.5840/tester1"
 )
diff --git a/src/shardkv1/shardkv_test.go b/src/shardkv1/shardkv_test.go
index 14d9ede..78f0f58 100644
--- a/src/shardkv1/shardkv_test.go
+++ b/src/shardkv1/shardkv_test.go
@@ -22,8 +22,8 @@ const (
 // kvsrv1 lab.
 func TestInitQuery5A(t *testing.T) {
 
-	// MakeTest starts a key/value server using `kvsrv.StartKVServer`,
-	// which is defined in shardkv1/kvsrv1.
+	// MakeTest starts your lab2 key/value server using
+	// `kvsrv.StartKVServer`.
 	ts := MakeTest(t, "Test (5A): Init and Query ...", true)
 	defer ts.Cleanup()
 
diff --git a/src/shardkv1/test.go b/src/shardkv1/test.go
index 14974a8..2020f48 100644
--- a/src/shardkv1/test.go
+++ b/src/shardkv1/test.go
@@ -10,10 +10,10 @@ import (
 	"time"
 
 	"6.5840/kvraft1/rsm"
+	"6.5840/kvsrv1"
 	"6.5840/kvsrv1/rpc"
 	"6.5840/kvtest1"
 	"6.5840/labrpc"
-	"6.5840/shardkv1/kvsrv1"
 	"6.5840/shardkv1/shardcfg"
 	"6.5840/shardkv1/shardctrler"
 	"6.5840/shardkv1/shardctrler/param"
diff --git a/src/tester1/annotation.go b/src/tester1/annotation.go
index b603ad2..dbbb19f 100644
--- a/src/tester1/annotation.go
+++ b/src/tester1/annotation.go
@@ -20,6 +20,7 @@ type Annotation struct {
 	mu          *sync.Mutex
 	annotations []porcupine.Annotation
 	continuous  map[string]Continuous
+	finalized   bool
 }
 
 type Continuous struct {
@@ -70,8 +71,22 @@ const (
 	TAG_INFO      string = "$ Test Info"
 )
 
-func (cfg *Config) RetrieveAnnotations() []porcupine.Annotation{
-	annotations := annotation.retrieve()
+func FinalizeAnnotations(end string) []porcupine.Annotation {
+	annotations := annotation.finalize()
+
+	// XXX: Make the last annotation an interval one to work around Porcupine's
+	// issue. Consider removing this once the issue is fixed.
+	t := timestamp()
+	aend := porcupine.Annotation{
+		Tag: TAG_INFO,
+		Start: t,
+		End: t + 1000,
+		Description: end,
+		Details: end,
+		BackgroundColor: COLOR_INFO,
+	}
+	annotations = append(annotations, aend)
+
 	return annotations
 }
 
@@ -173,12 +188,39 @@ func AnnotateCheckerNeutral(desp, details string) {
 	AnnotateCheckerEnd(desp, details, COLOR_NEUTRAL)
 }
 
+func SetAnnotationFinalized() {
+	annotation.mu.Lock()
+	defer annotation.mu.Unlock()
+
+	annotation.finalized = true
+}
+
+func (an *Annotation) isFinalized() bool {
+	annotation.mu.Lock()
+	defer annotation.mu.Unlock()
+
+	return annotation.finalized
+}
+
+func GetAnnotationFinalized() bool {
+	return annotation.isFinalized()
+}
+
 // Used before log.Fatalf
 func AnnotateCheckerFailureBeforeExit(desp, details string) {
 	AnnotateCheckerFailure(desp, details)
 	annotation.cleanup(true, "test failed")
 }
 
+// The current annotation API for failures is very hacky. We really should have
+// just one function that reads the current network/server status. For network,
+// we should be able to read whether an endname is enabled. However, if the
+// endname is enabled from X to Y, but not Y to X, the annotation would be
+// downright confusing. A better design (in the tester framework, not in the
+// annotation layer) is to have a single boolean for each pair of servers; once
+// we have such state, the annotation can then simply read the booleans to
+// determine the network partitions.
+
 // Two functions to annotate partitions: AnnotateConnection and
 // AnnotateTwoPartitions. The connected field of ServerGrp (in group.go) is
 // precise if and only if the ServerGrp.Partition is not used. Thus, we use the
@@ -236,14 +278,31 @@ func annotateFault() {
 	AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
 }
 
+// Currently this API does not work with failed servers, nor with the connected
+// fields of ServerGrp (in group.go). It is used specifically for
+// ServerGrp.Partition.
 func AnnotateTwoPartitions(p1 []int, p2 []int) {
 	// A bit hard to check whether the partition actually changes, so just
 	// annotate on every invocation.
-	// TODO
-	text := fmt.Sprintf("%v %v", p1, p2)
+	text := fmt.Sprintf("partition = %v %v", p1, p2)
 	AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
 }
 
+func AnnotateClearFailure() {
+	finfo.mu.Lock()
+	defer finfo.mu.Unlock()
+
+	for id := range(finfo.crashed) {
+		finfo.crashed[id] = false
+	}
+
+	for id := range(finfo.connected) {
+		finfo.connected[id] = true
+	}
+
+	AnnotateContinuousEnd(TAG_PARTITION)
+}
+
 func AnnotateShutdown(servers []int) {
 	finfo.mu.Lock()
 	defer finfo.mu.Unlock()
@@ -316,9 +375,12 @@ func timestamp() int64 {
 	return int64(time.Since(time.Unix(0, 0)))
 }
 
-func (an *Annotation) retrieve() []porcupine.Annotation {
+func (an *Annotation) finalize() []porcupine.Annotation {
 	an.mu.Lock()
+	defer an.mu.Unlock()
+
 	x := an.annotations
+
 	t := timestamp()
 	for tag, cont := range(an.continuous) {
 		a := porcupine.Annotation{
@@ -331,9 +393,8 @@ func (an *Annotation) retrieve() []porcupine.Annotation {
 		}
 		x = append(x, a)
 	}
-	an.annotations = make([]porcupine.Annotation, 0)
-	an.continuous = make(map[string]Continuous)
-	an.mu.Unlock()
+
+	an.finalized = true
 	return x
 }
 
@@ -341,6 +402,7 @@ func (an *Annotation) clear() {
 	an.mu.Lock()
 	an.annotations = make([]porcupine.Annotation, 0)
 	an.continuous = make(map[string]Continuous)
+	an.finalized = false
 	an.mu.Unlock()
 }
 
@@ -445,21 +507,22 @@ func (an *Annotation) annotateContinuousEnd(tag string) {
 
 func (an *Annotation) cleanup(failed bool, end string) {
 	enabled := os.Getenv("VIS_ENABLE")
-	if enabled == "never" || (!failed && enabled != "always") {
+	if enabled == "never" || (!failed && enabled != "always") || an.isFinalized() {
 		// Simply clean up the annotations without producing the vis file if
-		// VIS_ENABLE is set to "never", or if the test passes and VIS_ENABLE is
-		// not set to "always".
+		// VIS_ENABLE is set to "never", OR if the test passes AND VIS_ENABLE is
+		// not set to "always", OR the current test has already been finalized
+		// (because CheckPorcupine has already produced a vis file).
 		an.clear()
 		return
 	}
 
-	annotations := an.retrieve()
+	annotations := an.finalize()
 	if len(annotations) == 0 {
 		// Skip empty annotations.
 		return
 	}
 
-	// XXX: Make the last annotation a interval one to work around Porcupine's
+	// XXX: Make the last annotation an interval one to work around Porcupine's
 	// issue. Consider removing this once the issue is fixed.
 	t := timestamp()
 	aend := porcupine.Annotation{
@@ -499,6 +562,7 @@ func mkAnnotation() *Annotation {
 		mu: new(sync.Mutex),
 		annotations: make([]porcupine.Annotation, 0),
 		continuous: make(map[string]Continuous),
+		finalized: false,
 	}
 
 	return &an
diff --git a/src/tester1/group.go b/src/tester1/group.go
index 6f0488b..28849fa 100644
--- a/src/tester1/group.go
+++ b/src/tester1/group.go
@@ -313,7 +313,7 @@ func (sg *ServerGrp) AllowServersExcept(l int) []int {
 }
 
 func (sg *ServerGrp) Partition(p1 []int, p2 []int) {
-	// log.Printf("partition servers into: %v %v\n", p1, p2)
+	//log.Printf("partition servers into: %v %v\n", p1, p2)
 	for i := 0; i < len(p1); i++ {
 		sg.disconnect(p1[i], p2)
 		sg.connect(p1[i], p1)