update

2025-02-18 15:31:00 -05:00 · 2025-02-18 15:31:00 -05:00 · eacb273f95
commit eacb273f95
parent c8e7d779c2
8 changed files with 862 additions and 35 deletions
--- a/.check-build
+++ b/.check-build
@ -37,6 +37,7 @@ REFERENCE_FILES=(
    src/tester1/group.go
    src/tester1/persister.go
    src/tester1/srv.go
    src/tester1/annotation.go
    # lab 4
    src/kvraft1/rsm/rsm_test.go
--- a/src/kvtest1/porcupine.go
+++ b/src/kvtest1/porcupine.go
@ -2,8 +2,7 @@ package kvtest
 import (
 	"fmt"
-	"io/ioutil"
+	"os"
 	//"log"
 	"sync"
 	"testing"
 	"time"
@ -46,7 +45,7 @@ func (log *OpLog) Read() []porcupine.Operation {
 // absolute timestamps with `time.Now().UnixNano()` (which uses the wall
 // clock), we measure time relative to `t0` using `time.Since(t0)`, which uses
 // the monotonic clock
-var t0 = time.Now()
+var t0 = time.Unix(0, 0)
 func Get(cfg *tester.Config, ck IKVClerk, key string, log *OpLog, cli int) (string, rpc.Tversion, rpc.Err) {
 	start := int64(time.Since(t0))
@ -84,14 +83,26 @@ func Put(cfg *tester.Config, ck IKVClerk, key string, value string, version rpc.
 // Checks that the log of Clerk.Put's and Clerk.Get's is linearizable (see
 // linearizability-faq.txt)
-func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
+func checkPorcupine(
-	//log.Printf("oplog len %v %v", ts.oplog.Len(), ts.oplog)
+	t *testing.T, opLog *OpLog, annotations []porcupine.Annotation, nsec time.Duration,
 ) {
 	enabled := os.Getenv("VIS_ENABLE")
 	fpath := os.Getenv("VIS_FILE")
 	res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), nsec)
 	if res == porcupine.Illegal {
-		file, err := ioutil.TempFile("", "porcupine-*.html")
+		var file *os.File
-		if err != nil {
+		var err error
-			fmt.Printf("info: failed to create temp file for visualization")
+		if fpath == "" {
 			// Save the vis file in a temporary file.
 			file, err = os.CreateTemp("", "porcupine-*.html")
 		} else {
 			file, err = os.OpenFile(fpath, os.O_RDWR | os.O_CREATE | os.O_TRUNC, 0644)
 		}
 		if err != nil {
 			fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
 		} else if enabled != "never" {
 			// Don't produce visualization file if VIS_ENABLE is set to "never".
 			info.AddAnnotations(annotations)
 			err = porcupine.Visualize(models.KvModel, info, file)
 			if err != nil {
 				fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
@ -103,6 +114,29 @@ func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
 	} else if res == porcupine.Unknown {
 		fmt.Println("info: linearizability check timed out, assuming history is ok")
 	}
 	// The result is either legal or unknown.
 	if enabled == "always" {
 		var file *os.File
 		var err error
 		if fpath == "" {
 			// Save the vis file in a temporary file.
 			file, err = os.CreateTemp("", "porcupine-*.html")
 		} else {
 			file, err = os.OpenFile(fpath, os.O_RDWR | os.O_CREATE | os.O_TRUNC, 0644)
 		}
 		if err != nil {
 			fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
 			return
 		}
 		info.AddAnnotations(annotations)
 		err = porcupine.Visualize(models.KvModel, info, file)
 		if err != nil {
 			fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
 		} else {
 			fmt.Printf("info: wrote history visualization to %s\n", file.Name())
 		}
 	}
 }
 // Porcupine
@ -142,9 +176,13 @@ func (ts *Test) Put(ck IKVClerk, key string, value string, version rpc.Tversion,
 }
 func (ts *Test) CheckPorcupine() {
-	checkPorcupine(ts.t, ts.oplog, linearizabilityCheckTimeout)
+	ts.CheckPorcupineT(linearizabilityCheckTimeout)
 }
 func (ts *Test) CheckPorcupineT(nsec time.Duration) {
-	checkPorcupine(ts.t, ts.oplog, nsec)
+	// ts.RetrieveAnnotations() also clears the accumulated annotations so that
 	// the vis file containing client operations (generated here) won't be
 	// overridden by that without client operations (generated at cleanup time).
 	annotations := ts.RetrieveAnnotations()
 	checkPorcupine(ts.t, ts.oplog, annotations, nsec)
 }
--- a/src/raft1/raft_test.go
+++ b/src/raft1/raft_test.go
@ -29,6 +29,7 @@ func TestInitialElection3A(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestInitialElection3A", servers)
 	ts.Begin("Test (3A): initial election")
 	// is a leader elected?
@ -58,24 +59,28 @@ func TestReElection3A(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestReElection3A", servers)
 	ts.Begin("Test (3A): election after network failure")
 	leader1 := ts.checkOneLeader()
 	// if the leader disconnects, a new one should be elected.
 	ts.g.DisconnectAll(leader1)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.checkOneLeader()
 	// if the old leader rejoins, that shouldn't
 	// disturb the new leader. and the old leader
 	// should switch to follower.
 	ts.g.ConnectOne(leader1)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	leader2 := ts.checkOneLeader()
 	// if there's no quorum, no new leader should
 	// be elected.
 	ts.g.DisconnectAll(leader2)
 	ts.g.DisconnectAll((leader2 + 1) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	time.Sleep(2 * RaftElectionTimeout)
 	// check that the one connected server
@ -84,10 +89,12 @@ func TestReElection3A(t *testing.T) {
 	// if a quorum arises, it should elect a leader.
 	ts.g.ConnectOne((leader2 + 1) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.checkOneLeader()
 	// re-join of last node shouldn't prevent leader from existing.
 	ts.g.ConnectOne(leader2)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.checkOneLeader()
 }
@ -96,6 +103,7 @@ func TestManyElections3A(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestManyElection3A", servers)
 	ts.Begin("Test (3A): multiple elections")
 	ts.checkOneLeader()
@ -109,6 +117,7 @@ func TestManyElections3A(t *testing.T) {
 		ts.g.DisconnectAll(i1)
 		ts.g.DisconnectAll(i2)
 		ts.g.DisconnectAll(i3)
 		tester.AnnotateConnection(ts.g.GetConnected())
 		// either the current leader should still be alive,
 		// or the remaining four should elect a new one.
@ -117,6 +126,7 @@ func TestManyElections3A(t *testing.T) {
 		ts.g.ConnectOne(i1)
 		ts.g.ConnectOne(i2)
 		ts.g.ConnectOne(i3)
 		tester.AnnotateConnection(ts.g.GetConnected())
 	}
 	ts.checkOneLeader()
 }
@ -126,6 +136,7 @@ func TestBasicAgree3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestBasicAgree3B", servers)
 	ts.Begin("Test (3B): basic agreement")
 	iters := 3
@ -149,6 +160,7 @@ func TestRPCBytes3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestRPCBytes3B", servers)
 	ts.Begin("Test (3B): RPC byte count")
 	ts.one(99, servers, false)
@ -180,6 +192,7 @@ func TestFollowerFailure3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestFollowerFailure3B", servers)
 	ts.Begin("Test (3B): test progressive failure of followers")
 	ts.one(101, servers, false)
@ -187,6 +200,7 @@ func TestFollowerFailure3B(t *testing.T) {
 	// disconnect one follower from the network.
 	leader1 := ts.checkOneLeader()
 	ts.g.DisconnectAll((leader1 + 1) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// the leader and remaining follower should be
 	// able to agree despite the disconnected follower.
@ -198,6 +212,7 @@ func TestFollowerFailure3B(t *testing.T) {
 	leader2 := ts.checkOneLeader()
 	ts.g.DisconnectAll((leader2 + 1) % servers)
 	ts.g.DisconnectAll((leader2 + 2) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// submit a command.
 	index, _, ok := ts.srvs[leader2].Raft().Start(104)
@ -211,11 +226,7 @@ func TestFollowerFailure3B(t *testing.T) {
 	time.Sleep(2 * RaftElectionTimeout)
 	// check that command 104 did not commit.
-	n, _ := ts.nCommitted(index)
+	ts.checkNoAgreement(index)
 	if n > 0 {
 		t.Fatalf("%v committed but no majority", n)
 	}
 }
 // test just failure of leaders.
@ -224,6 +235,7 @@ func TestLeaderFailure3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestLeaderFailure3B", servers)
 	ts.Begin("Test (3B): test failure of leaders")
 	ts.one(101, servers, false)
@ -231,6 +243,7 @@ func TestLeaderFailure3B(t *testing.T) {
 	// disconnect the first leader.
 	leader1 := ts.checkOneLeader()
 	ts.g.DisconnectAll(leader1)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// the remaining followers should elect
 	// a new leader.
@ -241,6 +254,7 @@ func TestLeaderFailure3B(t *testing.T) {
 	// disconnect the new leader.
 	leader2 := ts.checkOneLeader()
 	ts.g.DisconnectAll(leader2)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// submit a command to each server.
 	for i := 0; i < servers; i++ {
@ -250,11 +264,7 @@ func TestLeaderFailure3B(t *testing.T) {
 	time.Sleep(2 * RaftElectionTimeout)
 	// check that command 104 did not commit.
-	n, _ := ts.nCommitted(4)
+	ts.checkNoAgreement(4)
 	if n > 0 {
 		t.Fatalf("%v committed but no majority", n)
 	}
 }
 // test that a follower participates after
@ -264,6 +274,7 @@ func TestFailAgree3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestFailAgree3B", servers)
 	ts.Begin("Test (3B): agreement after follower reconnects")
 	ts.one(101, servers, false)
@ -271,6 +282,7 @@ func TestFailAgree3B(t *testing.T) {
 	// disconnect one follower from the network.
 	leader := ts.checkOneLeader()
 	ts.g.DisconnectAll((leader + 1) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// the leader and remaining follower should be
 	// able to agree despite the disconnected follower.
@ -282,6 +294,7 @@ func TestFailAgree3B(t *testing.T) {
 	// re-connect
 	ts.g.ConnectOne((leader + 1) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// the full set of servers should preserve
 	// previous agreements, and be able to agree
@ -289,7 +302,6 @@ func TestFailAgree3B(t *testing.T) {
 	ts.one(106, servers, true)
 	time.Sleep(RaftElectionTimeout)
 	ts.one(107, servers, true)
 }
 func TestFailNoAgree3B(t *testing.T) {
@ -297,6 +309,7 @@ func TestFailNoAgree3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestFailNoAgree3B", servers)
 	ts.Begin("Test (3B): no agreement if too many followers disconnect")
 	ts.one(10, servers, false)
@ -306,6 +319,7 @@ func TestFailNoAgree3B(t *testing.T) {
 	ts.g.DisconnectAll((leader + 1) % servers)
 	ts.g.DisconnectAll((leader + 2) % servers)
 	ts.g.DisconnectAll((leader + 3) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	index, _, ok := ts.srvs[leader].Raft().Start(20)
 	if ok != true {
@ -326,6 +340,7 @@ func TestFailNoAgree3B(t *testing.T) {
 	ts.g.ConnectOne((leader + 1) % servers)
 	ts.g.ConnectOne((leader + 2) % servers)
 	ts.g.ConnectOne((leader + 3) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// the disconnected majority may have chosen a leader from
 	// among their own ranks, forgetting index 2.
@ -339,7 +354,6 @@ func TestFailNoAgree3B(t *testing.T) {
 	}
 	ts.one(1000, servers, true)
 }
 func TestConcurrentStarts3B(t *testing.T) {
@ -347,6 +361,7 @@ func TestConcurrentStarts3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestConcurrentStarts3B", servers)
 	ts.Begin("Test (3B): concurrent Start()s")
 	var success bool
@ -358,9 +373,15 @@ loop:
 		}
 		leader := ts.checkOneLeader()
 		textb := fmt.Sprintf("checking concurrent submission of commands (attempt %v)", try)
 		tester.AnnotateCheckerBegin(textb)
 		_, term, ok := ts.srvs[leader].Raft().Start(1)
 		despretry := "concurrent submission failed; retry"
 		if !ok {
 			// leader moved on really quickly
 			details := fmt.Sprintf("%v is no longer a leader", leader)
 			tester.AnnotateCheckerNeutral(despretry, details)
 			continue
 		}
@ -388,6 +409,9 @@ loop:
 		for j := 0; j < servers; j++ {
 			if t, _ := ts.srvs[j].Raft().GetState(); t != term {
 				// term changed -- can't expect low RPC counts
 				details := fmt.Sprintf("term of server %v changed from %v to %v",
 					j, term, t)
 				tester.AnnotateCheckerNeutral(despretry, details)
 				continue loop
 			}
 		}
@ -402,11 +426,17 @@ loop:
 					// so we can't expect all Start()s to
 					// have succeeded
 					failed = true
 					details := fmt.Sprintf(
 						"term changed while waiting for %v servers to commit index %v",
 						servers, index)
 					tester.AnnotateCheckerNeutral(despretry, details)
 					break
 				}
 				cmds = append(cmds, ix)
 			} else {
-				t.Fatalf("value %v is not an int", cmd)
+				details := fmt.Sprintf("value %v is not an int", cmd)
 				tester.AnnotateCheckerFailure("read ill-typed value", details)
 				t.Fatalf(details)
 			}
 		}
@ -428,7 +458,9 @@ loop:
 				}
 			}
 			if ok == false {
-				t.Fatalf("cmd %v missing in %v", x, cmds)
+				details := fmt.Sprintf("cmd %v missing in %v", x, cmds)
 				tester.AnnotateCheckerFailure("concurrent submission failed", details)
 				t.Fatalf(details)
 			}
 		}
@ -437,9 +469,14 @@ loop:
 	}
 	if !success {
 		tester.AnnotateCheckerFailure(
 			"agreement failed under concurrent submission",
 			"unable to reach agreement after 5 attempts")
 		t.Fatalf("term changed too often")
 	}
 	text := "agreement reached under concurrent submission"
 	tester.AnnotateCheckerSuccess(text, "OK")
 }
 func TestRejoin3B(t *testing.T) {
@ -447,6 +484,7 @@ func TestRejoin3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestRejoin3B", servers)
 	ts.Begin("Test (3B): rejoin of partitioned leader")
 	ts.one(101, servers, true)
@ -454,11 +492,15 @@ func TestRejoin3B(t *testing.T) {
 	// leader network failure
 	leader1 := ts.checkOneLeader()
 	ts.g.DisconnectAll(leader1)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// make old leader try to agree on some entries
 	start := tester.GetAnnotateTimestamp()
 	ts.srvs[leader1].Raft().Start(102)
 	ts.srvs[leader1].Raft().Start(103)
 	ts.srvs[leader1].Raft().Start(104)
 	text := fmt.Sprintf("submitted commands [102 103 104] to %v", leader1)
 	tester.AnnotateInfoInterval(start, text, text)
 	// new leader commits, also for index=2
 	ts.one(103, 2, true)
@ -469,14 +511,15 @@ func TestRejoin3B(t *testing.T) {
 	// old leader connected again
 	ts.g.ConnectOne(leader1)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.one(104, 2, true)
 	// all together now
 	ts.g.ConnectOne(leader2)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.one(105, servers, true)
 }
 func TestBackup3B(t *testing.T) {
@ -484,6 +527,7 @@ func TestBackup3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestBackup3B", servers)
 	ts.Begin("Test (3B): leader backs up quickly over incorrect follower logs")
 	ts.one(rand.Int(), servers, true)
@ -493,11 +537,15 @@ func TestBackup3B(t *testing.T) {
 	ts.g.DisconnectAll((leader1 + 2) % servers)
 	ts.g.DisconnectAll((leader1 + 3) % servers)
 	ts.g.DisconnectAll((leader1 + 4) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// submit lots of commands that won't commit
 	start := tester.GetAnnotateTimestamp()
 	for i := 0; i < 50; i++ {
 		ts.srvs[leader1].Raft().Start(rand.Int())
 	}
 	text := fmt.Sprintf("submitted 50 commands to %v", leader1)
 	tester.AnnotateInfoInterval(start, text, text)
 	time.Sleep(RaftElectionTimeout / 2)
@ -508,6 +556,7 @@ func TestBackup3B(t *testing.T) {
 	ts.g.ConnectOne((leader1 + 2) % servers)
 	ts.g.ConnectOne((leader1 + 3) % servers)
 	ts.g.ConnectOne((leader1 + 4) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// lots of successful commands to new group.
 	for i := 0; i < 50; i++ {
@ -521,11 +570,15 @@ func TestBackup3B(t *testing.T) {
 		other = (leader2 + 1) % servers
 	}
 	ts.g.DisconnectAll(other)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// lots more commands that won't commit
 	start = tester.GetAnnotateTimestamp()
 	for i := 0; i < 50; i++ {
 		ts.srvs[leader2].Raft().Start(rand.Int())
 	}
 	text = fmt.Sprintf("submitted 50 commands to %v", leader2)
 	tester.AnnotateInfoInterval(start, text, text)
 	time.Sleep(RaftElectionTimeout / 2)
@ -536,6 +589,7 @@ func TestBackup3B(t *testing.T) {
 	ts.g.ConnectOne((leader1 + 0) % servers)
 	ts.g.ConnectOne((leader1 + 1) % servers)
 	ts.g.ConnectOne(other)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	// lots of successful commands to new group.
 	for i := 0; i < 50; i++ {
@ -546,6 +600,7 @@ func TestBackup3B(t *testing.T) {
 	for i := 0; i < servers; i++ {
 		ts.g.ConnectOne(i)
 	}
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.one(rand.Int(), servers, true)
 }
@ -554,6 +609,7 @@ func TestCount3B(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestCount3B", servers)
 	ts.Begin("Test (3B): RPC counts aren't too high")
 	rpcs := func() (n int) {
@ -568,7 +624,9 @@ func TestCount3B(t *testing.T) {
 	total1 := rpcs()
 	if total1 > 30 || total1 < 1 {
-		t.Fatalf("too many or few RPCs (%v) to elect initial leader\n", total1)
+		text := fmt.Sprintf("too many or few RPCs (%v) to elect initial leader", total1)
 		tester.AnnotateCheckerFailure(text, text)
 		t.Fatalf("%s", text)
 	}
 	var total2 int
@ -581,14 +639,20 @@ loop:
 		}
 		leader = ts.checkOneLeader()
 		textb := fmt.Sprintf("checking reasonable RPC counts for agreement (attempt %v)", try)
 		tester.AnnotateCheckerBegin(textb)
 		total1 = rpcs()
 		iters := 10
 		starti, term, ok := ts.srvs[leader].Raft().Start(1)
 		despretry := "submission failed; retry"
 		if !ok {
 			// leader moved on really quickly
 			details := fmt.Sprintf("%v is no longer a leader", leader)
 			tester.AnnotateCheckerNeutral(despretry, details)
 			continue
 		}
 		cmds := []int{}
 		for i := 1; i < iters+2; i++ {
 			x := int(rand.Int31())
@ -596,13 +660,23 @@ loop:
 			index1, term1, ok := ts.srvs[leader].Raft().Start(x)
 			if term1 != term {
 				// Term changed while starting
 				details := fmt.Sprintf("term of the leader (%v) changed from %v to %v",
 					leader, term, term1)
 				tester.AnnotateCheckerNeutral(despretry, details)
 				continue loop
 			}
 			if !ok {
 				// No longer the leader, so term has changed
 				details := fmt.Sprintf("%v is no longer a leader", leader)
 				tester.AnnotateCheckerNeutral(despretry, details)
 				continue loop
 			}
 			if starti+i != index1 {
 				desp := fmt.Sprintf("leader %v adds the command at the wrong index", leader)
 				details := fmt.Sprintf(
 					"the command should locate at index %v, but the leader puts it at %v",
 					starti + i, index1)
 				tester.AnnotateCheckerFailure(desp, details)
 				t.Fatalf("Start() failed")
 			}
 		}
@ -612,8 +686,16 @@ loop:
 			if ix, ok := cmd.(int); ok == false || ix != cmds[i-1] {
 				if ix == -1 {
 					// term changed -- try again
 					details := fmt.Sprintf(
 						"term changed while waiting for %v servers to commit index %v",
 						servers, starti + i)
 					tester.AnnotateCheckerNeutral(despretry, details)
 					continue loop
 				}
 				details := fmt.Sprintf(
 					"the command submitted at index %v in term %v is %v, but read %v",
 					starti + i, term, cmds[i - 1], cmd)
 				tester.AnnotateCheckerFailure("incorrect command committed", details)
 				t.Fatalf("wrong value %v committed for index %v; expected %v\n", cmd, starti+i, cmds)
 			}
 		}
@ -624,6 +706,8 @@ loop:
 			if t, _ := ts.srvs[j].Raft().GetState(); t != term {
 				// term changed -- can't expect low RPC counts
 				// need to keep going to update total2
 				details := fmt.Sprintf("term of server %v changed from %v to %v", j, term, t)
 				tester.AnnotateCheckerNeutral(despretry, details)
 				failed = true
 			}
 			total2 += ts.g.RpcCount(j)
@ -634,17 +718,29 @@ loop:
 		}
 		if total2-total1 > (iters+1+3)*3 {
 			details := fmt.Sprintf("number of RPC used for %v entries = %v > %v",
 				iters, total2-total1, (iters+1+3)*3)
 			tester.AnnotateCheckerFailure("used too many RPCs for agreement", details)
 			t.Fatalf("too many RPCs (%v) for %v entries\n", total2-total1, iters)
 		}
 		details := fmt.Sprintf("number of RPC used for %v entries = %v <= %v",
 			iters, total2-total1, (iters+1+3)*3)
 		tester.AnnotateCheckerSuccess("used reasonable number of RPCs for agreement", details)
 		success = true
 		break
 	}
 	if !success {
 		tester.AnnotateCheckerFailure(
 			"agreement failed",
 			"unable to reach agreement after 5 attempts")
 		t.Fatalf("term changed too often")
 	}
 	tester.AnnotateCheckerBegin("checking reasonable RPC counts in idle")
 	time.Sleep(RaftElectionTimeout)
 	total3 := 0
@ -653,9 +749,15 @@ loop:
 	}
 	if total3-total2 > 3*20 {
 		details := fmt.Sprintf("number of RPC used for 1 second of idleness = %v > %v",
 			total3-total2, 3 * 20)
 		tester.AnnotateCheckerFailure("used too many RPCs in idle", details)
 		t.Fatalf("too many RPCs (%v) for 1 second of idleness\n", total3-total2)
 	}
-
+	details := fmt.Sprintf("number of RPC used for 1 second of idleness = %v <= %v",
 		total3-total2, 3 * 20)
 	tester.AnnotateCheckerSuccess(
 		"used a reasonable number of RPCs in idle", details)
 }
 func TestPersist13C(t *testing.T) {
@ -663,36 +765,47 @@ func TestPersist13C(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestPersist13C", servers)
 	ts.Begin("Test (3C): basic persistence")
 	ts.one(11, servers, true)
 	ts.g.Shutdown()
 	tester.AnnotateShutdownAll()
 	ts.g.StartServers()
 	tester.AnnotateRestartAll()
 	ts.one(12, servers, true)
 	leader1 := ts.checkOneLeader()
 	ts.g.ShutdownServer(leader1)
 	tester.AnnotateShutdown([]int{leader1})
 	ts.restart(leader1)
 	tester.AnnotateRestart([]int{leader1})
 	ts.one(13, servers, true)
 	leader2 := ts.checkOneLeader()
 	ts.g.ShutdownServer(leader2)
 	tester.AnnotateShutdown([]int{leader2})
 	ts.one(14, servers-1, true)
 	ts.restart(leader2)
 	tester.AnnotateRestart([]int{leader2})
 	tester.AnnotateCheckerBegin("wait for all servers to commit until index 4")
 	ts.wait(4, servers, -1) // wait for leader2 to join before killing i3
 	tester.AnnotateCheckerSuccess("all committed until index 4", "OK")
 	i3 := (ts.checkOneLeader() + 1) % servers
 	ts.g.ShutdownServer(i3)
 	tester.AnnotateShutdown([]int{i3})
 	ts.one(15, servers-1, true)
 	ts.restart(i3)
 	tester.AnnotateRestart([]int{i3})
 	ts.one(16, servers, true)
 }
@ -702,6 +815,7 @@ func TestPersist23C(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestPersist23C", servers)
 	ts.Begin("Test (3C): more persistence")
 	index := 1
@ -713,6 +827,7 @@ func TestPersist23C(t *testing.T) {
 		ts.g.ShutdownServer((leader1 + 1) % servers)
 		ts.g.ShutdownServer((leader1 + 2) % servers)
 		tester.AnnotateShutdown([]int{(leader1 + 1) % servers, (leader1 + 2) % servers})
 		ts.one(10+index, servers-2, true)
 		index++
@ -720,19 +835,25 @@ func TestPersist23C(t *testing.T) {
 		ts.g.ShutdownServer((leader1 + 0) % servers)
 		ts.g.ShutdownServer((leader1 + 3) % servers)
 		ts.g.ShutdownServer((leader1 + 4) % servers)
 		tester.AnnotateShutdown([]int{
 			(leader1 + 0) % servers, (leader1 + 3) % servers, (leader1 + 4) % servers,
 		})
 		ts.restart((leader1 + 1) % servers)
 		ts.restart((leader1 + 2) % servers)
 		tester.AnnotateRestart([]int{(leader1 + 1) % servers, (leader1 + 2) % servers})
 		time.Sleep(RaftElectionTimeout)
 		ts.restart((leader1 + 3) % servers)
 		tester.AnnotateRestart([]int{(leader1 + 3) % servers})
 		ts.one(10+index, servers-2, true)
 		index++
 		ts.restart((leader1 + 4) % servers)
 		ts.restart((leader1 + 0) % servers)
 		tester.AnnotateRestart([]int{(leader1 + 4) % servers, (leader1 + 0) % servers})
 	}
 	ts.one(1000, servers, true)
@ -743,23 +864,29 @@ func TestPersist33C(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestPersist33C", servers)
 	ts.Begin("Test (3C): partitioned leader and one follower crash, leader restarts")
 	ts.one(101, 3, true)
 	leader := ts.checkOneLeader()
 	ts.g.DisconnectAll((leader + 2) % servers)
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.one(102, 2, true)
 	ts.g.ShutdownServer((leader + 0) % servers)
 	ts.g.ShutdownServer((leader + 1) % servers)
 	tester.AnnotateShutdown([]int{(leader + 0) % servers, (leader + 1) % servers})
 	ts.restart((leader + 2) % servers)
 	ts.restart((leader + 0) % servers)
 	tester.AnnotateRestart([]int{(leader + 2) % servers, (leader + 0) % servers})
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.one(103, 2, true)
 	ts.restart((leader + 1) % servers)
 	tester.AnnotateRestart([]int{(leader + 1) % servers})
 	ts.one(104, servers, true)
 }
@ -777,6 +904,7 @@ func TestFigure83C(t *testing.T) {
 	ts := makeTest(t, servers, true, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestFigure83C", servers)
 	ts.Begin("Test (3C): Figure 8")
 	ts.one(rand.Int(), 1, true)
@ -786,8 +914,11 @@ func TestFigure83C(t *testing.T) {
 		leader := -1
 		for i := 0; i < servers; i++ {
 			if ts.srvs[i].Raft() != nil {
-				_, _, ok := ts.srvs[i].Raft().Start(rand.Int())
+				cmd := rand.Int()
 				_, _, ok := ts.srvs[i].Raft().Start(cmd)
 				if ok {
 					text := fmt.Sprintf("submitted command %v to server %v", cmd, i)
 					tester.AnnotateInfo(text, text)
 					leader = i
 				}
 			}
@ -803,6 +934,7 @@ func TestFigure83C(t *testing.T) {
 		if leader != -1 {
 			ts.g.ShutdownServer(leader)
 			tester.AnnotateShutdown([]int{leader})
 			nup -= 1
 		}
@ -810,6 +942,7 @@ func TestFigure83C(t *testing.T) {
 			s := rand.Int() % servers
 			if ts.srvs[s].Raft() == nil {
 				ts.restart(s)
 				tester.AnnotateRestart([]int{s})
 				nup += 1
 			}
 		}
@ -820,9 +953,9 @@ func TestFigure83C(t *testing.T) {
 			ts.restart(i)
 		}
 	}
 	tester.AnnotateRestartAll()
 	ts.one(rand.Int(), servers, true)
 }
 func TestUnreliableAgree3C(t *testing.T) {
@ -830,6 +963,7 @@ func TestUnreliableAgree3C(t *testing.T) {
 	ts := makeTest(t, servers, false, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestUnreliableAgree3C", servers)
 	ts.Begin("Test (3C): unreliable agreement")
 	var wg sync.WaitGroup
@ -858,6 +992,7 @@ func TestFigure8Unreliable3C(t *testing.T) {
 	ts := makeTest(t, servers, false, false)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestFigure8Unreliable3C", servers)
 	ts.Begin("Test (3C): Figure 8 (unreliable)")
 	ts.one(rand.Int()%10000, 1, true)
@ -869,7 +1004,12 @@ func TestFigure8Unreliable3C(t *testing.T) {
 		}
 		leader := -1
 		for i := 0; i < servers; i++ {
-			_, _, ok := ts.srvs[i].Raft().Start(rand.Int() % 10000)
+			cmd := rand.Int() % 10000
 			_, _, ok := ts.srvs[i].Raft().Start(cmd)
 			if ok {
 				text := fmt.Sprintf("submitted command %v to server %v", cmd, i)
 				tester.AnnotateInfo(text, text)
 			}
 			if ok && ts.g.IsConnected(i) {
 				leader = i
 			}
@ -885,6 +1025,7 @@ func TestFigure8Unreliable3C(t *testing.T) {
 		if leader != -1 && (rand.Int()%1000) < int(RaftElectionTimeout/time.Millisecond)/2 {
 			ts.g.DisconnectAll(leader)
 			tester.AnnotateConnection(ts.g.GetConnected())
 			nup -= 1
 		}
@ -892,6 +1033,7 @@ func TestFigure8Unreliable3C(t *testing.T) {
 			s := rand.Int() % servers
 			if !ts.g.IsConnected(s) {
 				ts.g.ConnectOne(s)
 				tester.AnnotateConnection(ts.g.GetConnected())
 				nup += 1
 			}
 		}
@ -902,9 +1044,9 @@ func TestFigure8Unreliable3C(t *testing.T) {
 			ts.g.ConnectOne(i)
 		}
 	}
 	tester.AnnotateConnection(ts.g.GetConnected())
 	ts.one(rand.Int()%10000, servers, true)
 }
 func internalChurn(t *testing.T, reliable bool) {
@ -914,8 +1056,10 @@ func internalChurn(t *testing.T, reliable bool) {
 	defer ts.cleanup()
 	if ts.IsReliable() {
 		tester.AnnotateTest("TestReliableChurn3C", servers)
 		ts.Begin("Test (3C): churn")
 	} else {
 		tester.AnnotateTest("TestUnreliableChurn3C", servers)
 		ts.Begin("Test (3C): unreliable churn")
 	}
@ -968,6 +1112,7 @@ func internalChurn(t *testing.T, reliable bool) {
 		ret = values
 	}
 	startcli := tester.GetAnnotateTimestamp()
 	ncli := 3
 	cha := []chan []int{}
 	for i := 0; i < ncli; i++ {
@ -979,20 +1124,24 @@ func internalChurn(t *testing.T, reliable bool) {
 		if (rand.Int() % 1000) < 200 {
 			i := rand.Int() % servers
 			ts.g.DisconnectAll(i)
 			tester.AnnotateConnection(ts.g.GetConnected())
 		}
 		if (rand.Int() % 1000) < 500 {
 			i := rand.Int() % servers
 			if ts.srvs[i].raft == nil {
 				ts.restart(i)
 				tester.AnnotateRestart([]int{i})
 			}
 			ts.g.ConnectOne(i)
 			tester.AnnotateConnection(ts.g.GetConnected())
 		}
 		if (rand.Int() % 1000) < 200 {
 			i := rand.Int() % servers
 			if ts.srvs[i].raft != nil {
 				ts.g.ShutdownServer(i)
 				tester.AnnotateShutdown([]int{i})
 			}
 		}
@ -1011,9 +1160,14 @@ func internalChurn(t *testing.T, reliable bool) {
 		}
 		ts.g.ConnectOne(i)
 	}
 	tester.AnnotateRestartAll()
 	tester.AnnotateConnection(ts.g.GetConnected())
 	atomic.StoreInt32(&stop, 1)
 	textcli := fmt.Sprintf("%v clients submitting commands concurrently", ncli)
 	tester.AnnotateInfoInterval(startcli, textcli, textcli)
 	tester.AnnotateCheckerBegin("checking if any client has failed")
 	values := []int{}
 	for i := 0; i < ncli; i++ {
 		vv := <-cha[i]
@ -1022,6 +1176,7 @@ func internalChurn(t *testing.T, reliable bool) {
 		}
 		values = append(values, vv...)
 	}
 	tester.AnnotateCheckerSuccess("none of the clients have failed", "OK")
 	time.Sleep(RaftElectionTimeout)
@ -1033,10 +1188,14 @@ func internalChurn(t *testing.T, reliable bool) {
 		if vi, ok := v.(int); ok {
 			really = append(really, vi)
 		} else {
 			text := fmt.Sprintf("committed value %v is not an integer", v)
 			tester.AnnotateCheckerFailure(text, text)
 			t.Fatalf("not an int")
 		}
 	}
 	tester.AnnotateCheckerBegin(
 		"checking if committed values observed by the clients remain in the log")
 	for _, v1 := range values {
 		ok := false
 		for _, v2 := range really {
@ -1048,7 +1207,7 @@ func internalChurn(t *testing.T, reliable bool) {
 			ts.t.Fatalf("didn't find a value")
 		}
 	}
-
+	tester.AnnotateCheckerSuccess("committed values remain in the log", "OK")
 }
 func TestReliableChurn3C(t *testing.T) {
@ -1069,6 +1228,8 @@ func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash
 	ts := makeTest(t, servers, reliable, true)
 	defer ts.cleanup()
 	// Inconsistent with other test cases, but don't want to change API.
 	tester.AnnotateTest(name, servers)
 	ts.Begin(name)
 	ts.one(rand.Int(), servers, true)
@ -1084,18 +1245,23 @@ func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash
 		if disconnect {
 			ts.g.DisconnectAll(victim)
 			tester.AnnotateConnection(ts.g.GetConnected())
 			ts.one(rand.Int(), servers-1, true)
 		}
 		if crash {
 			ts.g.ShutdownServer(victim)
 			tester.AnnotateShutdown([]int{victim})
 			ts.one(rand.Int(), servers-1, true)
 		}
 		// perhaps send enough to get a snapshot
 		start := tester.GetAnnotateTimestamp()
 		nn := (SnapShotInterval / 2) + (rand.Int() % SnapShotInterval)
 		for i := 0; i < nn; i++ {
 			ts.srvs[sender].Raft().Start(rand.Int())
 		}
 		text := fmt.Sprintf("submitting %v commands to %v", nn, sender)
 		tester.AnnotateInfoInterval(start, text, text)
 		// let applier threads catch up with the Start()'s
 		if disconnect == false && crash == false {
@ -1114,11 +1280,13 @@ func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash
 			// reconnect a follower, who maybe behind and
 			// needs to rceive a snapshot to catch up.
 			ts.g.ConnectOne(victim)
 			tester.AnnotateConnection(ts.g.GetConnected())
 			ts.one(rand.Int(), servers, true)
 			leader1 = ts.checkOneLeader()
 		}
 		if crash {
 			ts.restart(victim)
 			tester.AnnotateRestart([]int{victim})
 			ts.one(rand.Int(), servers, true)
 			leader1 = ts.checkOneLeader()
 		}
@ -1155,6 +1323,7 @@ func TestSnapshotAllCrash3D(t *testing.T) {
 	ts := makeTest(t, servers, false, true)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestSnapshotAllCrash3D", servers)
 	ts.Begin("Test (3D): crash and restart all servers")
 	ts.one(rand.Int(), servers, true)
@ -1170,11 +1339,15 @@ func TestSnapshotAllCrash3D(t *testing.T) {
 		// crash all
 		ts.g.Shutdown()
 		tester.AnnotateShutdownAll()
 		ts.g.StartServers()
 		tester.AnnotateRestartAll()
 		index2 := ts.one(rand.Int(), servers, true)
 		if index2 < index1+1 {
-			t.Fatalf("index decreased from %v to %v", index1, index2)
+			msg := fmt.Sprintf("index decreased from %v to %v", index1, index2)
 			tester.AnnotateCheckerFailure("incorrect behavior: index decreased", msg)
 			t.Fatalf(msg)
 		}
 	}
 }
@ -1186,6 +1359,7 @@ func TestSnapshotInit3D(t *testing.T) {
 	ts := makeTest(t, servers, false, true)
 	defer ts.cleanup()
 	tester.AnnotateTest("TestSnapshotInit3D", servers)
 	ts.Begin("Test (3D): snapshot initialization after crash")
 	ts.one(rand.Int(), servers, true)
@ -1196,13 +1370,17 @@ func TestSnapshotInit3D(t *testing.T) {
 	}
 	ts.g.Shutdown()
 	tester.AnnotateShutdownAll()
 	ts.g.StartServers()
 	tester.AnnotateRestartAll()
 	// a single op, to get something to be written back to persistent storage.
 	ts.one(rand.Int(), servers, true)
 	ts.g.Shutdown()
 	tester.AnnotateShutdownAll()
 	ts.g.StartServers()
 	tester.AnnotateRestartAll()
 	// do another op to trigger potential bug
 	ts.one(rand.Int(), servers, true)
--- a/src/raft1/server.go
+++ b/src/raft1/server.go
@ -51,6 +51,7 @@ func newRfsrv(ts *Test, srv int, ends []*labrpc.ClientEnd, persister *tester.Per
 			// ideally Raft should send it up on applyCh...
 			err := s.ingestSnap(snapshot, -1)
 			if err != "" {
 				tester.AnnotateCheckerFailureBeforeExit("failed to ingest snapshot", err)
 				ts.t.Fatal(err)
 			}
 		}
@ -106,6 +107,7 @@ func (rs *rfsrv) applier(applyCh chan raftapi.ApplyMsg) {
 				err_msg = fmt.Sprintf("server %v apply out of order %v", rs.me, m.CommandIndex)
 			}
 			if err_msg != "" {
 				tester.AnnotateCheckerFailureBeforeExit("apply error", err_msg)
 				log.Fatalf("apply error: %v", err_msg)
 				rs.applyErr = err_msg
 				// keep reading after error so that Raft doesn't block
@ -149,12 +151,18 @@ func (rs *rfsrv) applierSnap(applyCh chan raftapi.ApplyMsg) {
 					xlog = append(xlog, rs.logs[j])
 				}
 				e.Encode(xlog)
 				start := tester.GetAnnotateTimestamp()
 				rs.raft.Snapshot(m.CommandIndex, w.Bytes())
 				details := fmt.Sprintf(
 					"snapshot created after applying the command at index %v",
 					m.CommandIndex)
 				tester.AnnotateInfoInterval(start, "snapshot created", details)
 			}
 		} else {
 			// Ignore other types of ApplyMsg.
 		}
 		if err_msg != "" {
 			tester.AnnotateCheckerFailureBeforeExit("apply error", err_msg)
 			log.Fatalf("apply error: %v", err_msg)
 			rs.applyErr = err_msg
 			// keep reading after error so that Raft doesn't block
@ -169,6 +177,7 @@ func (rs *rfsrv) ingestSnap(snapshot []byte, index int) string {
 	defer rs.mu.Unlock()
 	if snapshot == nil {
 		tester.AnnotateCheckerFailureBeforeExit("failed to ingest snapshot", "nil snapshot")
 		log.Fatalf("nil snapshot")
 		return "nil snapshot"
 	}
@ -178,6 +187,8 @@ func (rs *rfsrv) ingestSnap(snapshot []byte, index int) string {
 	var xlog []any
 	if d.Decode(&lastIncludedIndex) != nil ||
 		d.Decode(&xlog) != nil {
 		text := "failed to decode snapshot"
 		tester.AnnotateCheckerFailureBeforeExit(text, text)
 		log.Fatalf("snapshot decode error")
 		return "snapshot Decode() error"
 	}
--- a/src/raft1/test.go
+++ b/src/raft1/test.go
@ -62,6 +62,7 @@ func (ts *Test) restart(i int) {
 }
 func (ts *Test) checkOneLeader() int {
 	tester.AnnotateCheckerBegin("checking for a single leader")
 	for iters := 0; iters < 10; iters++ {
 		ms := 450 + (rand.Int63() % 100)
 		time.Sleep(time.Duration(ms) * time.Millisecond)
@ -78,6 +79,8 @@ func (ts *Test) checkOneLeader() int {
 		lastTermWithLeader := -1
 		for term, leaders := range leaders {
 			if len(leaders) > 1 {
 				details := fmt.Sprintf("multiple leaders in term %v = %v", term, leaders)
 				tester.AnnotateCheckerFailure("multiple leaders", details)
 				ts.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
 			}
 			if term > lastTermWithLeader {
@ -86,14 +89,20 @@ func (ts *Test) checkOneLeader() int {
 		}
 		if len(leaders) != 0 {
 			details := fmt.Sprintf("leader in term %v = %v",
 				lastTermWithLeader, leaders[lastTermWithLeader][0])
 			tester.AnnotateCheckerSuccess(details, details)
 			return leaders[lastTermWithLeader][0]
 		}
 	}
 	details := fmt.Sprintf("unable to find a leader")
 	tester.AnnotateCheckerFailure("no leader", details)
 	ts.Fatalf("expected one leader, got none")
 	return -1
 }
 func (ts *Test) checkTerms() int {
 	tester.AnnotateCheckerBegin("checking term agreement")
 	term := -1
 	for i := 0; i < ts.n; i++ {
 		if ts.g.IsConnected(i) {
@ -101,10 +110,15 @@ func (ts *Test) checkTerms() int {
 			if term == -1 {
 				term = xterm
 			} else if term != xterm {
 				details := fmt.Sprintf("node ids -> terms = { %v -> %v; %v -> %v }",
 					i - 1, term, i, xterm)
 				tester.AnnotateCheckerFailure("term disagreed", details)
 				ts.Fatalf("servers disagree on term")
 			}
 		}
 	}
 	details := fmt.Sprintf("term = %v", term)
 	tester.AnnotateCheckerSuccess("term agreed", details)
 	return term
 }
@ -134,14 +148,32 @@ func (ts *Test) checkLogs(i int, m raftapi.ApplyMsg) (string, bool) {
 // check that none of the connected servers
 // thinks it is the leader.
 func (ts *Test) checkNoLeader() {
 	tester.AnnotateCheckerBegin("checking no unexpected leader among connected servers")
 	for i := 0; i < ts.n; i++ {
 		if ts.g.IsConnected(i) {
 			_, is_leader := ts.srvs[i].GetState()
 			if is_leader {
-				ts.Fatalf("expected no leader among connected servers, but %v claims to be leader", i)
+				details := fmt.Sprintf("leader = %v", i)
 				tester.AnnotateCheckerFailure("unexpected leader found", details)
 				ts.Fatalf(details)
 			}
 		}
 	}
 	tester.AnnotateCheckerSuccess("no unexpected leader", "no unexpected leader")
 }
 func (ts *Test) checkNoAgreement(index int) {
 	text := fmt.Sprintf("checking no unexpected agreement at index %v", index)
 	tester.AnnotateCheckerBegin(text)
 	n, _ := ts.nCommitted(index)
 	if n > 0 {
 		desp := fmt.Sprintf("unexpected agreement at index %v", index)
 		details := fmt.Sprintf("%v server(s) commit incorrectly index", n)
 		tester.AnnotateCheckerFailure(desp, details)
 		ts.Fatalf("%v committed but no majority", n)
 	}
 	desp := fmt.Sprintf("no unexpected agreement at index %v", index)
 	tester.AnnotateCheckerSuccess(desp, "OK")
 }
 // how many servers think a log entry is committed?
@ -153,6 +185,7 @@ func (ts *Test) nCommitted(index int) (int, any) {
 	var cmd any = nil
 	for _, rs := range ts.srvs {
 		if rs.applyErr != "" {
 			tester.AnnotateCheckerFailure("apply error", rs.applyErr)
 			ts.t.Fatal(rs.applyErr)
 		}
@ -160,8 +193,10 @@ func (ts *Test) nCommitted(index int) (int, any) {
 		if ok {
 			if count > 0 && cmd != cmd1 {
-				ts.Fatalf("committed values do not match: index %v, %v, %v",
+				text := fmt.Sprintf("committed values at index %v do not match (%v != %v)",
 					index, cmd, cmd1)
 				tester.AnnotateCheckerFailure("unmatched committed values", text)
 				ts.Fatalf(text)
 			}
 			count += 1
 			cmd = cmd1
@ -183,6 +218,16 @@ func (ts *Test) nCommitted(index int) (int, any) {
 // if retry==false, calls Start() only once, in order
 // to simplify the early Lab 3B tests.
 func (ts *Test) one(cmd any, expectedServers int, retry bool) int {
 	var textretry string
 	if retry {
 		textretry = "with"
 	} else {
 		textretry = "without"
 	}
 	textcmd := fmt.Sprintf("%v", cmd)
 	textb := fmt.Sprintf("checking agreement of %.8s by at least %v servers %v retry",
 		textcmd, expectedServers, textretry)
 	tester.AnnotateCheckerBegin(textb)
 	t0 := time.Now()
 	starts := 0
 	for time.Since(t0).Seconds() < 10 && ts.checkFinished() == false {
@ -214,12 +259,16 @@ func (ts *Test) one(cmd any, expectedServers int, retry bool) int {
 					// committed
 					if cmd1 == cmd {
 						// and it was the command we submitted.
 						desp := fmt.Sprintf("agreement of %.8s reached", textcmd)
 						tester.AnnotateCheckerSuccess(desp, "OK")
 						return index
 					}
 				}
 				time.Sleep(20 * time.Millisecond)
 			}
 			if retry == false {
 				desp := fmt.Sprintf("agreement of %.8s failed", textcmd)
 				tester.AnnotateCheckerFailure(desp, "failed after submitting command")
 				ts.Fatalf("one(%v) failed to reach agreement", cmd)
 			}
 		} else {
@ -227,6 +276,8 @@ func (ts *Test) one(cmd any, expectedServers int, retry bool) int {
 		}
 	}
 	if ts.checkFinished() == false {
 		desp := fmt.Sprintf("agreement of %.8s failed", textcmd)
 		tester.AnnotateCheckerFailure(desp, "failed after 10-second timeout")
 		ts.Fatalf("one(%v) failed to reach agreement", cmd)
 	}
 	return -1
@ -262,6 +313,10 @@ func (ts *Test) wait(index int, n int, startTerm int) any {
 	}
 	nd, cmd := ts.nCommitted(index)
 	if nd < n {
 		desp := fmt.Sprintf("less than %v servers commit index %v", n, index)
 		details := fmt.Sprintf(
 			"only %v (< %v) servers commit index %v at term %v", nd, n, index, startTerm)
 		tester.AnnotateCheckerFailure(desp, details)
 		ts.Fatalf("only %d decided for index %d; wanted %d",
 			nd, index, n)
 	}
--- a/src/tester1/annotation.go
+++ b/src/tester1/annotation.go
@ -0,0 +1,535 @@
 package tester
 import (
 	"sync"
 	"os"
 	"os/signal"
 	"fmt"
 	"time"
 	"strings"
 	"slices"
 	"github.com/anishathalye/porcupine"
 	"6.5840/models1"
 )
 ///
 /// Public interface.
 ///
 type Annotation struct {
 	mu          *sync.Mutex
 	annotations []porcupine.Annotation
 	continuous  map[string]Continuous
 }
 type Continuous struct {
 	start   int64
 	desp    string
 	details string
 	bgcolor string
 }
 type FrameworkInfo struct {
 	mu        *sync.Mutex
 	nservers  int
 	connected []bool
 	crashed   []bool
 	ckbegin   CheckerBegin
 }
 type CheckerBegin struct {
 	ts      int64
 	details string
 }
 // Using global variable feels disturbing, but also can't figure out a better
 // way to support user-level annotations. An alternative would be passing an
 // Annotation object to the start-up function of servers and clients, but that
 // doesn't feel better.
 //
 // One potential problem with using a global Annotation object is that when
 // running multiple test cases, some zombie threads in previous test cases could
 // interfere the current one. An ad-hoc fix at the user level would be adding
 // annotations only if the killed flag on the server is not set.
 var annotation *Annotation = mkAnnotation()
 var unit struct{} = captureSignal()
 var finfo *FrameworkInfo
 const (
 	COLOR_INFO    string = "#FAFAFA"
 	COLOR_NEUTRAL string = "#FFECB3"
 	COLOR_SUCCESS string = "#C8E6C9"
 	COLOR_FAILURE string = "#FFCDD2"
 	COLOR_FAULT   string = "#B3E5FC"
 	COLOR_USER    string = "#FFF176"
 )
 const (
 	TAG_CHECKER   string = "$ Checker"
 	TAG_PARTITION string = "$ Failure"
 	TAG_INFO      string = "$ Test Info"
 )
 func (cfg *Config) RetrieveAnnotations() []porcupine.Annotation{
 	annotations := annotation.retrieve()
 	return annotations
 }
 func AnnotatePointColor(
 	tag, desp, details, bgcolor string,
 ) {
 	annotation.annotatePointColor(tag, desp, details, bgcolor)
 }
 func GetAnnotateTimestamp() int64 {
 	return timestamp()
 }
 func AnnotateIntervalColor(
 	tag string, start int64, desp, details, bgcolor string,
 ) {
 	annotation.annotateIntervalColor(tag, start, desp, details, bgcolor)
 }
 func AnnotateContinuousColor(tag, desp, details, bgcolor string) {
 	annotation.annotateContinuousColor(tag, desp, details, bgcolor)
 }
 func AnnotateContinuousEnd(tag string) {
 	annotation.annotateContinuousEnd(tag)
 }
 // Used by users.
 func AnnotatePoint(tag, desp, details string) {
 	annotation.annotatePointColor(tag, desp, details, COLOR_USER)
 }
 func AnnotateInterval(tag string, start int64, desp, details string) {
 	annotation.annotateIntervalColor(tag, start, desp, details, COLOR_USER)
 }
 func AnnotateContinuous(tag, desp, details string) {
 	annotation.annotateContinuousColor(tag, desp, details, COLOR_USER)
 }
 // Used by test framework.
 func AnnotateInfo(desp, details string) {
 	AnnotatePointColor(TAG_INFO, desp, details, COLOR_INFO)
 }
 func AnnotateInfoInterval(start int64, desp, details string) {
 	AnnotateIntervalColor(TAG_INFO, start, desp, details, COLOR_INFO)
 }
 func AnnotateTest(desp string, nservers int) {
 	details := fmt.Sprintf("%s (%d servers)", desp, nservers)
 	finfo = mkFrameworkInfo(nservers)
 	annotation.clear()
 	AnnotateInfo(details, details)
 }
 func AnnotateCheckerBegin(details string) {
 	finfo.mu.Lock()
 	defer finfo.mu.Unlock()
 	finfo.ckbegin = CheckerBegin{
 		ts: timestamp(),
 		details: details,
 	}
 }
 func AnnotateCheckerEnd(desp, details, color string) {
 	finfo.mu.Lock()
 	defer finfo.mu.Unlock()
 	ckbegin := finfo.ckbegin
 	if ckbegin.ts == 0 {
 		// Annotate as a point-in-time if the begin timestamp is not set.
 		AnnotatePointColor(TAG_CHECKER, desp, details, color)
 		return
 	}
 	// Annotate as an interval if the begin timestamp is set.
 	d := fmt.Sprintf("%s: %s", ckbegin.details, details)
 	AnnotateIntervalColor(TAG_CHECKER, ckbegin.ts, desp, d, color)
 	// Reset the checker begin timestamp.
 	ckbegin.ts = 0
 }
 func AnnotateCheckerSuccess(desp, details string) {
 	AnnotateCheckerEnd(desp, details, COLOR_SUCCESS)
 }
 func AnnotateCheckerFailure(desp, details string) {
 	AnnotateCheckerEnd(desp, details, COLOR_FAILURE)
 }
 func AnnotateCheckerNeutral(desp, details string) {
 	AnnotateCheckerEnd(desp, details, COLOR_NEUTRAL)
 }
 // Used before log.Fatalf
 func AnnotateCheckerFailureBeforeExit(desp, details string) {
 	AnnotateCheckerFailure(desp, details)
 	annotation.cleanup(true, "test failed")
 }
 // Two functions to annotate partitions: AnnotateConnection and
 // AnnotateTwoPartitions. The connected field of ServerGrp (in group.go) is
 // precise if and only if the ServerGrp.Partition is not used. Thus, we use the
 // latter when ServerGrp.Partition is involved, and the former otherwise.
 func AnnotateConnection(connection []bool) {
 	finfo.mu.Lock()
 	defer finfo.mu.Unlock()
 	if slices.Equal(finfo.connected, connection) {
 		// Nothing to do if the connection is unchanged.
 		return
 	}
 	copy(finfo.connected, connection)
 	annotateFault()
 }
 func annotateFault() {
 	trues := make([]bool, finfo.nservers)
 	for id := range(trues) {
 		trues[id] = true
 	}
 	falses := make([]bool, finfo.nservers)
 	if slices.Equal(trues, finfo.connected) && slices.Equal(falses, finfo.crashed) {
 		// No annotation when no partitions and no crashes.
 		AnnotateContinuousEnd(TAG_PARTITION)
 		return
 	}
 	// Now, each disconnected server sits in its own partition, connected
 	// servers in one partition; crahsed servers indicated at the end.
 	conn := make([]int, 0)
 	crashes := make([]int, 0)
 	var builder strings.Builder
 	builder.WriteString("partition = ")
 	for id, connected := range(finfo.connected) {
 		if finfo.crashed[id] {
 			crashes = append(crashes, id)
 			continue
 		}
 		if connected {
 			conn = append(conn, id)
 		} else {
 			builder.WriteString(fmt.Sprintf("[%v] ", id))
 		}
 	}
 	if len(conn) > 0 {
 		builder.WriteString(fmt.Sprintf("%v", conn))
 	}
 	if len(crashes) > 0 {
 		builder.WriteString(fmt.Sprintf(" / crash = %v", crashes))
 	}
 	text := builder.String()
 	AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
 }
 func AnnotateTwoPartitions(p1 []int, p2 []int) {
 	// A bit hard to check whether the partition actually changes, so just
 	// annotate on every invocation.
 	// TODO
 	text := fmt.Sprintf("%v %v", p1, p2)
 	AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
 }
 func AnnotateShutdown(servers []int) {
 	finfo.mu.Lock()
 	defer finfo.mu.Unlock()
 	changed := false
 	for _, id := range(servers) {
 		if !finfo.crashed[id] {
 			changed = true
 		}
 		finfo.crashed[id] = true
 	}
 	if !changed {
 		// Nothing to do if the set of crashed servers is unchanged.
 		return
 	}
 	annotateFault()
 }
 func AnnotateShutdownAll() {
 	finfo.mu.Lock()
 	n := finfo.nservers
 	finfo.mu.Unlock()
 	servers := make([]int, n)
 	for i := range(servers) {
 		servers[i] = i
 	}
 	AnnotateShutdown(servers)
 }
 func AnnotateRestart(servers []int) {
 	finfo.mu.Lock()
 	defer finfo.mu.Unlock()
 	changed := false
 	for _, id := range(servers) {
 		if finfo.crashed[id] {
 			changed = true
 		}
 		finfo.crashed[id] = false
 	}
 	if !changed {
 		// Nothing to do if the set of crashed servers is unchanged.
 		return
 	}
 	annotateFault()
 }
 func AnnotateRestartAll() {
 	finfo.mu.Lock()
 	n := finfo.nservers
 	finfo.mu.Unlock()
 	servers := make([]int, n)
 	for i := range(servers) {
 		servers[i] = i
 	}
 	AnnotateRestart(servers)
 }
 ///
 /// Internal.
 ///
 func timestamp() int64 {
 	return int64(time.Since(time.Unix(0, 0)))
 }
 func (an *Annotation) retrieve() []porcupine.Annotation {
 	an.mu.Lock()
 	x := an.annotations
 	t := timestamp()
 	for tag, cont := range(an.continuous) {
 		a := porcupine.Annotation{
 			Tag: tag,
 			Start: cont.start,
 			End: t,
 			Description: cont.desp,
 			Details: cont.details,
 			BackgroundColor: cont.bgcolor,
 		}
 		x = append(x, a)
 	}
 	an.annotations = make([]porcupine.Annotation, 0)
 	an.continuous = make(map[string]Continuous)
 	an.mu.Unlock()
 	return x
 }
 func (an *Annotation) clear() {
 	an.mu.Lock()
 	an.annotations = make([]porcupine.Annotation, 0)
 	an.continuous = make(map[string]Continuous)
 	an.mu.Unlock()
 }
 func (an *Annotation) annotatePointColor(
 	tag, desp, details, bgcolor string,
 ) {
 	an.mu.Lock()
 	t := timestamp()
 	a := porcupine.Annotation{
 		Tag: tag,
 		Start: t,
 		Description: desp,
 		Details: details,
 		BackgroundColor: bgcolor,
 	}
 	an.annotations = append(an.annotations, a)
 	an.mu.Unlock()
 }
 func (an *Annotation) annotateIntervalColor(
 	tag string, start int64, desp, details, bgcolor string,
 ) {
 	an.mu.Lock()
 	a := porcupine.Annotation{
 		Tag: tag,
 		Start: start,
 		End: timestamp(),
 		Description: desp,
 		Details: details,
 		BackgroundColor: bgcolor,
 	}
 	an.annotations = append(an.annotations, a)
 	an.mu.Unlock()
 }
 func (an *Annotation) annotateContinuousColor(
 	tag, desp, details, bgcolor string,
 ) {
 	an.mu.Lock()
 	defer an.mu.Unlock()
 	cont, ok := an.continuous[tag]
 	if !ok {
 		// The first continuous annotation for tag. Simply add it to the
 		// continuous map.
 		an.continuous[tag] = Continuous{
 			start: timestamp(),
 			desp: desp,
 			details: details,
 			bgcolor: bgcolor,
 		}
 		return
 	}
 	// Subsequent continuous annotation for tag. Concretize the previous
 	// annotation and add this one to the continuous map.
 	t := timestamp()
 	aprev := porcupine.Annotation{
 		Tag: tag,
 		Start: cont.start,
 		End: t,
 		Description: cont.desp,
 		Details: cont.details,
 		BackgroundColor: cont.bgcolor,
 	}
 	an.annotations = append(an.annotations, aprev)
 	an.continuous[tag] = Continuous{
 		// XXX: If the start timestamp of an event is too closer to the end
 		// timestamp of another event, Porcupine seems to overlap the two
 		// events. We add a delta (1000) as a workaround, but remove this once
 		// this issue is resolved.
 		start: t + 1000,
 		desp: desp,
 		details: details,
 		bgcolor: bgcolor,
 	}
 }
 func (an *Annotation) annotateContinuousEnd(tag string) {
 	an.mu.Lock()
 	defer an.mu.Unlock()
 	cont, ok := an.continuous[tag]
 	if !ok {
 		// Nothing to end since there's no on-going continuous annotation for
 		// tag.
 	}
 	// End the on-going continuous annotation for tag.
 	t := timestamp()
 	aprev := porcupine.Annotation{
 		Tag: tag,
 		Start: cont.start,
 		End: t,
 		Description: cont.desp,
 		Details: cont.details,
 		BackgroundColor: cont.bgcolor,
 	}
 	an.annotations = append(an.annotations, aprev)
 	delete(an.continuous, tag)
 }
 func (an *Annotation) cleanup(failed bool, end string) {
 	enabled := os.Getenv("VIS_ENABLE")
 	if enabled == "never" || (!failed && enabled != "always") {
 		// Simply clean up the annotations without producing the vis file if
 		// VIS_ENABLE is set to "never", or if the test passes and VIS_ENABLE is
 		// not set to "always".
 		an.clear()
 		return
 	}
 	annotations := an.retrieve()
 	if len(annotations) == 0 {
 		// Skip empty annotations.
 		return
 	}
 	// XXX: Make the last annotation a interval one to work around Porcupine's
 	// issue. Consider removing this once the issue is fixed.
 	t := timestamp()
 	aend := porcupine.Annotation{
 		Tag: TAG_INFO,
 		Start: t,
 		End: t + 1000,
 		Description: end,
 		Details: end,
 		BackgroundColor: COLOR_INFO,
 	}
 	annotations = append(annotations, aend)
 	fpath := os.Getenv("VIS_FILE")
 	var file *os.File
 	var err error
 	if fpath == "" {
 		// Save the vis file in a temporary file.
 		file, err = os.CreateTemp("", "porcupine-*.html")
 	} else {
 		file, err = os.OpenFile(fpath, os.O_RDWR | os.O_CREATE | os.O_TRUNC, 0644)
 	}
 	if err != nil {
 		fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
 		return
 	}
 	// Create a fresh linearization info without any client operations and use
 	// models.KvModel simply as a placeholder.
 	info := porcupine.LinearizationInfo{}
 	info.AddAnnotations(annotations)
 	porcupine.Visualize(models.KvModel, info, file)
 	fmt.Printf("info: wrote visualization to %s\n", file.Name())
 }
 func mkAnnotation() *Annotation {
 	an := Annotation{
 		mu: new(sync.Mutex),
 		annotations: make([]porcupine.Annotation, 0),
 		continuous: make(map[string]Continuous),
 	}
 	return &an
 }
 func mkFrameworkInfo(nservers int) *FrameworkInfo {
 	conn := make([]bool, nservers)
 	for id := range(conn) {
 		conn[id] = true
 	}
 	finfo := FrameworkInfo{
 		mu: new(sync.Mutex),
 		nservers: nservers,
 		connected: conn,
 		crashed: make([]bool, nservers),
 	}
 	return &finfo
 }
 func captureSignal() struct{} {
 	// Capture SIGINT to visualize on interruption.
 	c := make(chan os.Signal, 1)
 	signal.Notify(c, os.Interrupt)
 	go func(){
 		for range c {
 			annotation.cleanup(true, "interrupted")
 			os.Exit(1)
 		}
 	}()
 	return struct{}{}
 }
--- a/src/tester1/config.go
+++ b/src/tester1/config.go
@ -79,6 +79,11 @@ func (cfg *Config) Cleanup() {
 	cfg.Clnts.cleanup()
 	cfg.Groups.cleanup()
 	cfg.net.Cleanup()
 	if cfg.t.Failed() {
 		annotation.cleanup(true, "test failed")
 	} else {
 		annotation.cleanup(false, "test passed")
 	}
 	cfg.CheckTimeout()
 }
--- a/src/tester1/group.go
+++ b/src/tester1/group.go
@ -194,6 +194,10 @@ func (sg *ServerGrp) IsConnected(i int) bool {
 	return sg.connected[i]
 }
 func (sg *ServerGrp) GetConnected() []bool {
 	return sg.connected
 }
 // Maximum log size across all servers
 func (sg *ServerGrp) LogSize() int {
 	logsize := 0