update
This commit is contained in:
parent
c8e7d779c2
commit
eacb273f95
@ -37,6 +37,7 @@ REFERENCE_FILES=(
|
||||
src/tester1/group.go
|
||||
src/tester1/persister.go
|
||||
src/tester1/srv.go
|
||||
src/tester1/annotation.go
|
||||
|
||||
# lab 4
|
||||
src/kvraft1/rsm/rsm_test.go
|
||||
|
@ -2,8 +2,7 @@ package kvtest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
//"log"
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
@ -46,7 +45,7 @@ func (log *OpLog) Read() []porcupine.Operation {
|
||||
// absolute timestamps with `time.Now().UnixNano()` (which uses the wall
|
||||
// clock), we measure time relative to `t0` using `time.Since(t0)`, which uses
|
||||
// the monotonic clock
|
||||
var t0 = time.Now()
|
||||
var t0 = time.Unix(0, 0)
|
||||
|
||||
func Get(cfg *tester.Config, ck IKVClerk, key string, log *OpLog, cli int) (string, rpc.Tversion, rpc.Err) {
|
||||
start := int64(time.Since(t0))
|
||||
@ -84,14 +83,26 @@ func Put(cfg *tester.Config, ck IKVClerk, key string, value string, version rpc.
|
||||
|
||||
// Checks that the log of Clerk.Put's and Clerk.Get's is linearizable (see
|
||||
// linearizability-faq.txt)
|
||||
func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
|
||||
//log.Printf("oplog len %v %v", ts.oplog.Len(), ts.oplog)
|
||||
func checkPorcupine(
|
||||
t *testing.T, opLog *OpLog, annotations []porcupine.Annotation, nsec time.Duration,
|
||||
) {
|
||||
enabled := os.Getenv("VIS_ENABLE")
|
||||
fpath := os.Getenv("VIS_FILE")
|
||||
res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), nsec)
|
||||
if res == porcupine.Illegal {
|
||||
file, err := ioutil.TempFile("", "porcupine-*.html")
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to create temp file for visualization")
|
||||
var file *os.File
|
||||
var err error
|
||||
if fpath == "" {
|
||||
// Save the vis file in a temporary file.
|
||||
file, err = os.CreateTemp("", "porcupine-*.html")
|
||||
} else {
|
||||
file, err = os.OpenFile(fpath, os.O_RDWR | os.O_CREATE | os.O_TRUNC, 0644)
|
||||
}
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
|
||||
} else if enabled != "never" {
|
||||
// Don't produce visualization file if VIS_ENABLE is set to "never".
|
||||
info.AddAnnotations(annotations)
|
||||
err = porcupine.Visualize(models.KvModel, info, file)
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
|
||||
@ -103,6 +114,29 @@ func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
|
||||
} else if res == porcupine.Unknown {
|
||||
fmt.Println("info: linearizability check timed out, assuming history is ok")
|
||||
}
|
||||
|
||||
// The result is either legal or unknown.
|
||||
if enabled == "always" {
|
||||
var file *os.File
|
||||
var err error
|
||||
if fpath == "" {
|
||||
// Save the vis file in a temporary file.
|
||||
file, err = os.CreateTemp("", "porcupine-*.html")
|
||||
} else {
|
||||
file, err = os.OpenFile(fpath, os.O_RDWR | os.O_CREATE | os.O_TRUNC, 0644)
|
||||
}
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
|
||||
return
|
||||
}
|
||||
info.AddAnnotations(annotations)
|
||||
err = porcupine.Visualize(models.KvModel, info, file)
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
|
||||
} else {
|
||||
fmt.Printf("info: wrote history visualization to %s\n", file.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Porcupine
|
||||
@ -142,9 +176,13 @@ func (ts *Test) Put(ck IKVClerk, key string, value string, version rpc.Tversion,
|
||||
}
|
||||
|
||||
func (ts *Test) CheckPorcupine() {
|
||||
checkPorcupine(ts.t, ts.oplog, linearizabilityCheckTimeout)
|
||||
ts.CheckPorcupineT(linearizabilityCheckTimeout)
|
||||
}
|
||||
|
||||
func (ts *Test) CheckPorcupineT(nsec time.Duration) {
|
||||
checkPorcupine(ts.t, ts.oplog, nsec)
|
||||
// ts.RetrieveAnnotations() also clears the accumulated annotations so that
|
||||
// the vis file containing client operations (generated here) won't be
|
||||
// overridden by that without client operations (generated at cleanup time).
|
||||
annotations := ts.RetrieveAnnotations()
|
||||
checkPorcupine(ts.t, ts.oplog, annotations, nsec)
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ func TestInitialElection3A(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestInitialElection3A", servers)
|
||||
ts.Begin("Test (3A): initial election")
|
||||
|
||||
// is a leader elected?
|
||||
@ -58,24 +59,28 @@ func TestReElection3A(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestReElection3A", servers)
|
||||
ts.Begin("Test (3A): election after network failure")
|
||||
|
||||
leader1 := ts.checkOneLeader()
|
||||
|
||||
// if the leader disconnects, a new one should be elected.
|
||||
ts.g.DisconnectAll(leader1)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
ts.checkOneLeader()
|
||||
|
||||
// if the old leader rejoins, that shouldn't
|
||||
// disturb the new leader. and the old leader
|
||||
// should switch to follower.
|
||||
ts.g.ConnectOne(leader1)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
leader2 := ts.checkOneLeader()
|
||||
|
||||
// if there's no quorum, no new leader should
|
||||
// be elected.
|
||||
ts.g.DisconnectAll(leader2)
|
||||
ts.g.DisconnectAll((leader2 + 1) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
time.Sleep(2 * RaftElectionTimeout)
|
||||
|
||||
// check that the one connected server
|
||||
@ -84,10 +89,12 @@ func TestReElection3A(t *testing.T) {
|
||||
|
||||
// if a quorum arises, it should elect a leader.
|
||||
ts.g.ConnectOne((leader2 + 1) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
ts.checkOneLeader()
|
||||
|
||||
// re-join of last node shouldn't prevent leader from existing.
|
||||
ts.g.ConnectOne(leader2)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
ts.checkOneLeader()
|
||||
}
|
||||
|
||||
@ -96,6 +103,7 @@ func TestManyElections3A(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestManyElection3A", servers)
|
||||
ts.Begin("Test (3A): multiple elections")
|
||||
|
||||
ts.checkOneLeader()
|
||||
@ -109,6 +117,7 @@ func TestManyElections3A(t *testing.T) {
|
||||
ts.g.DisconnectAll(i1)
|
||||
ts.g.DisconnectAll(i2)
|
||||
ts.g.DisconnectAll(i3)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// either the current leader should still be alive,
|
||||
// or the remaining four should elect a new one.
|
||||
@ -117,6 +126,7 @@ func TestManyElections3A(t *testing.T) {
|
||||
ts.g.ConnectOne(i1)
|
||||
ts.g.ConnectOne(i2)
|
||||
ts.g.ConnectOne(i3)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
}
|
||||
ts.checkOneLeader()
|
||||
}
|
||||
@ -126,6 +136,7 @@ func TestBasicAgree3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestBasicAgree3B", servers)
|
||||
ts.Begin("Test (3B): basic agreement")
|
||||
|
||||
iters := 3
|
||||
@ -149,6 +160,7 @@ func TestRPCBytes3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestRPCBytes3B", servers)
|
||||
ts.Begin("Test (3B): RPC byte count")
|
||||
|
||||
ts.one(99, servers, false)
|
||||
@ -180,6 +192,7 @@ func TestFollowerFailure3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestFollowerFailure3B", servers)
|
||||
ts.Begin("Test (3B): test progressive failure of followers")
|
||||
|
||||
ts.one(101, servers, false)
|
||||
@ -187,6 +200,7 @@ func TestFollowerFailure3B(t *testing.T) {
|
||||
// disconnect one follower from the network.
|
||||
leader1 := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll((leader1 + 1) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// the leader and remaining follower should be
|
||||
// able to agree despite the disconnected follower.
|
||||
@ -198,6 +212,7 @@ func TestFollowerFailure3B(t *testing.T) {
|
||||
leader2 := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll((leader2 + 1) % servers)
|
||||
ts.g.DisconnectAll((leader2 + 2) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// submit a command.
|
||||
index, _, ok := ts.srvs[leader2].Raft().Start(104)
|
||||
@ -211,11 +226,7 @@ func TestFollowerFailure3B(t *testing.T) {
|
||||
time.Sleep(2 * RaftElectionTimeout)
|
||||
|
||||
// check that command 104 did not commit.
|
||||
n, _ := ts.nCommitted(index)
|
||||
if n > 0 {
|
||||
t.Fatalf("%v committed but no majority", n)
|
||||
}
|
||||
|
||||
ts.checkNoAgreement(index)
|
||||
}
|
||||
|
||||
// test just failure of leaders.
|
||||
@ -224,6 +235,7 @@ func TestLeaderFailure3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestLeaderFailure3B", servers)
|
||||
ts.Begin("Test (3B): test failure of leaders")
|
||||
|
||||
ts.one(101, servers, false)
|
||||
@ -231,6 +243,7 @@ func TestLeaderFailure3B(t *testing.T) {
|
||||
// disconnect the first leader.
|
||||
leader1 := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll(leader1)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// the remaining followers should elect
|
||||
// a new leader.
|
||||
@ -241,6 +254,7 @@ func TestLeaderFailure3B(t *testing.T) {
|
||||
// disconnect the new leader.
|
||||
leader2 := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll(leader2)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// submit a command to each server.
|
||||
for i := 0; i < servers; i++ {
|
||||
@ -250,11 +264,7 @@ func TestLeaderFailure3B(t *testing.T) {
|
||||
time.Sleep(2 * RaftElectionTimeout)
|
||||
|
||||
// check that command 104 did not commit.
|
||||
n, _ := ts.nCommitted(4)
|
||||
if n > 0 {
|
||||
t.Fatalf("%v committed but no majority", n)
|
||||
}
|
||||
|
||||
ts.checkNoAgreement(4)
|
||||
}
|
||||
|
||||
// test that a follower participates after
|
||||
@ -264,6 +274,7 @@ func TestFailAgree3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestFailAgree3B", servers)
|
||||
ts.Begin("Test (3B): agreement after follower reconnects")
|
||||
|
||||
ts.one(101, servers, false)
|
||||
@ -271,6 +282,7 @@ func TestFailAgree3B(t *testing.T) {
|
||||
// disconnect one follower from the network.
|
||||
leader := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll((leader + 1) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// the leader and remaining follower should be
|
||||
// able to agree despite the disconnected follower.
|
||||
@ -282,6 +294,7 @@ func TestFailAgree3B(t *testing.T) {
|
||||
|
||||
// re-connect
|
||||
ts.g.ConnectOne((leader + 1) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// the full set of servers should preserve
|
||||
// previous agreements, and be able to agree
|
||||
@ -289,7 +302,6 @@ func TestFailAgree3B(t *testing.T) {
|
||||
ts.one(106, servers, true)
|
||||
time.Sleep(RaftElectionTimeout)
|
||||
ts.one(107, servers, true)
|
||||
|
||||
}
|
||||
|
||||
func TestFailNoAgree3B(t *testing.T) {
|
||||
@ -297,6 +309,7 @@ func TestFailNoAgree3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestFailNoAgree3B", servers)
|
||||
ts.Begin("Test (3B): no agreement if too many followers disconnect")
|
||||
|
||||
ts.one(10, servers, false)
|
||||
@ -306,6 +319,7 @@ func TestFailNoAgree3B(t *testing.T) {
|
||||
ts.g.DisconnectAll((leader + 1) % servers)
|
||||
ts.g.DisconnectAll((leader + 2) % servers)
|
||||
ts.g.DisconnectAll((leader + 3) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
index, _, ok := ts.srvs[leader].Raft().Start(20)
|
||||
if ok != true {
|
||||
@ -326,6 +340,7 @@ func TestFailNoAgree3B(t *testing.T) {
|
||||
ts.g.ConnectOne((leader + 1) % servers)
|
||||
ts.g.ConnectOne((leader + 2) % servers)
|
||||
ts.g.ConnectOne((leader + 3) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// the disconnected majority may have chosen a leader from
|
||||
// among their own ranks, forgetting index 2.
|
||||
@ -339,7 +354,6 @@ func TestFailNoAgree3B(t *testing.T) {
|
||||
}
|
||||
|
||||
ts.one(1000, servers, true)
|
||||
|
||||
}
|
||||
|
||||
func TestConcurrentStarts3B(t *testing.T) {
|
||||
@ -347,6 +361,7 @@ func TestConcurrentStarts3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestConcurrentStarts3B", servers)
|
||||
ts.Begin("Test (3B): concurrent Start()s")
|
||||
|
||||
var success bool
|
||||
@ -358,9 +373,15 @@ loop:
|
||||
}
|
||||
|
||||
leader := ts.checkOneLeader()
|
||||
textb := fmt.Sprintf("checking concurrent submission of commands (attempt %v)", try)
|
||||
tester.AnnotateCheckerBegin(textb)
|
||||
_, term, ok := ts.srvs[leader].Raft().Start(1)
|
||||
|
||||
despretry := "concurrent submission failed; retry"
|
||||
if !ok {
|
||||
// leader moved on really quickly
|
||||
details := fmt.Sprintf("%v is no longer a leader", leader)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -388,6 +409,9 @@ loop:
|
||||
for j := 0; j < servers; j++ {
|
||||
if t, _ := ts.srvs[j].Raft().GetState(); t != term {
|
||||
// term changed -- can't expect low RPC counts
|
||||
details := fmt.Sprintf("term of server %v changed from %v to %v",
|
||||
j, term, t)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
@ -402,11 +426,17 @@ loop:
|
||||
// so we can't expect all Start()s to
|
||||
// have succeeded
|
||||
failed = true
|
||||
details := fmt.Sprintf(
|
||||
"term changed while waiting for %v servers to commit index %v",
|
||||
servers, index)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
break
|
||||
}
|
||||
cmds = append(cmds, ix)
|
||||
} else {
|
||||
t.Fatalf("value %v is not an int", cmd)
|
||||
details := fmt.Sprintf("value %v is not an int", cmd)
|
||||
tester.AnnotateCheckerFailure("read ill-typed value", details)
|
||||
t.Fatalf(details)
|
||||
}
|
||||
}
|
||||
|
||||
@ -428,7 +458,9 @@ loop:
|
||||
}
|
||||
}
|
||||
if ok == false {
|
||||
t.Fatalf("cmd %v missing in %v", x, cmds)
|
||||
details := fmt.Sprintf("cmd %v missing in %v", x, cmds)
|
||||
tester.AnnotateCheckerFailure("concurrent submission failed", details)
|
||||
t.Fatalf(details)
|
||||
}
|
||||
}
|
||||
|
||||
@ -437,9 +469,14 @@ loop:
|
||||
}
|
||||
|
||||
if !success {
|
||||
tester.AnnotateCheckerFailure(
|
||||
"agreement failed under concurrent submission",
|
||||
"unable to reach agreement after 5 attempts")
|
||||
t.Fatalf("term changed too often")
|
||||
}
|
||||
|
||||
text := "agreement reached under concurrent submission"
|
||||
tester.AnnotateCheckerSuccess(text, "OK")
|
||||
}
|
||||
|
||||
func TestRejoin3B(t *testing.T) {
|
||||
@ -447,6 +484,7 @@ func TestRejoin3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestRejoin3B", servers)
|
||||
ts.Begin("Test (3B): rejoin of partitioned leader")
|
||||
|
||||
ts.one(101, servers, true)
|
||||
@ -454,11 +492,15 @@ func TestRejoin3B(t *testing.T) {
|
||||
// leader network failure
|
||||
leader1 := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll(leader1)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// make old leader try to agree on some entries
|
||||
start := tester.GetAnnotateTimestamp()
|
||||
ts.srvs[leader1].Raft().Start(102)
|
||||
ts.srvs[leader1].Raft().Start(103)
|
||||
ts.srvs[leader1].Raft().Start(104)
|
||||
text := fmt.Sprintf("submitted commands [102 103 104] to %v", leader1)
|
||||
tester.AnnotateInfoInterval(start, text, text)
|
||||
|
||||
// new leader commits, also for index=2
|
||||
ts.one(103, 2, true)
|
||||
@ -469,14 +511,15 @@ func TestRejoin3B(t *testing.T) {
|
||||
|
||||
// old leader connected again
|
||||
ts.g.ConnectOne(leader1)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
ts.one(104, 2, true)
|
||||
|
||||
// all together now
|
||||
ts.g.ConnectOne(leader2)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
ts.one(105, servers, true)
|
||||
|
||||
}
|
||||
|
||||
func TestBackup3B(t *testing.T) {
|
||||
@ -484,6 +527,7 @@ func TestBackup3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestBackup3B", servers)
|
||||
ts.Begin("Test (3B): leader backs up quickly over incorrect follower logs")
|
||||
|
||||
ts.one(rand.Int(), servers, true)
|
||||
@ -493,11 +537,15 @@ func TestBackup3B(t *testing.T) {
|
||||
ts.g.DisconnectAll((leader1 + 2) % servers)
|
||||
ts.g.DisconnectAll((leader1 + 3) % servers)
|
||||
ts.g.DisconnectAll((leader1 + 4) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// submit lots of commands that won't commit
|
||||
start := tester.GetAnnotateTimestamp()
|
||||
for i := 0; i < 50; i++ {
|
||||
ts.srvs[leader1].Raft().Start(rand.Int())
|
||||
}
|
||||
text := fmt.Sprintf("submitted 50 commands to %v", leader1)
|
||||
tester.AnnotateInfoInterval(start, text, text)
|
||||
|
||||
time.Sleep(RaftElectionTimeout / 2)
|
||||
|
||||
@ -508,6 +556,7 @@ func TestBackup3B(t *testing.T) {
|
||||
ts.g.ConnectOne((leader1 + 2) % servers)
|
||||
ts.g.ConnectOne((leader1 + 3) % servers)
|
||||
ts.g.ConnectOne((leader1 + 4) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// lots of successful commands to new group.
|
||||
for i := 0; i < 50; i++ {
|
||||
@ -521,11 +570,15 @@ func TestBackup3B(t *testing.T) {
|
||||
other = (leader2 + 1) % servers
|
||||
}
|
||||
ts.g.DisconnectAll(other)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// lots more commands that won't commit
|
||||
start = tester.GetAnnotateTimestamp()
|
||||
for i := 0; i < 50; i++ {
|
||||
ts.srvs[leader2].Raft().Start(rand.Int())
|
||||
}
|
||||
text = fmt.Sprintf("submitted 50 commands to %v", leader2)
|
||||
tester.AnnotateInfoInterval(start, text, text)
|
||||
|
||||
time.Sleep(RaftElectionTimeout / 2)
|
||||
|
||||
@ -536,6 +589,7 @@ func TestBackup3B(t *testing.T) {
|
||||
ts.g.ConnectOne((leader1 + 0) % servers)
|
||||
ts.g.ConnectOne((leader1 + 1) % servers)
|
||||
ts.g.ConnectOne(other)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
// lots of successful commands to new group.
|
||||
for i := 0; i < 50; i++ {
|
||||
@ -546,6 +600,7 @@ func TestBackup3B(t *testing.T) {
|
||||
for i := 0; i < servers; i++ {
|
||||
ts.g.ConnectOne(i)
|
||||
}
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
ts.one(rand.Int(), servers, true)
|
||||
}
|
||||
|
||||
@ -554,6 +609,7 @@ func TestCount3B(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestCount3B", servers)
|
||||
ts.Begin("Test (3B): RPC counts aren't too high")
|
||||
|
||||
rpcs := func() (n int) {
|
||||
@ -568,7 +624,9 @@ func TestCount3B(t *testing.T) {
|
||||
total1 := rpcs()
|
||||
|
||||
if total1 > 30 || total1 < 1 {
|
||||
t.Fatalf("too many or few RPCs (%v) to elect initial leader\n", total1)
|
||||
text := fmt.Sprintf("too many or few RPCs (%v) to elect initial leader", total1)
|
||||
tester.AnnotateCheckerFailure(text, text)
|
||||
t.Fatalf("%s", text)
|
||||
}
|
||||
|
||||
var total2 int
|
||||
@ -581,14 +639,20 @@ loop:
|
||||
}
|
||||
|
||||
leader = ts.checkOneLeader()
|
||||
textb := fmt.Sprintf("checking reasonable RPC counts for agreement (attempt %v)", try)
|
||||
tester.AnnotateCheckerBegin(textb)
|
||||
total1 = rpcs()
|
||||
|
||||
iters := 10
|
||||
starti, term, ok := ts.srvs[leader].Raft().Start(1)
|
||||
despretry := "submission failed; retry"
|
||||
if !ok {
|
||||
// leader moved on really quickly
|
||||
details := fmt.Sprintf("%v is no longer a leader", leader)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
continue
|
||||
}
|
||||
|
||||
cmds := []int{}
|
||||
for i := 1; i < iters+2; i++ {
|
||||
x := int(rand.Int31())
|
||||
@ -596,13 +660,23 @@ loop:
|
||||
index1, term1, ok := ts.srvs[leader].Raft().Start(x)
|
||||
if term1 != term {
|
||||
// Term changed while starting
|
||||
details := fmt.Sprintf("term of the leader (%v) changed from %v to %v",
|
||||
leader, term, term1)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
continue loop
|
||||
}
|
||||
if !ok {
|
||||
// No longer the leader, so term has changed
|
||||
details := fmt.Sprintf("%v is no longer a leader", leader)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
continue loop
|
||||
}
|
||||
if starti+i != index1 {
|
||||
desp := fmt.Sprintf("leader %v adds the command at the wrong index", leader)
|
||||
details := fmt.Sprintf(
|
||||
"the command should locate at index %v, but the leader puts it at %v",
|
||||
starti + i, index1)
|
||||
tester.AnnotateCheckerFailure(desp, details)
|
||||
t.Fatalf("Start() failed")
|
||||
}
|
||||
}
|
||||
@ -612,8 +686,16 @@ loop:
|
||||
if ix, ok := cmd.(int); ok == false || ix != cmds[i-1] {
|
||||
if ix == -1 {
|
||||
// term changed -- try again
|
||||
details := fmt.Sprintf(
|
||||
"term changed while waiting for %v servers to commit index %v",
|
||||
servers, starti + i)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
continue loop
|
||||
}
|
||||
details := fmt.Sprintf(
|
||||
"the command submitted at index %v in term %v is %v, but read %v",
|
||||
starti + i, term, cmds[i - 1], cmd)
|
||||
tester.AnnotateCheckerFailure("incorrect command committed", details)
|
||||
t.Fatalf("wrong value %v committed for index %v; expected %v\n", cmd, starti+i, cmds)
|
||||
}
|
||||
}
|
||||
@ -624,6 +706,8 @@ loop:
|
||||
if t, _ := ts.srvs[j].Raft().GetState(); t != term {
|
||||
// term changed -- can't expect low RPC counts
|
||||
// need to keep going to update total2
|
||||
details := fmt.Sprintf("term of server %v changed from %v to %v", j, term, t)
|
||||
tester.AnnotateCheckerNeutral(despretry, details)
|
||||
failed = true
|
||||
}
|
||||
total2 += ts.g.RpcCount(j)
|
||||
@ -634,17 +718,29 @@ loop:
|
||||
}
|
||||
|
||||
if total2-total1 > (iters+1+3)*3 {
|
||||
details := fmt.Sprintf("number of RPC used for %v entries = %v > %v",
|
||||
iters, total2-total1, (iters+1+3)*3)
|
||||
tester.AnnotateCheckerFailure("used too many RPCs for agreement", details)
|
||||
t.Fatalf("too many RPCs (%v) for %v entries\n", total2-total1, iters)
|
||||
}
|
||||
|
||||
details := fmt.Sprintf("number of RPC used for %v entries = %v <= %v",
|
||||
iters, total2-total1, (iters+1+3)*3)
|
||||
tester.AnnotateCheckerSuccess("used reasonable number of RPCs for agreement", details)
|
||||
|
||||
success = true
|
||||
break
|
||||
}
|
||||
|
||||
if !success {
|
||||
tester.AnnotateCheckerFailure(
|
||||
"agreement failed",
|
||||
"unable to reach agreement after 5 attempts")
|
||||
t.Fatalf("term changed too often")
|
||||
}
|
||||
|
||||
tester.AnnotateCheckerBegin("checking reasonable RPC counts in idle")
|
||||
|
||||
time.Sleep(RaftElectionTimeout)
|
||||
|
||||
total3 := 0
|
||||
@ -653,9 +749,15 @@ loop:
|
||||
}
|
||||
|
||||
if total3-total2 > 3*20 {
|
||||
details := fmt.Sprintf("number of RPC used for 1 second of idleness = %v > %v",
|
||||
total3-total2, 3 * 20)
|
||||
tester.AnnotateCheckerFailure("used too many RPCs in idle", details)
|
||||
t.Fatalf("too many RPCs (%v) for 1 second of idleness\n", total3-total2)
|
||||
}
|
||||
|
||||
details := fmt.Sprintf("number of RPC used for 1 second of idleness = %v <= %v",
|
||||
total3-total2, 3 * 20)
|
||||
tester.AnnotateCheckerSuccess(
|
||||
"used a reasonable number of RPCs in idle", details)
|
||||
}
|
||||
|
||||
func TestPersist13C(t *testing.T) {
|
||||
@ -663,36 +765,47 @@ func TestPersist13C(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestPersist13C", servers)
|
||||
ts.Begin("Test (3C): basic persistence")
|
||||
|
||||
ts.one(11, servers, true)
|
||||
|
||||
ts.g.Shutdown()
|
||||
tester.AnnotateShutdownAll()
|
||||
ts.g.StartServers()
|
||||
tester.AnnotateRestartAll()
|
||||
|
||||
ts.one(12, servers, true)
|
||||
|
||||
leader1 := ts.checkOneLeader()
|
||||
ts.g.ShutdownServer(leader1)
|
||||
tester.AnnotateShutdown([]int{leader1})
|
||||
ts.restart(leader1)
|
||||
tester.AnnotateRestart([]int{leader1})
|
||||
|
||||
ts.one(13, servers, true)
|
||||
|
||||
leader2 := ts.checkOneLeader()
|
||||
ts.g.ShutdownServer(leader2)
|
||||
tester.AnnotateShutdown([]int{leader2})
|
||||
|
||||
ts.one(14, servers-1, true)
|
||||
|
||||
ts.restart(leader2)
|
||||
tester.AnnotateRestart([]int{leader2})
|
||||
|
||||
tester.AnnotateCheckerBegin("wait for all servers to commit until index 4")
|
||||
ts.wait(4, servers, -1) // wait for leader2 to join before killing i3
|
||||
tester.AnnotateCheckerSuccess("all committed until index 4", "OK")
|
||||
|
||||
i3 := (ts.checkOneLeader() + 1) % servers
|
||||
ts.g.ShutdownServer(i3)
|
||||
tester.AnnotateShutdown([]int{i3})
|
||||
|
||||
ts.one(15, servers-1, true)
|
||||
|
||||
ts.restart(i3)
|
||||
tester.AnnotateRestart([]int{i3})
|
||||
|
||||
ts.one(16, servers, true)
|
||||
}
|
||||
@ -702,6 +815,7 @@ func TestPersist23C(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestPersist23C", servers)
|
||||
ts.Begin("Test (3C): more persistence")
|
||||
|
||||
index := 1
|
||||
@ -713,6 +827,7 @@ func TestPersist23C(t *testing.T) {
|
||||
|
||||
ts.g.ShutdownServer((leader1 + 1) % servers)
|
||||
ts.g.ShutdownServer((leader1 + 2) % servers)
|
||||
tester.AnnotateShutdown([]int{(leader1 + 1) % servers, (leader1 + 2) % servers})
|
||||
|
||||
ts.one(10+index, servers-2, true)
|
||||
index++
|
||||
@ -720,19 +835,25 @@ func TestPersist23C(t *testing.T) {
|
||||
ts.g.ShutdownServer((leader1 + 0) % servers)
|
||||
ts.g.ShutdownServer((leader1 + 3) % servers)
|
||||
ts.g.ShutdownServer((leader1 + 4) % servers)
|
||||
tester.AnnotateShutdown([]int{
|
||||
(leader1 + 0) % servers, (leader1 + 3) % servers, (leader1 + 4) % servers,
|
||||
})
|
||||
|
||||
ts.restart((leader1 + 1) % servers)
|
||||
ts.restart((leader1 + 2) % servers)
|
||||
tester.AnnotateRestart([]int{(leader1 + 1) % servers, (leader1 + 2) % servers})
|
||||
|
||||
time.Sleep(RaftElectionTimeout)
|
||||
|
||||
ts.restart((leader1 + 3) % servers)
|
||||
tester.AnnotateRestart([]int{(leader1 + 3) % servers})
|
||||
|
||||
ts.one(10+index, servers-2, true)
|
||||
index++
|
||||
|
||||
ts.restart((leader1 + 4) % servers)
|
||||
ts.restart((leader1 + 0) % servers)
|
||||
tester.AnnotateRestart([]int{(leader1 + 4) % servers, (leader1 + 0) % servers})
|
||||
}
|
||||
|
||||
ts.one(1000, servers, true)
|
||||
@ -743,23 +864,29 @@ func TestPersist33C(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestPersist33C", servers)
|
||||
ts.Begin("Test (3C): partitioned leader and one follower crash, leader restarts")
|
||||
|
||||
ts.one(101, 3, true)
|
||||
|
||||
leader := ts.checkOneLeader()
|
||||
ts.g.DisconnectAll((leader + 2) % servers)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
ts.one(102, 2, true)
|
||||
|
||||
ts.g.ShutdownServer((leader + 0) % servers)
|
||||
ts.g.ShutdownServer((leader + 1) % servers)
|
||||
tester.AnnotateShutdown([]int{(leader + 0) % servers, (leader + 1) % servers})
|
||||
ts.restart((leader + 2) % servers)
|
||||
ts.restart((leader + 0) % servers)
|
||||
tester.AnnotateRestart([]int{(leader + 2) % servers, (leader + 0) % servers})
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
ts.one(103, 2, true)
|
||||
|
||||
ts.restart((leader + 1) % servers)
|
||||
tester.AnnotateRestart([]int{(leader + 1) % servers})
|
||||
|
||||
ts.one(104, servers, true)
|
||||
}
|
||||
@ -777,6 +904,7 @@ func TestFigure83C(t *testing.T) {
|
||||
ts := makeTest(t, servers, true, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestFigure83C", servers)
|
||||
ts.Begin("Test (3C): Figure 8")
|
||||
|
||||
ts.one(rand.Int(), 1, true)
|
||||
@ -786,8 +914,11 @@ func TestFigure83C(t *testing.T) {
|
||||
leader := -1
|
||||
for i := 0; i < servers; i++ {
|
||||
if ts.srvs[i].Raft() != nil {
|
||||
_, _, ok := ts.srvs[i].Raft().Start(rand.Int())
|
||||
cmd := rand.Int()
|
||||
_, _, ok := ts.srvs[i].Raft().Start(cmd)
|
||||
if ok {
|
||||
text := fmt.Sprintf("submitted command %v to server %v", cmd, i)
|
||||
tester.AnnotateInfo(text, text)
|
||||
leader = i
|
||||
}
|
||||
}
|
||||
@ -803,6 +934,7 @@ func TestFigure83C(t *testing.T) {
|
||||
|
||||
if leader != -1 {
|
||||
ts.g.ShutdownServer(leader)
|
||||
tester.AnnotateShutdown([]int{leader})
|
||||
nup -= 1
|
||||
}
|
||||
|
||||
@ -810,6 +942,7 @@ func TestFigure83C(t *testing.T) {
|
||||
s := rand.Int() % servers
|
||||
if ts.srvs[s].Raft() == nil {
|
||||
ts.restart(s)
|
||||
tester.AnnotateRestart([]int{s})
|
||||
nup += 1
|
||||
}
|
||||
}
|
||||
@ -820,9 +953,9 @@ func TestFigure83C(t *testing.T) {
|
||||
ts.restart(i)
|
||||
}
|
||||
}
|
||||
tester.AnnotateRestartAll()
|
||||
|
||||
ts.one(rand.Int(), servers, true)
|
||||
|
||||
}
|
||||
|
||||
func TestUnreliableAgree3C(t *testing.T) {
|
||||
@ -830,6 +963,7 @@ func TestUnreliableAgree3C(t *testing.T) {
|
||||
ts := makeTest(t, servers, false, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestUnreliableAgree3C", servers)
|
||||
ts.Begin("Test (3C): unreliable agreement")
|
||||
|
||||
var wg sync.WaitGroup
|
||||
@ -858,6 +992,7 @@ func TestFigure8Unreliable3C(t *testing.T) {
|
||||
ts := makeTest(t, servers, false, false)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestFigure8Unreliable3C", servers)
|
||||
ts.Begin("Test (3C): Figure 8 (unreliable)")
|
||||
|
||||
ts.one(rand.Int()%10000, 1, true)
|
||||
@ -869,7 +1004,12 @@ func TestFigure8Unreliable3C(t *testing.T) {
|
||||
}
|
||||
leader := -1
|
||||
for i := 0; i < servers; i++ {
|
||||
_, _, ok := ts.srvs[i].Raft().Start(rand.Int() % 10000)
|
||||
cmd := rand.Int() % 10000
|
||||
_, _, ok := ts.srvs[i].Raft().Start(cmd)
|
||||
if ok {
|
||||
text := fmt.Sprintf("submitted command %v to server %v", cmd, i)
|
||||
tester.AnnotateInfo(text, text)
|
||||
}
|
||||
if ok && ts.g.IsConnected(i) {
|
||||
leader = i
|
||||
}
|
||||
@ -885,6 +1025,7 @@ func TestFigure8Unreliable3C(t *testing.T) {
|
||||
|
||||
if leader != -1 && (rand.Int()%1000) < int(RaftElectionTimeout/time.Millisecond)/2 {
|
||||
ts.g.DisconnectAll(leader)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
nup -= 1
|
||||
}
|
||||
|
||||
@ -892,6 +1033,7 @@ func TestFigure8Unreliable3C(t *testing.T) {
|
||||
s := rand.Int() % servers
|
||||
if !ts.g.IsConnected(s) {
|
||||
ts.g.ConnectOne(s)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
nup += 1
|
||||
}
|
||||
}
|
||||
@ -902,9 +1044,9 @@ func TestFigure8Unreliable3C(t *testing.T) {
|
||||
ts.g.ConnectOne(i)
|
||||
}
|
||||
}
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
ts.one(rand.Int()%10000, servers, true)
|
||||
|
||||
}
|
||||
|
||||
func internalChurn(t *testing.T, reliable bool) {
|
||||
@ -914,8 +1056,10 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
defer ts.cleanup()
|
||||
|
||||
if ts.IsReliable() {
|
||||
tester.AnnotateTest("TestReliableChurn3C", servers)
|
||||
ts.Begin("Test (3C): churn")
|
||||
} else {
|
||||
tester.AnnotateTest("TestUnreliableChurn3C", servers)
|
||||
ts.Begin("Test (3C): unreliable churn")
|
||||
}
|
||||
|
||||
@ -968,6 +1112,7 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
ret = values
|
||||
}
|
||||
|
||||
startcli := tester.GetAnnotateTimestamp()
|
||||
ncli := 3
|
||||
cha := []chan []int{}
|
||||
for i := 0; i < ncli; i++ {
|
||||
@ -979,20 +1124,24 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
if (rand.Int() % 1000) < 200 {
|
||||
i := rand.Int() % servers
|
||||
ts.g.DisconnectAll(i)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
}
|
||||
|
||||
if (rand.Int() % 1000) < 500 {
|
||||
i := rand.Int() % servers
|
||||
if ts.srvs[i].raft == nil {
|
||||
ts.restart(i)
|
||||
tester.AnnotateRestart([]int{i})
|
||||
}
|
||||
ts.g.ConnectOne(i)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
}
|
||||
|
||||
if (rand.Int() % 1000) < 200 {
|
||||
i := rand.Int() % servers
|
||||
if ts.srvs[i].raft != nil {
|
||||
ts.g.ShutdownServer(i)
|
||||
tester.AnnotateShutdown([]int{i})
|
||||
}
|
||||
}
|
||||
|
||||
@ -1011,9 +1160,14 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
}
|
||||
ts.g.ConnectOne(i)
|
||||
}
|
||||
tester.AnnotateRestartAll()
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
|
||||
atomic.StoreInt32(&stop, 1)
|
||||
textcli := fmt.Sprintf("%v clients submitting commands concurrently", ncli)
|
||||
tester.AnnotateInfoInterval(startcli, textcli, textcli)
|
||||
|
||||
tester.AnnotateCheckerBegin("checking if any client has failed")
|
||||
values := []int{}
|
||||
for i := 0; i < ncli; i++ {
|
||||
vv := <-cha[i]
|
||||
@ -1022,6 +1176,7 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
}
|
||||
values = append(values, vv...)
|
||||
}
|
||||
tester.AnnotateCheckerSuccess("none of the clients have failed", "OK")
|
||||
|
||||
time.Sleep(RaftElectionTimeout)
|
||||
|
||||
@ -1033,10 +1188,14 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
if vi, ok := v.(int); ok {
|
||||
really = append(really, vi)
|
||||
} else {
|
||||
text := fmt.Sprintf("committed value %v is not an integer", v)
|
||||
tester.AnnotateCheckerFailure(text, text)
|
||||
t.Fatalf("not an int")
|
||||
}
|
||||
}
|
||||
|
||||
tester.AnnotateCheckerBegin(
|
||||
"checking if committed values observed by the clients remain in the log")
|
||||
for _, v1 := range values {
|
||||
ok := false
|
||||
for _, v2 := range really {
|
||||
@ -1048,7 +1207,7 @@ func internalChurn(t *testing.T, reliable bool) {
|
||||
ts.t.Fatalf("didn't find a value")
|
||||
}
|
||||
}
|
||||
|
||||
tester.AnnotateCheckerSuccess("committed values remain in the log", "OK")
|
||||
}
|
||||
|
||||
func TestReliableChurn3C(t *testing.T) {
|
||||
@ -1069,6 +1228,8 @@ func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash
|
||||
ts := makeTest(t, servers, reliable, true)
|
||||
defer ts.cleanup()
|
||||
|
||||
// Inconsistent with other test cases, but don't want to change API.
|
||||
tester.AnnotateTest(name, servers)
|
||||
ts.Begin(name)
|
||||
|
||||
ts.one(rand.Int(), servers, true)
|
||||
@ -1084,18 +1245,23 @@ func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash
|
||||
|
||||
if disconnect {
|
||||
ts.g.DisconnectAll(victim)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
ts.one(rand.Int(), servers-1, true)
|
||||
}
|
||||
if crash {
|
||||
ts.g.ShutdownServer(victim)
|
||||
tester.AnnotateShutdown([]int{victim})
|
||||
ts.one(rand.Int(), servers-1, true)
|
||||
}
|
||||
|
||||
// perhaps send enough to get a snapshot
|
||||
start := tester.GetAnnotateTimestamp()
|
||||
nn := (SnapShotInterval / 2) + (rand.Int() % SnapShotInterval)
|
||||
for i := 0; i < nn; i++ {
|
||||
ts.srvs[sender].Raft().Start(rand.Int())
|
||||
}
|
||||
text := fmt.Sprintf("submitting %v commands to %v", nn, sender)
|
||||
tester.AnnotateInfoInterval(start, text, text)
|
||||
|
||||
// let applier threads catch up with the Start()'s
|
||||
if disconnect == false && crash == false {
|
||||
@ -1114,11 +1280,13 @@ func snapcommon(t *testing.T, name string, disconnect bool, reliable bool, crash
|
||||
// reconnect a follower, who maybe behind and
|
||||
// needs to rceive a snapshot to catch up.
|
||||
ts.g.ConnectOne(victim)
|
||||
tester.AnnotateConnection(ts.g.GetConnected())
|
||||
ts.one(rand.Int(), servers, true)
|
||||
leader1 = ts.checkOneLeader()
|
||||
}
|
||||
if crash {
|
||||
ts.restart(victim)
|
||||
tester.AnnotateRestart([]int{victim})
|
||||
ts.one(rand.Int(), servers, true)
|
||||
leader1 = ts.checkOneLeader()
|
||||
}
|
||||
@ -1155,6 +1323,7 @@ func TestSnapshotAllCrash3D(t *testing.T) {
|
||||
ts := makeTest(t, servers, false, true)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestSnapshotAllCrash3D", servers)
|
||||
ts.Begin("Test (3D): crash and restart all servers")
|
||||
|
||||
ts.one(rand.Int(), servers, true)
|
||||
@ -1170,11 +1339,15 @@ func TestSnapshotAllCrash3D(t *testing.T) {
|
||||
|
||||
// crash all
|
||||
ts.g.Shutdown()
|
||||
tester.AnnotateShutdownAll()
|
||||
ts.g.StartServers()
|
||||
tester.AnnotateRestartAll()
|
||||
|
||||
index2 := ts.one(rand.Int(), servers, true)
|
||||
if index2 < index1+1 {
|
||||
t.Fatalf("index decreased from %v to %v", index1, index2)
|
||||
msg := fmt.Sprintf("index decreased from %v to %v", index1, index2)
|
||||
tester.AnnotateCheckerFailure("incorrect behavior: index decreased", msg)
|
||||
t.Fatalf(msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1186,6 +1359,7 @@ func TestSnapshotInit3D(t *testing.T) {
|
||||
ts := makeTest(t, servers, false, true)
|
||||
defer ts.cleanup()
|
||||
|
||||
tester.AnnotateTest("TestSnapshotInit3D", servers)
|
||||
ts.Begin("Test (3D): snapshot initialization after crash")
|
||||
ts.one(rand.Int(), servers, true)
|
||||
|
||||
@ -1196,13 +1370,17 @@ func TestSnapshotInit3D(t *testing.T) {
|
||||
}
|
||||
|
||||
ts.g.Shutdown()
|
||||
tester.AnnotateShutdownAll()
|
||||
ts.g.StartServers()
|
||||
tester.AnnotateRestartAll()
|
||||
|
||||
// a single op, to get something to be written back to persistent storage.
|
||||
ts.one(rand.Int(), servers, true)
|
||||
|
||||
ts.g.Shutdown()
|
||||
tester.AnnotateShutdownAll()
|
||||
ts.g.StartServers()
|
||||
tester.AnnotateRestartAll()
|
||||
|
||||
// do another op to trigger potential bug
|
||||
ts.one(rand.Int(), servers, true)
|
||||
|
@ -51,6 +51,7 @@ func newRfsrv(ts *Test, srv int, ends []*labrpc.ClientEnd, persister *tester.Per
|
||||
// ideally Raft should send it up on applyCh...
|
||||
err := s.ingestSnap(snapshot, -1)
|
||||
if err != "" {
|
||||
tester.AnnotateCheckerFailureBeforeExit("failed to ingest snapshot", err)
|
||||
ts.t.Fatal(err)
|
||||
}
|
||||
}
|
||||
@ -106,6 +107,7 @@ func (rs *rfsrv) applier(applyCh chan raftapi.ApplyMsg) {
|
||||
err_msg = fmt.Sprintf("server %v apply out of order %v", rs.me, m.CommandIndex)
|
||||
}
|
||||
if err_msg != "" {
|
||||
tester.AnnotateCheckerFailureBeforeExit("apply error", err_msg)
|
||||
log.Fatalf("apply error: %v", err_msg)
|
||||
rs.applyErr = err_msg
|
||||
// keep reading after error so that Raft doesn't block
|
||||
@ -149,12 +151,18 @@ func (rs *rfsrv) applierSnap(applyCh chan raftapi.ApplyMsg) {
|
||||
xlog = append(xlog, rs.logs[j])
|
||||
}
|
||||
e.Encode(xlog)
|
||||
start := tester.GetAnnotateTimestamp()
|
||||
rs.raft.Snapshot(m.CommandIndex, w.Bytes())
|
||||
details := fmt.Sprintf(
|
||||
"snapshot created after applying the command at index %v",
|
||||
m.CommandIndex)
|
||||
tester.AnnotateInfoInterval(start, "snapshot created", details)
|
||||
}
|
||||
} else {
|
||||
// Ignore other types of ApplyMsg.
|
||||
}
|
||||
if err_msg != "" {
|
||||
tester.AnnotateCheckerFailureBeforeExit("apply error", err_msg)
|
||||
log.Fatalf("apply error: %v", err_msg)
|
||||
rs.applyErr = err_msg
|
||||
// keep reading after error so that Raft doesn't block
|
||||
@ -169,6 +177,7 @@ func (rs *rfsrv) ingestSnap(snapshot []byte, index int) string {
|
||||
defer rs.mu.Unlock()
|
||||
|
||||
if snapshot == nil {
|
||||
tester.AnnotateCheckerFailureBeforeExit("failed to ingest snapshot", "nil snapshot")
|
||||
log.Fatalf("nil snapshot")
|
||||
return "nil snapshot"
|
||||
}
|
||||
@ -178,6 +187,8 @@ func (rs *rfsrv) ingestSnap(snapshot []byte, index int) string {
|
||||
var xlog []any
|
||||
if d.Decode(&lastIncludedIndex) != nil ||
|
||||
d.Decode(&xlog) != nil {
|
||||
text := "failed to decode snapshot"
|
||||
tester.AnnotateCheckerFailureBeforeExit(text, text)
|
||||
log.Fatalf("snapshot decode error")
|
||||
return "snapshot Decode() error"
|
||||
}
|
||||
|
@ -62,6 +62,7 @@ func (ts *Test) restart(i int) {
|
||||
}
|
||||
|
||||
func (ts *Test) checkOneLeader() int {
|
||||
tester.AnnotateCheckerBegin("checking for a single leader")
|
||||
for iters := 0; iters < 10; iters++ {
|
||||
ms := 450 + (rand.Int63() % 100)
|
||||
time.Sleep(time.Duration(ms) * time.Millisecond)
|
||||
@ -78,6 +79,8 @@ func (ts *Test) checkOneLeader() int {
|
||||
lastTermWithLeader := -1
|
||||
for term, leaders := range leaders {
|
||||
if len(leaders) > 1 {
|
||||
details := fmt.Sprintf("multiple leaders in term %v = %v", term, leaders)
|
||||
tester.AnnotateCheckerFailure("multiple leaders", details)
|
||||
ts.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
|
||||
}
|
||||
if term > lastTermWithLeader {
|
||||
@ -86,14 +89,20 @@ func (ts *Test) checkOneLeader() int {
|
||||
}
|
||||
|
||||
if len(leaders) != 0 {
|
||||
details := fmt.Sprintf("leader in term %v = %v",
|
||||
lastTermWithLeader, leaders[lastTermWithLeader][0])
|
||||
tester.AnnotateCheckerSuccess(details, details)
|
||||
return leaders[lastTermWithLeader][0]
|
||||
}
|
||||
}
|
||||
details := fmt.Sprintf("unable to find a leader")
|
||||
tester.AnnotateCheckerFailure("no leader", details)
|
||||
ts.Fatalf("expected one leader, got none")
|
||||
return -1
|
||||
}
|
||||
|
||||
func (ts *Test) checkTerms() int {
|
||||
tester.AnnotateCheckerBegin("checking term agreement")
|
||||
term := -1
|
||||
for i := 0; i < ts.n; i++ {
|
||||
if ts.g.IsConnected(i) {
|
||||
@ -101,10 +110,15 @@ func (ts *Test) checkTerms() int {
|
||||
if term == -1 {
|
||||
term = xterm
|
||||
} else if term != xterm {
|
||||
details := fmt.Sprintf("node ids -> terms = { %v -> %v; %v -> %v }",
|
||||
i - 1, term, i, xterm)
|
||||
tester.AnnotateCheckerFailure("term disagreed", details)
|
||||
ts.Fatalf("servers disagree on term")
|
||||
}
|
||||
}
|
||||
}
|
||||
details := fmt.Sprintf("term = %v", term)
|
||||
tester.AnnotateCheckerSuccess("term agreed", details)
|
||||
return term
|
||||
}
|
||||
|
||||
@ -134,14 +148,32 @@ func (ts *Test) checkLogs(i int, m raftapi.ApplyMsg) (string, bool) {
|
||||
// check that none of the connected servers
|
||||
// thinks it is the leader.
|
||||
func (ts *Test) checkNoLeader() {
|
||||
tester.AnnotateCheckerBegin("checking no unexpected leader among connected servers")
|
||||
for i := 0; i < ts.n; i++ {
|
||||
if ts.g.IsConnected(i) {
|
||||
_, is_leader := ts.srvs[i].GetState()
|
||||
if is_leader {
|
||||
ts.Fatalf("expected no leader among connected servers, but %v claims to be leader", i)
|
||||
details := fmt.Sprintf("leader = %v", i)
|
||||
tester.AnnotateCheckerFailure("unexpected leader found", details)
|
||||
ts.Fatalf(details)
|
||||
}
|
||||
}
|
||||
}
|
||||
tester.AnnotateCheckerSuccess("no unexpected leader", "no unexpected leader")
|
||||
}
|
||||
|
||||
func (ts *Test) checkNoAgreement(index int) {
|
||||
text := fmt.Sprintf("checking no unexpected agreement at index %v", index)
|
||||
tester.AnnotateCheckerBegin(text)
|
||||
n, _ := ts.nCommitted(index)
|
||||
if n > 0 {
|
||||
desp := fmt.Sprintf("unexpected agreement at index %v", index)
|
||||
details := fmt.Sprintf("%v server(s) commit incorrectly index", n)
|
||||
tester.AnnotateCheckerFailure(desp, details)
|
||||
ts.Fatalf("%v committed but no majority", n)
|
||||
}
|
||||
desp := fmt.Sprintf("no unexpected agreement at index %v", index)
|
||||
tester.AnnotateCheckerSuccess(desp, "OK")
|
||||
}
|
||||
|
||||
// how many servers think a log entry is committed?
|
||||
@ -153,6 +185,7 @@ func (ts *Test) nCommitted(index int) (int, any) {
|
||||
var cmd any = nil
|
||||
for _, rs := range ts.srvs {
|
||||
if rs.applyErr != "" {
|
||||
tester.AnnotateCheckerFailure("apply error", rs.applyErr)
|
||||
ts.t.Fatal(rs.applyErr)
|
||||
}
|
||||
|
||||
@ -160,8 +193,10 @@ func (ts *Test) nCommitted(index int) (int, any) {
|
||||
|
||||
if ok {
|
||||
if count > 0 && cmd != cmd1 {
|
||||
ts.Fatalf("committed values do not match: index %v, %v, %v",
|
||||
text := fmt.Sprintf("committed values at index %v do not match (%v != %v)",
|
||||
index, cmd, cmd1)
|
||||
tester.AnnotateCheckerFailure("unmatched committed values", text)
|
||||
ts.Fatalf(text)
|
||||
}
|
||||
count += 1
|
||||
cmd = cmd1
|
||||
@ -183,6 +218,16 @@ func (ts *Test) nCommitted(index int) (int, any) {
|
||||
// if retry==false, calls Start() only once, in order
|
||||
// to simplify the early Lab 3B tests.
|
||||
func (ts *Test) one(cmd any, expectedServers int, retry bool) int {
|
||||
var textretry string
|
||||
if retry {
|
||||
textretry = "with"
|
||||
} else {
|
||||
textretry = "without"
|
||||
}
|
||||
textcmd := fmt.Sprintf("%v", cmd)
|
||||
textb := fmt.Sprintf("checking agreement of %.8s by at least %v servers %v retry",
|
||||
textcmd, expectedServers, textretry)
|
||||
tester.AnnotateCheckerBegin(textb)
|
||||
t0 := time.Now()
|
||||
starts := 0
|
||||
for time.Since(t0).Seconds() < 10 && ts.checkFinished() == false {
|
||||
@ -214,12 +259,16 @@ func (ts *Test) one(cmd any, expectedServers int, retry bool) int {
|
||||
// committed
|
||||
if cmd1 == cmd {
|
||||
// and it was the command we submitted.
|
||||
desp := fmt.Sprintf("agreement of %.8s reached", textcmd)
|
||||
tester.AnnotateCheckerSuccess(desp, "OK")
|
||||
return index
|
||||
}
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
if retry == false {
|
||||
desp := fmt.Sprintf("agreement of %.8s failed", textcmd)
|
||||
tester.AnnotateCheckerFailure(desp, "failed after submitting command")
|
||||
ts.Fatalf("one(%v) failed to reach agreement", cmd)
|
||||
}
|
||||
} else {
|
||||
@ -227,6 +276,8 @@ func (ts *Test) one(cmd any, expectedServers int, retry bool) int {
|
||||
}
|
||||
}
|
||||
if ts.checkFinished() == false {
|
||||
desp := fmt.Sprintf("agreement of %.8s failed", textcmd)
|
||||
tester.AnnotateCheckerFailure(desp, "failed after 10-second timeout")
|
||||
ts.Fatalf("one(%v) failed to reach agreement", cmd)
|
||||
}
|
||||
return -1
|
||||
@ -262,6 +313,10 @@ func (ts *Test) wait(index int, n int, startTerm int) any {
|
||||
}
|
||||
nd, cmd := ts.nCommitted(index)
|
||||
if nd < n {
|
||||
desp := fmt.Sprintf("less than %v servers commit index %v", n, index)
|
||||
details := fmt.Sprintf(
|
||||
"only %v (< %v) servers commit index %v at term %v", nd, n, index, startTerm)
|
||||
tester.AnnotateCheckerFailure(desp, details)
|
||||
ts.Fatalf("only %d decided for index %d; wanted %d",
|
||||
nd, index, n)
|
||||
}
|
||||
|
535
src/tester1/annotation.go
Normal file
535
src/tester1/annotation.go
Normal file
@ -0,0 +1,535 @@
|
||||
package tester
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"os"
|
||||
"os/signal"
|
||||
"fmt"
|
||||
"time"
|
||||
"strings"
|
||||
"slices"
|
||||
"github.com/anishathalye/porcupine"
|
||||
"6.5840/models1"
|
||||
)
|
||||
|
||||
///
|
||||
/// Public interface.
|
||||
///
|
||||
|
||||
type Annotation struct {
|
||||
mu *sync.Mutex
|
||||
annotations []porcupine.Annotation
|
||||
continuous map[string]Continuous
|
||||
}
|
||||
|
||||
type Continuous struct {
|
||||
start int64
|
||||
desp string
|
||||
details string
|
||||
bgcolor string
|
||||
}
|
||||
|
||||
type FrameworkInfo struct {
|
||||
mu *sync.Mutex
|
||||
nservers int
|
||||
connected []bool
|
||||
crashed []bool
|
||||
ckbegin CheckerBegin
|
||||
}
|
||||
|
||||
type CheckerBegin struct {
|
||||
ts int64
|
||||
details string
|
||||
}
|
||||
|
||||
// Using global variable feels disturbing, but also can't figure out a better
|
||||
// way to support user-level annotations. An alternative would be passing an
|
||||
// Annotation object to the start-up function of servers and clients, but that
|
||||
// doesn't feel better.
|
||||
//
|
||||
// One potential problem with using a global Annotation object is that when
|
||||
// running multiple test cases, some zombie threads in previous test cases could
|
||||
// interfere the current one. An ad-hoc fix at the user level would be adding
|
||||
// annotations only if the killed flag on the server is not set.
|
||||
var annotation *Annotation = mkAnnotation()
|
||||
var unit struct{} = captureSignal()
|
||||
var finfo *FrameworkInfo
|
||||
|
||||
const (
|
||||
COLOR_INFO string = "#FAFAFA"
|
||||
COLOR_NEUTRAL string = "#FFECB3"
|
||||
COLOR_SUCCESS string = "#C8E6C9"
|
||||
COLOR_FAILURE string = "#FFCDD2"
|
||||
COLOR_FAULT string = "#B3E5FC"
|
||||
COLOR_USER string = "#FFF176"
|
||||
)
|
||||
|
||||
const (
|
||||
TAG_CHECKER string = "$ Checker"
|
||||
TAG_PARTITION string = "$ Failure"
|
||||
TAG_INFO string = "$ Test Info"
|
||||
)
|
||||
|
||||
func (cfg *Config) RetrieveAnnotations() []porcupine.Annotation{
|
||||
annotations := annotation.retrieve()
|
||||
return annotations
|
||||
}
|
||||
|
||||
func AnnotatePointColor(
|
||||
tag, desp, details, bgcolor string,
|
||||
) {
|
||||
annotation.annotatePointColor(tag, desp, details, bgcolor)
|
||||
}
|
||||
|
||||
func GetAnnotateTimestamp() int64 {
|
||||
return timestamp()
|
||||
}
|
||||
|
||||
func AnnotateIntervalColor(
|
||||
tag string, start int64, desp, details, bgcolor string,
|
||||
) {
|
||||
annotation.annotateIntervalColor(tag, start, desp, details, bgcolor)
|
||||
}
|
||||
|
||||
func AnnotateContinuousColor(tag, desp, details, bgcolor string) {
|
||||
annotation.annotateContinuousColor(tag, desp, details, bgcolor)
|
||||
}
|
||||
|
||||
func AnnotateContinuousEnd(tag string) {
|
||||
annotation.annotateContinuousEnd(tag)
|
||||
}
|
||||
|
||||
// Used by users.
|
||||
|
||||
func AnnotatePoint(tag, desp, details string) {
|
||||
annotation.annotatePointColor(tag, desp, details, COLOR_USER)
|
||||
}
|
||||
|
||||
func AnnotateInterval(tag string, start int64, desp, details string) {
|
||||
annotation.annotateIntervalColor(tag, start, desp, details, COLOR_USER)
|
||||
}
|
||||
|
||||
func AnnotateContinuous(tag, desp, details string) {
|
||||
annotation.annotateContinuousColor(tag, desp, details, COLOR_USER)
|
||||
}
|
||||
|
||||
// Used by test framework.
|
||||
|
||||
func AnnotateInfo(desp, details string) {
|
||||
AnnotatePointColor(TAG_INFO, desp, details, COLOR_INFO)
|
||||
}
|
||||
|
||||
func AnnotateInfoInterval(start int64, desp, details string) {
|
||||
AnnotateIntervalColor(TAG_INFO, start, desp, details, COLOR_INFO)
|
||||
}
|
||||
|
||||
func AnnotateTest(desp string, nservers int) {
|
||||
details := fmt.Sprintf("%s (%d servers)", desp, nservers)
|
||||
finfo = mkFrameworkInfo(nservers)
|
||||
annotation.clear()
|
||||
|
||||
AnnotateInfo(details, details)
|
||||
}
|
||||
|
||||
func AnnotateCheckerBegin(details string) {
|
||||
finfo.mu.Lock()
|
||||
defer finfo.mu.Unlock()
|
||||
|
||||
finfo.ckbegin = CheckerBegin{
|
||||
ts: timestamp(),
|
||||
details: details,
|
||||
}
|
||||
}
|
||||
|
||||
func AnnotateCheckerEnd(desp, details, color string) {
|
||||
finfo.mu.Lock()
|
||||
defer finfo.mu.Unlock()
|
||||
|
||||
ckbegin := finfo.ckbegin
|
||||
|
||||
if ckbegin.ts == 0 {
|
||||
// Annotate as a point-in-time if the begin timestamp is not set.
|
||||
AnnotatePointColor(TAG_CHECKER, desp, details, color)
|
||||
return
|
||||
}
|
||||
|
||||
// Annotate as an interval if the begin timestamp is set.
|
||||
d := fmt.Sprintf("%s: %s", ckbegin.details, details)
|
||||
AnnotateIntervalColor(TAG_CHECKER, ckbegin.ts, desp, d, color)
|
||||
|
||||
// Reset the checker begin timestamp.
|
||||
ckbegin.ts = 0
|
||||
}
|
||||
|
||||
func AnnotateCheckerSuccess(desp, details string) {
|
||||
AnnotateCheckerEnd(desp, details, COLOR_SUCCESS)
|
||||
}
|
||||
|
||||
func AnnotateCheckerFailure(desp, details string) {
|
||||
AnnotateCheckerEnd(desp, details, COLOR_FAILURE)
|
||||
}
|
||||
|
||||
func AnnotateCheckerNeutral(desp, details string) {
|
||||
AnnotateCheckerEnd(desp, details, COLOR_NEUTRAL)
|
||||
}
|
||||
|
||||
// Used before log.Fatalf
|
||||
func AnnotateCheckerFailureBeforeExit(desp, details string) {
|
||||
AnnotateCheckerFailure(desp, details)
|
||||
annotation.cleanup(true, "test failed")
|
||||
}
|
||||
|
||||
// Two functions to annotate partitions: AnnotateConnection and
|
||||
// AnnotateTwoPartitions. The connected field of ServerGrp (in group.go) is
|
||||
// precise if and only if the ServerGrp.Partition is not used. Thus, we use the
|
||||
// latter when ServerGrp.Partition is involved, and the former otherwise.
|
||||
func AnnotateConnection(connection []bool) {
|
||||
finfo.mu.Lock()
|
||||
defer finfo.mu.Unlock()
|
||||
|
||||
if slices.Equal(finfo.connected, connection) {
|
||||
// Nothing to do if the connection is unchanged.
|
||||
return
|
||||
}
|
||||
|
||||
copy(finfo.connected, connection)
|
||||
|
||||
annotateFault()
|
||||
}
|
||||
|
||||
func annotateFault() {
|
||||
trues := make([]bool, finfo.nservers)
|
||||
for id := range(trues) {
|
||||
trues[id] = true
|
||||
}
|
||||
falses := make([]bool, finfo.nservers)
|
||||
if slices.Equal(trues, finfo.connected) && slices.Equal(falses, finfo.crashed) {
|
||||
// No annotation when no partitions and no crashes.
|
||||
AnnotateContinuousEnd(TAG_PARTITION)
|
||||
return
|
||||
}
|
||||
|
||||
// Now, each disconnected server sits in its own partition, connected
|
||||
// servers in one partition; crahsed servers indicated at the end.
|
||||
conn := make([]int, 0)
|
||||
crashes := make([]int, 0)
|
||||
var builder strings.Builder
|
||||
builder.WriteString("partition = ")
|
||||
for id, connected := range(finfo.connected) {
|
||||
if finfo.crashed[id] {
|
||||
crashes = append(crashes, id)
|
||||
continue
|
||||
}
|
||||
if connected {
|
||||
conn = append(conn, id)
|
||||
} else {
|
||||
builder.WriteString(fmt.Sprintf("[%v] ", id))
|
||||
}
|
||||
}
|
||||
if len(conn) > 0 {
|
||||
builder.WriteString(fmt.Sprintf("%v", conn))
|
||||
}
|
||||
if len(crashes) > 0 {
|
||||
builder.WriteString(fmt.Sprintf(" / crash = %v", crashes))
|
||||
}
|
||||
text := builder.String()
|
||||
AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
|
||||
}
|
||||
|
||||
func AnnotateTwoPartitions(p1 []int, p2 []int) {
|
||||
// A bit hard to check whether the partition actually changes, so just
|
||||
// annotate on every invocation.
|
||||
// TODO
|
||||
text := fmt.Sprintf("%v %v", p1, p2)
|
||||
AnnotateContinuousColor(TAG_PARTITION, text, text, COLOR_FAULT)
|
||||
}
|
||||
|
||||
func AnnotateShutdown(servers []int) {
|
||||
finfo.mu.Lock()
|
||||
defer finfo.mu.Unlock()
|
||||
|
||||
changed := false
|
||||
for _, id := range(servers) {
|
||||
if !finfo.crashed[id] {
|
||||
changed = true
|
||||
}
|
||||
finfo.crashed[id] = true
|
||||
}
|
||||
|
||||
if !changed {
|
||||
// Nothing to do if the set of crashed servers is unchanged.
|
||||
return
|
||||
}
|
||||
|
||||
annotateFault()
|
||||
}
|
||||
|
||||
func AnnotateShutdownAll() {
|
||||
finfo.mu.Lock()
|
||||
n := finfo.nservers
|
||||
finfo.mu.Unlock()
|
||||
|
||||
servers := make([]int, n)
|
||||
for i := range(servers) {
|
||||
servers[i] = i
|
||||
}
|
||||
AnnotateShutdown(servers)
|
||||
}
|
||||
|
||||
func AnnotateRestart(servers []int) {
|
||||
finfo.mu.Lock()
|
||||
defer finfo.mu.Unlock()
|
||||
|
||||
changed := false
|
||||
for _, id := range(servers) {
|
||||
if finfo.crashed[id] {
|
||||
changed = true
|
||||
}
|
||||
finfo.crashed[id] = false
|
||||
}
|
||||
|
||||
if !changed {
|
||||
// Nothing to do if the set of crashed servers is unchanged.
|
||||
return
|
||||
}
|
||||
|
||||
annotateFault()
|
||||
}
|
||||
|
||||
func AnnotateRestartAll() {
|
||||
finfo.mu.Lock()
|
||||
n := finfo.nservers
|
||||
finfo.mu.Unlock()
|
||||
|
||||
servers := make([]int, n)
|
||||
for i := range(servers) {
|
||||
servers[i] = i
|
||||
}
|
||||
AnnotateRestart(servers)
|
||||
}
|
||||
|
||||
///
|
||||
/// Internal.
|
||||
///
|
||||
|
||||
func timestamp() int64 {
|
||||
return int64(time.Since(time.Unix(0, 0)))
|
||||
}
|
||||
|
||||
func (an *Annotation) retrieve() []porcupine.Annotation {
|
||||
an.mu.Lock()
|
||||
x := an.annotations
|
||||
t := timestamp()
|
||||
for tag, cont := range(an.continuous) {
|
||||
a := porcupine.Annotation{
|
||||
Tag: tag,
|
||||
Start: cont.start,
|
||||
End: t,
|
||||
Description: cont.desp,
|
||||
Details: cont.details,
|
||||
BackgroundColor: cont.bgcolor,
|
||||
}
|
||||
x = append(x, a)
|
||||
}
|
||||
an.annotations = make([]porcupine.Annotation, 0)
|
||||
an.continuous = make(map[string]Continuous)
|
||||
an.mu.Unlock()
|
||||
return x
|
||||
}
|
||||
|
||||
func (an *Annotation) clear() {
|
||||
an.mu.Lock()
|
||||
an.annotations = make([]porcupine.Annotation, 0)
|
||||
an.continuous = make(map[string]Continuous)
|
||||
an.mu.Unlock()
|
||||
}
|
||||
|
||||
func (an *Annotation) annotatePointColor(
|
||||
tag, desp, details, bgcolor string,
|
||||
) {
|
||||
an.mu.Lock()
|
||||
t := timestamp()
|
||||
a := porcupine.Annotation{
|
||||
Tag: tag,
|
||||
Start: t,
|
||||
Description: desp,
|
||||
Details: details,
|
||||
BackgroundColor: bgcolor,
|
||||
}
|
||||
an.annotations = append(an.annotations, a)
|
||||
an.mu.Unlock()
|
||||
}
|
||||
|
||||
func (an *Annotation) annotateIntervalColor(
|
||||
tag string, start int64, desp, details, bgcolor string,
|
||||
) {
|
||||
an.mu.Lock()
|
||||
a := porcupine.Annotation{
|
||||
Tag: tag,
|
||||
Start: start,
|
||||
End: timestamp(),
|
||||
Description: desp,
|
||||
Details: details,
|
||||
BackgroundColor: bgcolor,
|
||||
}
|
||||
an.annotations = append(an.annotations, a)
|
||||
an.mu.Unlock()
|
||||
}
|
||||
|
||||
func (an *Annotation) annotateContinuousColor(
|
||||
tag, desp, details, bgcolor string,
|
||||
) {
|
||||
an.mu.Lock()
|
||||
defer an.mu.Unlock()
|
||||
|
||||
cont, ok := an.continuous[tag]
|
||||
if !ok {
|
||||
// The first continuous annotation for tag. Simply add it to the
|
||||
// continuous map.
|
||||
an.continuous[tag] = Continuous{
|
||||
start: timestamp(),
|
||||
desp: desp,
|
||||
details: details,
|
||||
bgcolor: bgcolor,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Subsequent continuous annotation for tag. Concretize the previous
|
||||
// annotation and add this one to the continuous map.
|
||||
t := timestamp()
|
||||
aprev := porcupine.Annotation{
|
||||
Tag: tag,
|
||||
Start: cont.start,
|
||||
End: t,
|
||||
Description: cont.desp,
|
||||
Details: cont.details,
|
||||
BackgroundColor: cont.bgcolor,
|
||||
}
|
||||
an.annotations = append(an.annotations, aprev)
|
||||
an.continuous[tag] = Continuous{
|
||||
// XXX: If the start timestamp of an event is too closer to the end
|
||||
// timestamp of another event, Porcupine seems to overlap the two
|
||||
// events. We add a delta (1000) as a workaround, but remove this once
|
||||
// this issue is resolved.
|
||||
start: t + 1000,
|
||||
desp: desp,
|
||||
details: details,
|
||||
bgcolor: bgcolor,
|
||||
}
|
||||
}
|
||||
|
||||
func (an *Annotation) annotateContinuousEnd(tag string) {
|
||||
an.mu.Lock()
|
||||
defer an.mu.Unlock()
|
||||
|
||||
cont, ok := an.continuous[tag]
|
||||
if !ok {
|
||||
// Nothing to end since there's no on-going continuous annotation for
|
||||
// tag.
|
||||
}
|
||||
|
||||
// End the on-going continuous annotation for tag.
|
||||
t := timestamp()
|
||||
aprev := porcupine.Annotation{
|
||||
Tag: tag,
|
||||
Start: cont.start,
|
||||
End: t,
|
||||
Description: cont.desp,
|
||||
Details: cont.details,
|
||||
BackgroundColor: cont.bgcolor,
|
||||
}
|
||||
an.annotations = append(an.annotations, aprev)
|
||||
delete(an.continuous, tag)
|
||||
}
|
||||
|
||||
func (an *Annotation) cleanup(failed bool, end string) {
|
||||
enabled := os.Getenv("VIS_ENABLE")
|
||||
if enabled == "never" || (!failed && enabled != "always") {
|
||||
// Simply clean up the annotations without producing the vis file if
|
||||
// VIS_ENABLE is set to "never", or if the test passes and VIS_ENABLE is
|
||||
// not set to "always".
|
||||
an.clear()
|
||||
return
|
||||
}
|
||||
|
||||
annotations := an.retrieve()
|
||||
if len(annotations) == 0 {
|
||||
// Skip empty annotations.
|
||||
return
|
||||
}
|
||||
|
||||
// XXX: Make the last annotation a interval one to work around Porcupine's
|
||||
// issue. Consider removing this once the issue is fixed.
|
||||
t := timestamp()
|
||||
aend := porcupine.Annotation{
|
||||
Tag: TAG_INFO,
|
||||
Start: t,
|
||||
End: t + 1000,
|
||||
Description: end,
|
||||
Details: end,
|
||||
BackgroundColor: COLOR_INFO,
|
||||
}
|
||||
annotations = append(annotations, aend)
|
||||
|
||||
fpath := os.Getenv("VIS_FILE")
|
||||
var file *os.File
|
||||
var err error
|
||||
if fpath == "" {
|
||||
// Save the vis file in a temporary file.
|
||||
file, err = os.CreateTemp("", "porcupine-*.html")
|
||||
} else {
|
||||
file, err = os.OpenFile(fpath, os.O_RDWR | os.O_CREATE | os.O_TRUNC, 0644)
|
||||
}
|
||||
if err != nil {
|
||||
fmt.Printf("info: failed to open visualization file %s (%v)\n", fpath, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Create a fresh linearization info without any client operations and use
|
||||
// models.KvModel simply as a placeholder.
|
||||
info := porcupine.LinearizationInfo{}
|
||||
info.AddAnnotations(annotations)
|
||||
porcupine.Visualize(models.KvModel, info, file)
|
||||
fmt.Printf("info: wrote visualization to %s\n", file.Name())
|
||||
}
|
||||
|
||||
func mkAnnotation() *Annotation {
|
||||
an := Annotation{
|
||||
mu: new(sync.Mutex),
|
||||
annotations: make([]porcupine.Annotation, 0),
|
||||
continuous: make(map[string]Continuous),
|
||||
}
|
||||
|
||||
return &an
|
||||
}
|
||||
|
||||
func mkFrameworkInfo(nservers int) *FrameworkInfo {
|
||||
conn := make([]bool, nservers)
|
||||
for id := range(conn) {
|
||||
conn[id] = true
|
||||
}
|
||||
|
||||
finfo := FrameworkInfo{
|
||||
mu: new(sync.Mutex),
|
||||
nservers: nservers,
|
||||
connected: conn,
|
||||
crashed: make([]bool, nservers),
|
||||
}
|
||||
|
||||
return &finfo
|
||||
}
|
||||
|
||||
func captureSignal() struct{} {
|
||||
// Capture SIGINT to visualize on interruption.
|
||||
c := make(chan os.Signal, 1)
|
||||
signal.Notify(c, os.Interrupt)
|
||||
go func(){
|
||||
for range c {
|
||||
annotation.cleanup(true, "interrupted")
|
||||
os.Exit(1)
|
||||
}
|
||||
}()
|
||||
|
||||
return struct{}{}
|
||||
}
|
@ -79,6 +79,11 @@ func (cfg *Config) Cleanup() {
|
||||
cfg.Clnts.cleanup()
|
||||
cfg.Groups.cleanup()
|
||||
cfg.net.Cleanup()
|
||||
if cfg.t.Failed() {
|
||||
annotation.cleanup(true, "test failed")
|
||||
} else {
|
||||
annotation.cleanup(false, "test passed")
|
||||
}
|
||||
cfg.CheckTimeout()
|
||||
}
|
||||
|
||||
|
@ -194,6 +194,10 @@ func (sg *ServerGrp) IsConnected(i int) bool {
|
||||
return sg.connected[i]
|
||||
}
|
||||
|
||||
func (sg *ServerGrp) GetConnected() []bool {
|
||||
return sg.connected
|
||||
}
|
||||
|
||||
// Maximum log size across all servers
|
||||
func (sg *ServerGrp) LogSize() int {
|
||||
logsize := 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user