NetworkDB incorrect number of entries in networkNodes

A rapid (within networkReapTime 30min) leave/join network
can corrupt the list of nodes per network with multiple copies
of the same nodes.
The fix makes sure that each node is present only once

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
This commit is contained in:
Flavio Crisciani 2017-07-05 11:02:04 -07:00
parent f23721a4c7
commit 297c3d4ad2
No known key found for this signature in database
GPG Key ID: 28CAFCE754CF3A48
4 changed files with 92 additions and 5 deletions

View File

@ -310,12 +310,11 @@ func (nDB *NetworkDB) reapState() {
func (nDB *NetworkDB) reapNetworks() {
nDB.Lock()
for name, nn := range nDB.networks {
for _, nn := range nDB.networks {
for id, n := range nn {
if n.leaving {
if n.reapTime <= 0 {
delete(nn, id)
nDB.deleteNetworkNode(id, name)
continue
}
n.reapTime -= reapPeriod

View File

@ -179,7 +179,12 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
flushEntries = true
}
nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
if nEvent.Type == NetworkEventTypeLeave {
nDB.deleteNetworkNode(nEvent.NetworkID, nEvent.NodeName)
} else {
nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
}
return true
}

View File

@ -487,7 +487,7 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error {
},
RetransmitMult: 4,
}
nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName)
nDB.addNetworkNode(nid, nDB.config.NodeName)
networkNodes := nDB.networkNodes[nid]
nDB.Unlock()
@ -522,6 +522,8 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
entries []*entry
)
nDB.deleteNetworkNode(nid, nDB.config.NodeName)
nwWalker := func(path string, v interface{}) bool {
entry, ok := v.(*entry)
if !ok {
@ -580,7 +582,10 @@ func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) {
// passed network. Caller should hold the NetworkDB lock while calling
// this
func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
nodes := nDB.networkNodes[nid]
nodes, ok := nDB.networkNodes[nid]
if !ok || len(nodes) == 0 {
return
}
newNodes := make([]string, 0, len(nodes)-1)
for _, name := range nodes {
if name == nodeName {

View File

@ -436,3 +436,81 @@ func TestNetworkDBCRUDMediumCluster(t *testing.T) {
log.Print("Closing DB instances...")
closeNetworkDBInstances(dbs)
}
func TestNetworkDBNodeJoinLeaveIteration(t *testing.T) {
maxRetry := 5
dbs := createNetworkDBInstances(t, 2, "node")
// Single node Join/Leave
err := dbs[0].JoinNetwork("network1")
assert.NoError(t, err)
if len(dbs[0].networkNodes["network1"]) != 1 {
t.Fatalf("The networkNodes list has to have be 1 instead of %d", len(dbs[0].networkNodes["network1"]))
}
err = dbs[0].LeaveNetwork("network1")
assert.NoError(t, err)
if len(dbs[0].networkNodes["network1"]) != 0 {
t.Fatalf("The networkNodes list has to have be 0 instead of %d", len(dbs[0].networkNodes["network1"]))
}
// Multiple nodes Join/Leave
err = dbs[0].JoinNetwork("network1")
assert.NoError(t, err)
err = dbs[1].JoinNetwork("network1")
assert.NoError(t, err)
// Wait for the propagation on db[0]
for i := 0; i < maxRetry; i++ {
if len(dbs[0].networkNodes["network1"]) == 2 {
break
}
time.Sleep(1 * time.Second)
}
if len(dbs[0].networkNodes["network1"]) != 2 {
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[0].networkNodes["network1"]), dbs[0].networkNodes["network1"])
}
// Wait for the propagation on db[1]
for i := 0; i < maxRetry; i++ {
if len(dbs[1].networkNodes["network1"]) == 2 {
break
}
time.Sleep(1 * time.Second)
}
if len(dbs[1].networkNodes["network1"]) != 2 {
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[1].networkNodes["network1"]), dbs[1].networkNodes["network1"])
}
// Try a quick leave/join
err = dbs[0].LeaveNetwork("network1")
assert.NoError(t, err)
err = dbs[0].JoinNetwork("network1")
assert.NoError(t, err)
for i := 0; i < maxRetry; i++ {
if len(dbs[0].networkNodes["network1"]) == 2 {
break
}
time.Sleep(1 * time.Second)
}
if len(dbs[0].networkNodes["network1"]) != 2 {
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[0].networkNodes["network1"]), dbs[0].networkNodes["network1"])
}
for i := 0; i < maxRetry; i++ {
if len(dbs[1].networkNodes["network1"]) == 2 {
break
}
time.Sleep(1 * time.Second)
}
if len(dbs[1].networkNodes["network1"]) != 2 {
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[1].networkNodes["network1"]), dbs[1].networkNodes["network1"])
}
dbs[0].Close()
dbs[1].Close()
}