fn: common.WaitGroup improvements (#940)

* fn: common.WaitGroup improvements

*) Split the API into AddSession/DoneSession
*) Only wake up listeners when session count reaches zero.

* fn: WaitGroup go-routine blast test

* fn: test fix and rebase fixup
This commit is contained in:
Tolga Ceylan
2018-04-12 16:21:13 -07:00
committed by GitHub
parent e7dd095b92
commit 623aeb35b2
6 changed files with 94 additions and 56 deletions

View File

@@ -258,7 +258,7 @@ func (a *agent) scheduleCallEnd(fn func()) {
go func() { go func() {
fn() fn()
atomic.AddInt64(&a.callEndCount, -1) atomic.AddInt64(&a.callEndCount, -1)
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
}() }()
} }
@@ -266,7 +266,7 @@ func (a *agent) finalizeCallEnd(ctx context.Context, err error, isRetriable, isS
// if scheduled in background, let scheduleCallEnd() handle // if scheduled in background, let scheduleCallEnd() handle
// the shutWg group, otherwise decrement here. // the shutWg group, otherwise decrement here.
if !isScheduled { if !isScheduled {
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
} }
handleStatsEnd(ctx, err) handleStatsEnd(ctx, err)
return transformTimeout(err, isRetriable) return transformTimeout(err, isRetriable)
@@ -442,7 +442,7 @@ func (a *agent) checkLaunch(ctx context.Context, call *call) {
go func() { go func() {
// NOTE: runHot will not inherit the timeout from ctx (ignore timings) // NOTE: runHot will not inherit the timeout from ctx (ignore timings)
a.runHot(ctx, call, tok, state) a.runHot(ctx, call, tok, state)
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
}() }()
return return
} }

View File

@@ -23,7 +23,7 @@ func (a *agent) asyncDequeue() {
for { for {
select { select {
case <-a.shutWg.Closer(): case <-a.shutWg.Closer():
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
return return
case <-a.resources.WaitAsyncResource(ctx): case <-a.resources.WaitAsyncResource(ctx):
// TODO we _could_ return a token here to reserve the ram so that there's // TODO we _could_ return a token here to reserve the ram so that there's
@@ -35,13 +35,13 @@ func (a *agent) asyncDequeue() {
// we think we can get a cookie now, so go get a cookie // we think we can get a cookie now, so go get a cookie
select { select {
case <-a.shutWg.Closer(): case <-a.shutWg.Closer():
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
return return
case model, ok := <-a.asyncChew(ctx): case model, ok := <-a.asyncChew(ctx):
if ok { if ok {
go func(model *models.Call) { go func(model *models.Call) {
a.asyncRun(ctx, model) a.asyncRun(ctx, model)
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
}(model) }(model)
// WARNING: tricky. We reserve another session for next iteration of the loop // WARNING: tricky. We reserve another session for next iteration of the loop

View File

@@ -192,7 +192,7 @@ func (a *lbAgent) scheduleCallEnd(fn func()) {
go func() { go func() {
fn() fn()
atomic.AddInt64(&a.callEndCount, -1) atomic.AddInt64(&a.callEndCount, -1)
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
}() }()
} }
@@ -209,7 +209,7 @@ func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isSt
return transformTimeout(err, false) return transformTimeout(err, false)
} }
a.shutWg.AddSession(-1) a.shutWg.DoneSession()
handleStatsDequeue(ctx, err) handleStatsDequeue(ctx, err)
return transformTimeout(err, true) return transformTimeout(err, true)
} }

View File

@@ -94,7 +94,7 @@ func (r *gRPCRunner) TryExec(ctx context.Context, call pool.RunnerCall) (bool, e
if !r.shutWg.AddSession(1) { if !r.shutWg.AddSession(1) {
return true, ErrorRunnerClosed return true, ErrorRunnerClosed
} }
defer r.shutWg.AddSession(-1) defer r.shutWg.DoneSession()
// extract the call's model data to pass on to the pure runner // extract the call's model data to pass on to the pure runner
modelJSON, err := json.Marshal(call.Model()) modelJSON, err := json.Marshal(call.Model())

View File

@@ -9,20 +9,19 @@ import (
/* /*
WaitGroup is used to manage and wait for a collection of WaitGroup is used to manage and wait for a collection of
sessions. It is similar to sync.WaitGroup, but sessions. It is similar to sync.WaitGroup, but
AddSession/CloseGroup session is not only thread AddSession/DoneSession/CloseGroup session is not only thread
safe but can be executed in any order unlike sync.WaitGroup. safe but can be executed in any order unlike sync.WaitGroup.
Once a shutdown is initiated via CloseGroup(), add/rm Once a shutdown is initiated via CloseGroup(), add/done
operations will still function correctly, where operations will still function correctly, where
AddSession would return false. In this state, AddSession would return false. In this state,
CloseGroup() blocks until sessions get drained CloseGroup() blocks until sessions get drained
via remove operations. via DoneSession() operations.
It is an error to call AddSession() with invalid values. It is callers responsibility to make sure AddSessions
For example, if current session count is 1, AddSession and DoneSessions math adds up to >= 0. In other words,
can only add more or subtract 1 from this. Caller needs calling more DoneSessions() than sum of AddSessions()
to make sure addition/subtraction math is correct when will cause panic.
using WaitGroup.
Example usage: Example usage:
@@ -34,7 +33,7 @@ import (
// group may be closing or full // group may be closing or full
return return
} }
defer group.AddSession(-1) defer group.DoneSession()
// do stuff // do stuff
}(item) }(item)
@@ -65,45 +64,44 @@ func (r *WaitGroup) Closer() chan struct{} {
} }
// AddSession manipulates the session counter by // AddSession manipulates the session counter by
// adding or subtracting the delta value. Incrementing // adding the delta value. Incrementing
// the session counter is not possible and will set // the session counter is not possible and will set
// return value to false if a close was initiated. // return value to false if a close was initiated.
// It's callers responsibility to make sure addition and func (r *WaitGroup) AddSession(delta uint64) bool {
// subtraction math is correct.
func (r *WaitGroup) AddSession(delta int64) bool {
r.cond.L.Lock() r.cond.L.Lock()
defer r.cond.L.Unlock() defer r.cond.L.Unlock()
if delta >= 0 { // we cannot add if we are being shutdown
// we cannot add if we are being shutdown if r.isClosed {
if r.isClosed { return false
return false }
}
incr := uint64(delta) // we have maxed out
if r.sessions == math.MaxUint64-delta {
return false
}
// we have maxed out r.sessions += delta
if r.sessions == math.MaxUint64-incr { return true
return false }
}
r.sessions += incr // DoneSession decrements 1 from accumulated
} else { // sessions and wakes up listeners when this reaches
decr := uint64(-delta) // zero.
func (r *WaitGroup) DoneSession() {
r.cond.L.Lock()
defer r.cond.L.Unlock()
// illegal operation, it's callers responsibility // illegal operation, it's callers responsibility
// to make sure subtraction and addition math is correct. // to make sure subtraction and addition math is correct.
if r.sessions < decr { if r.sessions == 0 {
panic(fmt.Sprintf("common.WaitGroup misuse sum=%d decr=%d isClosed=%v", panic(fmt.Sprintf("common.WaitGroup misuse isClosed=%v", r.isClosed))
r.sessions, decr, r.isClosed)) }
}
r.sessions -= decr r.sessions -= 1
if r.sessions == 0 {
// subtractions need to notify CloseGroup
r.cond.Broadcast() r.cond.Broadcast()
} }
return true
} }
// CloseGroup initiates a close and blocks until // CloseGroup initiates a close and blocks until

View File

@@ -2,6 +2,7 @@ package common
import ( import (
"testing" "testing"
"time"
) )
func isClosed(ch chan struct{}) bool { func isClosed(ch chan struct{}) bool {
@@ -44,6 +45,52 @@ func TestWaitGroupEmpty(t *testing.T) {
} }
} }
func TestWaitGroupBlast(t *testing.T) {
wg := NewWaitGroup()
wg.AddSession(1)
blastRadius := 500
for i := 0; i < blastRadius; i++ {
go func(i int) {
if !wg.AddSession(1) {
// this is OK, we are creating a race
// and some cannot make it after main
// go-routine issues a CloseGroupNB()
return
}
if isClosed(wg.Closer()) {
t.Fatalf("Should not be closing state")
}
wg.DoneSession()
}(i)
}
if isClosed(wg.Closer()) {
t.Fatalf("Should not be closing state")
}
done := wg.CloseGroupNB()
if !isClosed(wg.Closer()) {
t.Fatalf("Should be closing state")
}
select {
case <-done:
t.Fatalf("NB Chan should not be closed yet, since sum is 1")
case <-time.After(time.Duration(1) * time.Second):
wg.DoneSession()
}
select {
case <-done:
case <-time.After(time.Duration(1) * time.Second):
t.Fatalf("NB Chan should be closed by now, since sum is 0")
}
}
func TestWaitGroupSingle(t *testing.T) { func TestWaitGroupSingle(t *testing.T) {
wg := NewWaitGroup() wg := NewWaitGroup()
@@ -60,10 +107,7 @@ func TestWaitGroupSingle(t *testing.T) {
t.Fatalf("Should not be closing state yet") t.Fatalf("Should not be closing state yet")
} }
if !wg.AddSession(-1) { wg.DoneSession()
t.Fatalf("Add -1 should not fail")
}
// sum should be zero now. // sum should be zero now.
if !wg.AddSession(2) { if !wg.AddSession(2) {
@@ -78,9 +122,8 @@ func TestWaitGroupSingle(t *testing.T) {
t.Fatalf("NB Chan should not be closed yet, since sum is 2") t.Fatalf("NB Chan should not be closed yet, since sum is 2")
} }
if !wg.AddSession(-1) { wg.DoneSession()
t.Fatalf("Add -1 should not fail")
}
if wg.AddSession(1) { if wg.AddSession(1) {
t.Fatalf("Add 1 should fail (we are shutting down)") t.Fatalf("Add 1 should fail (we are shutting down)")
} }
@@ -106,9 +149,7 @@ func TestWaitGroupSingle(t *testing.T) {
t.Fatalf("Should be closing state") t.Fatalf("Should be closing state")
} }
if !wg.AddSession(-1) { wg.DoneSession()
t.Fatalf("Add -1 should not fail")
}
// sum is 0 now // sum is 0 now
<-done <-done
@@ -120,5 +161,4 @@ func TestWaitGroupSingle(t *testing.T) {
if !isClosed(wg.Closer()) { if !isClosed(wg.Closer()) {
t.Fatalf("Should be closing state") t.Fatalf("Should be closing state")
} }
} }