mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: sync.WaitGroup replacement common.WaitGroup (#937)
* fn: sync.WaitGroup replacement common.WaitGroup agent/lb_agent/pure_runner has been incorrectly using sync.WaitGroup semantics. Switching these components to use the new common.WaitGroup() that provides a few handy functionality for common graceful shutdown cases. From https://golang.org/pkg/sync/#WaitGroup, "Note that calls with a positive delta that occur when the counter is zero must happen before a Wait. Calls with a negative delta, or calls with a positive delta that start when the counter is greater than zero, may happen at any time. Typically this means the calls to Add should execute before the statement creating the goroutine or other event to be waited for. If a WaitGroup is reused to wait for several independent sets of events, new Add calls must happen after all previous Wait calls have returned." HandleCallEnd introduces some complexity to the shutdowns, but this is currently handled by AddSession(2) initially and letting the HandleCallEnd() when to decrement by -1 in addition to decrement -1 in Submit(). lb_agent shutdown sequence and particularly timeouts with runner pool needs another look/revision, but this is outside of the scope of this commit. * fn: lb-agent wg share * fn: no need to +2 in Submit with defer. Removed defer since handleCallEnd already has this responsibility.
This commit is contained in:
@@ -2,12 +2,12 @@ package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"go.opencensus.io/trace"
|
||||
|
||||
"github.com/fnproject/fn/api/common"
|
||||
"github.com/fnproject/fn/api/models"
|
||||
pool "github.com/fnproject/fn/api/runnerpool"
|
||||
"github.com/fnproject/fn/fnext"
|
||||
@@ -28,19 +28,19 @@ type lbAgent struct {
|
||||
delegatedAgent Agent
|
||||
rp pool.RunnerPool
|
||||
placer pool.Placer
|
||||
|
||||
wg sync.WaitGroup // Needs a good name
|
||||
shutdown chan struct{}
|
||||
shutWg *common.WaitGroup
|
||||
}
|
||||
|
||||
// NewLBAgent creates an Agent that knows how to load-balance function calls
|
||||
// across a group of runner nodes.
|
||||
func NewLBAgent(da DataAccess, rp pool.RunnerPool, p pool.Placer) (Agent, error) {
|
||||
agent := createAgent(da, false)
|
||||
wg := common.NewWaitGroup()
|
||||
agent := createAgent(da, false, wg)
|
||||
a := &lbAgent{
|
||||
delegatedAgent: agent,
|
||||
rp: rp,
|
||||
placer: p,
|
||||
shutWg: wg,
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
@@ -63,18 +63,31 @@ func (a *lbAgent) GetCall(opts ...CallOpt) (Call, error) {
|
||||
}
|
||||
|
||||
func (a *lbAgent) Close() error {
|
||||
// we should really be passing the server's context here
|
||||
|
||||
// start closing the front gate first
|
||||
ch := a.shutWg.CloseGroupNB()
|
||||
|
||||
// delegated agent shutdown next, blocks here...
|
||||
err1 := a.delegatedAgent.Close()
|
||||
if err1 != nil {
|
||||
logrus.WithError(err1).Warn("Delegated agent shutdown error")
|
||||
}
|
||||
|
||||
// finally shutdown the runner pool
|
||||
ctx, cancel := context.WithTimeout(context.Background(), runnerPoolShutdownTimeout)
|
||||
defer cancel()
|
||||
|
||||
close(a.shutdown)
|
||||
a.rp.Shutdown(ctx)
|
||||
err := a.delegatedAgent.Close()
|
||||
a.wg.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
err2 := a.rp.Shutdown(ctx)
|
||||
if err2 != nil {
|
||||
logrus.WithError(err2).Warn("Runner pool shutdown error")
|
||||
}
|
||||
return nil
|
||||
|
||||
// gate-on front-gate, should be completed if delegated agent & runner pool is gone.
|
||||
<-ch
|
||||
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
return err2
|
||||
}
|
||||
|
||||
func GetGroupID(call *models.Call) string {
|
||||
@@ -90,13 +103,8 @@ func GetGroupID(call *models.Call) string {
|
||||
}
|
||||
|
||||
func (a *lbAgent) Submit(callI Call) error {
|
||||
a.wg.Add(1)
|
||||
defer a.wg.Done()
|
||||
|
||||
select {
|
||||
case <-a.shutdown:
|
||||
if !a.shutWg.AddSession(1) {
|
||||
return models.ErrCallTimeoutServerBusy
|
||||
default:
|
||||
}
|
||||
|
||||
call := callI.(*call)
|
||||
|
||||
Reference in New Issue
Block a user