mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: reduce lbagent and agent dependency (#938)
* fn: reduce lbagent and agent dependency lbagent and agent code is too dependent. This causes any changed in agent to break lbagent. In reality, for LB there should be no delegated agent. Splitting these two will cause some code duplication, but it reduces dependency and complexity (eg. agent without docker) * fn: post rebase fixup * fn: runner/runnercall should use lbDeadline * fn: fixup ln agent test * fn: remove agent create option for common.WaitGroup
This commit is contained in:
@@ -2,6 +2,8 @@ package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -25,41 +27,89 @@ const (
|
||||
)
|
||||
|
||||
type lbAgent struct {
|
||||
delegatedAgent Agent
|
||||
rp pool.RunnerPool
|
||||
placer pool.Placer
|
||||
shutWg *common.WaitGroup
|
||||
cfg AgentConfig
|
||||
da DataAccess
|
||||
callListeners []fnext.CallListener
|
||||
rp pool.RunnerPool
|
||||
placer pool.Placer
|
||||
|
||||
shutWg *common.WaitGroup
|
||||
callEndCount int64
|
||||
}
|
||||
|
||||
// NewLBAgent creates an Agent that knows how to load-balance function calls
|
||||
// across a group of runner nodes.
|
||||
func NewLBAgent(da DataAccess, rp pool.RunnerPool, p pool.Placer) (Agent, error) {
|
||||
wg := common.NewWaitGroup()
|
||||
agent := createAgent(da, false, wg)
|
||||
|
||||
// TODO: Move the constants above to Agent Config or an LB specific LBAgentConfig
|
||||
cfg, err := NewAgentConfig()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Fatalf("error in lb-agent config cfg=%+v", cfg)
|
||||
}
|
||||
logrus.Infof("lb-agent starting cfg=%+v", cfg)
|
||||
|
||||
a := &lbAgent{
|
||||
delegatedAgent: agent,
|
||||
rp: rp,
|
||||
placer: p,
|
||||
shutWg: wg,
|
||||
cfg: *cfg,
|
||||
da: da,
|
||||
rp: rp,
|
||||
placer: p,
|
||||
shutWg: common.NewWaitGroup(),
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func (a *lbAgent) AddCallListener(listener fnext.CallListener) {
|
||||
a.callListeners = append(a.callListeners, listener)
|
||||
}
|
||||
|
||||
func (a *lbAgent) fireBeforeCall(ctx context.Context, call *models.Call) error {
|
||||
return fireBeforeCallFun(a.callListeners, ctx, call)
|
||||
}
|
||||
|
||||
func (a *lbAgent) fireAfterCall(ctx context.Context, call *models.Call) error {
|
||||
return fireAfterCallFun(a.callListeners, ctx, call)
|
||||
}
|
||||
|
||||
// GetAppID is to get the match of an app name to its ID
|
||||
func (a *lbAgent) GetAppID(ctx context.Context, appName string) (string, error) {
|
||||
return a.delegatedAgent.GetAppID(ctx, appName)
|
||||
return a.da.GetAppID(ctx, appName)
|
||||
}
|
||||
|
||||
// GetAppByID is to get the app by ID
|
||||
func (a *lbAgent) GetAppByID(ctx context.Context, appID string) (*models.App, error) {
|
||||
return a.delegatedAgent.GetAppByID(ctx, appID)
|
||||
return a.da.GetAppByID(ctx, appID)
|
||||
}
|
||||
|
||||
func (a *lbAgent) GetRoute(ctx context.Context, appID string, path string) (*models.Route, error) {
|
||||
return a.da.GetRoute(ctx, appID, path)
|
||||
}
|
||||
|
||||
// GetCall delegates to the wrapped agent but disables the capacity check as
|
||||
// this agent isn't actually running the call.
|
||||
func (a *lbAgent) GetCall(opts ...CallOpt) (Call, error) {
|
||||
opts = append(opts, WithoutPreemptiveCapacityCheck())
|
||||
return a.delegatedAgent.GetCall(opts...)
|
||||
var c call
|
||||
|
||||
for _, o := range opts {
|
||||
err := o(&c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// TODO typed errors to test
|
||||
if c.req == nil || c.Call == nil {
|
||||
return nil, errors.New("no model or request provided for call")
|
||||
}
|
||||
|
||||
c.da = a.da
|
||||
c.ct = a
|
||||
c.stderr = &nullReadWriter{}
|
||||
|
||||
ctx, _ := common.LoggerWithFields(c.req.Context(),
|
||||
logrus.Fields{"id": c.ID, "app_id": c.AppID, "route": c.Path})
|
||||
c.req = c.req.WithContext(ctx)
|
||||
|
||||
c.lbDeadline = time.Now().Add(time.Duration(c.Call.Timeout) * time.Second)
|
||||
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func (a *lbAgent) Close() error {
|
||||
@@ -67,27 +117,17 @@ func (a *lbAgent) Close() error {
|
||||
// start closing the front gate first
|
||||
ch := a.shutWg.CloseGroupNB()
|
||||
|
||||
// delegated agent shutdown next, blocks here...
|
||||
err1 := a.delegatedAgent.Close()
|
||||
if err1 != nil {
|
||||
logrus.WithError(err1).Warn("Delegated agent shutdown error")
|
||||
}
|
||||
|
||||
// finally shutdown the runner pool
|
||||
ctx, cancel := context.WithTimeout(context.Background(), runnerPoolShutdownTimeout)
|
||||
defer cancel()
|
||||
err2 := a.rp.Shutdown(ctx)
|
||||
if err2 != nil {
|
||||
logrus.WithError(err2).Warn("Runner pool shutdown error")
|
||||
err := a.rp.Shutdown(ctx)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("Runner pool shutdown error")
|
||||
}
|
||||
|
||||
// gate-on front-gate, should be completed if delegated agent & runner pool is gone.
|
||||
<-ch
|
||||
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
return err2
|
||||
return err
|
||||
}
|
||||
|
||||
func GetGroupID(call *models.Call) string {
|
||||
@@ -109,20 +149,20 @@ func (a *lbAgent) Submit(callI Call) error {
|
||||
|
||||
call := callI.(*call)
|
||||
|
||||
ctx, cancel := context.WithDeadline(call.req.Context(), call.execDeadline)
|
||||
ctx, cancel := context.WithDeadline(call.req.Context(), call.lbDeadline)
|
||||
call.req = call.req.WithContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
ctx, span := trace.StartSpan(ctx, "agent_submit")
|
||||
defer span.End()
|
||||
|
||||
err := a.submit(ctx, call)
|
||||
return err
|
||||
}
|
||||
|
||||
func (a *lbAgent) submit(ctx context.Context, call *call) error {
|
||||
statsEnqueue(ctx)
|
||||
|
||||
// first check any excess case of call.End() stacking.
|
||||
if atomic.LoadInt64(&a.callEndCount) >= int64(a.cfg.MaxCallEndStacking) {
|
||||
a.handleCallEnd(ctx, call, context.DeadlineExceeded, false)
|
||||
}
|
||||
|
||||
err := call.Start(ctx)
|
||||
if err != nil {
|
||||
return a.handleCallEnd(ctx, call, err, false)
|
||||
@@ -130,6 +170,10 @@ func (a *lbAgent) submit(ctx context.Context, call *call) error {
|
||||
|
||||
statsDequeueAndStart(ctx)
|
||||
|
||||
// WARNING: isStarted (handleCallEnd) semantics
|
||||
// need some consideration here. Similar to runner/agent
|
||||
// we consider isCommitted true if call.Start() succeeds.
|
||||
// isStarted=true means we will call Call.End().
|
||||
err = a.placer.PlaceCall(a.rp, ctx, call)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("Failed to place call")
|
||||
@@ -138,16 +182,34 @@ func (a *lbAgent) submit(ctx context.Context, call *call) error {
|
||||
return a.handleCallEnd(ctx, call, err, true)
|
||||
}
|
||||
|
||||
func (a *lbAgent) AddCallListener(cl fnext.CallListener) {
|
||||
a.delegatedAgent.AddCallListener(cl)
|
||||
}
|
||||
|
||||
func (a *lbAgent) Enqueue(context.Context, *models.Call) error {
|
||||
logrus.Fatal("Enqueue not implemented. Panicking.")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isCommitted bool) error {
|
||||
delegatedAgent := a.delegatedAgent.(*agent)
|
||||
return delegatedAgent.handleCallEnd(ctx, call, nil, err, isCommitted)
|
||||
func (a *lbAgent) scheduleCallEnd(fn func()) {
|
||||
atomic.AddInt64(&a.callEndCount, 1)
|
||||
go func() {
|
||||
fn()
|
||||
atomic.AddInt64(&a.callEndCount, -1)
|
||||
a.shutWg.AddSession(-1)
|
||||
}()
|
||||
}
|
||||
|
||||
func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isStarted bool) error {
|
||||
if isStarted {
|
||||
a.scheduleCallEnd(func() {
|
||||
ctx = common.BackgroundContext(ctx)
|
||||
ctx, cancel := context.WithTimeout(ctx, a.cfg.CallEndTimeout)
|
||||
call.End(ctx, err)
|
||||
cancel()
|
||||
})
|
||||
|
||||
handleStatsEnd(ctx, err)
|
||||
return transformTimeout(err, false)
|
||||
}
|
||||
|
||||
a.shutWg.AddSession(-1)
|
||||
handleStatsDequeue(ctx, err)
|
||||
return transformTimeout(err, true)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user