mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: agent/lb/runner error handling adjustments (#1214)
1) Early call validation and return due to cpu/mem impossible to meet (eg. request cpu/mem larger than max-mem or max-cpu on server) now emits HTTP Bad Request (400) instead of 503. This case is most likely due to client/service configuration and/or validation issue. 2) 'failed' metric is now removed. 'failed' versus 'errors' were too confusing. 'errors' is now a catch all error case. 3) new 'canceled' counter for client side cancels. 4) 'server_busy' now covers more cases than it previously did.
This commit is contained in:
@@ -244,15 +244,17 @@ func (a *agent) Close() error {
|
||||
}
|
||||
|
||||
func (a *agent) Submit(callI Call) error {
|
||||
call := callI.(*call)
|
||||
ctx, span := trace.StartSpan(call.req.Context(), "agent_submit")
|
||||
defer span.End()
|
||||
|
||||
statsCalls(ctx)
|
||||
|
||||
if !a.shutWg.AddSession(1) {
|
||||
statsTooBusy(ctx)
|
||||
return models.ErrCallTimeoutServerBusy
|
||||
}
|
||||
|
||||
call := callI.(*call)
|
||||
|
||||
ctx := call.req.Context()
|
||||
ctx, span := trace.StartSpan(ctx, "agent_submit")
|
||||
defer span.End()
|
||||
defer a.shutWg.DoneSession()
|
||||
|
||||
err := a.submit(ctx, call)
|
||||
return err
|
||||
@@ -294,7 +296,8 @@ func (a *agent) submit(ctx context.Context, call *call) error {
|
||||
return a.handleCallEnd(ctx, call, slot, err, false)
|
||||
}
|
||||
|
||||
statsDequeueAndStart(ctx)
|
||||
statsDequeue(ctx)
|
||||
statsStartRun(ctx)
|
||||
|
||||
// We are about to execute the function, set container Exec Deadline (call.Timeout)
|
||||
slotCtx, cancel := context.WithTimeout(ctx, time.Duration(call.Timeout)*time.Second)
|
||||
@@ -314,52 +317,26 @@ func (a *agent) handleCallEnd(ctx context.Context, call *call, slot Slot, err er
|
||||
// This means call was routed (executed)
|
||||
if isStarted {
|
||||
call.End(ctx, err)
|
||||
}
|
||||
|
||||
handleStatsEnd(ctx, err)
|
||||
a.shutWg.DoneSession()
|
||||
return transformTimeout(err, !isStarted)
|
||||
}
|
||||
|
||||
func transformTimeout(e error, isRetriable bool) error {
|
||||
if e == context.DeadlineExceeded {
|
||||
if isRetriable {
|
||||
statsStopRun(ctx)
|
||||
if err == nil {
|
||||
statsComplete(ctx)
|
||||
}
|
||||
} else {
|
||||
if err == CapacityFull || err == context.DeadlineExceeded {
|
||||
statsTooBusy(ctx)
|
||||
return models.ErrCallTimeoutServerBusy
|
||||
}
|
||||
return models.ErrCallTimeout
|
||||
} else if e == CapacityFull {
|
||||
return models.ErrCallTimeoutServerBusy
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// handleStatsDequeue handles stats for dequeuing for early exit (getSlot or Start)
|
||||
// cases. Only timeouts can be a simple dequeue while other cases are actual errors.
|
||||
func handleStatsDequeue(ctx context.Context, err error) {
|
||||
if err == context.DeadlineExceeded {
|
||||
statsDequeue(ctx)
|
||||
statsTooBusy(ctx)
|
||||
} else {
|
||||
statsDequeueAndFail(ctx)
|
||||
statsTimedout(ctx)
|
||||
return models.ErrCallTimeout
|
||||
} else if err == context.Canceled {
|
||||
statsCanceled(ctx)
|
||||
} else if err != nil {
|
||||
statsErrors(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// handleStatsEnd handles stats for after a call is ran, depending on error.
|
||||
func handleStatsEnd(ctx context.Context, err error) {
|
||||
if err == nil {
|
||||
// decrement running count, increment completed count
|
||||
statsComplete(ctx)
|
||||
} else {
|
||||
// decrement running count, increment failed count
|
||||
statsFailed(ctx)
|
||||
// increment the timeout or errors count, as appropriate
|
||||
if err == context.DeadlineExceeded {
|
||||
statsTimedout(ctx)
|
||||
} else {
|
||||
statsErrors(ctx)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// getSlot returns a Slot (or error) for the request to run. Depending on hot/cold
|
||||
|
||||
Reference in New Issue
Block a user