mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: error handling updates (#1337)
* docker-pull timeout is now a 504 which classifies it as a service error. Avoid using 503 to make sure LB does not retry. * Only applicable to detached mode, a timeout on LB is now a ErrServiceReservationFailure (500). In detached mode, this is unlikely to make it back to a client and it is mostly for documentation/metrics purposes. * For Triggers, avoid scrubbing service code.
This commit is contained in:
@@ -304,6 +304,18 @@ func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isFo
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
statsComplete(ctx)
|
statsComplete(ctx)
|
||||||
recordCallLatency(ctx, call, completedMetricName)
|
recordCallLatency(ctx, call, completedMetricName)
|
||||||
|
} else if err == context.DeadlineExceeded {
|
||||||
|
// We are here because we were unable to service this request for the given
|
||||||
|
// reservation. In detached case, the reservation is calculated based on estimated
|
||||||
|
// total time to run a request. (See: spawnPlaceCall) Otherwise, there's no set
|
||||||
|
// deadline in the request context. This is also a bit more robust going forward
|
||||||
|
// if we start enforcing a maximum overall deadline for clients. For detached case, the
|
||||||
|
// error is unlikely to be delivered to the client since this is essentially an async
|
||||||
|
// operation.
|
||||||
|
statsTimedout(ctx)
|
||||||
|
recordCallLatency(ctx, call, timedoutMetricName)
|
||||||
|
// We have failed: http 500 Internal Server Error
|
||||||
|
return models.ErrServiceReservationFailure
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
statsDequeue(ctx)
|
statsDequeue(ctx)
|
||||||
@@ -318,10 +330,6 @@ func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isFo
|
|||||||
statsTooBusy(ctx)
|
statsTooBusy(ctx)
|
||||||
recordCallLatency(ctx, call, serverBusyMetricName)
|
recordCallLatency(ctx, call, serverBusyMetricName)
|
||||||
return models.ErrCallTimeoutServerBusy
|
return models.ErrCallTimeoutServerBusy
|
||||||
} else if err == context.DeadlineExceeded {
|
|
||||||
statsTimedout(ctx)
|
|
||||||
recordCallLatency(ctx, call, timedoutMetricName)
|
|
||||||
return models.ErrCallTimeout
|
|
||||||
} else if err == context.Canceled {
|
} else if err == context.Canceled {
|
||||||
statsCanceled(ctx)
|
statsCanceled(ctx)
|
||||||
recordCallLatency(ctx, call, canceledMetricName)
|
recordCallLatency(ctx, call, canceledMetricName)
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ var (
|
|||||||
error: errors.New("Timed out - server too busy"),
|
error: errors.New("Timed out - server too busy"),
|
||||||
}
|
}
|
||||||
ErrDockerPullTimeout = err{
|
ErrDockerPullTimeout = err{
|
||||||
code: http.StatusServiceUnavailable,
|
code: http.StatusGatewayTimeout,
|
||||||
error: errors.New("Docker pull timed out"),
|
error: errors.New("Docker pull timed out"),
|
||||||
}
|
}
|
||||||
ErrContainerInitTimeout = err{
|
ErrContainerInitTimeout = err{
|
||||||
@@ -188,7 +188,10 @@ var (
|
|||||||
code: http.StatusInternalServerError,
|
code: http.StatusInternalServerError,
|
||||||
error: errors.New("Unable to find the call handle"),
|
error: errors.New("Unable to find the call handle"),
|
||||||
}
|
}
|
||||||
|
ErrServiceReservationFailure = err{
|
||||||
|
code: http.StatusInternalServerError,
|
||||||
|
error: errors.New("Unable to service the request for the reservation period"),
|
||||||
|
}
|
||||||
ErrContainerInitFail = err{
|
ErrContainerInitFail = err{
|
||||||
code: http.StatusBadGateway,
|
code: http.StatusBadGateway,
|
||||||
error: errors.New("container failed to initialize, please ensure you are using the latest fdk / format and check the logs"),
|
error: errors.New("container failed to initialize, please ensure you are using the latest fdk / format and check the logs"),
|
||||||
|
|||||||
@@ -76,13 +76,13 @@ func (trw *triggerResponseWriter) Write(b []byte) (int, error) {
|
|||||||
return trw.inner.Write(b)
|
return trw.inner.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (trw *triggerResponseWriter) WriteHeader(statusCode int) {
|
func (trw *triggerResponseWriter) WriteHeader(serviceStatus int) {
|
||||||
if trw.committed {
|
if trw.committed {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
trw.committed = true
|
trw.committed = true
|
||||||
|
|
||||||
var fnStatus int
|
userStatus := 0
|
||||||
realHeaders := trw.Header()
|
realHeaders := trw.Header()
|
||||||
gwHeaders := make(http.Header, len(realHeaders))
|
gwHeaders := make(http.Header, len(realHeaders))
|
||||||
for k, vs := range realHeaders {
|
for k, vs := range realHeaders {
|
||||||
@@ -96,7 +96,7 @@ func (trw *triggerResponseWriter) WriteHeader(statusCode int) {
|
|||||||
if len(vs) > 0 {
|
if len(vs) > 0 {
|
||||||
statusInt, err := strconv.Atoi(vs[0])
|
statusInt, err := strconv.Atoi(vs[0])
|
||||||
if err == nil {
|
if err == nil {
|
||||||
fnStatus = statusInt
|
userStatus = statusInt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case k == "Content-Type", k == "Fn-Call-Id":
|
case k == "Content-Type", k == "Fn-Call-Id":
|
||||||
@@ -113,14 +113,14 @@ func (trw *triggerResponseWriter) WriteHeader(statusCode int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// XXX(reed): simplify / add tests for these behaviors...
|
// XXX(reed): simplify / add tests for these behaviors...
|
||||||
gatewayStatus := 200
|
finalStatus := 200
|
||||||
if statusCode >= 400 {
|
if serviceStatus >= 400 {
|
||||||
gatewayStatus = 502
|
finalStatus = serviceStatus
|
||||||
} else if fnStatus > 0 {
|
} else if userStatus > 0 {
|
||||||
gatewayStatus = fnStatus
|
finalStatus = userStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
trw.inner.WriteHeader(gatewayStatus)
|
trw.inner.WriteHeader(finalStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
var skipTriggerHeaders = map[string]bool{
|
var skipTriggerHeaders = map[string]bool{
|
||||||
|
|||||||
Reference in New Issue
Block a user