fn: error handling updates (#1337)

* docker-pull timeout is now a 504 which classifies it as a
service error. Avoid using 503 to make sure LB does not retry.

* Only applicable to detached mode, a timeout on LB is
now a ErrServiceReservationFailure (500). In detached mode,
this is unlikely to make it back to a client and it is mostly
for documentation/metrics purposes.
  
* For Triggers, avoid scrubbing service code.
This commit is contained in:
Tolga Ceylan
2018-12-03 14:58:48 -08:00
committed by GitHub
parent 3818046351
commit a9a828cc40
3 changed files with 26 additions and 15 deletions

View File

@@ -304,6 +304,18 @@ func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isFo
if err == nil {
statsComplete(ctx)
recordCallLatency(ctx, call, completedMetricName)
} else if err == context.DeadlineExceeded {
// We are here because we were unable to service this request for the given
// reservation. In detached case, the reservation is calculated based on estimated
// total time to run a request. (See: spawnPlaceCall) Otherwise, there's no set
// deadline in the request context. This is also a bit more robust going forward
// if we start enforcing a maximum overall deadline for clients. For detached case, the
// error is unlikely to be delivered to the client since this is essentially an async
// operation.
statsTimedout(ctx)
recordCallLatency(ctx, call, timedoutMetricName)
// We have failed: http 500 Internal Server Error
return models.ErrServiceReservationFailure
}
} else {
statsDequeue(ctx)
@@ -318,10 +330,6 @@ func (a *lbAgent) handleCallEnd(ctx context.Context, call *call, err error, isFo
statsTooBusy(ctx)
recordCallLatency(ctx, call, serverBusyMetricName)
return models.ErrCallTimeoutServerBusy
} else if err == context.DeadlineExceeded {
statsTimedout(ctx)
recordCallLatency(ctx, call, timedoutMetricName)
return models.ErrCallTimeout
} else if err == context.Canceled {
statsCanceled(ctx)
recordCallLatency(ctx, call, canceledMetricName)

View File

@@ -37,7 +37,7 @@ var (
error: errors.New("Timed out - server too busy"),
}
ErrDockerPullTimeout = err{
code: http.StatusServiceUnavailable,
code: http.StatusGatewayTimeout,
error: errors.New("Docker pull timed out"),
}
ErrContainerInitTimeout = err{
@@ -188,7 +188,10 @@ var (
code: http.StatusInternalServerError,
error: errors.New("Unable to find the call handle"),
}
ErrServiceReservationFailure = err{
code: http.StatusInternalServerError,
error: errors.New("Unable to service the request for the reservation period"),
}
ErrContainerInitFail = err{
code: http.StatusBadGateway,
error: errors.New("container failed to initialize, please ensure you are using the latest fdk / format and check the logs"),

View File

@@ -76,13 +76,13 @@ func (trw *triggerResponseWriter) Write(b []byte) (int, error) {
return trw.inner.Write(b)
}
func (trw *triggerResponseWriter) WriteHeader(statusCode int) {
func (trw *triggerResponseWriter) WriteHeader(serviceStatus int) {
if trw.committed {
return
}
trw.committed = true
var fnStatus int
userStatus := 0
realHeaders := trw.Header()
gwHeaders := make(http.Header, len(realHeaders))
for k, vs := range realHeaders {
@@ -96,7 +96,7 @@ func (trw *triggerResponseWriter) WriteHeader(statusCode int) {
if len(vs) > 0 {
statusInt, err := strconv.Atoi(vs[0])
if err == nil {
fnStatus = statusInt
userStatus = statusInt
}
}
case k == "Content-Type", k == "Fn-Call-Id":
@@ -113,14 +113,14 @@ func (trw *triggerResponseWriter) WriteHeader(statusCode int) {
}
// XXX(reed): simplify / add tests for these behaviors...
gatewayStatus := 200
if statusCode >= 400 {
gatewayStatus = 502
} else if fnStatus > 0 {
gatewayStatus = fnStatus
finalStatus := 200
if serviceStatus >= 400 {
finalStatus = serviceStatus
} else if userStatus > 0 {
finalStatus = userStatus
}
trw.inner.WriteHeader(gatewayStatus)
trw.inner.WriteHeader(finalStatus)
}
var skipTriggerHeaders = map[string]bool{