fn: introducing 503 responses for out of capacity case (#518)

* fn: introducing 503 responses for out of capacity case *) Adding 503 with Retry-After header case if request failed during waiting for slots. *) TODO: return 503 without Retry-After if the request can never be met by this fn server. *) fn: runner test docker pull fixup *) fn: MaxMemory for routes is now a variable to allow testing and adjusting it according to fleet memory sizes.
2022-10-28 21:29:17 +03:00 · 2017-11-21 12:42:02 -08:00
parent 460e9d2dea
commit 2551be446a
6 changed files with 38 additions and 13 deletions
--- a/api/agent/agent.go
+++ b/api/agent/agent.go
@@ -170,7 +170,18 @@ func (a *agent) Close() error {
 	return nil
 }
 func transformTimeout(e error, isRetriable bool) error {
 	if e == context.DeadlineExceeded {
 		if isRetriable {
 			return models.ErrCallTimeoutServerBusy
 		}
 		return models.ErrCallTimeout
 	}
 	return e
 }
 func (a *agent) Submit(callI Call) error {
 	a.wg.Add(1)
 	defer a.wg.Done()
@@ -199,7 +210,7 @@ func (a *agent) Submit(callI Call) error {
 	slot, err := a.getSlot(ctx, call) // find ram available / running
 	if err != nil {
 		a.stats.Dequeue(callI.Model().Path)
-		return err
+		return transformTimeout(err, true)
 	}
 	// TODO if the call times out & container is created, we need
 	// to make this remove the container asynchronously?
@@ -209,7 +220,7 @@ func (a *agent) Submit(callI Call) error {
 	err = call.Start(ctx, a)
 	if err != nil {
 		a.stats.Dequeue(callI.Model().Path)
-		return err
+		return transformTimeout(err, true)
 	}
 	// decrement queued count, increment running count
@@ -231,7 +242,7 @@ func (a *agent) Submit(callI Call) error {
 	// but this could put us over the timeout if the call did not reply yet (need better policy).
 	ctx = opentracing.ContextWithSpan(context.Background(), span)
 	err = call.End(ctx, err, a)
-	return err
+	return transformTimeout(err, false)
 }
 // getSlot must ensure that if it receives a slot, it will be returned, otherwise
--- a/api/models/error.go
+++ b/api/models/error.go
@@ -20,6 +20,10 @@ var (
 		code:  http.StatusGatewayTimeout,
 		error: errors.New("Timed out"),
 	}
 	ErrCallTimeoutServerBusy = err{
 		code:  http.StatusServiceUnavailable,
 		error: errors.New("Timed out - server too busy"),
 	}
 	ErrAppsMissingName = err{
 		code:  http.StatusBadRequest,
 		error: errors.New("Missing app name"),
@@ -154,7 +158,7 @@ var (
 	}
 	ErrRoutesInvalidMemory = err{
 		code:  http.StatusBadRequest,
-		error: fmt.Errorf("memory value is invalid. 0 < memory < %d", MaxMemory),
+		error: fmt.Errorf("memory value is invalid. 0 < memory < %d", RouteMaxMemory),
 	}
 	ErrCallNotFound = err{
 		code:  http.StatusNotFound,
--- a/api/models/route.go
+++ b/api/models/route.go
@@ -17,9 +17,10 @@ const (
 	MaxSyncTimeout  = 120  // 2 minutes
 	MaxAsyncTimeout = 3600 // 1 hour
 	MaxIdleTimeout  = MaxAsyncTimeout
 	MaxMemory       = 1024 * 8 // 8GB TODO should probably be a var of machine max?
 )
 var RouteMaxMemory = uint64(8 * 1024) // 8GB TODO should probably be a var of machine max?
 type Routes []*Route
 type Route struct {
@@ -112,7 +113,7 @@ func (r *Route) Validate() error {
 		return ErrRoutesInvalidIdleTimeout
 	}
-	if r.Memory < 1 || r.Memory > MaxMemory {
+	if r.Memory < 1 || r.Memory > RouteMaxMemory {
 		return ErrRoutesInvalidMemory
 	}
--- a/api/server/error_response.go
+++ b/api/server/error_response.go
@@ -32,6 +32,11 @@ func HandleErrorResponse(ctx context.Context, w http.ResponseWriter, err error)
 		if e.Code() >= 500 {
 			log.WithFields(logrus.Fields{"code": e.Code()}).WithError(e).Error("api error")
 		}
 		if err == models.ErrCallTimeoutServerBusy {
 			// TODO: Determine a better delay value here (perhaps ask Agent). For now 15 secs with
 			// the hopes that fnlb will land this on a better server immediately.
 			w.Header().Set("Retry-After", "15")
 		}
 		statuscode = e.Code()
 	} else {
 		log.WithError(err).WithFields(logrus.Fields{"stack": string(debug.Stack())}).Error("internal server error")
--- a/api/server/runner.go
+++ b/api/server/runner.go
@@ -2,7 +2,6 @@ package server
 import (
 	"bytes"
 	"context"
 	"net/http"
 	"path"
 	"strings"
@@ -90,12 +89,10 @@ func (s *Server) serve(c *gin.Context, appName, path string) {
 	if err != nil {
 		// NOTE if they cancel the request then it will stop the call (kind of cool),
 		// we could filter that error out here too as right now it yells a little
-		if err == context.DeadlineExceeded {
+		if err == models.ErrCallTimeoutServerBusy || err == models.ErrCallTimeout {
 			// TODO maneuver
 			// add this, since it means that start may not have been called [and it's relevant]
 			c.Writer.Header().Add("XXX-FXLB-WAIT", time.Now().Sub(time.Time(model.CreatedAt)).String())
 			err = models.ErrCallTimeout // 504 w/ friendly note
 		}
 		// NOTE: if the task wrote the headers already then this will fail to write
 		// a 5xx (and log about it to us) -- that's fine (nice, even!)
--- a/api/server/runner_test.go
+++ b/api/server/runner_test.go
@@ -232,15 +232,19 @@ func TestFailedEnqueue(t *testing.T) {
 }
 func TestRouteRunnerTimeout(t *testing.T) {
 	t.Skip("doesn't work on old Ubuntu")
 	buf := setLogBuffer()
 	models.RouteMaxMemory = uint64(1024 * 1024 * 1024) // 1024 TB
 	hugeMem := uint64(models.RouteMaxMemory - 1)
 	ds := datastore.NewMockInit(
 		[]*models.App{
 			{Name: "myapp", Config: models.Config{}},
 		},
 		[]*models.Route{
-			{Path: "/sleeper", AppName: "myapp", Image: "fnproject/sleeper", Timeout: 1},
+			{Path: "/pull", AppName: "myapp", Image: "fnproject/sleeper", Type: "sync", Memory: 128, Timeout: 30, IdleTimeout: 30},
 			{Path: "/sleeper", AppName: "myapp", Image: "fnproject/sleeper", Type: "sync", Memory: 128, Timeout: 1, IdleTimeout: 30},
 			{Path: "/waitmemory", AppName: "myapp", Image: "fnproject/sleeper", Type: "sync", Memory: hugeMem, Timeout: 1, IdleTimeout: 30},
 		}, nil,
 	)
@@ -257,8 +261,11 @@ func TestRouteRunnerTimeout(t *testing.T) {
 		expectedCode    int
 		expectedHeaders map[string][]string
 	}{
 		// first request with large timeout, we let the docker pull go through...
 		{"/r/myapp/pull", `{"sleep": 0}`, "POST", http.StatusOK, nil},
 		{"/r/myapp/sleeper", `{"sleep": 0}`, "POST", http.StatusOK, nil},
-		{"/r/myapp/sleeper", `{"sleep": 2}`, "POST", http.StatusGatewayTimeout, nil},
+		{"/r/myapp/sleeper", `{"sleep": 4}`, "POST", http.StatusGatewayTimeout, nil},
 		{"/r/myapp/waitmemory", `{"sleep": 0}`, "POST", http.StatusServiceUnavailable, map[string][]string{"Retry-After": {"15"}}},
 	} {
 		body := strings.NewReader(test.body)
 		_, rec := routerRequest(t, srv.Router, test.method, test.path, body)