Hybrid plumby (#585)

* fix configuration of agent and server to be future proof and plumb in the hybrid client agent * fixes up the tests, turns off /r/ on api nodes * fix up defaults for runner nodes * shove the runner async push code down into agent land to use client * plumb up async-age * return full call from async dequeue endpoint, since we're storing a whole call in the MQ we don't need to worry about caching of app/route [for now] * fast safe shutdown of dequeue looper in runner / tidying of agent * nice errors for path not found against /r/, /v1/ or other path not found * removed some stale TODO in agent * mq backends are only loud mouths in debug mode now * update tests * Add caching to hybrid client * Fix HTTP error handling in hybrid client. The type switch was on the value rather than a pointer. * Gofmt. * Better caching with a nice caching wrapper * Remove datastore cache which is now unused * Don't need to manually wrap interface methods * Go fmt
2022-10-28 21:29:17 +03:00 · 2017-12-12 15:54:55 -08:00
parent 05ce2e3868
commit bb92547b95
18 changed files with 433 additions and 375 deletions
--- a/api/agent/async.go
+++ b/api/agent/async.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"time"

+	"github.com/fnproject/fn/api/models"
 	"github.com/sirupsen/logrus"
 )

@@ -11,6 +12,10 @@ func (a *agent) asyncDequeue() {
 	a.wg.Add(1)
 	defer a.wg.Done() // we can treat this thread like one big task and get safe shutdown fo free

+	// this is just so we can hang up the dequeue request if we get shut down
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
 	for {
 		select {
 		case <-a.shutdown:
@@ -22,42 +27,64 @@ func (a *agent) asyncDequeue() {
 			// out of RAM so..
 		}

-		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) // TODO ???
-		model, err := a.da.Dequeue(ctx)
-		cancel()
-		if err != nil || model == nil {
-			if err != nil {
-				logrus.WithError(err).Error("error fetching queued calls")
+		// we think we can get a cookie now, so go get a cookie
+		select {
+		case <-a.shutdown:
+			return
+		case model, ok := <-a.asyncChew(ctx):
+			if ok {
+				a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
+				go func(model *models.Call) {
+					a.asyncRun(model)
+					a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
+				}(model)
 			}
-			time.Sleep(1 * time.Second) // backoff a little
-			continue
 		}
-
-		// TODO output / logger should be here too...
-
-		a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
-		go func() {
-			defer a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
-
-			call, err := a.GetCall(FromModel(model))
-			if err != nil {
-				logrus.WithError(err).Error("error getting async call")
-				return
-			}
-
-			// TODO if the task is cold and doesn't require reading STDIN, it could
-			// run but we may not listen for output since the task timed out. these
-			// are at least once semantics, which is really preferable to at most
-			// once, so let's do it for now
-
-			err = a.Submit(call)
-			if err != nil {
-				// NOTE: these could be errors / timeouts from the call that we're
-				// logging here (i.e. not our fault), but it's likely better to log
-				// these than suppress them so...
-				id := call.Model().ID
-				logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
-			}
-		}()
+	}
+}
+
+func (a *agent) asyncChew(ctx context.Context) <-chan *models.Call {
+	ch := make(chan *models.Call, 1)
+
+	go func() {
+		ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
+		defer cancel()
+
+		call, err := a.da.Dequeue(ctx)
+		if call != nil {
+			ch <- call
+		} else { // call is nil / error
+			if err != nil && err != context.DeadlineExceeded {
+				logrus.WithError(err).Error("error fetching queued calls")
+			}
+			// queue may be empty / unavailable
+			time.Sleep(1 * time.Second) // backoff a little before sending no cookie message
+			close(ch)
+		}
+	}()
+
+	return ch
+}
+
+func (a *agent) asyncRun(model *models.Call) {
+	// TODO output / logger should be here too...
+	call, err := a.GetCall(FromModel(model))
+	if err != nil {
+		logrus.WithError(err).Error("error getting async call")
+		return
+	}
+
+	// TODO if the task is cold and doesn't require reading STDIN, it could
+	// run but we may not listen for output since the task timed out. these
+	// are at least once semantics, which is really preferable to at most
+	// once, so let's do it for now
+
+	err = a.Submit(call)
+	if err != nil {
+		// NOTE: these could be errors / timeouts from the call that we're
+		// logging here (i.e. not our fault), but it's likely better to log
+		// these than suppress them so...
+		id := call.Model().ID
+		logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
 	}
 }