Hybrid plumby (#585)

* fix configuration of agent and server to be future proof and plumb in the hybrid client agent

* fixes up the tests, turns off /r/ on api nodes

* fix up defaults for runner nodes

* shove the runner async push code down into agent land to use client

* plumb up async-age

* return full call from async dequeue endpoint, since we're storing a whole
call in the MQ we don't need to worry about caching of app/route [for now]
* fast safe shutdown of dequeue looper in runner / tidying of agent
* nice errors for path not found against /r/, /v1/ or other path not found
* removed some stale TODO in agent
* mq backends are only loud mouths in debug mode now

* update tests

* Add caching to hybrid client

* Fix HTTP error handling in hybrid client.

The type switch was on the value rather than a pointer.

* Gofmt.

* Better caching with a nice caching wrapper

* Remove datastore cache which is now unused

* Don't need to manually wrap interface methods

* Go fmt
This commit is contained in:
Reed Allman
2017-12-12 15:54:55 -08:00
committed by GitHub
parent 05ce2e3868
commit bb92547b95
18 changed files with 433 additions and 375 deletions

View File

@@ -4,6 +4,7 @@ import (
"context"
"time"
"github.com/fnproject/fn/api/models"
"github.com/sirupsen/logrus"
)
@@ -11,6 +12,10 @@ func (a *agent) asyncDequeue() {
a.wg.Add(1)
defer a.wg.Done() // we can treat this thread like one big task and get safe shutdown fo free
// this is just so we can hang up the dequeue request if we get shut down
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for {
select {
case <-a.shutdown:
@@ -22,42 +27,64 @@ func (a *agent) asyncDequeue() {
// out of RAM so..
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) // TODO ???
model, err := a.da.Dequeue(ctx)
cancel()
if err != nil || model == nil {
if err != nil {
logrus.WithError(err).Error("error fetching queued calls")
// we think we can get a cookie now, so go get a cookie
select {
case <-a.shutdown:
return
case model, ok := <-a.asyncChew(ctx):
if ok {
a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
go func(model *models.Call) {
a.asyncRun(model)
a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
}(model)
}
time.Sleep(1 * time.Second) // backoff a little
continue
}
// TODO output / logger should be here too...
a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
go func() {
defer a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
call, err := a.GetCall(FromModel(model))
if err != nil {
logrus.WithError(err).Error("error getting async call")
return
}
// TODO if the task is cold and doesn't require reading STDIN, it could
// run but we may not listen for output since the task timed out. these
// are at least once semantics, which is really preferable to at most
// once, so let's do it for now
err = a.Submit(call)
if err != nil {
// NOTE: these could be errors / timeouts from the call that we're
// logging here (i.e. not our fault), but it's likely better to log
// these than suppress them so...
id := call.Model().ID
logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
}
}()
}
}
func (a *agent) asyncChew(ctx context.Context) <-chan *models.Call {
ch := make(chan *models.Call, 1)
go func() {
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
call, err := a.da.Dequeue(ctx)
if call != nil {
ch <- call
} else { // call is nil / error
if err != nil && err != context.DeadlineExceeded {
logrus.WithError(err).Error("error fetching queued calls")
}
// queue may be empty / unavailable
time.Sleep(1 * time.Second) // backoff a little before sending no cookie message
close(ch)
}
}()
return ch
}
func (a *agent) asyncRun(model *models.Call) {
// TODO output / logger should be here too...
call, err := a.GetCall(FromModel(model))
if err != nil {
logrus.WithError(err).Error("error getting async call")
return
}
// TODO if the task is cold and doesn't require reading STDIN, it could
// run but we may not listen for output since the task timed out. these
// are at least once semantics, which is really preferable to at most
// once, so let's do it for now
err = a.Submit(call)
if err != nil {
// NOTE: these could be errors / timeouts from the call that we're
// logging here (i.e. not our fault), but it's likely better to log
// these than suppress them so...
id := call.Model().ID
logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
}
}