Files
fn-serverless/api/agent/async.go
Reed Allman e13a6fd029 death to format (#1281)
* get rid of old format stuff, utils usage, fix up for fdk2.0 interface

* pure agent format removal, TODO remove format field, fix up all tests

* shitter's clogged

* fix agent tests

* start rolling through server tests

* tests compile, some failures

* remove json / content type detection on invoke/httptrigger, fix up tests

* remove hello, fixup system tests

the fucking status checker test just hangs and it's testing that it doesn't
work so the test passes but the test doesn't pass fuck life it's not worth it

* fix migration

* meh

* make dbhelper shut up about dbhelpers not being used

* move fail status at least into main thread, jfc

* fix status call to have FN_LISTENER

also turns off the stdout/stderr blocking between calls, because it's
impossible to debug without that (without syslog), now that stdout and stderr
go to the same place (either to host stderr or nowhere) and isn't used for
function output this shouldn't be a big fuss really

* remove stdin

* cleanup/remind: fixed bug where watcher would leak if container dies first

* silence system-test logs until fail, fix datastore tests

postgres does weird things with constraints when renaming tables, took the
easy way out

system-tests were loud as fuck and made you download a circleci text file of
the logs, made them only yell when they goof

* fix fdk-go dep for test image. fun

* fix swagger and remove test about format

* update all the gopkg files

* add back FN_FORMAT for fdks that assert things. pfft

* add useful error for functions that exit

this error is really confounding because containers can exit for all manner of
reason, we're just guessing that this is the most likely cause for now, and
this error message should very likely change or be removed from the client
path anyway (context.Canceled wasn't all that useful either, but anyway, I'd
been hunting for this... so found it). added a test to avoid being publicly
shamed for 1 line commits (beware...).
2018-10-26 10:43:04 -07:00

124 lines
3.2 KiB
Go

package agent
import (
"context"
"time"
"github.com/fnproject/fn/api/common"
"github.com/fnproject/fn/api/models"
"github.com/sirupsen/logrus"
"go.opencensus.io/tag"
"go.opencensus.io/trace"
)
func (a *agent) asyncDequeue(dqda DequeueDataAccess) {
// this is just so we can hang up the dequeue request if we get shut down
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// parent span here so that we can see how many async calls are running
ctx, span := trace.StartSpan(ctx, "agent_async_dequeue")
defer span.End()
for {
select {
case <-a.shutWg.Closer():
a.shutWg.DoneSession()
return
case <-a.resources.WaitAsyncResource(ctx):
// TODO we _could_ return a token here to reserve the ram so that there's
// not a race between here and Submit but we're single threaded
// dequeueing and retries handled gracefully inside of Submit if we run
// out of RAM so..
}
// we think we can get a cookie now, so go get a cookie
select {
case <-a.shutWg.Closer():
a.shutWg.DoneSession()
return
case model, ok := <-a.asyncChew(ctx, dqda):
if ok {
go func(model *models.Call) {
a.asyncRun(ctx, model)
a.shutWg.DoneSession()
}(model)
// WARNING: tricky. We reserve another session for next iteration of the loop
if !a.shutWg.AddSession(1) {
return
}
}
}
}
}
func (a *agent) asyncChew(ctx context.Context, dqda DequeueDataAccess) <-chan *models.Call {
ch := make(chan *models.Call, 1)
go func() {
ctx, cancel := context.WithTimeout(ctx, a.cfg.AsyncChewPoll)
defer cancel()
call, err := dqda.Dequeue(ctx)
if call != nil {
ch <- call
} else { // call is nil / error
if err != nil && err != context.DeadlineExceeded {
logrus.WithError(err).Error("error fetching queued calls")
}
// queue may be empty / unavailable
time.Sleep(1 * time.Second) // backoff a little before sending no cookie message
close(ch)
}
}()
return ch
}
func (a *agent) asyncRun(ctx context.Context, model *models.Call) {
// IMPORTANT: get a context that has a child span but NO timeout (Submit imposes timeout)
// TODO this is a 'FollowsFrom'
ctx = common.BackgroundContext(ctx)
// since async doesn't come in through the normal request path,
// we've gotta add tags here for stats to come out properly.
appKey, err := tag.NewKey("app_id")
if err != nil {
logrus.Fatal(err)
}
fnKey, err := tag.NewKey("fn_id")
if err != nil {
logrus.Fatal(err)
}
ctx, err = tag.New(ctx,
tag.Insert(appKey, model.AppID),
tag.Insert(fnKey, model.FnID),
)
if err != nil {
logrus.Fatal(err)
}
// additional enclosing context here since this isn't spawned from an http request
ctx, span := trace.StartSpan(ctx, "agent_async_run")
defer span.End()
call, err := a.GetCall(
FromModel(model),
WithContext(ctx), // NOTE: order is important
)
if err != nil {
logrus.WithError(err).Error("error getting async call")
return
}
err = a.Submit(call)
if err != nil {
// NOTE: these could be errors / timeouts from the call that we're
// logging here (i.e. not our fault), but it's likely better to log
// these than suppress them so...
id := call.Model().ID
logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
}
}