mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
* update vendor directory, add go.opencensus.io * update imports * oops * s/opentracing/opencensus/ & remove prometheus / zipkin stuff & remove old stats * the dep train rides again * fix gin build * deps from last guy * start in on the agent metrics * she builds * remove tags for now, cardinality error is fussing. subscribe instead of register * update to patched version of opencensus to proceed for now TODO switch to a release * meh fix imports * println debug the bad boys * lace it with the tags * update deps again * fix all inconsistent cardinality errors * add our own logger * fix init * fix oom measure * remove bugged removal code * fix s3 measures * fix prom handler nil
125 lines
3.5 KiB
Go
125 lines
3.5 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/fnproject/fn/api/common"
|
|
"github.com/fnproject/fn/api/models"
|
|
"github.com/sirupsen/logrus"
|
|
"go.opencensus.io/tag"
|
|
"go.opencensus.io/trace"
|
|
)
|
|
|
|
func (a *agent) asyncDequeue() {
|
|
defer a.wg.Done() // we can treat this thread like one big task and get safe shutdown fo free
|
|
|
|
// this is just so we can hang up the dequeue request if we get shut down
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// parent span here so that we can see how many async calls are running
|
|
ctx, span := trace.StartSpan(ctx, "agent_async_dequeue")
|
|
defer span.End()
|
|
|
|
for {
|
|
select {
|
|
case <-a.shutdown:
|
|
return
|
|
case <-a.resources.WaitAsyncResource(ctx):
|
|
// TODO we _could_ return a token here to reserve the ram so that there's
|
|
// not a race between here and Submit but we're single threaded
|
|
// dequeueing and retries handled gracefully inside of Submit if we run
|
|
// out of RAM so..
|
|
}
|
|
|
|
// we think we can get a cookie now, so go get a cookie
|
|
select {
|
|
case <-a.shutdown:
|
|
return
|
|
case model, ok := <-a.asyncChew(ctx):
|
|
if ok {
|
|
a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
|
|
go func(model *models.Call) {
|
|
a.asyncRun(ctx, model)
|
|
a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
|
|
}(model)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *agent) asyncChew(ctx context.Context) <-chan *models.Call {
|
|
ch := make(chan *models.Call, 1)
|
|
|
|
go func() {
|
|
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
|
defer cancel()
|
|
|
|
call, err := a.da.Dequeue(ctx)
|
|
if call != nil {
|
|
ch <- call
|
|
} else { // call is nil / error
|
|
if err != nil && err != context.DeadlineExceeded {
|
|
logrus.WithError(err).Error("error fetching queued calls")
|
|
}
|
|
// queue may be empty / unavailable
|
|
time.Sleep(1 * time.Second) // backoff a little before sending no cookie message
|
|
close(ch)
|
|
}
|
|
}()
|
|
|
|
return ch
|
|
}
|
|
|
|
func (a *agent) asyncRun(ctx context.Context, model *models.Call) {
|
|
// IMPORTANT: get a context that has a child span but NO timeout (Submit imposes timeout)
|
|
// TODO this is a 'FollowsFrom'
|
|
ctx = common.BackgroundContext(ctx)
|
|
|
|
// since async doesn't come in through the normal request path,
|
|
// we've gotta add tags here for stats to come out properly.
|
|
appKey, err := tag.NewKey("fn_appname")
|
|
if err != nil {
|
|
logrus.Fatal(err)
|
|
}
|
|
pathKey, err := tag.NewKey("fn_path")
|
|
if err != nil {
|
|
logrus.Fatal(err)
|
|
}
|
|
ctx, err = tag.New(ctx,
|
|
tag.Insert(appKey, model.AppName),
|
|
tag.Insert(pathKey, model.Path),
|
|
)
|
|
if err != nil {
|
|
logrus.Fatal(err)
|
|
}
|
|
|
|
// additional enclosing context here since this isn't spawned from an http request
|
|
ctx, span := trace.StartSpan(ctx, "agent_async_run")
|
|
defer span.End()
|
|
|
|
call, err := a.GetCall(
|
|
FromModel(model),
|
|
WithContext(ctx), // NOTE: order is important
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Error("error getting async call")
|
|
return
|
|
}
|
|
|
|
// TODO if the task is cold and doesn't require reading STDIN, it could
|
|
// run but we may not listen for output since the task timed out. these
|
|
// are at least once semantics, which is really preferable to at most
|
|
// once, so let's do it for now
|
|
|
|
err = a.Submit(call)
|
|
if err != nil {
|
|
// NOTE: these could be errors / timeouts from the call that we're
|
|
// logging here (i.e. not our fault), but it's likely better to log
|
|
// these than suppress them so...
|
|
id := call.Model().ID
|
|
logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
|
|
}
|
|
}
|