mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
* fn: lb and pure-runner with non-blocking agent *) Removed pure-runner capacity tracking code. This did not play well with internal agent resource tracker. *) In LB and runner gRPC comm, removed ACK. Now, upon TryCall, pure-runner quickly proceeds to call Submit. This is good since at this stage pure-runner already has all relevant data to initiate the call. *) Unless pure-runner emits a NACK, LB immediately streams http body to runners. *) For retriable requests added a CachedReader for http.Request Body. *) Idempotenty/retry is similar to previous code. After initial success in Engament, after attempting a TryCall, unless we receive NACK, we cannot retry that call. *) ch and naive places now wraps each TryExec with a cancellable context to clean up gRPC contexts quicker. * fn: err for simpler one-time read GetBody approach This allows for a more flexible approach since we let users to define GetBody() to allow repetitive http body read. In default LB case, LB executes a one-time io.ReadAll and sets of GetBody, which is detected by RunnerCall.RequestBody(). * fn: additional check for non-nil req.body * fn: attempt to override IO errors with ctx for TryExec * fn: system-tests log dest * fn: LB: EOF send handling * fn: logging for partial IO * fn: use buffer pool for IO storage in lb agent * fn: pure runner should use chunks for data msgs * fn: required config validations and pass APIErrors * fn: additional tests and gRPC proto simplification *) remove ACK/NACK messages as Finish message type works OK for this purpose. *) return resp in api tests for check for status code *) empty body json test in api tests for lb & pure-runner * fn: buffer adjustments *) setRequestBody result handling correction *) switch to bytes.Reader for read-only safety *) io.EOF can be returned for non-nil Body in request. * fn: clarify detection of 503 / Server Too Busy
80 lines
1.7 KiB
Go
80 lines
1.7 KiB
Go
package runnerpool
|
|
|
|
import (
|
|
"context"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/fnproject/fn/api/common"
|
|
"github.com/fnproject/fn/api/models"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
const (
|
|
// sleep time to attempt placement across all runners before retrying
|
|
retryWaitInterval = 10 * time.Millisecond
|
|
)
|
|
|
|
type naivePlacer struct {
|
|
rrIndex uint64
|
|
}
|
|
|
|
func NewNaivePlacer() Placer {
|
|
rrIndex := uint64(time.Now().Nanosecond())
|
|
logrus.Infof("Creating new naive runnerpool placer rrIndex=%d", rrIndex)
|
|
return &naivePlacer{
|
|
rrIndex: rrIndex,
|
|
}
|
|
}
|
|
|
|
func (sp *naivePlacer) PlaceCall(rp RunnerPool, ctx context.Context, call RunnerCall) error {
|
|
timeout := time.After(call.LbDeadline().Sub(time.Now()))
|
|
|
|
for {
|
|
runners, err := rp.Runners(call)
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Failed to find runners for call")
|
|
} else {
|
|
for j := 0; j < len(runners); j++ {
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return models.ErrCallTimeoutServerBusy
|
|
case <-timeout:
|
|
return models.ErrCallTimeoutServerBusy
|
|
default:
|
|
}
|
|
|
|
i := atomic.AddUint64(&sp.rrIndex, uint64(1))
|
|
r := runners[int(i)%len(runners)]
|
|
|
|
tryCtx, tryCancel := context.WithCancel(ctx)
|
|
placed, err := r.TryExec(tryCtx, call)
|
|
tryCancel()
|
|
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Failed during call placement")
|
|
}
|
|
if placed {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
remaining := call.LbDeadline().Sub(time.Now())
|
|
if remaining <= 0 {
|
|
return models.ErrCallTimeoutServerBusy
|
|
}
|
|
|
|
// backoff
|
|
select {
|
|
case <-ctx.Done():
|
|
return models.ErrCallTimeoutServerBusy
|
|
case <-timeout:
|
|
return models.ErrCallTimeoutServerBusy
|
|
case <-time.After(common.MinDuration(retryWaitInterval, remaining)):
|
|
}
|
|
}
|
|
}
|