mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: user friendly timeout handling changes (#1021)
* fn: user friendly timeout handling changes Timeout setting in routes now means "maximum amount of time a function can run in a container". Total wait time for a given http request is now expected to be handled by the client. As long as the client waits, the LB, runner or agents will search for resources to schedule it.
This commit is contained in:
@@ -228,10 +228,7 @@ func (a *agent) Submit(callI Call) error {
|
||||
|
||||
call := callI.(*call)
|
||||
|
||||
ctx, cancel := context.WithDeadline(call.req.Context(), call.execDeadline)
|
||||
call.req = call.req.WithContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
ctx := call.req.Context()
|
||||
ctx, span := trace.StartSpan(ctx, "agent_submit")
|
||||
defer span.End()
|
||||
|
||||
@@ -277,7 +274,11 @@ func (a *agent) submit(ctx context.Context, call *call) error {
|
||||
|
||||
statsDequeueAndStart(ctx)
|
||||
|
||||
// pass this error (nil or otherwise) to end directly, to store status, etc
|
||||
// We are about to execute the function, set container Exec Deadline (call.Timeout)
|
||||
ctx, cancel := context.WithTimeout(ctx, time.Duration(call.Timeout)*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Pass this error (nil or otherwise) to end directly, to store status, etc.
|
||||
err = slot.exec(ctx, call)
|
||||
return a.handleCallEnd(ctx, call, slot, err, true)
|
||||
}
|
||||
@@ -383,9 +384,17 @@ func handleStatsEnd(ctx context.Context, err error) {
|
||||
// request type, this may launch a new container or wait for other containers to become idle
|
||||
// or it may wait for resources to become available to launch a new container.
|
||||
func (a *agent) getSlot(ctx context.Context, call *call) (Slot, error) {
|
||||
// start the deadline context for waiting for slots
|
||||
ctx, cancel := context.WithDeadline(ctx, call.slotDeadline)
|
||||
defer cancel()
|
||||
if call.Type == models.TypeAsync {
|
||||
// *) for async, slot deadline is also call.Timeout. This is because we would like to
|
||||
// allocate enough time for docker-pull, slot-wait, docker-start, etc.
|
||||
// and also make sure we have call.Timeout inside the container. Total time
|
||||
// to run an async becomes 2 * call.Timeout.
|
||||
// *) for sync, there's no slot deadline, the timeout is controlled by http-client
|
||||
// context (or runner gRPC context)
|
||||
tmp, cancel := context.WithTimeout(ctx, time.Duration(call.Timeout)*time.Second)
|
||||
ctx = tmp
|
||||
defer cancel()
|
||||
}
|
||||
|
||||
ctx, span := trace.StartSpan(ctx, "agent_get_slot")
|
||||
defer span.End()
|
||||
@@ -721,7 +730,7 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
|
||||
|
||||
errApp := make(chan error, 1)
|
||||
go func() {
|
||||
ci := protocol.NewCallInfo(call.IsCloudEvent, call.Call, call.req)
|
||||
ci := protocol.NewCallInfo(call.IsCloudEvent, call.Call, call.req.WithContext(ctx))
|
||||
errApp <- proto.Dispatch(ctx, ci, call.w)
|
||||
}()
|
||||
|
||||
@@ -752,8 +761,10 @@ func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch
|
||||
|
||||
call.containerState.UpdateState(ctx, ContainerStateStart, call.slots)
|
||||
|
||||
deadline := time.Now().Add(time.Duration(call.Timeout) * time.Second)
|
||||
|
||||
// add Fn-specific information to the config to shove everything into env vars for cold
|
||||
call.Config["FN_DEADLINE"] = strfmt.DateTime(call.execDeadline).String()
|
||||
call.Config["FN_DEADLINE"] = strfmt.DateTime(deadline).String()
|
||||
call.Config["FN_METHOD"] = call.Model().Method
|
||||
call.Config["FN_REQUEST_URL"] = call.Model().URL
|
||||
call.Config["FN_CALL_ID"] = call.Model().ID
|
||||
|
||||
@@ -254,40 +254,36 @@ func (a *agent) GetCall(opts ...CallOpt) (Call, error) {
|
||||
}
|
||||
|
||||
mem := c.Memory + uint64(c.TmpFsSize)
|
||||
|
||||
if !a.resources.IsResourcePossible(mem, uint64(c.CPUs), c.Type == models.TypeAsync) {
|
||||
// if we're not going to be able to run this call on this machine, bail here.
|
||||
return nil, models.ErrCallTimeoutServerBusy
|
||||
}
|
||||
|
||||
err := setMaxBodyLimit(&a.cfg, &c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
setupCtx(&c)
|
||||
|
||||
c.da = a.da
|
||||
c.ct = a
|
||||
|
||||
ctx, _ := common.LoggerWithFields(c.req.Context(),
|
||||
logrus.Fields{"id": c.ID, "app_id": c.AppID, "route": c.Path})
|
||||
c.req = c.req.WithContext(ctx)
|
||||
|
||||
c.stderr = setupLogger(ctx, a.cfg.MaxLogSize, c.Call)
|
||||
c.stderr = setupLogger(c.req.Context(), a.cfg.MaxLogSize, c.Call)
|
||||
if c.w == nil {
|
||||
// send STDOUT to logs if no writer given (async...)
|
||||
// TODO we could/should probably make this explicit to GetCall, ala 'WithLogger', but it's dupe code (who cares?)
|
||||
c.w = c.stderr
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
slotDeadline := now.Add(time.Duration(c.Call.Timeout) * time.Second / 2)
|
||||
execDeadline := now.Add(time.Duration(c.Call.Timeout) * time.Second)
|
||||
|
||||
c.slotDeadline = slotDeadline
|
||||
c.execDeadline = execDeadline
|
||||
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
func setupCtx(c *call) {
|
||||
ctx, _ := common.LoggerWithFields(c.req.Context(),
|
||||
logrus.Fields{"id": c.ID, "app_id": c.AppID, "route": c.Path})
|
||||
c.req = c.req.WithContext(ctx)
|
||||
}
|
||||
|
||||
func setMaxBodyLimit(cfg *AgentConfig, c *call) error {
|
||||
if cfg.MaxRequestSize > 0 && c.req.ContentLength > 0 && uint64(c.req.ContentLength) > cfg.MaxRequestSize {
|
||||
return models.ErrRequestContentTooBig
|
||||
@@ -310,16 +306,10 @@ type call struct {
|
||||
stderr io.ReadWriteCloser
|
||||
ct callTrigger
|
||||
slots *slotQueue
|
||||
slotDeadline time.Time
|
||||
lbDeadline time.Time
|
||||
execDeadline time.Time
|
||||
requestState RequestState
|
||||
containerState ContainerState
|
||||
slotHashId string
|
||||
}
|
||||
|
||||
func (c *call) LbDeadline() time.Time {
|
||||
return c.lbDeadline
|
||||
isLB bool
|
||||
}
|
||||
|
||||
func (c *call) SlotHashId() string {
|
||||
@@ -358,8 +348,7 @@ func (c *call) Start(ctx context.Context) error {
|
||||
c.StartedAt = strfmt.DateTime(time.Now())
|
||||
c.Status = "running"
|
||||
|
||||
// Do not write this header if lb-agent
|
||||
if c.lbDeadline.IsZero() {
|
||||
if !c.isLB {
|
||||
if rw, ok := c.w.(http.ResponseWriter); ok { // TODO need to figure out better way to wire response headers in
|
||||
rw.Header().Set("XXX-FXLB-WAIT", time.Time(c.StartedAt).Sub(time.Time(c.CreatedAt)).String())
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"go.opencensus.io/trace"
|
||||
@@ -104,20 +103,18 @@ func (a *lbAgent) GetCall(opts ...CallOpt) (Call, error) {
|
||||
if c.req == nil || c.Call == nil {
|
||||
return nil, errors.New("no model or request provided for call")
|
||||
}
|
||||
|
||||
err := setMaxBodyLimit(&a.cfg, &c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
setupCtx(&c)
|
||||
|
||||
c.isLB = true
|
||||
c.da = a.da
|
||||
c.ct = a
|
||||
c.stderr = &nullReadWriter{}
|
||||
|
||||
ctx, _ := common.LoggerWithFields(c.req.Context(),
|
||||
logrus.Fields{"id": c.ID, "app_id": c.AppID, "route": c.Path})
|
||||
c.req = c.req.WithContext(ctx)
|
||||
|
||||
c.lbDeadline = time.Now().Add(time.Duration(c.Call.Timeout) * time.Second)
|
||||
c.slotHashId = getSlotQueueKey(&c)
|
||||
|
||||
return &c, nil
|
||||
@@ -157,12 +154,7 @@ func (a *lbAgent) Submit(callI Call) error {
|
||||
}
|
||||
|
||||
call := callI.(*call)
|
||||
|
||||
ctx, cancel := context.WithDeadline(call.req.Context(), call.lbDeadline)
|
||||
call.req = call.req.WithContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
ctx, span := trace.StartSpan(ctx, "agent_submit")
|
||||
ctx, span := trace.StartSpan(call.req.Context(), "agent_submit")
|
||||
defer span.End()
|
||||
|
||||
statsEnqueue(ctx)
|
||||
|
||||
@@ -117,7 +117,6 @@ func (r *mockRunner) Address() string {
|
||||
}
|
||||
|
||||
type mockRunnerCall struct {
|
||||
lbDeadline time.Time
|
||||
r *http.Request
|
||||
rw http.ResponseWriter
|
||||
stdErr io.ReadWriteCloser
|
||||
@@ -125,10 +124,6 @@ type mockRunnerCall struct {
|
||||
slotHashId string
|
||||
}
|
||||
|
||||
func (c *mockRunnerCall) LbDeadline() time.Time {
|
||||
return c.lbDeadline
|
||||
}
|
||||
|
||||
func (c *mockRunnerCall) SlotHashId() string {
|
||||
return c.slotHashId
|
||||
}
|
||||
@@ -157,8 +152,10 @@ func setupMockRunnerPool(expectedRunners []string, execSleep time.Duration, maxC
|
||||
func TestOneRunner(t *testing.T) {
|
||||
placer := pool.NewNaivePlacer()
|
||||
rp := setupMockRunnerPool([]string{"171.19.0.1"}, 10*time.Millisecond, 5)
|
||||
call := &mockRunnerCall{lbDeadline: time.Now().Add(1 * time.Second)}
|
||||
err := placer.PlaceCall(rp, context.Background(), call)
|
||||
call := &mockRunnerCall{}
|
||||
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(1*time.Second))
|
||||
defer cancel()
|
||||
err := placer.PlaceCall(rp, ctx, call)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to place call on runner %v", err)
|
||||
}
|
||||
@@ -167,7 +164,7 @@ func TestOneRunner(t *testing.T) {
|
||||
func TestEnforceTimeoutFromContext(t *testing.T) {
|
||||
placer := pool.NewNaivePlacer()
|
||||
rp := setupMockRunnerPool([]string{"171.19.0.1"}, 10*time.Millisecond, 5)
|
||||
call := &mockRunnerCall{lbDeadline: time.Now().Add(1 * time.Second)}
|
||||
call := &mockRunnerCall{}
|
||||
ctx, cancel := context.WithDeadline(context.Background(), time.Now())
|
||||
defer cancel()
|
||||
err := placer.PlaceCall(rp, ctx, call)
|
||||
@@ -187,8 +184,10 @@ func TestRRRunner(t *testing.T) {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
call := &mockRunnerCall{lbDeadline: time.Now().Add(10 * time.Millisecond)}
|
||||
err := placer.PlaceCall(rp, context.Background(), call)
|
||||
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(10*time.Millisecond))
|
||||
defer cancel()
|
||||
call := &mockRunnerCall{}
|
||||
err := placer.PlaceCall(rp, ctx, call)
|
||||
if err != nil {
|
||||
failures <- fmt.Errorf("Timed out call %d", i)
|
||||
}
|
||||
@@ -218,8 +217,11 @@ func TestEnforceLbTimeout(t *testing.T) {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
call := &mockRunnerCall{lbDeadline: time.Now().Add(10 * time.Millisecond)}
|
||||
err := placer.PlaceCall(rp, context.Background(), call)
|
||||
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(10*time.Millisecond))
|
||||
defer cancel()
|
||||
|
||||
call := &mockRunnerCall{}
|
||||
err := placer.PlaceCall(rp, ctx, call)
|
||||
if err != nil {
|
||||
failures <- fmt.Errorf("Timed out call %d", i)
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/fnproject/fn/api/models"
|
||||
"github.com/go-openapi/strfmt"
|
||||
@@ -79,16 +78,8 @@ func (ci callInfoImpl) Input() io.Reader {
|
||||
func (ci callInfoImpl) Deadline() strfmt.DateTime {
|
||||
deadline, ok := ci.req.Context().Deadline()
|
||||
if !ok {
|
||||
// In theory deadline must have been set here, but if it wasn't then
|
||||
// at this point it is already too late to raise an error. Set it to
|
||||
// something meaningful.
|
||||
// This assumes StartedAt was set to something other than the default.
|
||||
// If that isn't set either, then how many things have gone wrong?
|
||||
if ci.call.StartedAt == strfmt.NewDateTime() {
|
||||
// We just panic if StartedAt is the default (i.e. not set)
|
||||
panic("No context deadline and zero-value StartedAt - this should never happen")
|
||||
}
|
||||
deadline = ((time.Time)(ci.call.StartedAt)).Add(time.Duration(ci.call.Timeout) * time.Second)
|
||||
// In theory deadline must have been set here
|
||||
panic("No context deadline is set in protocol, should never happen")
|
||||
}
|
||||
return strfmt.DateTime(deadline)
|
||||
}
|
||||
|
||||
@@ -2,12 +2,14 @@ package protocol
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/fnproject/fn/api/models"
|
||||
)
|
||||
@@ -16,7 +18,7 @@ type RequestData struct {
|
||||
A string `json:"a"`
|
||||
}
|
||||
|
||||
func setupRequest(data interface{}) *callInfoImpl {
|
||||
func setupRequest(data interface{}) (*callInfoImpl, context.CancelFunc) {
|
||||
req := &http.Request{
|
||||
Method: http.MethodPost,
|
||||
URL: &url.URL{
|
||||
@@ -46,12 +48,14 @@ func setupRequest(data interface{}) *callInfoImpl {
|
||||
// fixup URL in models.Call
|
||||
call.URL = req.URL.String()
|
||||
|
||||
ci := &callInfoImpl{call: call, req: req}
|
||||
return ci
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 1*time.Second)
|
||||
ci := &callInfoImpl{call: call, req: req.WithContext(ctx)}
|
||||
return ci, cancel
|
||||
}
|
||||
|
||||
func TestJSONProtocolwriteJSONInputRequestBasicFields(t *testing.T) {
|
||||
ci := setupRequest(nil)
|
||||
ci, cancel := setupRequest(nil)
|
||||
defer cancel()
|
||||
r, w := io.Pipe()
|
||||
proto := JSONProtocol{w, r}
|
||||
go func() {
|
||||
@@ -88,7 +92,8 @@ func TestJSONProtocolwriteJSONInputRequestBasicFields(t *testing.T) {
|
||||
|
||||
func TestJSONProtocolwriteJSONInputRequestWithData(t *testing.T) {
|
||||
rDataBefore := RequestData{A: "a"}
|
||||
ci := setupRequest(rDataBefore)
|
||||
ci, cancel := setupRequest(rDataBefore)
|
||||
defer cancel()
|
||||
r, w := io.Pipe()
|
||||
proto := JSONProtocol{w, r}
|
||||
go func() {
|
||||
@@ -133,7 +138,8 @@ func TestJSONProtocolwriteJSONInputRequestWithData(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestJSONProtocolwriteJSONInputRequestWithoutData(t *testing.T) {
|
||||
ci := setupRequest(nil)
|
||||
ci, cancel := setupRequest(nil)
|
||||
defer cancel()
|
||||
r, w := io.Pipe()
|
||||
proto := JSONProtocol{w, r}
|
||||
go func() {
|
||||
@@ -177,7 +183,8 @@ func TestJSONProtocolwriteJSONInputRequestWithoutData(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestJSONProtocolwriteJSONInputRequestWithQuery(t *testing.T) {
|
||||
ci := setupRequest(nil)
|
||||
ci, cancel := setupRequest(nil)
|
||||
defer cancel()
|
||||
r, w := io.Pipe()
|
||||
proto := JSONProtocol{w, r}
|
||||
go func() {
|
||||
|
||||
@@ -67,8 +67,7 @@ type callHandle struct {
|
||||
c *call // the agent's version of call
|
||||
|
||||
// Timings, for metrics:
|
||||
receivedTime strfmt.DateTime // When was the call received?
|
||||
allocatedTime strfmt.DateTime // When did we finish allocating capacity?
|
||||
receivedTime strfmt.DateTime // When was the call received?
|
||||
|
||||
// For implementing http.ResponseWriter:
|
||||
headers http.Header
|
||||
@@ -530,7 +529,7 @@ func (pr *pureRunner) handleTryCall(tc *runner.TryCall, state *callHandle) error
|
||||
return err
|
||||
}
|
||||
|
||||
agent_call, err := pr.a.GetCall(FromModelAndInput(&c, state.pipeToFnR), WithWriter(state))
|
||||
agent_call, err := pr.a.GetCall(FromModelAndInput(&c, state.pipeToFnR), WithWriter(state), WithContext(state.ctx))
|
||||
if err != nil {
|
||||
state.enqueueCallResponse(err)
|
||||
return err
|
||||
@@ -545,7 +544,6 @@ func (pr *pureRunner) handleTryCall(tc *runner.TryCall, state *callHandle) error
|
||||
}
|
||||
state.c.slotHashId = string(hashId[:])
|
||||
}
|
||||
state.allocatedTime = strfmt.DateTime(time.Now())
|
||||
pr.spawnSubmit(state)
|
||||
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user