fn: container initialization monitoring (#1288)

Container initialization phase consumes resource tracker resources (token), during lengthy operations. In order for agent stability/liveness, this phase has to be evictable/cancelable and time bounded. With this change, introducing a new system wide environment setting to bound the time spent in container initialization phase. This phase includes docker-pull, docker-create, docker-attach, docker-start and UDS wait operations. This initialization period is also now considered evictable.
2022-10-28 21:29:17 +03:00 · 2018-11-15 13:37:43 -08:00
parent fe2b9fb53d
commit 6eaf1578e6
8 changed files with 700 additions and 165 deletions
--- a/api/agent/agent.go
+++ b/api/agent/agent.go
@@ -359,17 +359,30 @@ func (a *agent) getSlot(ctx context.Context, call *call) (Slot, error) {

 	call.slots, isNew = a.slotMgr.getSlotQueue(call.slotHashId)
 	call.requestState.UpdateState(ctx, RequestStateWait, call.slots)
-	if isNew {
-		go a.hotLauncher(ctx, call)
+
+	// setup slot caller with a ctx that gets cancelled once waitHot() is completed.
+	// This allows runHot() to detect if original caller has been serviced by
+	// another container or if original caller was disconnected.
+	caller := &slotCaller{}
+	{
+		ctx, cancel := context.WithCancel(ctx)
+		defer cancel()
+
+		caller.done = ctx.Done()
+		caller.notify = make(chan error)
 	}
-	s, err := a.waitHot(ctx, call)
+
+	if isNew {
+		go a.hotLauncher(ctx, call, caller)
+	}
+	s, err := a.waitHot(ctx, call, caller)
 	return s, err
 }

 // hotLauncher is spawned in a go routine for each slot queue to monitor stats and launch hot
 // containers if needed. Upon shutdown or activity timeout, hotLauncher exits and during exit,
 // it destroys the slot queue.
-func (a *agent) hotLauncher(ctx context.Context, call *call) {
+func (a *agent) hotLauncher(ctx context.Context, call *call, caller *slotCaller) {
 	// Let use 60 minutes or 2 * IdleTimeout as hot queue idle timeout, pick
 	// whichever is longer. If in this time, there's no activity, then
 	// we destroy the hot queue.
@@ -388,12 +401,9 @@ func (a *agent) hotLauncher(ctx context.Context, call *call) {
 	ctx, span := trace.StartSpan(ctx, "agent_hot_launcher")
 	defer span.End()

-	var notifyChan chan error
-
 	for {
 		ctx, cancel := context.WithTimeout(ctx, timeout)
-		a.checkLaunch(ctx, call, notifyChan)
-		notifyChan = nil
+		a.checkLaunch(ctx, call, *caller)

 		select {
 		case <-a.shutWg.Closer(): // server shutdown
@@ -405,7 +415,7 @@ func (a *agent) hotLauncher(ctx context.Context, call *call) {
 				logger.Debug("Hot function launcher timed out")
 				return
 			}
-		case notifyChan = <-call.slots.signaller:
+		case caller = <-call.slots.signaller:
 			cancel()
 		}
 	}
@@ -420,7 +430,7 @@ func tryNotify(notifyChan chan error, err error) {
 	}
 }

-func (a *agent) checkLaunch(ctx context.Context, call *call, notifyChan chan error) {
+func (a *agent) checkLaunch(ctx context.Context, call *call, caller slotCaller) {
 	curStats := call.slots.getStats()
 	isNB := a.cfg.EnableNBResourceTracker
 	if !isNewContainerNeeded(&curStats) {
@@ -471,19 +481,19 @@ func (a *agent) checkLaunch(ctx context.Context, call *call, notifyChan chan err
 	if tok != nil {
 		if tok.Error() != nil {
 			if tok.Error() != CapacityFull {
-				tryNotify(notifyChan, tok.Error())
+				tryNotify(caller.notify, tok.Error())
 			} else {
 				needMem, needCpu := tok.NeededCapacity()
 				notifyChans = a.evictor.PerformEviction(call.slotHashId, needMem, uint64(needCpu))
 				// For Non-blocking mode, if there's nothing to evict, we emit 503.
 				if len(notifyChans) == 0 && isNB {
-					tryNotify(notifyChan, tok.Error())
+					tryNotify(caller.notify, tok.Error())
 				}
 			}
 		} else if a.shutWg.AddSession(1) {
 			go func() {
 				// NOTE: runHot will not inherit the timeout from ctx (ignore timings)
-				a.runHot(ctx, call, tok, state)
+				a.runHot(ctx, caller, call, tok, state)
 				a.shutWg.DoneSession()
 			}()
 			// early return (do not allow container state to switch to ContainerStateDone)
@@ -510,7 +520,7 @@ func (a *agent) checkLaunch(ctx context.Context, call *call, notifyChan chan err
 }

 // waitHot pings and waits for a hot container from the slot queue
-func (a *agent) waitHot(ctx context.Context, call *call) (Slot, error) {
+func (a *agent) waitHot(ctx context.Context, call *call, caller *slotCaller) (Slot, error) {
 	ctx, span := trace.StartSpan(ctx, "agent_wait_hot")
 	defer span.End()

@@ -519,15 +529,13 @@ func (a *agent) waitHot(ctx context.Context, call *call) (Slot, error) {

 	ch := call.slots.startDequeuer(ctx)

-	notifyChan := make(chan error)
-
 	// 1) if we can get a slot immediately, grab it.
 	// 2) if we don't, send a signaller every x msecs until we do.

 	sleep := 1 * time.Microsecond // pad, so time.After doesn't send immediately
 	for {
 		select {
-		case err := <-notifyChan:
+		case err := <-caller.notify:
 			return nil, err
 		case s := <-ch:
 			if call.slots.acquireSlot(s) {
@@ -550,7 +558,7 @@ func (a *agent) waitHot(ctx context.Context, call *call) (Slot, error) {
 		sleep = a.cfg.HotPoll
 		// send a notification to launchHot()
 		select {
-		case call.slots.signaller <- notifyChan:
+		case call.slots.signaller <- caller:
 		default:
 		}
 	}
@@ -671,7 +679,7 @@ func (s *hotSlot) dispatch(ctx context.Context, call *call) chan error {
 		resp, err := s.container.udsClient.Do(req)
 		if err != nil {
 			common.Logger(ctx).WithError(err).Error("Got error from UDS socket")
-			errApp <- models.NewAPIError(http.StatusBadGateway, errors.New("error receiving function response"))
+			errApp <- models.ErrFunctionResponse
 			return
 		}
 		common.Logger(ctx).WithField("resp", resp).Debug("Got resp from UDS socket")
@@ -737,114 +745,177 @@ func newSizerRespWriter(max uint64, rw http.ResponseWriter) http.ResponseWriter

 func (s *sizerRespWriter) Write(b []byte) (int, error) { return s.w.Write(b) }

-func (a *agent) runHot(ctx context.Context, call *call, tok ResourceToken, state ContainerState) {
+// Try to queue an error to the error channel if possible.
+func tryQueueErr(err error, ch chan error) error {
+	if err != nil {
+		select {
+		case ch <- err:
+		default:
+		}
+	}
+	return err
+}
+
+func (a *agent) runHot(ctx context.Context, caller slotCaller, call *call, tok ResourceToken, state ContainerState) {
 	// IMPORTANT: get a context that has a child span / logger but NO timeout
 	// TODO this is a 'FollowsFrom'
 	ctx = common.BackgroundContext(ctx)
 	ctx, span := trace.StartSpan(ctx, "agent_run_hot")
 	defer span.End()

-	// IMPORTANT: evict token is deleted *after* resource token in defer statements below.
-	// This ordering allows resource token to be freed first, which means once evict token
-	// is deleted, eviction is considered to be completed.
+	var container *container
+	var cookie drivers.Cookie
+	var err error
+
+	id := id.New().String()
+	logger := logrus.WithFields(logrus.Fields{"id": id, "app_id": call.AppID, "fn_id": call.FnID, "image": call.Image, "memory": call.Memory, "cpus": call.CPUs, "idle_timeout": call.IdleTimeout})
+	ctx, cancel := context.WithCancel(common.WithLogger(ctx, logger))
+
+	udsWait := make(chan error, 1)  // track UDS state and errors
+	errQueue := make(chan error, 1) // errors to be reflected back to the slot queue
+
 	evictor := a.evictor.CreateEvictToken(call.slotHashId, call.Memory+uint64(call.TmpFsSize), uint64(call.CPUs))
-	defer a.evictor.DeleteEvictToken(evictor)

 	statsUtilization(ctx, a.resources.GetUtilization())
+	state.UpdateState(ctx, ContainerStateStart, call.slots)
+
+	// stack unwind spelled out with strict ordering below.
 	defer func() {
+		// IMPORTANT: we ignore any errors due to eviction and do not reflect these to clients.
+		if !evictor.isEvicted() {
+			select {
+			case err := <-udsWait:
+				tryQueueErr(err, errQueue)
+			default:
+				tryQueueErr(models.ErrContainerInitFail, errQueue)
+			}
+			select {
+			case err := <-errQueue:
+				call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+			default:
+			}
+		}
+
+		// shutdown the container and related I/O operations and go routines
+		cancel()
+
+		// IMPORTANT: for release cookie (remove container), make sure ctx below has no timeout.
+		if cookie != nil {
+			cookie.Close(common.BackgroundContext(ctx))
+		}
+
+		if container != nil {
+			container.Close()
+		}
+
+		lastState := state.GetState()
+		state.UpdateState(ctx, ContainerStateDone, call.slots)
+
+		tok.Close() // release cpu/mem
+
+		// IMPORTANT: evict token is deleted *after* resource token.
+		// This ordering allows resource token to be freed first, which means once evict token
+		// is deleted, eviction is considered to be completed.
+		a.evictor.DeleteEvictToken(evictor)
+
 		statsUtilization(ctx, a.resources.GetUtilization())
+		if evictor.isEvicted() {
+			logger.Debugf("Hot function evicted")
+			statsContainerEvicted(ctx, lastState)
+		}
 	}()

-	defer tok.Close() // IMPORTANT: this MUST get called
+	// Monitor initialization and evictability.
+	go func() {
+		for {
+			select {
+			case err := <-udsWait:
+				if tryQueueErr(err, errQueue) != nil {
+					cancel()
+				}
+				return
+			case <-ctx.Done(): // container shutdown
+				return
+			case <-a.shutWg.Closer(): // agent shutdown
+				cancel()
+				return
+			case <-caller.done: // original caller disconnected or serviced by another container?
+				evictor.SetEvictable(true)
+				caller.done = nil // block 'caller.done' after this point
+			case <-evictor.C: // eviction
+				cancel()
+				return
+			}
+		}
+	}()

-	state.UpdateState(ctx, ContainerStateStart, call.slots)
-	defer state.UpdateState(ctx, ContainerStateDone, call.slots)
-
-	container, err := newHotContainer(ctx, call, &a.cfg)
-	if err != nil {
-		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
-		return
-	}
-	defer container.Close()
-
-	logger := logrus.WithFields(logrus.Fields{"id": container.id, "app_id": call.AppID, "fn_id": call.FnID, "image": call.Image, "memory": call.Memory, "cpus": call.CPUs, "idle_timeout": call.IdleTimeout})
-	ctx = common.WithLogger(ctx, logger)
-
-	ctx, cancel := context.WithTimeout(common.BackgroundContext(ctx), a.cfg.HotStartTimeout)
-	defer cancel()
-
-	cookie, err := a.driver.CreateCookie(ctx, container)
-	if err != nil {
-		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+	container = newHotContainer(ctx, call, &a.cfg, id, udsWait)
+	if container == nil {
 		return
 	}

-	// WARNING: we wait forever.
-	defer cookie.Close(common.BackgroundContext(ctx))
-
-	shouldPull, err := cookie.ValidateImage(ctx)
-	if err != nil {
-		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+	cookie, err = a.driver.CreateCookie(ctx, container)
+	if tryQueueErr(err, errQueue) != nil {
 		return
 	}

-	if shouldPull {
+	needsPull, err := cookie.ValidateImage(ctx)
+	if tryQueueErr(err, errQueue) != nil {
+		return
+	}
+
+	if needsPull {
+		ctx, cancel := context.WithTimeout(ctx, a.cfg.HotPullTimeout)
 		err = cookie.PullImage(ctx)
-		if err != nil {
-			call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+		cancel()
+		if ctx.Err() == context.DeadlineExceeded {
+			err = models.ErrDockerPullTimeout
+		}
+		if tryQueueErr(err, errQueue) != nil {
 			return
 		}
 	}

 	err = cookie.CreateContainer(ctx)
-	if err != nil {
-		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+	if tryQueueErr(err, errQueue) != nil {
 		return
 	}

-	cancel()
-	ctx, shutdownContainer := context.WithCancel(common.BackgroundContext(ctx))
-	defer shutdownContainer() // close this if our waiter returns, to call off slots, needs to follow cookie.Close so the cookie crumbles
-
-	udsAwait := make(chan error)
-	// start our listener before starting the container, so we don't miss the pretty things whispered in our ears
-	// make sure this thread has the shutdownContainer context in case the container exits
-	go inotifyUDS(ctx, container.UDSAgentPath(), udsAwait)
-
 	waiter, err := cookie.Run(ctx)
-	if err != nil {
-		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+	if tryQueueErr(err, errQueue) != nil {
 		return
 	}

 	go func() {
-		defer shutdownContainer() // also close if we get an agent shutdown / idle timeout
+		defer cancel() // also close if we get an agent shutdown / idle timeout

-		// now we wait for the socket to be created before handing out any slots, need this
-		// here in case the container dies before making the sock we need to bail
+		// INIT BARRIER HERE.
 		select {
-		case err := <-udsAwait: // XXX(reed): need to leave a note about pairing ctx here?
-			// sends a nil error if all is good, we can proceed...
-			if err != nil {
-				call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
+		case err := <-udsWait:
+			if tryQueueErr(err, errQueue) != nil {
 				return
 			}
-
+		case <-a.shutWg.Closer(): // agent shutdown
+			return
 		case <-ctx.Done():
-			// XXX(reed): this seems like a bad idea? why are we even handing out a
-			// bad slot? shouldn't we make the client wait for a valid one and maybe
-			// timeout? not in this PR, NOT TONIGHT!
-			call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: models.ErrContainerExitedEarly})
+			return
+		case <-evictor.C: // eviction
+			return
+		case <-time.After(a.cfg.HotStartTimeout):
+			tryQueueErr(models.ErrContainerInitTimeout, errQueue)
 			return
 		}

 		for {
-			select { // make sure everything is up before trying to send slot
-			case <-ctx.Done(): // container shutdown
+			// Below we are rather defensive and poll on evictor/ctx
+			// to reduce the likelyhood of attempting to queue a hotSlot when these
+			// two cases occur.
+			select {
+			case <-ctx.Done():
 				return
-			case <-a.shutWg.Closer(): // server shutdown
+			case <-evictor.C: // eviction
 				return
-			default: // ok
+			default:
 			}

 			slot := &hotSlot{
@@ -867,9 +938,9 @@ func (a *agent) runHot(ctx context.Context, call *call, tok ResourceToken, state
 		}
 	}()

-	res := waiter.Wait(ctx)
-	if res.Error() != context.Canceled {
-		logger.WithError(res.Error()).Info("hot function terminated")
+	runRes := waiter.Wait(ctx)
+	if runRes != nil && runRes.Error() != context.Canceled {
+		logger.WithError(runRes.Error()).Info("hot function terminated")
 	}
 }

@@ -901,52 +972,69 @@ func checkSocketDestination(filename string) error {

 	return nil
 }
-func inotifyUDS(ctx context.Context, iofsDir string, awaitUDS chan<- error) {
-	// XXX(reed): I forgot how to plumb channels temporarily forgive me for this sin (inotify will timeout, this is just bad programming)
-	err := inotifyAwait(ctx, iofsDir)
-	if err == nil {
-		err = checkSocketDestination(filepath.Join(iofsDir, udsFilename))
-	}
-	select {
-	case awaitUDS <- err:
-	case <-ctx.Done():
-	}
-}

-func inotifyAwait(ctx context.Context, iofsDir string) error {
+func inotifyAwait(ctx context.Context, iofsDir string, udsWait chan error) {
 	ctx, span := trace.StartSpan(ctx, "inotify_await")
 	defer span.End()

+	logger := common.Logger(ctx)
+
+	// Here we create the fs notify (inotify) synchronously and once that is
+	// setup, then fork off our async go-routine. Basically fsnotify should be enabled
+	// before we launch the container in order not to miss any events.
 	fsWatcher, err := fsnotify.NewWatcher()
 	if err != nil {
-		return fmt.Errorf("error getting fsnotify watcher: %v", err)
+		udsWait <- fmt.Errorf("error getting fsnotify watcher: %v", err)
+		return
 	}
-	defer func() {
-		if err := fsWatcher.Close(); err != nil {
-			common.Logger(ctx).WithError(err).Error("Failed to close inotify watcher")
-		}
-	}()

 	err = fsWatcher.Add(iofsDir)
 	if err != nil {
-		return fmt.Errorf("error adding iofs dir to fswatcher: %v", err)
+		if err := fsWatcher.Close(); err != nil {
+			logger.WithError(err).Error("Failed to close inotify watcher")
+		}
+		udsWait <- fmt.Errorf("error adding iofs dir to fswatcher: %v", err)
+		return
 	}

-	for {
-		select {
-		case <-ctx.Done():
-			// XXX(reed): damn it would sure be nice to tell users they didn't make a uds and that's why it timed out
-			return ctx.Err()
-		case err := <-fsWatcher.Errors:
-			return fmt.Errorf("error watching for iofs: %v", err)
-		case event := <-fsWatcher.Events:
-			common.Logger(ctx).WithField("event", event).Debug("fsnotify event")
-			if event.Op&fsnotify.Create == fsnotify.Create && event.Name == filepath.Join(iofsDir, udsFilename) {
-				// wait until the socket file is created by the container
-				return nil
+	go func() {
+		ctx, span := trace.StartSpan(ctx, "inotify_await_poller")
+		defer span.End()
+
+		defer func() {
+			if err := fsWatcher.Close(); err != nil {
+				logger.WithError(err).Error("Failed to close inotify watcher")
+			}
+		}()
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case err := <-fsWatcher.Errors:
+				// TODO: We do not know if these cases would be due to customer container/FDK
+				// fault or some kind of service/runner issue. As conservative choice,
+				// we reflect back a non API error, which means a 500 back to user.
+				logger.WithError(err).Error("error watching for iofs")
+				udsWait <- err
+				return
+			case event := <-fsWatcher.Events:
+				logger.WithField("event", event).Debug("fsnotify event")
+				if event.Op&fsnotify.Create == fsnotify.Create && event.Name == filepath.Join(iofsDir, udsFilename) {
+					err := checkSocketDestination(filepath.Join(iofsDir, udsFilename))
+					if err != nil {
+						// This case is more like a bad FDK/container, so let's reflect this back to
+						// clients as container init fail.
+						logger.WithError(err).Error("Failed to check socket destination")
+						udsWait <- models.ErrContainerInitFail
+					} else {
+						close(udsWait)
+					}
+					return
+				}
 			}
 		}
-	}
+	}()
 }

 // runHotReq enqueues a free slot to slot queue manager and watches various timers and the consumer until
@@ -956,7 +1044,6 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,

 	var err error
 	isFrozen := false
-	isEvictEvent := false

 	freezeTimer := time.NewTimer(a.cfg.FreezeIdle)
 	idleTimer := time.NewTimer(time.Duration(call.IdleTimeout) * time.Second)
@@ -980,7 +1067,7 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
 		select {
 		case <-s.trigger: // slot already consumed
 		case <-ctx.Done(): // container shutdown
-		case <-a.shutWg.Closer(): // server shutdown
+		case <-a.shutWg.Closer(): // agent shutdown
 		case <-idleTimer.C:
 		case <-freezeTimer.C:
 			if !isFrozen {
@@ -995,8 +1082,6 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
 			}
 			continue
 		case <-evictor.C:
-			logger.Debug("attempting hot function eviction")
-			isEvictEvent = true
 		}
 		break
 	}
@@ -1008,9 +1093,6 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
 	// otherwise continue processing the request
 	if call.slots.acquireSlot(s) {
 		slot.Close()
-		if isEvictEvent {
-			statsContainerEvicted(ctx, state.GetState())
-		}
 		return false
 	}

@@ -1058,8 +1140,24 @@ type container struct {
 }

 //newHotContainer creates a container that can be used for multiple sequential events
-func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container, error) {
-	id := id.New().String()
+func newHotContainer(ctx context.Context, call *call, cfg *Config, id string, udsWait chan error) *container {
+
+	var iofs iofs
+	var err error
+
+	logger := common.Logger(ctx)
+
+	if cfg.IOFSEnableTmpfs {
+		iofs, err = newTmpfsIOFS(ctx, cfg)
+	} else {
+		iofs, err = newDirectoryIOFS(ctx, cfg)
+	}
+	if err != nil {
+		udsWait <- err
+		return nil
+	}
+
+	inotifyAwait(ctx, iofs.AgentPath(), udsWait)

 	stderr := common.NewGhostWriter()
 	stdout := common.NewGhostWriter()
@@ -1095,18 +1193,6 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
 	stdout.Swap(newLineWriterWithBuffer(buf1, soc))
 	stderr.Swap(newLineWriterWithBuffer(buf2, sec))

-	var iofs iofs
-	var err error
-	if cfg.IOFSEnableTmpfs {
-		iofs, err = newTmpfsIOFS(ctx, cfg)
-	} else {
-		iofs, err = newDirectoryIOFS(ctx, cfg)
-	}
-
-	if err != nil {
-		return nil, err
-	}
-
 	return &container{
 		id:         id, // XXX we could just let docker generate ids...
 		image:      call.Image,
@@ -1144,13 +1230,11 @@ func newHotContainer(ctx context.Context, call *call, cfg *Config) (*container,
 			for _, b := range bufs {
 				bufPool.Put(b)
 			}
-			// iofs.Close MUST be called here or we will leak directories and/or tmpfs mounts!
-			if err = iofs.Close(); err != nil {
-				// Note: This is logged with the context of the container creation
-				common.Logger(ctx).WithError(err).Error("Error closing IOFS")
+			if err := iofs.Close(); err != nil {
+				logger.WithError(err).Error("Error closing IOFS")
 			}
 		},
-	}, nil
+	}
 }

 func (c *container) swap(stdout, stderr io.Writer, cs *drivers.Stats) func() {
--- a/api/agent/agent_evict_test.go
+++ b/api/agent/agent_evict_test.go
@@ -0,0 +1,369 @@
+package agent
+
+import (
+	"bytes"
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"strconv"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	_ "github.com/fnproject/fn/api/agent/drivers/docker"
+	"github.com/fnproject/fn/api/id"
+	"github.com/fnproject/fn/api/logs"
+	"github.com/fnproject/fn/api/models"
+	"github.com/fnproject/fn/api/mqs"
+)
+
+// create a simple non-blocking agent. Non-blocking does not queue, so it's
+// easier to test and see if evictions took place.
+func getAgent() (Agent, error) {
+	ls := logs.NewMock()
+	cfg, err := NewConfig()
+	if err != nil {
+		return nil, err
+	}
+
+	// 160MB memory
+	cfg.EnableNBResourceTracker = true
+	cfg.HotPoll = 20
+	cfg.MaxTotalMemory = 160 * 1024 * 1024
+	cfg.HotPullTimeout = time.Duration(10000) * time.Millisecond
+	cfg.HotStartTimeout = time.Duration(10000) * time.Millisecond
+
+	a := New(NewDirectCallDataAccess(ls, new(mqs.Mock)), WithConfig(cfg))
+	return a, nil
+}
+
+func getHungDocker() (*httptest.Server, func()) {
+	hung, cancel := context.WithCancel(context.Background())
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// version check seem to have a sane timeout in docker, let's serve this, then stop
+		if r.URL.String() == "/v2/" {
+			w.WriteHeader(200)
+			return
+		}
+		<-hung.Done()
+	}))
+
+	closer := func() {
+		cancel()
+		srv.Close()
+	}
+
+	return srv, closer
+}
+
+func getApp() *models.App {
+	return &models.App{ID: id.New().String()}
+}
+
+func getFn(initDelayMsecs int) *models.Fn {
+	fn := &models.Fn{
+		ID:    id.New().String(),
+		Image: "fnproject/fn-test-utils",
+		ResourceConfig: models.ResourceConfig{
+			Timeout:     10,
+			IdleTimeout: 60,
+			Memory:      128, // only 1 fit in 160MB
+		},
+	}
+	if initDelayMsecs > 0 {
+		fn.Config = models.Config{"ENABLE_INIT_DELAY_MSEC": strconv.FormatUint(uint64(initDelayMsecs), 10)}
+	}
+	return fn
+}
+
+// simple GetCall/Submit combo.
+func execFn(input string, fn *models.Fn, app *models.App, a Agent, tmsec int) error {
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(tmsec)*time.Millisecond)
+	defer cancel()
+
+	url := "http://127.0.0.1:8080/invoke/" + fn.ID
+
+	req, err := http.NewRequest("GET", url, &dummyReader{Reader: strings.NewReader(input)})
+	if err != nil {
+		return err
+	}
+
+	req = req.WithContext(ctx)
+
+	var out bytes.Buffer
+	callI, err := a.GetCall(FromHTTPFnRequest(app, fn, req), WithWriter(&out))
+	if err != nil {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		return err
+	}
+
+	err = a.Submit(callI)
+	if ctx.Err() != nil {
+		return ctx.Err()
+	}
+	return err
+}
+
+func TestBadContainer1(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	fn := getFn(0)
+	fn.Config = models.Config{"ENABLE_INIT_EXIT": "0"}
+
+	err = execFn(`{"sleepTime": 8000}`, fn, getApp(), a, 20000)
+	if err != models.ErrContainerInitFail {
+		t.Fatalf("submit unexpected error! %v", err)
+	}
+}
+
+func TestBadContainer2(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	fn := getFn(0)
+	fn.Config = models.Config{"ENABLE_INIT_EXIT": "0", "ENABLE_INIT_DELAY_MSEC": "200"}
+
+	err = execFn(`{"sleepTime": 8000}`, fn, getApp(), a, 20000)
+	if err != models.ErrContainerInitFail {
+		t.Fatalf("submit unexpected error! %v", err)
+	}
+}
+
+func TestBadContainer3(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	fn := getFn(0)
+
+	err = execFn(`{"isCrash": true }`, fn, getApp(), a, 20000)
+	if err != models.ErrFunctionResponse {
+		t.Fatalf("submit unexpected error! %v", err)
+	}
+}
+
+func TestBadContainer4(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	fn := getFn(0)
+
+	err = execFn(`{"isExit": true, "exitCode": 0 }`, fn, getApp(), a, 20000)
+	if err != models.ErrFunctionResponse {
+		t.Fatalf("submit unexpected error! %v", err)
+	}
+}
+
+// Eviction will NOT take place since the first container is busy
+func TestPlainNoEvict(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+		err := execFn(`{"sleepTime": 8000}`, getFn(0), getApp(), a, 20000)
+		if err != nil {
+			t.Fatalf("submit should not error! %v", err)
+		}
+	}()
+
+	go func() {
+		defer wg.Done()
+		time.Sleep(3000 * time.Millisecond)
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != models.ErrCallTimeoutServerBusy {
+			t.Fatalf("unexpected error %v", err)
+		}
+	}()
+
+	wg.Wait()
+}
+
+// Eviction will take place since the first container is idle
+func TestPlainDoEvict(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != nil {
+			t.Fatalf("submit should not error! %v", err)
+		}
+	}()
+
+	go func() {
+		defer wg.Done()
+		time.Sleep(3000 * time.Millisecond)
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != nil {
+			t.Fatalf("submit should not error! %v", err)
+		}
+	}()
+
+	wg.Wait()
+}
+
+func TestHungFDKNoEvict(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+		err := execFn(`{"sleepTime": 0}`, getFn(11000), getApp(), a, 20000)
+		if err != models.ErrContainerInitTimeout {
+			t.Fatalf("submit unexpected error! %v", err)
+		}
+	}()
+
+	go func() {
+		defer wg.Done()
+		time.Sleep(3000 * time.Millisecond)
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != models.ErrCallTimeoutServerBusy {
+			t.Fatalf("unexpected error %v", err)
+		}
+	}()
+
+	wg.Wait()
+}
+
+func TestHungFDKDoEvict(t *testing.T) {
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+		err := execFn(`{"sleepTime": 0}`, getFn(11000), getApp(), a, 1000)
+		if err != context.DeadlineExceeded {
+			t.Fatalf("submit expected context.DeadlineExceeded %v", err)
+		}
+	}()
+
+	go func() {
+		defer wg.Done()
+		time.Sleep(3000 * time.Millisecond)
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != nil {
+			t.Fatalf("submit should not error! %v", err)
+		}
+	}()
+
+	wg.Wait()
+}
+
+func TestDockerPullHungDoEvict(t *testing.T) {
+	dockerSrv, dockerCancel := getHungDocker()
+	defer dockerCancel()
+
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+
+		fn := getFn(0)
+		fn.Image = strings.TrimPrefix(dockerSrv.URL, "http://") + "/" + fn.Image
+
+		err := execFn(`{"sleepTime": 0}`, fn, getApp(), a, 1000)
+		if err != context.DeadlineExceeded {
+			t.Fatalf("submit expected context.DeadlineExceeded %v", err)
+		}
+	}()
+
+	go func() {
+		defer wg.Done()
+		time.Sleep(2000 * time.Millisecond)
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != nil {
+			t.Fatalf("submit should not error! %v", err)
+		}
+	}()
+
+	wg.Wait()
+}
+
+func TestDockerPullHungNoEvict(t *testing.T) {
+	dockerSrv, dockerCancel := getHungDocker()
+	defer dockerCancel()
+
+	a, err := getAgent()
+	if err != nil {
+		t.Fatal("cannot create agent")
+	}
+	defer checkClose(t, a)
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+
+		fn := getFn(0)
+		fn.Image = strings.TrimPrefix(dockerSrv.URL, "http://") + "/" + fn.Image
+
+		err := execFn(`{"sleepTime": 0}`, fn, getApp(), a, 20000)
+		if err != models.ErrDockerPullTimeout {
+			t.Fatalf("unexpected error %v", err)
+		}
+	}()
+
+	go func() {
+		defer wg.Done()
+		time.Sleep(3000 * time.Millisecond)
+		err := execFn(`{"sleepTime": 0}`, getFn(0), getApp(), a, 20000)
+		if err != models.ErrCallTimeoutServerBusy {
+			t.Fatalf("unexpected error %v", err)
+		}
+	}()
+
+	wg.Wait()
+
+}
--- a/api/agent/agent_test.go
+++ b/api/agent/agent_test.go
@@ -7,9 +7,13 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"math"
+	"net"
 	"net/http"
 	"net/http/httptest"
+	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 	"sync"
@@ -21,11 +25,8 @@ import (
 	"github.com/fnproject/fn/api/logs"
 	"github.com/fnproject/fn/api/models"
 	"github.com/fnproject/fn/api/mqs"
+
 	"github.com/sirupsen/logrus"
-	"io/ioutil"
-	"net"
-	"os"
-	"path/filepath"
 )

 func init() {
@@ -436,6 +437,48 @@ type dummyReader struct {
 	io.Reader
 }

+func TestHungFDK(t *testing.T) {
+	app := &models.App{ID: "app_id"}
+	fn := &models.Fn{
+		ID:     "fn_id",
+		Image:  "fnproject/fn-test-utils",
+		Config: models.Config{"ENABLE_INIT_DELAY_MSEC": "5000"},
+		ResourceConfig: models.ResourceConfig{
+			Timeout:     5,
+			IdleTimeout: 10,
+			Memory:      128,
+		},
+	}
+
+	url := "http://127.0.0.1:8080/invoke/" + fn.ID
+
+	ls := logs.NewMock()
+	cfg, err := NewConfig()
+	cfg.MaxDockerRetries = 1
+	cfg.HotStartTimeout = time.Duration(3) * time.Second
+	a := New(NewDirectCallDataAccess(ls, new(mqs.Mock)), WithConfig(cfg))
+	defer checkClose(t, a)
+
+	req, err := http.NewRequest("GET", url, &dummyReader{Reader: strings.NewReader(`{}`)})
+	if err != nil {
+		t.Fatal("unexpected error building request", err)
+	}
+
+	var out bytes.Buffer
+	callI, err := a.GetCall(FromHTTPFnRequest(app, fn, req), WithWriter(&out))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = a.Submit(callI)
+	if err == nil {
+		t.Fatal("submit should error!")
+	}
+	if err != models.ErrContainerInitTimeout {
+		t.Fatalf("unexpected error %v", err)
+	}
+}
+
 func TestDockerPullHungRepo(t *testing.T) {
 	hung, cancel := context.WithCancel(context.Background())
 	garbageServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -467,7 +510,7 @@ func TestDockerPullHungRepo(t *testing.T) {
 	ls := logs.NewMock()
 	cfg, err := NewConfig()
 	cfg.MaxDockerRetries = 1
-	cfg.HotStartTimeout = time.Duration(5) * time.Second
+	cfg.HotPullTimeout = time.Duration(5) * time.Second
 	a := New(NewDirectCallDataAccess(ls, new(mqs.Mock)), WithConfig(cfg))
 	defer checkClose(t, a)

@@ -486,8 +529,7 @@ func TestDockerPullHungRepo(t *testing.T) {
 	if err == nil {
 		t.Fatal("submit should error!")
 	}
-	errS := err.Error()
-	if !strings.HasPrefix(errS, "Failed to pull image ") || !strings.Contains(errS, "context deadline exceeded") {
+	if err != models.ErrDockerPullTimeout {
 		t.Fatalf("unexpected error %v", err)
 	}
 }
@@ -535,7 +577,7 @@ func TestDockerPullBadRepo(t *testing.T) {
 	if err == nil {
 		t.Fatal("submit should error!")
 	}
-	if !strings.HasPrefix(err.Error(), "Failed to pull image ") {
+	if !models.IsAPIError(err) || !strings.HasPrefix(err.Error(), "Failed to pull image ") {
 		t.Fatalf("unexpected error %v", err)
 	}
 }
@@ -1219,8 +1261,11 @@ func TestDockerAuthExtn(t *testing.T) {

 	ctx := context.TODO()

-	c, err := newHotContainer(ctx, call, cfg)
-	if err != nil {
+	errC := make(chan error, 10)
+
+	c := newHotContainer(ctx, call, cfg, id.New().String(), errC)
+	if c == nil {
+		err := <-errC
 		t.Fatal("got unexpected err: ", err)
 	}
 	da, err := c.DockerAuth()
@@ -1236,8 +1281,9 @@ func TestDockerAuthExtn(t *testing.T) {
 	extn["FN_REGISTRY_TOKEN"] = "TestRegistryToken"
 	call.extensions = extn

-	c, err = newHotContainer(ctx, call, cfg)
-	if err != nil {
+	c = newHotContainer(ctx, call, cfg, id.New().String(), errC)
+	if c == nil {
+		err := <-errC
 		t.Fatal("got unexpected err: ", err)
 	}
 	da, err = c.DockerAuth()
--- a/api/agent/config.go
+++ b/api/agent/config.go
@@ -16,6 +16,7 @@ type Config struct {
 	FreezeIdle              time.Duration `json:"freeze_idle_msecs"`
 	HotPoll                 time.Duration `json:"hot_poll_msecs"`
 	HotLauncherTimeout      time.Duration `json:"hot_launcher_timeout_msecs"`
+	HotPullTimeout          time.Duration `json:"hot_pull_timeout_msecs"`
 	HotStartTimeout         time.Duration `json:"hot_start_timeout_msecs"`
 	AsyncChewPoll           time.Duration `json:"async_chew_poll_msecs"`
 	DetachedHeadRoom        time.Duration `json:"detached_head_room_msecs"`
@@ -52,7 +53,9 @@ const (
 	EnvHotPoll = "FN_HOT_POLL_MSECS"
 	// EnvHotLauncherTimeout is the timeout for a hot container queue to persist if idle
 	EnvHotLauncherTimeout = "FN_HOT_LAUNCHER_TIMEOUT_MSECS"
-	// EnvHotStartTimeout is the timeout for a hot container to become available for use including docker-pull
+	// EnvHotStartTimeout is the timeout for a hot container to be created including docker-pull
+	EnvHotPullTimeout = "FN_HOT_PULL_TIMEOUT_MSECS"
+	// EnvHotStartTimeout is the timeout for a hot container to become available for use for requests after EnvHotStartTimeout
 	EnvHotStartTimeout = "FN_HOT_START_TIMEOUT_MSECS"
 	// EnvAsyncChewPoll is the interval to poll the queue that contains async function invocations
 	EnvAsyncChewPoll = "FN_ASYNC_CHEW_POLL_MSECS"
@@ -130,7 +133,8 @@ func NewConfig() (*Config, error) {
 	err = setEnvMsecs(err, EnvFreezeIdle, &cfg.FreezeIdle, 50*time.Millisecond)
 	err = setEnvMsecs(err, EnvHotPoll, &cfg.HotPoll, DefaultHotPoll)
 	err = setEnvMsecs(err, EnvHotLauncherTimeout, &cfg.HotLauncherTimeout, time.Duration(60)*time.Minute)
-	err = setEnvMsecs(err, EnvHotStartTimeout, &cfg.HotStartTimeout, time.Duration(10)*time.Minute)
+	err = setEnvMsecs(err, EnvHotPullTimeout, &cfg.HotPullTimeout, time.Duration(10)*time.Minute)
+	err = setEnvMsecs(err, EnvHotStartTimeout, &cfg.HotStartTimeout, time.Duration(10)*time.Second)
 	err = setEnvMsecs(err, EnvAsyncChewPoll, &cfg.AsyncChewPoll, time.Duration(60)*time.Second)
 	err = setEnvMsecs(err, EnvDetachedHeadroom, &cfg.DetachedHeadRoom, time.Duration(360)*time.Second)
 	err = setEnvUint(err, EnvMaxResponseSize, &cfg.MaxResponseSize)
--- a/api/agent/slots.go
+++ b/api/agent/slots.go
@@ -43,6 +43,11 @@ type slotToken struct {
 	isBusy  uint32
 }

+type slotCaller struct {
+	notify chan error      // notification to caller
+	done   <-chan struct{} // caller done
+}
+
 // LIFO queue that exposes input/output channels along
 // with runner/waiter tracking for agent
 type slotQueue struct {
@@ -50,7 +55,7 @@ type slotQueue struct {
 	cond      *sync.Cond
 	slots     []*slotToken
 	nextId    uint64
-	signaller chan chan error
+	signaller chan *slotCaller
 	statsLock sync.Mutex // protects stats below
 	stats     slotQueueStats
 }
@@ -67,7 +72,7 @@ func NewSlotQueue(key string) *slotQueue {
 		key:       key,
 		cond:      sync.NewCond(new(sync.Mutex)),
 		slots:     make([]*slotToken, 0),
-		signaller: make(chan chan error, 1),
+		signaller: make(chan *slotCaller, 1),
 	}

 	return obj
--- a/api/models/error.go
+++ b/api/models/error.go
@@ -31,7 +31,14 @@ var (
 		code:  http.StatusServiceUnavailable,
 		error: errors.New("Timed out - server too busy"),
 	}
-
+	ErrDockerPullTimeout = err{
+		code:  http.StatusServiceUnavailable,
+		error: errors.New("Docker pull timed out"),
+	}
+	ErrContainerInitTimeout = err{
+		code:  http.StatusServiceUnavailable,
+		error: errors.New("Container initialization timed out, please ensure you are using the latest fdk / format and check the logs"),
+	}
 	ErrUnsupportedMediaType = err{
 		code:  http.StatusUnsupportedMediaType,
 		error: errors.New("Content Type not supported")}
@@ -122,6 +129,10 @@ var (
 		code:  http.StatusBadGateway,
 		error: fmt.Errorf("function response too large"),
 	}
+	ErrFunctionResponse = err{
+		code:  http.StatusBadGateway,
+		error: fmt.Errorf("error receiving function response"),
+	}
 	ErrRequestContentTooBig = err{
 		code:  http.StatusRequestEntityTooLarge,
 		error: fmt.Errorf("Request content too large"),
@@ -150,7 +161,6 @@ var (
 		code:  http.StatusTooManyRequests,
 		error: errors.New("Too many requests submitted"),
 	}
-
 	ErrAsyncUnsupported = err{
 		code:  http.StatusBadRequest,
 		error: errors.New("Async functions are not supported on this server"),
@@ -166,10 +176,9 @@ var (
 		error: errors.New("Unable to find the call handle"),
 	}

-	// TODO consider removal. see rationale at uses, or remove if none.
-	ErrContainerExitedEarly = err{
+	ErrContainerInitFail = err{
 		code:  http.StatusBadGateway,
-		error: errors.New("container exited early, please ensure you are using the latest fdk / format and check the logs"),
+		error: errors.New("container failed to initialize, please ensure you are using the latest fdk / format and check the logs"),
 	}
 )

--- a/api/server/runner_fninvoke_test.go
+++ b/api/server/runner_fninvoke_test.go
@@ -210,7 +210,7 @@ func TestFnInvokeRunnerExecution(t *testing.T) {
 		{"/invoke/http_stream_fn_id", multiLog, "POST", http.StatusOK, nil, "", multiLogExpectHot},

 		// TODO consider removing this, see comment above the image
-		{"/invoke/fail_fn", ok, "POST", http.StatusBadGateway, nil, "container exited early, please ensure", nil},
+		{"/invoke/fail_fn", ok, "POST", http.StatusBadGateway, nil, "container failed to initialize", nil},
 	}

 	callIds := make([]string, len(testCases))
--- a/images/fn-test-utils/fn-test-utils.go
+++ b/images/fn-test-utils/fn-test-utils.go
@@ -40,6 +40,10 @@ type AppRequest struct {
 	IsDebug bool `json:"isDebug,omitempty"`
 	// simulate crash
 	IsCrash bool `json:"isCrash,omitempty"`
+	// abrupt exit code
+	ExitCode int `json:"exitCode,omitempty"`
+	// enable abrupt exit
+	IsExit bool `json:"isExit,omitempty"`
 	// shutdown UDS after request
 	IsShutdown bool `json:"isShutdown,omitempty"`
 	// read a file from disk
@@ -232,6 +236,11 @@ func processRequest(ctx context.Context, in io.Reader) (*AppRequest, *AppRespons
 		log.Fatalln("Crash requested")
 	}

+	if request.IsExit {
+		log.Printf("Exit requested %+v", request.ExitCode)
+		os.Exit(request.ExitCode)
+	}
+
 	if request.ExpectHeaders != nil {
 		for name, header := range request.ExpectHeaders {
 			if h2 := fnctx.Header().Get(name); header[0] != h2 {
@@ -316,6 +325,15 @@ func main() {
 		time.Sleep(time.Millisecond * time.Duration(delay))
 	}

+	if initExit := os.Getenv("ENABLE_INIT_EXIT"); initExit != "" {
+		log.Printf("Container start exit %v", initExit)
+		exitCode, err := strconv.ParseInt(initExit, 10, 64)
+		if err != nil {
+			log.Fatalf("cannot parse ENABLE_INIT_EXIT %v", err)
+		}
+		os.Exit(int(exitCode))
+	}
+
 	ctx, cancel := context.WithCancel(context.Background())
 	GlobCancel = cancel
 	fdk.HandleContext(ctx, fdk.HandlerFunc(AppHandler)) // XXX(reed): can extract & instrument