slot tracking improvements (#562)

* fn: remove 100 msec sleep for hot containers *) moved slot management to its own file *) slots are now implemented with LIFO semantics, this is important since we do not want to round robin hot containers. Idle hot containers should timeout properly. *) each slot queue now stores a few basic stats such as avg time a call spent in a given state and number of running/launching containers, number of waiting calls in those states. *) first metrics in these basic stats are discarded to avoid initial docker pull/start spikes. *) agent now records/updates slot queue state and how much time a call stayed in that state. *) waitHotSlot() replaces the previous wait 100 msec logic where it sends a msg to hot slot go routine launchHot() and waits for a slot *) launchHot() is now a go routine for tracking containers in hot slots, it determines if a new containers is needed based on slot queue stats.
2022-10-28 21:29:17 +03:00 · 2017-12-15 15:50:07 -08:00
parent a9820cfbab
commit 25a72146f5
3 changed files with 512 additions and 174 deletions
--- a/api/agent/agent.go
+++ b/api/agent/agent.go
@@ -2,12 +2,9 @@ package agent

 import (
 	"context"
-	"crypto/sha1"
 	"errors"
-	"fmt"
 	"io"
 	"net/http"
-	"sort"
 	"sync"
 	"time"

@@ -115,9 +112,7 @@ type agent struct {

 	driver drivers.Driver

-	hMu sync.RWMutex // protects hot
-	hot map[string]chan slot
-
+	slotMgr *slotQueueMgr
 	// track usage
 	resources ResourceTracker

@@ -138,7 +133,7 @@ func New(da DataAccess) Agent {
 	a := &agent{
 		da:          da,
 		driver:      driver,
-		hot:         make(map[string]chan slot),
+		slotMgr:     NewSlotQueueMgr(),
 		resources:   NewResourceTracker(),
 		shutdown:    make(chan struct{}),
 		promHandler: promhttp.Handler(),
@@ -260,147 +255,138 @@ func (a *agent) Submit(callI Call) error {
 	return transformTimeout(err, false)
 }

-// getSlot must ensure that if it receives a slot, it will be returned, otherwise
-// a container will be locked up forever waiting for slot to free.
-func (a *agent) getSlot(ctx context.Context, call *call) (slot, error) {
+// getSlot returns a Slot (or error) for the request to run. Depending on hot/cold
+// request type, this may launch a new container or wait for other containers to become idle
+// or it may wait for resources to become available to launch a new container.
+func (a *agent) getSlot(ctx context.Context, call *call) (Slot, error) {
 	span, ctx := opentracing.StartSpanFromContext(ctx, "agent_get_slot")
 	defer span.Finish()

-	if protocol.IsStreamable(protocol.Protocol(call.Format)) {
-		return a.hotSlot(ctx, call)
+	isHot := protocol.IsStreamable(protocol.Protocol(call.Format))
+	if isHot {
+		// For hot requests, we use a long lived slot queue, which we use to manage hot containers
+		call.slots = a.slotMgr.getHotSlotQueue(call)
+		start := time.Now()
+
+		call.slots.enterState(SlotQueueWaiter)
+		s, err := a.launchHot(ctx, call)
+		call.slots.exitStateWithLatency(SlotQueueWaiter, uint64(time.Now().Sub(start).Seconds()*1000))
+
+		return s, err
 	}

-	// make new channel and launch 1 for cold
-	ch := make(chan slot)
-	return a.launchOrSlot(ctx, ch, call)
+	return a.launchCold(ctx, call)
 }

-// launchOrSlot will launch a container that will send slots on the provided channel when it
-// is free if no slots are available on that channel first. the returned slot may or may not
-// be from the launched container. if there is an error launching a new container (if necessary),
-// then that will be returned rather than a slot, if no slot is free first.
-func (a *agent) launchOrSlot(ctx context.Context, slots chan slot, call *call) (slot, error) {
-	var errCh <-chan error
+// launchHot checks with slot queue to see if a new container needs to be launched and waits
+// for available slots in the queue for hot request execution.
+func (a *agent) launchHot(ctx context.Context, call *call) (Slot, error) {

-	// check if any slot immediately without trying to get a ram token
-	select {
-	case s := <-slots:
-		return s, nil
-	case <-ctx.Done():
-		return nil, ctx.Err()
-	default:
-	}
-
-	// IMPORTANT: This means, if this request was submitted indirectly through fnlb or
-	// other proxy, we will continue classifying it as 'async' which is good as async
-	// regardless of origin should use the async resources.
 	isAsync := call.Type == models.TypeAsync

-	// add context cancel here to prevent ramToken/launch race, w/o this ramToken /
-	// launch won't know whether we are no longer receiving or not yet receiving.
-	ctx, launchCancel := context.WithCancel(ctx)
-	defer launchCancel()
+launchLoop:
+	for {
+		// Check/evaluate if we need to launch a new hot container
+		doLaunch, stats := call.slots.isNewContainerNeeded()
+		common.Logger(ctx).WithField("stats", stats).Debug("checking hot container launch ", doLaunch)
+
+		if doLaunch {
+			ctxToken, tokenCancel := context.WithCancel(context.Background())
+
+			// wait on token/slot/timeout whichever comes first
+			select {
+			case tok, isOpen := <-a.resources.GetResourceToken(ctxToken, call.Memory, isAsync):
+				tokenCancel()
+				if !isOpen {
+					return nil, models.ErrCallTimeoutServerBusy
+				}
+				go a.runHot(ctx, call, tok)
+			case s, ok := <-call.slots.getDequeueChan():
+				tokenCancel()
+				if !ok {
+					return nil, errors.New("slot shut down while waiting for hot slot")
+				}
+				if s.acquireSlot() {
+					if s.slot.Error() != nil {
+						s.slot.Close()
+						return nil, s.slot.Error()
+					}
+					return s.slot, nil
+				}
+
+				// we failed to take ownership of the token (eg. container idle timeout)
+				// try launching again
+				continue launchLoop
+			case <-ctx.Done():
+				tokenCancel()
+				return nil, ctx.Err()
+			}
+		}
+
+		// After launching (if it was necessary) a container, now wait for slot/timeout
+		// or periodically reevaluate the launchHot() logic from beginning.
+		select {
+		case s, ok := <-call.slots.getDequeueChan():
+			if !ok {
+				return nil, errors.New("slot shut down while waiting for hot slot")
+			}
+			if s.acquireSlot() {
+				if s.slot.Error() != nil {
+					s.slot.Close()
+					return nil, s.slot.Error()
+				}
+				return s.slot, nil
+			}
+
+			// we failed to take ownership of the token (eg. container idle timeout)
+			// try launching again
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-time.After(time.Duration(200) * time.Millisecond):
+			// reevaluate
+		}
+	}
+}
+
+// launchCold waits for necessary resources to launch a new container, then
+// returns the slot for that new container to run the request on.
+func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
+
+	isAsync := call.Type == models.TypeAsync
+	ch := make(chan Slot)

-	// if nothing free, wait for ram token or a slot
 	select {
-	case s := <-slots:
-		return s, nil
 	case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, isAsync):
 		if !isOpen {
 			return nil, models.ErrCallTimeoutServerBusy
 		}
-		errCh = a.launch(ctx, slots, call, tok) // TODO mangle
+		go a.prepCold(ctx, call, tok, ch)
 	case <-ctx.Done():
 		return nil, ctx.Err()
 	}

-	// wait for launch err or a slot to open up (possibly from launch)
+	// wait for launch err or a slot to open up
 	select {
-	case err := <-errCh:
-		// if we get a launch err, try to return to user (e.g. image not found)
-		return nil, err
-	case slot := <-slots:
-		return slot, nil
-	case <-ctx.Done():
-		return nil, ctx.Err()
+	case s := <-ch:
+		if s.Error() != nil {
+			s.Close()
+			return nil, s.Error()
 		}
-}
-
-func (a *agent) hotSlot(ctx context.Context, call *call) (slot, error) {
-	slots := a.slots(hotKey(call))
-
-	// TODO if we track avg run time we could know how long to wait or
-	// if we need to launch instead of waiting.
-
-	// if we can get a slot in a reasonable amount of time, use it
-	select {
-	case s := <-slots:
 		return s, nil
-	case <-time.After(100 * time.Millisecond): // XXX(reed): precise^
-		// TODO this means the first launched container if none are running eats
-		// this. yes it sucks but there are a lot of other fish to fry, opening a
-		// policy discussion...
+	case <-ctx.Done():
+		return nil, ctx.Err()
 	}
-
-	// then wait for a slot or try to launch...
-	return a.launchOrSlot(ctx, slots, call)
-}
-
-// TODO this should be a LIFO stack of channels, perhaps. a queue (channel)
-// will always send the least recently used, not ideal.
-func (a *agent) slots(key string) chan slot {
-	a.hMu.RLock()
-	slots, ok := a.hot[key]
-	a.hMu.RUnlock()
-	if !ok {
-		a.hMu.Lock()
-		slots, ok = a.hot[key]
-		if !ok {
-			slots = make(chan slot) // should not be buffered
-			a.hot[key] = slots
-		}
-		a.hMu.Unlock()
-	}
-	return slots
-}
-
-func hotKey(call *call) string {
-	// return a sha1 hash of a (hopefully) unique string of all the config
-	// values, to make map lookups quicker [than the giant unique string]
-
-	hash := sha1.New()
-	fmt.Fprint(hash, call.AppName, "\x00")
-	fmt.Fprint(hash, call.Path, "\x00")
-	fmt.Fprint(hash, call.Image, "\x00")
-	fmt.Fprint(hash, call.Timeout, "\x00")
-	fmt.Fprint(hash, call.IdleTimeout, "\x00")
-	fmt.Fprint(hash, call.Memory, "\x00")
-	fmt.Fprint(hash, call.Format, "\x00")
-
-	// we have to sort these before printing, yay. TODO do better
-	keys := make([]string, 0, len(call.BaseEnv))
-	for k := range call.BaseEnv {
-		keys = append(keys, k)
-	}
-
-	sort.Strings(keys)
-	for _, k := range keys {
-		fmt.Fprint(hash, k, "\x00", call.BaseEnv[k], "\x00")
-	}
-
-	var buf [sha1.Size]byte
-	return string(hash.Sum(buf[:0]))
-}
-
-type slot interface {
-	exec(ctx context.Context, call *call) error
-	io.Closer
 }

 // implements Slot
 type coldSlot struct {
 	cookie drivers.Cookie
 	tok    ResourceToken
+	err    error
+}
+
+func (s *coldSlot) Error() error {
+	return s.err
 }

 func (s *coldSlot) exec(ctx context.Context, call *call) error {
@@ -430,7 +416,9 @@ func (s *coldSlot) Close() error {
 		// removal latency
 		s.cookie.Close(context.Background()) // ensure container removal, separate ctx
 	}
+	if s.tok != nil {
 		s.tok.Close()
+	}
 	return nil
 }

@@ -440,9 +428,17 @@ type hotSlot struct {
 	proto     protocol.ContainerIO
 	errC      <-chan error // container error
 	container *container   // TODO mask this
+	err       error
 }

-func (s *hotSlot) Close() error { close(s.done); return nil }
+func (s *hotSlot) Close() error {
+	close(s.done)
+	return nil
+}
+
+func (s *hotSlot) Error() error {
+	return s.err
+}

 func (s *hotSlot) exec(ctx context.Context, call *call) error {
 	span, ctx := opentracing.StartSpanFromContext(ctx, "agent_hot_exec")
@@ -451,6 +447,11 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
 	// link the container id and id in the logs [for us!]
 	common.Logger(ctx).WithField("container_id", s.container.id).Info("starting call")

+	start := time.Now()
+	defer func() {
+		call.slots.recordLatency(SlotQueueRunner, uint64(time.Now().Sub(start).Seconds()*1000))
+	}()
+
 	// swap in the new stderr logger & stat accumulator
 	oldStderr := s.container.swap(call.stderr, &call.Stats)
 	defer s.container.swap(oldStderr, nil) // once we're done, swap out in this scope to prevent races
@@ -475,33 +476,7 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
 	// TODO we REALLY need to wait for dispatch to return before conceding our slot
 }

-// this will work for hot & cold (woo)
-// if launch encounters a non-nil error it will send it on the returned channel,
-// this can be useful if an image doesn't exist, e.g.
-func (a *agent) launch(ctx context.Context, slots chan<- slot, call *call, tok ResourceToken) <-chan error {
-	ch := make(chan error, 1)
-
-	if !protocol.IsStreamable(protocol.Protocol(call.Format)) {
-		// TODO no
-		go func() {
-			err := a.prepCold(ctx, slots, call, tok)
-			if err != nil {
-				ch <- err
-			}
-		}()
-		return ch
-	}
-
-	go func() {
-		err := a.runHot(ctx, slots, call, tok)
-		if err != nil {
-			ch <- err
-		}
-	}()
-	return ch
-}
-
-func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok ResourceToken) error {
+func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch chan Slot) {
 	container := &container{
 		id:      id.New().String(), // XXX we could just let docker generate ids...
 		image:   call.Image,
@@ -517,32 +492,21 @@ func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok
 	// pull & create container before we return a slot, so as to be friendly
 	// about timing out if this takes a while...
 	cookie, err := a.driver.Prepare(ctx, container)
-	if err != nil {
-		tok.Close()
-		return err
-	}
-
-	slot := &coldSlot{cookie, tok}
+	slot := &coldSlot{cookie, tok, err}
 	select {
-	case slots <- slot:
+	case ch <- slot:
 	case <-ctx.Done():
-		slot.Close() // if we can't send this slot, need to take care of it ourselves
+		slot.Close()
 	}
-	return nil
 }

-func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, tok ResourceToken) error {
+func (a *agent) runHot(ctxArg context.Context, call *call, tok ResourceToken) {
 	// We must be careful to only use ctxArg for logs/spans

 	// create a span from ctxArg but ignore the new Context
 	// instead we will create a new Context below and explicitly set its span
 	span, _ := opentracing.StartSpanFromContext(ctxArg, "docker_run_hot")
 	defer span.Finish()
-
-	if tok == nil {
-		// TODO we should panic, probably ;)
-		return errors.New("no token provided, not giving you a slot")
-	}
 	defer tok.Close()

 	// TODO we have to make sure we flush these pipes or we will deadlock
@@ -562,6 +526,9 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
 	// add the span we created above to the new Context
 	ctx = opentracing.ContextWithSpan(ctx, span)

+	start := time.Now()
+	call.slots.enterState(SlotQueueStarter)
+
 	cid := id.New().String()

 	// set up the stderr for the first one to capture any logs before the slot is
@@ -585,16 +552,23 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to

 	cookie, err := a.driver.Prepare(ctx, container)
 	if err != nil {
-		return err
+		call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
+		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), err: err})
+		return
 	}
 	defer cookie.Close(context.Background()) // ensure container removal, separate ctx

 	waiter, err := cookie.Run(ctx)
 	if err != nil {
-		return err
+		call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
+		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), err: err})
+		return
 	}

 	// container is running
+	call.slots.enterState(SlotQueueRunner)
+	call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
+	defer call.slots.exitState(SlotQueueRunner)

 	// buffered, in case someone has slot when waiter returns but isn't yet listening
 	errC := make(chan error, 1)
@@ -611,20 +585,31 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
 			}

 			done := make(chan struct{})
-			slot := &hotSlot{done, proto, errC, container}
+			s := call.slots.queueSlot(&hotSlot{done, proto, errC, container, nil})

 			select {
-			case slots <- slot:
+			case <-s.trigger:
 			case <-time.After(time.Duration(call.IdleTimeout) * time.Second):
+				if call.slots.ejectSlot(s) {
 					logger.Info("Canceling inactive hot function")
 					shutdownContainer()
 					return
+				}
 			case <-ctx.Done(): // container shutdown
+				if call.slots.ejectSlot(s) {
 					return
+				}
 			case <-a.shutdown: // server shutdown
+				if call.slots.ejectSlot(s) {
 					shutdownContainer()
 					return
 				}
+			}
+			// IMPORTANT: if we fail to eject the slot, it means that a consumer
+			// just dequeued this and acquired the slot. In other words, we were
+			// late in ejectSlots(), so we have to execute this request in this
+			// iteration. Beginning of for-loop will re-check ctx/shutdown case
+			// and terminate after this request is done.

 			// wait for this call to finish
 			// NOTE do NOT select with shutdown / other channels. slot handles this.
@@ -640,7 +625,6 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
 	}

 	logger.WithError(err).Info("hot function terminated")
-	return err
 }

 // container implements drivers.ContainerTask container is the execution of a
--- a/api/agent/call.go
+++ b/api/agent/call.go
@@ -300,6 +300,7 @@ type call struct {
 	req    *http.Request
 	stderr io.ReadWriteCloser
 	ct     callTrigger
+	slots  *slotQueue
 }

 func (c *call) Model() *models.Call { return c.Call }
--- a/api/agent/slots.go
+++ b/api/agent/slots.go
@@ -0,0 +1,353 @@
+package agent
+
+import (
+	"context"
+	"crypto/sha1"
+	"fmt"
+	"sort"
+	"sync"
+	"sync/atomic"
+)
+
+//
+// slotQueueMgr keeps track of hot container slotQueues where each slotQueue
+// provides for multiple consumers/producers. slotQueue also stores
+// a few basic stats in slotStats.
+//
+
+type Slot interface {
+	exec(ctx context.Context, call *call) error
+	Close() error
+	Error() error
+}
+
+// slotQueueMgr manages hot container slotQueues
+type slotQueueMgr struct {
+	hMu sync.RWMutex // protects hot
+	hot map[string]*slotQueue
+}
+
+type SlotQueueMetricType int
+
+const (
+	SlotQueueRunner SlotQueueMetricType = iota
+	SlotQueueStarter
+	SlotQueueWaiter
+	SlotQueueLast
+)
+
+// counters per state and moving avg of time spent in each state
+type slotQueueStats struct {
+	states       [SlotQueueLast]uint64
+	latencyCount [SlotQueueLast]uint64
+	latencies    [SlotQueueLast]uint64
+}
+
+type slotToken struct {
+	slot    Slot
+	trigger chan struct{}
+	id      uint64
+	isBusy  uint32
+}
+
+// LIFO queue that exposes input/output channels along
+// with runner/waiter tracking for agent
+type slotQueue struct {
+	key       string
+	cond      *sync.Cond
+	slots     []*slotToken
+	nextId    uint64
+	output    chan *slotToken
+	isClosed  bool
+	statsLock sync.Mutex // protects stats below
+	stats     slotQueueStats
+}
+
+func NewSlotQueueMgr() *slotQueueMgr {
+	obj := &slotQueueMgr{
+		hot: make(map[string]*slotQueue),
+	}
+	return obj
+}
+
+func NewSlotQueue(key string) *slotQueue {
+	obj := &slotQueue{
+		key:    key,
+		cond:   sync.NewCond(new(sync.Mutex)),
+		slots:  make([]*slotToken, 0),
+		output: make(chan *slotToken),
+	}
+
+	// producer go routine to pick LIFO slots and
+	// push them into output channel
+	go func() {
+		for {
+			obj.cond.L.Lock()
+			for len(obj.slots) <= 0 && !obj.isClosed {
+				obj.cond.Wait()
+			}
+
+			// cleanup and exit
+			if obj.isClosed {
+
+				purge := obj.slots
+				obj.slots = obj.slots[:0]
+				obj.cond.L.Unlock()
+
+				close(obj.output)
+
+				for _, val := range purge {
+					if val.acquireSlot() {
+						val.slot.Close()
+					}
+				}
+
+				return
+			}
+
+			// pop
+			item := obj.slots[len(obj.slots)-1]
+			obj.slots = obj.slots[:len(obj.slots)-1]
+			obj.cond.L.Unlock()
+
+			// block
+			obj.output <- item
+		}
+	}()
+
+	return obj
+}
+
+func (a *slotToken) acquireSlot() bool {
+	// let's get the lock
+	if !atomic.CompareAndSwapUint32(&a.isBusy, 0, 1) {
+		return false
+	}
+
+	// now we have the lock, push the trigger
+	close(a.trigger)
+	return true
+}
+
+func (a *slotQueue) ejectSlot(s *slotToken) bool {
+	// let's get the lock
+	if !atomic.CompareAndSwapUint32(&s.isBusy, 0, 1) {
+		return false
+	}
+
+	isFound := false
+
+	a.cond.L.Lock()
+	for idx, val := range a.slots {
+		if val.id == s.id {
+			a.slots[0], a.slots[idx] = a.slots[idx], a.slots[0]
+			isFound = true
+			break
+		}
+	}
+	if isFound {
+		a.slots = a.slots[1:]
+	}
+	a.cond.L.Unlock()
+
+	s.slot.Close()
+	// now we have the lock, push the trigger
+	close(s.trigger)
+	return true
+}
+
+func (a *slotQueue) destroySlotQueue() {
+	doSignal := false
+	a.cond.L.Lock()
+	if !a.isClosed {
+		a.isClosed = true
+		doSignal = true
+	}
+	a.cond.L.Unlock()
+	if doSignal {
+		a.cond.Signal()
+	}
+}
+
+func (a *slotQueue) getDequeueChan() chan *slotToken {
+	return a.output
+}
+
+func (a *slotQueue) queueSlot(slot Slot) *slotToken {
+
+	token := &slotToken{slot, make(chan struct{}), 0, 0}
+	isClosed := false
+
+	a.cond.L.Lock()
+	if !a.isClosed {
+		token.id = a.nextId
+		a.slots = append(a.slots, token)
+		a.nextId += 1
+	} else {
+		isClosed = true
+	}
+	a.cond.L.Unlock()
+
+	if !isClosed {
+		a.cond.Signal()
+		return token
+	}
+
+	return nil
+}
+
+func (a *slotQueue) getStats() slotQueueStats {
+	var out slotQueueStats
+	a.statsLock.Lock()
+	out = a.stats
+	a.statsLock.Unlock()
+	return out
+}
+
+func (a *slotQueue) isNewContainerNeeded() (bool, slotQueueStats) {
+
+	stats := a.getStats()
+
+	waiters := stats.states[SlotQueueWaiter]
+	if waiters == 0 {
+		return false, stats
+	}
+
+	// while a container is starting, do not start more than waiters
+	starters := stats.states[SlotQueueStarter]
+	if starters >= waiters {
+		return false, stats
+	}
+
+	// no executors? Start a container now.
+	executors := starters + stats.states[SlotQueueRunner]
+	if executors == 0 {
+		return true, stats
+	}
+
+	runLat := stats.latencies[SlotQueueRunner]
+	waitLat := stats.latencies[SlotQueueWaiter]
+	startLat := stats.latencies[SlotQueueStarter]
+
+	// no wait latency? No need to spin up new container
+	if waitLat == 0 {
+		return false, stats
+	}
+
+	// this determines the aggresiveness of the container launch.
+	if runLat/executors*2 < waitLat {
+		return true, stats
+	}
+	if runLat < waitLat {
+		return true, stats
+	}
+	if startLat < waitLat {
+		return true, stats
+	}
+
+	return false, stats
+}
+
+func (a *slotQueue) enterState(metricIdx SlotQueueMetricType) {
+	a.statsLock.Lock()
+	a.stats.states[metricIdx] += 1
+	a.statsLock.Unlock()
+}
+
+func (a *slotQueue) exitState(metricIdx SlotQueueMetricType) {
+	a.statsLock.Lock()
+	if a.stats.states[metricIdx] == 0 {
+		panic(fmt.Sprintf("BUG: metric tracking fault idx=%v", metricIdx))
+	}
+	a.stats.states[metricIdx] -= 1
+	a.statsLock.Unlock()
+}
+
+func (a *slotQueue) recordLatencyLocked(metricIdx SlotQueueMetricType, latency uint64) {
+	// exponentially weighted moving average with smoothing factor of 0.5
+	// 0.5 is a high value to age older observations fast while filtering
+	// some noise. For our purposes, newer observations are much more important
+	// than older, but we still would like to low pass some noise.
+	// first samples are ignored.
+	if a.stats.latencyCount[metricIdx] != 0 {
+		a.stats.latencies[metricIdx] = (a.stats.latencies[metricIdx]*5 + latency*5) / 10
+	}
+	a.stats.latencyCount[metricIdx] += 1
+	if a.stats.latencyCount[metricIdx] == 0 {
+		a.stats.latencyCount[metricIdx] += 1
+	}
+}
+
+func (a *slotQueue) recordLatency(metricIdx SlotQueueMetricType, latency uint64) {
+	a.statsLock.Lock()
+	a.recordLatencyLocked(metricIdx, latency)
+	a.statsLock.Unlock()
+}
+
+func (a *slotQueue) exitStateWithLatency(metricIdx SlotQueueMetricType, latency uint64) {
+	a.statsLock.Lock()
+	if a.stats.states[metricIdx] == 0 {
+		panic(fmt.Sprintf("BUG: metric tracking fault idx=%v", metricIdx))
+	}
+	a.stats.states[metricIdx] -= 1
+	a.recordLatencyLocked(metricIdx, latency)
+	a.statsLock.Unlock()
+}
+
+// getSlot must ensure that if it receives a slot, it will be returned, otherwise
+// a container will be locked up forever waiting for slot to free.
+func (a *slotQueueMgr) getHotSlotQueue(call *call) *slotQueue {
+
+	key := getSlotQueueKey(call)
+
+	a.hMu.RLock()
+	slots, ok := a.hot[key]
+	a.hMu.RUnlock()
+	if !ok {
+		a.hMu.Lock()
+		slots, ok = a.hot[key]
+		if !ok {
+			slots = NewSlotQueue(key)
+			a.hot[key] = slots
+		}
+		a.hMu.Unlock()
+	}
+	return slots
+}
+
+// currently unused. But at some point, we need to age/delete old
+// slotQueues.
+func (a *slotQueueMgr) destroySlotQueue(slots *slotQueue) {
+	slots.destroySlotQueue()
+	a.hMu.Lock()
+	delete(a.hot, slots.key)
+	a.hMu.Unlock()
+}
+
+func getSlotQueueKey(call *call) string {
+	// return a sha1 hash of a (hopefully) unique string of all the config
+	// values, to make map lookups quicker [than the giant unique string]
+
+	hash := sha1.New()
+	fmt.Fprint(hash, call.AppName, "\x00")
+	fmt.Fprint(hash, call.Path, "\x00")
+	fmt.Fprint(hash, call.Image, "\x00")
+	fmt.Fprint(hash, call.Timeout, "\x00")
+	fmt.Fprint(hash, call.IdleTimeout, "\x00")
+	fmt.Fprint(hash, call.Memory, "\x00")
+	fmt.Fprint(hash, call.Format, "\x00")
+
+	// we have to sort these before printing, yay. TODO do better
+	keys := make([]string, 0, len(call.BaseEnv))
+	for k := range call.BaseEnv {
+		keys = append(keys, k)
+	}
+
+	sort.Strings(keys)
+	for _, k := range keys {
+		fmt.Fprint(hash, k, "\x00", call.BaseEnv[k], "\x00")
+	}
+
+	var buf [sha1.Size]byte
+	return string(hash.Sum(buf[:0]))
+}