slot tracking improvements (#562)

* fn: remove 100 msec sleep for hot containers *) moved slot management to its own file *) slots are now implemented with LIFO semantics, this is important since we do not want to round robin hot containers. Idle hot containers should timeout properly. *) each slot queue now stores a few basic stats such as avg time a call spent in a given state and number of running/launching containers, number of waiting calls in those states. *) first metrics in these basic stats are discarded to avoid initial docker pull/start spikes. *) agent now records/updates slot queue state and how much time a call stayed in that state. *) waitHotSlot() replaces the previous wait 100 msec logic where it sends a msg to hot slot go routine launchHot() and waits for a slot *) launchHot() is now a go routine for tracking containers in hot slots, it determines if a new containers is needed based on slot queue stats.
2022-10-28 21:29:17 +03:00 · 2017-12-15 15:50:07 -08:00
parent a9820cfbab
commit 25a72146f5
3 changed files with 512 additions and 174 deletions
--- a/api/agent/agent.go
+++ b/api/agent/agent.go
@@ -2,12 +2,9 @@ package agent

 import (
 	"context"
-	"crypto/sha1"
 	"errors"
-	"fmt"
 	"io"
 	"net/http"
-	"sort"
 	"sync"
 	"time"

@@ -115,9 +112,7 @@ type agent struct {

 	driver drivers.Driver

-	hMu sync.RWMutex // protects hot
-	hot map[string]chan slot
-
+	slotMgr *slotQueueMgr
 	// track usage
 	resources ResourceTracker

@@ -138,7 +133,7 @@ func New(da DataAccess) Agent {
 	a := &agent{
 		da:          da,
 		driver:      driver,
-		hot:         make(map[string]chan slot),
+		slotMgr:     NewSlotQueueMgr(),
 		resources:   NewResourceTracker(),
 		shutdown:    make(chan struct{}),
 		promHandler: promhttp.Handler(),
@@ -260,147 +255,138 @@ func (a *agent) Submit(callI Call) error {
 	return transformTimeout(err, false)
 }

-// getSlot must ensure that if it receives a slot, it will be returned, otherwise
-// a container will be locked up forever waiting for slot to free.
-func (a *agent) getSlot(ctx context.Context, call *call) (slot, error) {
+// getSlot returns a Slot (or error) for the request to run. Depending on hot/cold
+// request type, this may launch a new container or wait for other containers to become idle
+// or it may wait for resources to become available to launch a new container.
+func (a *agent) getSlot(ctx context.Context, call *call) (Slot, error) {
 	span, ctx := opentracing.StartSpanFromContext(ctx, "agent_get_slot")
 	defer span.Finish()

-	if protocol.IsStreamable(protocol.Protocol(call.Format)) {
-		return a.hotSlot(ctx, call)
+	isHot := protocol.IsStreamable(protocol.Protocol(call.Format))
+	if isHot {
+		// For hot requests, we use a long lived slot queue, which we use to manage hot containers
+		call.slots = a.slotMgr.getHotSlotQueue(call)
+		start := time.Now()
+
+		call.slots.enterState(SlotQueueWaiter)
+		s, err := a.launchHot(ctx, call)
+		call.slots.exitStateWithLatency(SlotQueueWaiter, uint64(time.Now().Sub(start).Seconds()*1000))
+
+		return s, err
 	}

-	// make new channel and launch 1 for cold
-	ch := make(chan slot)
-	return a.launchOrSlot(ctx, ch, call)
+	return a.launchCold(ctx, call)
 }

-// launchOrSlot will launch a container that will send slots on the provided channel when it
-// is free if no slots are available on that channel first. the returned slot may or may not
-// be from the launched container. if there is an error launching a new container (if necessary),
-// then that will be returned rather than a slot, if no slot is free first.
-func (a *agent) launchOrSlot(ctx context.Context, slots chan slot, call *call) (slot, error) {
-	var errCh <-chan error
+// launchHot checks with slot queue to see if a new container needs to be launched and waits
+// for available slots in the queue for hot request execution.
+func (a *agent) launchHot(ctx context.Context, call *call) (Slot, error) {

-	// check if any slot immediately without trying to get a ram token
-	select {
-	case s := <-slots:
-		return s, nil
-	case <-ctx.Done():
-		return nil, ctx.Err()
-	default:
-	}
-
-	// IMPORTANT: This means, if this request was submitted indirectly through fnlb or
-	// other proxy, we will continue classifying it as 'async' which is good as async
-	// regardless of origin should use the async resources.
 	isAsync := call.Type == models.TypeAsync

-	// add context cancel here to prevent ramToken/launch race, w/o this ramToken /
-	// launch won't know whether we are no longer receiving or not yet receiving.
-	ctx, launchCancel := context.WithCancel(ctx)
-	defer launchCancel()
+launchLoop:
+	for {
+		// Check/evaluate if we need to launch a new hot container
+		doLaunch, stats := call.slots.isNewContainerNeeded()
+		common.Logger(ctx).WithField("stats", stats).Debug("checking hot container launch ", doLaunch)
+
+		if doLaunch {
+			ctxToken, tokenCancel := context.WithCancel(context.Background())
+
+			// wait on token/slot/timeout whichever comes first
+			select {
+			case tok, isOpen := <-a.resources.GetResourceToken(ctxToken, call.Memory, isAsync):
+				tokenCancel()
+				if !isOpen {
+					return nil, models.ErrCallTimeoutServerBusy
+				}
+				go a.runHot(ctx, call, tok)
+			case s, ok := <-call.slots.getDequeueChan():
+				tokenCancel()
+				if !ok {
+					return nil, errors.New("slot shut down while waiting for hot slot")
+				}
+				if s.acquireSlot() {
+					if s.slot.Error() != nil {
+						s.slot.Close()
+						return nil, s.slot.Error()
+					}
+					return s.slot, nil
+				}
+
+				// we failed to take ownership of the token (eg. container idle timeout)
+				// try launching again
+				continue launchLoop
+			case <-ctx.Done():
+				tokenCancel()
+				return nil, ctx.Err()
+			}
+		}
+
+		// After launching (if it was necessary) a container, now wait for slot/timeout
+		// or periodically reevaluate the launchHot() logic from beginning.
+		select {
+		case s, ok := <-call.slots.getDequeueChan():
+			if !ok {
+				return nil, errors.New("slot shut down while waiting for hot slot")
+			}
+			if s.acquireSlot() {
+				if s.slot.Error() != nil {
+					s.slot.Close()
+					return nil, s.slot.Error()
+				}
+				return s.slot, nil
+			}
+
+			// we failed to take ownership of the token (eg. container idle timeout)
+			// try launching again
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-time.After(time.Duration(200) * time.Millisecond):
+			// reevaluate
+		}
+	}
+}
+
+// launchCold waits for necessary resources to launch a new container, then
+// returns the slot for that new container to run the request on.
+func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
+
+	isAsync := call.Type == models.TypeAsync
+	ch := make(chan Slot)

-	// if nothing free, wait for ram token or a slot
 	select {
-	case s := <-slots:
-		return s, nil
 	case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, isAsync):
 		if !isOpen {
 			return nil, models.ErrCallTimeoutServerBusy
 		}
-		errCh = a.launch(ctx, slots, call, tok) // TODO mangle
+		go a.prepCold(ctx, call, tok, ch)
 	case <-ctx.Done():
 		return nil, ctx.Err()
 	}

-	// wait for launch err or a slot to open up (possibly from launch)
+	// wait for launch err or a slot to open up
 	select {
-	case err := <-errCh:
-		// if we get a launch err, try to return to user (e.g. image not found)
-		return nil, err
-	case slot := <-slots:
-		return slot, nil
-	case <-ctx.Done():
-		return nil, ctx.Err()
-	}
-}
-
-func (a *agent) hotSlot(ctx context.Context, call *call) (slot, error) {
-	slots := a.slots(hotKey(call))
-
-	// TODO if we track avg run time we could know how long to wait or
-	// if we need to launch instead of waiting.
-
-	// if we can get a slot in a reasonable amount of time, use it
-	select {
-	case s := <-slots:
-		return s, nil
-	case <-time.After(100 * time.Millisecond): // XXX(reed): precise^
-		// TODO this means the first launched container if none are running eats
-		// this. yes it sucks but there are a lot of other fish to fry, opening a
-		// policy discussion...
-	}
-
-	// then wait for a slot or try to launch...
-	return a.launchOrSlot(ctx, slots, call)
-}
-
-// TODO this should be a LIFO stack of channels, perhaps. a queue (channel)
-// will always send the least recently used, not ideal.
-func (a *agent) slots(key string) chan slot {
-	a.hMu.RLock()
-	slots, ok := a.hot[key]
-	a.hMu.RUnlock()
-	if !ok {
-		a.hMu.Lock()
-		slots, ok = a.hot[key]
-		if !ok {
-			slots = make(chan slot) // should not be buffered
-			a.hot[key] = slots
+	case s := <-ch:
+		if s.Error() != nil {
+			s.Close()
+			return nil, s.Error()
 		}
-		a.hMu.Unlock()
+		return s, nil
+	case <-ctx.Done():
+		return nil, ctx.Err()
 	}
-	return slots
-}
-
-func hotKey(call *call) string {
-	// return a sha1 hash of a (hopefully) unique string of all the config
-	// values, to make map lookups quicker [than the giant unique string]
-
-	hash := sha1.New()
-	fmt.Fprint(hash, call.AppName, "\x00")
-	fmt.Fprint(hash, call.Path, "\x00")
-	fmt.Fprint(hash, call.Image, "\x00")
-	fmt.Fprint(hash, call.Timeout, "\x00")
-	fmt.Fprint(hash, call.IdleTimeout, "\x00")
-	fmt.Fprint(hash, call.Memory, "\x00")
-	fmt.Fprint(hash, call.Format, "\x00")
-
-	// we have to sort these before printing, yay. TODO do better
-	keys := make([]string, 0, len(call.BaseEnv))
-	for k := range call.BaseEnv {
-		keys = append(keys, k)
-	}
-
-	sort.Strings(keys)
-	for _, k := range keys {
-		fmt.Fprint(hash, k, "\x00", call.BaseEnv[k], "\x00")
-	}
-
-	var buf [sha1.Size]byte
-	return string(hash.Sum(buf[:0]))
-}
-
-type slot interface {
-	exec(ctx context.Context, call *call) error
-	io.Closer
 }

 // implements Slot
 type coldSlot struct {
 	cookie drivers.Cookie
 	tok    ResourceToken
+	err    error
+}
+
+func (s *coldSlot) Error() error {
+	return s.err
 }

 func (s *coldSlot) exec(ctx context.Context, call *call) error {
@@ -430,7 +416,9 @@ func (s *coldSlot) Close() error {
 		// removal latency
 		s.cookie.Close(context.Background()) // ensure container removal, separate ctx
 	}
-	s.tok.Close()
+	if s.tok != nil {
+		s.tok.Close()
+	}
 	return nil
 }

@@ -440,9 +428,17 @@ type hotSlot struct {
 	proto     protocol.ContainerIO
 	errC      <-chan error // container error
 	container *container   // TODO mask this
+	err       error
 }

-func (s *hotSlot) Close() error { close(s.done); return nil }
+func (s *hotSlot) Close() error {
+	close(s.done)
+	return nil
+}
+
+func (s *hotSlot) Error() error {
+	return s.err
+}

 func (s *hotSlot) exec(ctx context.Context, call *call) error {
 	span, ctx := opentracing.StartSpanFromContext(ctx, "agent_hot_exec")
@@ -451,6 +447,11 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
 	// link the container id and id in the logs [for us!]
 	common.Logger(ctx).WithField("container_id", s.container.id).Info("starting call")

+	start := time.Now()
+	defer func() {
+		call.slots.recordLatency(SlotQueueRunner, uint64(time.Now().Sub(start).Seconds()*1000))
+	}()
+
 	// swap in the new stderr logger & stat accumulator
 	oldStderr := s.container.swap(call.stderr, &call.Stats)
 	defer s.container.swap(oldStderr, nil) // once we're done, swap out in this scope to prevent races
@@ -475,33 +476,7 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
 	// TODO we REALLY need to wait for dispatch to return before conceding our slot
 }

-// this will work for hot & cold (woo)
-// if launch encounters a non-nil error it will send it on the returned channel,
-// this can be useful if an image doesn't exist, e.g.
-func (a *agent) launch(ctx context.Context, slots chan<- slot, call *call, tok ResourceToken) <-chan error {
-	ch := make(chan error, 1)
-
-	if !protocol.IsStreamable(protocol.Protocol(call.Format)) {
-		// TODO no
-		go func() {
-			err := a.prepCold(ctx, slots, call, tok)
-			if err != nil {
-				ch <- err
-			}
-		}()
-		return ch
-	}
-
-	go func() {
-		err := a.runHot(ctx, slots, call, tok)
-		if err != nil {
-			ch <- err
-		}
-	}()
-	return ch
-}
-
-func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok ResourceToken) error {
+func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch chan Slot) {
 	container := &container{
 		id:      id.New().String(), // XXX we could just let docker generate ids...
 		image:   call.Image,
@@ -517,32 +492,21 @@ func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok
 	// pull & create container before we return a slot, so as to be friendly
 	// about timing out if this takes a while...
 	cookie, err := a.driver.Prepare(ctx, container)
-	if err != nil {
-		tok.Close()
-		return err
-	}
-
-	slot := &coldSlot{cookie, tok}
+	slot := &coldSlot{cookie, tok, err}
 	select {
-	case slots <- slot:
+	case ch <- slot:
 	case <-ctx.Done():
-		slot.Close() // if we can't send this slot, need to take care of it ourselves
+		slot.Close()
 	}
-	return nil
 }

-func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, tok ResourceToken) error {
+func (a *agent) runHot(ctxArg context.Context, call *call, tok ResourceToken) {
 	// We must be careful to only use ctxArg for logs/spans

 	// create a span from ctxArg but ignore the new Context
 	// instead we will create a new Context below and explicitly set its span
 	span, _ := opentracing.StartSpanFromContext(ctxArg, "docker_run_hot")
 	defer span.Finish()
-
-	if tok == nil {
-		// TODO we should panic, probably ;)
-		return errors.New("no token provided, not giving you a slot")
-	}
 	defer tok.Close()

 	// TODO we have to make sure we flush these pipes or we will deadlock
@@ -562,6 +526,9 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
 	// add the span we created above to the new Context
 	ctx = opentracing.ContextWithSpan(ctx, span)

+	start := time.Now()
+	call.slots.enterState(SlotQueueStarter)
+
 	cid := id.New().String()

 	// set up the stderr for the first one to capture any logs before the slot is
@@ -585,16 +552,23 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to

 	cookie, err := a.driver.Prepare(ctx, container)
 	if err != nil {
-		return err
+		call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
+		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), err: err})
+		return
 	}
 	defer cookie.Close(context.Background()) // ensure container removal, separate ctx

 	waiter, err := cookie.Run(ctx)
 	if err != nil {
-		return err
+		call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
+		call.slots.queueSlot(&hotSlot{done: make(chan struct{}), err: err})
+		return
 	}

 	// container is running
+	call.slots.enterState(SlotQueueRunner)
+	call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
+	defer call.slots.exitState(SlotQueueRunner)

 	// buffered, in case someone has slot when waiter returns but isn't yet listening
 	errC := make(chan error, 1)
@@ -611,20 +585,31 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
 			}

 			done := make(chan struct{})
-			slot := &hotSlot{done, proto, errC, container}
+			s := call.slots.queueSlot(&hotSlot{done, proto, errC, container, nil})

 			select {
-			case slots <- slot:
+			case <-s.trigger:
 			case <-time.After(time.Duration(call.IdleTimeout) * time.Second):
-				logger.Info("Canceling inactive hot function")
-				shutdownContainer()
-				return
+				if call.slots.ejectSlot(s) {
+					logger.Info("Canceling inactive hot function")
+					shutdownContainer()
+					return
+				}
 			case <-ctx.Done(): // container shutdown
-				return
+				if call.slots.ejectSlot(s) {
+					return
+				}
 			case <-a.shutdown: // server shutdown
-				shutdownContainer()
-				return
+				if call.slots.ejectSlot(s) {
+					shutdownContainer()
+					return
+				}
 			}
+			// IMPORTANT: if we fail to eject the slot, it means that a consumer
+			// just dequeued this and acquired the slot. In other words, we were
+			// late in ejectSlots(), so we have to execute this request in this
+			// iteration. Beginning of for-loop will re-check ctx/shutdown case
+			// and terminate after this request is done.

 			// wait for this call to finish
 			// NOTE do NOT select with shutdown / other channels. slot handles this.
@@ -640,7 +625,6 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
 	}

 	logger.WithError(err).Info("hot function terminated")
-	return err
 }

 // container implements drivers.ContainerTask container is the execution of a