slot tracking improvements (#562)

* fn: remove 100 msec sleep for hot containers

*) moved slot management to its own file
*) slots are now implemented with LIFO semantics, this is important since we do
   not want to round robin hot containers. Idle hot containers should timeout properly.
*) each slot queue now stores a few basic stats such as avg time a call spent in a given
   state and number of running/launching containers, number of waiting calls in those states.
*) first metrics in these basic stats are discarded to avoid initial docker pull/start spikes.
*) agent now records/updates slot queue state and how much time a call stayed in that state.
*) waitHotSlot() replaces the previous wait 100 msec logic where it sends a msg to
   hot slot go routine launchHot() and waits for a slot
*) launchHot() is now a go routine for tracking containers in hot slots, it determines
   if a new containers is needed based on slot queue stats.
This commit is contained in:
Tolga Ceylan
2017-12-15 15:50:07 -08:00
committed by GitHub
parent a9820cfbab
commit 25a72146f5
3 changed files with 512 additions and 174 deletions

View File

@@ -2,12 +2,9 @@ package agent
import (
"context"
"crypto/sha1"
"errors"
"fmt"
"io"
"net/http"
"sort"
"sync"
"time"
@@ -115,9 +112,7 @@ type agent struct {
driver drivers.Driver
hMu sync.RWMutex // protects hot
hot map[string]chan slot
slotMgr *slotQueueMgr
// track usage
resources ResourceTracker
@@ -138,7 +133,7 @@ func New(da DataAccess) Agent {
a := &agent{
da: da,
driver: driver,
hot: make(map[string]chan slot),
slotMgr: NewSlotQueueMgr(),
resources: NewResourceTracker(),
shutdown: make(chan struct{}),
promHandler: promhttp.Handler(),
@@ -260,147 +255,138 @@ func (a *agent) Submit(callI Call) error {
return transformTimeout(err, false)
}
// getSlot must ensure that if it receives a slot, it will be returned, otherwise
// a container will be locked up forever waiting for slot to free.
func (a *agent) getSlot(ctx context.Context, call *call) (slot, error) {
// getSlot returns a Slot (or error) for the request to run. Depending on hot/cold
// request type, this may launch a new container or wait for other containers to become idle
// or it may wait for resources to become available to launch a new container.
func (a *agent) getSlot(ctx context.Context, call *call) (Slot, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "agent_get_slot")
defer span.Finish()
if protocol.IsStreamable(protocol.Protocol(call.Format)) {
return a.hotSlot(ctx, call)
isHot := protocol.IsStreamable(protocol.Protocol(call.Format))
if isHot {
// For hot requests, we use a long lived slot queue, which we use to manage hot containers
call.slots = a.slotMgr.getHotSlotQueue(call)
start := time.Now()
call.slots.enterState(SlotQueueWaiter)
s, err := a.launchHot(ctx, call)
call.slots.exitStateWithLatency(SlotQueueWaiter, uint64(time.Now().Sub(start).Seconds()*1000))
return s, err
}
// make new channel and launch 1 for cold
ch := make(chan slot)
return a.launchOrSlot(ctx, ch, call)
return a.launchCold(ctx, call)
}
// launchOrSlot will launch a container that will send slots on the provided channel when it
// is free if no slots are available on that channel first. the returned slot may or may not
// be from the launched container. if there is an error launching a new container (if necessary),
// then that will be returned rather than a slot, if no slot is free first.
func (a *agent) launchOrSlot(ctx context.Context, slots chan slot, call *call) (slot, error) {
var errCh <-chan error
// launchHot checks with slot queue to see if a new container needs to be launched and waits
// for available slots in the queue for hot request execution.
func (a *agent) launchHot(ctx context.Context, call *call) (Slot, error) {
// check if any slot immediately without trying to get a ram token
select {
case s := <-slots:
return s, nil
case <-ctx.Done():
return nil, ctx.Err()
default:
}
// IMPORTANT: This means, if this request was submitted indirectly through fnlb or
// other proxy, we will continue classifying it as 'async' which is good as async
// regardless of origin should use the async resources.
isAsync := call.Type == models.TypeAsync
// add context cancel here to prevent ramToken/launch race, w/o this ramToken /
// launch won't know whether we are no longer receiving or not yet receiving.
ctx, launchCancel := context.WithCancel(ctx)
defer launchCancel()
launchLoop:
for {
// Check/evaluate if we need to launch a new hot container
doLaunch, stats := call.slots.isNewContainerNeeded()
common.Logger(ctx).WithField("stats", stats).Debug("checking hot container launch ", doLaunch)
if doLaunch {
ctxToken, tokenCancel := context.WithCancel(context.Background())
// wait on token/slot/timeout whichever comes first
select {
case tok, isOpen := <-a.resources.GetResourceToken(ctxToken, call.Memory, isAsync):
tokenCancel()
if !isOpen {
return nil, models.ErrCallTimeoutServerBusy
}
go a.runHot(ctx, call, tok)
case s, ok := <-call.slots.getDequeueChan():
tokenCancel()
if !ok {
return nil, errors.New("slot shut down while waiting for hot slot")
}
if s.acquireSlot() {
if s.slot.Error() != nil {
s.slot.Close()
return nil, s.slot.Error()
}
return s.slot, nil
}
// we failed to take ownership of the token (eg. container idle timeout)
// try launching again
continue launchLoop
case <-ctx.Done():
tokenCancel()
return nil, ctx.Err()
}
}
// After launching (if it was necessary) a container, now wait for slot/timeout
// or periodically reevaluate the launchHot() logic from beginning.
select {
case s, ok := <-call.slots.getDequeueChan():
if !ok {
return nil, errors.New("slot shut down while waiting for hot slot")
}
if s.acquireSlot() {
if s.slot.Error() != nil {
s.slot.Close()
return nil, s.slot.Error()
}
return s.slot, nil
}
// we failed to take ownership of the token (eg. container idle timeout)
// try launching again
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(time.Duration(200) * time.Millisecond):
// reevaluate
}
}
}
// launchCold waits for necessary resources to launch a new container, then
// returns the slot for that new container to run the request on.
func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
isAsync := call.Type == models.TypeAsync
ch := make(chan Slot)
// if nothing free, wait for ram token or a slot
select {
case s := <-slots:
return s, nil
case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, isAsync):
if !isOpen {
return nil, models.ErrCallTimeoutServerBusy
}
errCh = a.launch(ctx, slots, call, tok) // TODO mangle
go a.prepCold(ctx, call, tok, ch)
case <-ctx.Done():
return nil, ctx.Err()
}
// wait for launch err or a slot to open up (possibly from launch)
// wait for launch err or a slot to open up
select {
case err := <-errCh:
// if we get a launch err, try to return to user (e.g. image not found)
return nil, err
case slot := <-slots:
return slot, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
func (a *agent) hotSlot(ctx context.Context, call *call) (slot, error) {
slots := a.slots(hotKey(call))
// TODO if we track avg run time we could know how long to wait or
// if we need to launch instead of waiting.
// if we can get a slot in a reasonable amount of time, use it
select {
case s := <-slots:
return s, nil
case <-time.After(100 * time.Millisecond): // XXX(reed): precise^
// TODO this means the first launched container if none are running eats
// this. yes it sucks but there are a lot of other fish to fry, opening a
// policy discussion...
}
// then wait for a slot or try to launch...
return a.launchOrSlot(ctx, slots, call)
}
// TODO this should be a LIFO stack of channels, perhaps. a queue (channel)
// will always send the least recently used, not ideal.
func (a *agent) slots(key string) chan slot {
a.hMu.RLock()
slots, ok := a.hot[key]
a.hMu.RUnlock()
if !ok {
a.hMu.Lock()
slots, ok = a.hot[key]
if !ok {
slots = make(chan slot) // should not be buffered
a.hot[key] = slots
case s := <-ch:
if s.Error() != nil {
s.Close()
return nil, s.Error()
}
a.hMu.Unlock()
return s, nil
case <-ctx.Done():
return nil, ctx.Err()
}
return slots
}
func hotKey(call *call) string {
// return a sha1 hash of a (hopefully) unique string of all the config
// values, to make map lookups quicker [than the giant unique string]
hash := sha1.New()
fmt.Fprint(hash, call.AppName, "\x00")
fmt.Fprint(hash, call.Path, "\x00")
fmt.Fprint(hash, call.Image, "\x00")
fmt.Fprint(hash, call.Timeout, "\x00")
fmt.Fprint(hash, call.IdleTimeout, "\x00")
fmt.Fprint(hash, call.Memory, "\x00")
fmt.Fprint(hash, call.Format, "\x00")
// we have to sort these before printing, yay. TODO do better
keys := make([]string, 0, len(call.BaseEnv))
for k := range call.BaseEnv {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Fprint(hash, k, "\x00", call.BaseEnv[k], "\x00")
}
var buf [sha1.Size]byte
return string(hash.Sum(buf[:0]))
}
type slot interface {
exec(ctx context.Context, call *call) error
io.Closer
}
// implements Slot
type coldSlot struct {
cookie drivers.Cookie
tok ResourceToken
err error
}
func (s *coldSlot) Error() error {
return s.err
}
func (s *coldSlot) exec(ctx context.Context, call *call) error {
@@ -430,7 +416,9 @@ func (s *coldSlot) Close() error {
// removal latency
s.cookie.Close(context.Background()) // ensure container removal, separate ctx
}
s.tok.Close()
if s.tok != nil {
s.tok.Close()
}
return nil
}
@@ -440,9 +428,17 @@ type hotSlot struct {
proto protocol.ContainerIO
errC <-chan error // container error
container *container // TODO mask this
err error
}
func (s *hotSlot) Close() error { close(s.done); return nil }
func (s *hotSlot) Close() error {
close(s.done)
return nil
}
func (s *hotSlot) Error() error {
return s.err
}
func (s *hotSlot) exec(ctx context.Context, call *call) error {
span, ctx := opentracing.StartSpanFromContext(ctx, "agent_hot_exec")
@@ -451,6 +447,11 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
// link the container id and id in the logs [for us!]
common.Logger(ctx).WithField("container_id", s.container.id).Info("starting call")
start := time.Now()
defer func() {
call.slots.recordLatency(SlotQueueRunner, uint64(time.Now().Sub(start).Seconds()*1000))
}()
// swap in the new stderr logger & stat accumulator
oldStderr := s.container.swap(call.stderr, &call.Stats)
defer s.container.swap(oldStderr, nil) // once we're done, swap out in this scope to prevent races
@@ -475,33 +476,7 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
// TODO we REALLY need to wait for dispatch to return before conceding our slot
}
// this will work for hot & cold (woo)
// if launch encounters a non-nil error it will send it on the returned channel,
// this can be useful if an image doesn't exist, e.g.
func (a *agent) launch(ctx context.Context, slots chan<- slot, call *call, tok ResourceToken) <-chan error {
ch := make(chan error, 1)
if !protocol.IsStreamable(protocol.Protocol(call.Format)) {
// TODO no
go func() {
err := a.prepCold(ctx, slots, call, tok)
if err != nil {
ch <- err
}
}()
return ch
}
go func() {
err := a.runHot(ctx, slots, call, tok)
if err != nil {
ch <- err
}
}()
return ch
}
func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok ResourceToken) error {
func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch chan Slot) {
container := &container{
id: id.New().String(), // XXX we could just let docker generate ids...
image: call.Image,
@@ -517,32 +492,21 @@ func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok
// pull & create container before we return a slot, so as to be friendly
// about timing out if this takes a while...
cookie, err := a.driver.Prepare(ctx, container)
if err != nil {
tok.Close()
return err
}
slot := &coldSlot{cookie, tok}
slot := &coldSlot{cookie, tok, err}
select {
case slots <- slot:
case ch <- slot:
case <-ctx.Done():
slot.Close() // if we can't send this slot, need to take care of it ourselves
slot.Close()
}
return nil
}
func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, tok ResourceToken) error {
func (a *agent) runHot(ctxArg context.Context, call *call, tok ResourceToken) {
// We must be careful to only use ctxArg for logs/spans
// create a span from ctxArg but ignore the new Context
// instead we will create a new Context below and explicitly set its span
span, _ := opentracing.StartSpanFromContext(ctxArg, "docker_run_hot")
defer span.Finish()
if tok == nil {
// TODO we should panic, probably ;)
return errors.New("no token provided, not giving you a slot")
}
defer tok.Close()
// TODO we have to make sure we flush these pipes or we will deadlock
@@ -562,6 +526,9 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
// add the span we created above to the new Context
ctx = opentracing.ContextWithSpan(ctx, span)
start := time.Now()
call.slots.enterState(SlotQueueStarter)
cid := id.New().String()
// set up the stderr for the first one to capture any logs before the slot is
@@ -585,16 +552,23 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
cookie, err := a.driver.Prepare(ctx, container)
if err != nil {
return err
call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
call.slots.queueSlot(&hotSlot{done: make(chan struct{}), err: err})
return
}
defer cookie.Close(context.Background()) // ensure container removal, separate ctx
waiter, err := cookie.Run(ctx)
if err != nil {
return err
call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
call.slots.queueSlot(&hotSlot{done: make(chan struct{}), err: err})
return
}
// container is running
call.slots.enterState(SlotQueueRunner)
call.slots.exitStateWithLatency(SlotQueueStarter, uint64(time.Now().Sub(start).Seconds()*1000))
defer call.slots.exitState(SlotQueueRunner)
// buffered, in case someone has slot when waiter returns but isn't yet listening
errC := make(chan error, 1)
@@ -611,20 +585,31 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
}
done := make(chan struct{})
slot := &hotSlot{done, proto, errC, container}
s := call.slots.queueSlot(&hotSlot{done, proto, errC, container, nil})
select {
case slots <- slot:
case <-s.trigger:
case <-time.After(time.Duration(call.IdleTimeout) * time.Second):
logger.Info("Canceling inactive hot function")
shutdownContainer()
return
if call.slots.ejectSlot(s) {
logger.Info("Canceling inactive hot function")
shutdownContainer()
return
}
case <-ctx.Done(): // container shutdown
return
if call.slots.ejectSlot(s) {
return
}
case <-a.shutdown: // server shutdown
shutdownContainer()
return
if call.slots.ejectSlot(s) {
shutdownContainer()
return
}
}
// IMPORTANT: if we fail to eject the slot, it means that a consumer
// just dequeued this and acquired the slot. In other words, we were
// late in ejectSlots(), so we have to execute this request in this
// iteration. Beginning of for-loop will re-check ctx/shutdown case
// and terminate after this request is done.
// wait for this call to finish
// NOTE do NOT select with shutdown / other channels. slot handles this.
@@ -640,7 +625,6 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
}
logger.WithError(err).Info("hot function terminated")
return err
}
// container implements drivers.ContainerTask container is the execution of a