mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
gosec severity=medium passes, all severity=low errors are from unhandled errors, we have 107 of them. tbh it doesn't look worth it to me, but maybe there are a few assholes even itchier than mine out there. medium has some good stuff in it, and of course high makes sense if we're gonna do this at all. this adds some nosec annotations for some things like sql sprintfs where we know it's clean (we're constructing the strings with variables in them). fixed up other spots where we were sprinting without need. some stuff like filepath.Clean when opening a file from a variable, and file permissions, easy stuff... I can't get the CI build to shut up, but I can locally get it to be pretty quiet about imports and it just outputs the gosec output. fortunately, it still works as expected even when it's noisy. I got it to shut up by unsetting some of the go mod flags locally, but that doesn't seem to quite do it in circle, printed the env out and don't see them, so idk... i give up, this works closes #1303
1310 lines
40 KiB
Go
1310 lines
40 KiB
Go
package agent
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"path/filepath"
|
|
|
|
"github.com/fnproject/fn/api/agent/drivers"
|
|
"github.com/fnproject/fn/api/common"
|
|
"github.com/fnproject/fn/api/id"
|
|
"github.com/fnproject/fn/api/models"
|
|
"github.com/fnproject/fn/fnext"
|
|
"github.com/fsnotify/fsnotify"
|
|
docker "github.com/fsouza/go-dockerclient"
|
|
"github.com/sirupsen/logrus"
|
|
"go.opencensus.io/stats"
|
|
"go.opencensus.io/trace"
|
|
"os"
|
|
)
|
|
|
|
const (
|
|
pauseTimeout = 5 * time.Second // docker pause/unpause
|
|
)
|
|
|
|
// TODO we should prob store async calls in db immediately since we're returning id (will 404 until post-execution)
|
|
// TODO async calls need to add route.Headers as well
|
|
// TODO handle timeouts / no response in sync & async (sync is json+503 atm, not 504, async is empty log+status)
|
|
// see also: server/runner.go wrapping the response writer there, but need to handle async too (push down?)
|
|
// TODO storing logs / call can push call over the timeout
|
|
// TODO async is still broken, but way less so. we need to modify mq semantics
|
|
// to be much more robust. now we're at least running it if we delete the msg,
|
|
// but we may never store info about that execution so still broked (if fn
|
|
// dies). need coordination w/ db.
|
|
// TODO if async would store requests (or interchange format) it would be slick, but
|
|
// if we're going to store full calls in db maybe we should only queue pointers to ids?
|
|
// TODO examine cases where hot can't start a container and the user would never see an error
|
|
// about why that may be so (say, whatever it is takes longer than the timeout, e.g.)
|
|
// TODO if an image is not found or similar issues in getting a slot, then async should probably
|
|
// mark the call as errored rather than forever trying & failing to run it
|
|
// TODO it would be really nice if we made the ramToken wrap the driver cookie (less brittle,
|
|
// if those leak the container leaks too...) -- not the allocation, but the token.Close and cookie.Close
|
|
// TODO if machine is out of ram, just timeout immediately / wait for hot slot? (discuss policy)
|
|
|
|
// Agent exposes an api to create calls from various parameters and then submit
|
|
// those calls, it also exposes a 'safe' shutdown mechanism via its Close method.
|
|
// Agent has a few roles:
|
|
// * manage the memory pool for a given server
|
|
// * manage the container lifecycle for calls
|
|
// * execute calls against containers
|
|
// * invoke Start and End for each call appropriately
|
|
// * check the mq for any async calls, and submit them
|
|
//
|
|
// Overview:
|
|
// Upon submission of a call, Agent will start the call's timeout timer
|
|
// immediately. If the call is hot, Agent will attempt to find an active hot
|
|
// container for that route, and if necessary launch another container. calls
|
|
// will be able to read/write directly from/to a socket in the container. If
|
|
// it's necessary to launch a container, first an attempt will be made to try
|
|
// to reserve the ram required while waiting for any hot 'slot' to become
|
|
// available [if applicable]. If there is an error launching the container, an
|
|
// error will be returned provided the call has not yet timed out or found
|
|
// another hot 'slot' to execute in [if applicable]. call.Start will be called
|
|
// immediately before sending any input to a container. call.End will be called
|
|
// regardless of the timeout timer's status if the call was executed, and that
|
|
// error returned may be returned from Submit.
|
|
type Agent interface {
|
|
// GetCall will return a Call that is executable by the Agent, which
|
|
// can be built via various CallOpt's provided to the method.
|
|
GetCall(...CallOpt) (Call, error)
|
|
|
|
// Submit will attempt to execute a call locally, a Call may store information
|
|
// about itself in its Start and End methods, which will be called in Submit
|
|
// immediately before and after the Call is executed, respectively. An error
|
|
// will be returned if there is an issue executing the call or the error
|
|
// may be from the call's execution itself (if, say, the container dies,
|
|
// or the call times out).
|
|
Submit(Call) error
|
|
|
|
// Close will wait for any outstanding calls to complete and then exit.
|
|
// Closing the agent will invoke Close on the underlying DataAccess.
|
|
// Close is not safe to be called from multiple threads.
|
|
io.Closer
|
|
|
|
AddCallListener(fnext.CallListener)
|
|
}
|
|
|
|
type agent struct {
|
|
cfg Config
|
|
da CallHandler
|
|
callListeners []fnext.CallListener
|
|
|
|
driver drivers.Driver
|
|
|
|
slotMgr *slotQueueMgr
|
|
evictor Evictor
|
|
// track usage
|
|
resources ResourceTracker
|
|
|
|
// used to track running calls / safe shutdown
|
|
shutWg *common.WaitGroup
|
|
shutonce sync.Once
|
|
disableAsyncDequeue bool
|
|
|
|
callOverrider CallOverrider
|
|
// deferred actions to call at end of initialisation
|
|
onStartup []func()
|
|
}
|
|
|
|
// Option configures an agent at startup
|
|
type Option func(*agent) error
|
|
|
|
// RegistryToken is a reserved call extensions key to pass registry token
|
|
/* #nosec */
|
|
const RegistryToken = "FN_REGISTRY_TOKEN"
|
|
|
|
// New creates an Agent that executes functions locally as Docker containers.
|
|
func New(da CallHandler, options ...Option) Agent {
|
|
|
|
cfg, err := NewConfig()
|
|
if err != nil {
|
|
logrus.WithError(err).Fatalf("error in agent config cfg=%+v", cfg)
|
|
}
|
|
|
|
a := &agent{
|
|
cfg: *cfg,
|
|
}
|
|
|
|
a.shutWg = common.NewWaitGroup()
|
|
a.da = da
|
|
a.slotMgr = NewSlotQueueMgr()
|
|
a.evictor = NewEvictor()
|
|
|
|
// Allow overriding config
|
|
for _, option := range options {
|
|
err = option(a)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatalf("error in agent options")
|
|
}
|
|
}
|
|
|
|
logrus.Infof("agent starting cfg=%+v", a.cfg)
|
|
|
|
if a.driver == nil {
|
|
d, err := NewDockerDriver(&a.cfg)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("failed to create docker driver ")
|
|
}
|
|
a.driver = d
|
|
}
|
|
|
|
a.resources = NewResourceTracker(&a.cfg)
|
|
|
|
for _, sup := range a.onStartup {
|
|
sup()
|
|
}
|
|
return a
|
|
}
|
|
|
|
func (a *agent) addStartup(sup func()) {
|
|
a.onStartup = append(a.onStartup, sup)
|
|
|
|
}
|
|
|
|
// WithAsync Enables Async operations on the agent
|
|
func WithAsync(dqda DequeueDataAccess) Option {
|
|
return func(a *agent) error {
|
|
if !a.shutWg.AddSession(1) {
|
|
logrus.Fatalf("cannot start agent, unable to add session")
|
|
}
|
|
a.addStartup(func() {
|
|
go a.asyncDequeue(dqda) // safe shutdown can nanny this fine
|
|
})
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithConfig sets the agent config to the provided config
|
|
func WithConfig(cfg *Config) Option {
|
|
return func(a *agent) error {
|
|
a.cfg = *cfg
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithDockerDriver Provides a customer driver to agent
|
|
func WithDockerDriver(drv drivers.Driver) Option {
|
|
return func(a *agent) error {
|
|
if a.driver != nil {
|
|
return errors.New("cannot add driver to agent, driver already exists")
|
|
}
|
|
|
|
a.driver = drv
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// WithCallOverrider registers register a CallOverrider to modify a Call and extensions on call construction
|
|
func WithCallOverrider(fn CallOverrider) Option {
|
|
return func(a *agent) error {
|
|
if a.callOverrider != nil {
|
|
return errors.New("lb-agent call overriders already exists")
|
|
}
|
|
a.callOverrider = fn
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// NewDockerDriver creates a default docker driver from agent config
|
|
func NewDockerDriver(cfg *Config) (drivers.Driver, error) {
|
|
return drivers.New("docker", drivers.Config{
|
|
DockerNetworks: cfg.DockerNetworks,
|
|
DockerLoadFile: cfg.DockerLoadFile,
|
|
ServerVersion: cfg.MinDockerVersion,
|
|
PreForkPoolSize: cfg.PreForkPoolSize,
|
|
PreForkImage: cfg.PreForkImage,
|
|
PreForkCmd: cfg.PreForkCmd,
|
|
PreForkUseOnce: cfg.PreForkUseOnce,
|
|
PreForkNetworks: cfg.PreForkNetworks,
|
|
MaxTmpFsInodes: cfg.MaxTmpFsInodes,
|
|
EnableReadOnlyRootFs: !cfg.DisableReadOnlyRootFs,
|
|
MaxRetries: cfg.MaxDockerRetries,
|
|
})
|
|
}
|
|
|
|
func (a *agent) Close() error {
|
|
var err error
|
|
|
|
// wait for ongoing sessions
|
|
a.shutWg.CloseGroup()
|
|
|
|
a.shutonce.Do(func() {
|
|
// now close docker layer
|
|
if a.driver != nil {
|
|
err = a.driver.Close()
|
|
}
|
|
})
|
|
|
|
return err
|
|
}
|
|
|
|
func (a *agent) Submit(callI Call) error {
|
|
call := callI.(*call)
|
|
ctx, span := trace.StartSpan(call.req.Context(), "agent_submit")
|
|
defer span.End()
|
|
|
|
statsCalls(ctx)
|
|
|
|
if !a.shutWg.AddSession(1) {
|
|
statsTooBusy(ctx)
|
|
return models.ErrCallTimeoutServerBusy
|
|
}
|
|
defer a.shutWg.DoneSession()
|
|
|
|
err := a.submit(ctx, call)
|
|
return err
|
|
}
|
|
|
|
func (a *agent) startStateTrackers(ctx context.Context, call *call) {
|
|
call.requestState = NewRequestState()
|
|
}
|
|
|
|
func (a *agent) endStateTrackers(ctx context.Context, call *call) {
|
|
call.requestState.UpdateState(ctx, RequestStateDone, call.slots)
|
|
}
|
|
|
|
func (a *agent) submit(ctx context.Context, call *call) error {
|
|
statsEnqueue(ctx)
|
|
|
|
a.startStateTrackers(ctx, call)
|
|
defer a.endStateTrackers(ctx, call)
|
|
|
|
slot, err := a.getSlot(ctx, call)
|
|
if err != nil {
|
|
return a.handleCallEnd(ctx, call, slot, err, false)
|
|
}
|
|
|
|
err = call.Start(ctx)
|
|
if err != nil {
|
|
return a.handleCallEnd(ctx, call, slot, err, false)
|
|
}
|
|
|
|
statsDequeue(ctx)
|
|
statsStartRun(ctx)
|
|
|
|
// We are about to execute the function, set container Exec Deadline (call.Timeout)
|
|
slotCtx, cancel := context.WithTimeout(ctx, time.Duration(call.Timeout)*time.Second)
|
|
defer cancel()
|
|
|
|
// Pass this error (nil or otherwise) to end directly, to store status, etc.
|
|
err = slot.exec(slotCtx, call)
|
|
return a.handleCallEnd(ctx, call, slot, err, true)
|
|
}
|
|
|
|
func (a *agent) handleCallEnd(ctx context.Context, call *call, slot Slot, err error, isStarted bool) error {
|
|
|
|
if slot != nil {
|
|
slot.Close()
|
|
}
|
|
|
|
// This means call was routed (executed)
|
|
if isStarted {
|
|
call.End(ctx, err)
|
|
statsStopRun(ctx)
|
|
if err == nil {
|
|
statsComplete(ctx)
|
|
} else if err == context.DeadlineExceeded {
|
|
statsTimedout(ctx)
|
|
return models.ErrCallTimeout
|
|
}
|
|
} else {
|
|
statsDequeue(ctx)
|
|
if err == models.ErrCallTimeoutServerBusy || err == context.DeadlineExceeded {
|
|
statsTooBusy(ctx)
|
|
return models.ErrCallTimeoutServerBusy
|
|
}
|
|
}
|
|
|
|
if err == context.Canceled {
|
|
statsCanceled(ctx)
|
|
} else if err != nil {
|
|
statsErrors(ctx)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// getSlot returns a Slot (or error) for the request to run. This will wait
|
|
// for other containers to become idle or it may wait for resources to become
|
|
// available to launch a new container.
|
|
func (a *agent) getSlot(ctx context.Context, call *call) (Slot, error) {
|
|
if call.Type == models.TypeAsync {
|
|
// *) for async, slot deadline is also call.Timeout. This is because we would like to
|
|
// allocate enough time for docker-pull, slot-wait, docker-start, etc.
|
|
// and also make sure we have call.Timeout inside the container. Total time
|
|
// to run an async becomes 2 * call.Timeout.
|
|
// *) for sync, there's no slot deadline, the timeout is controlled by http-client
|
|
// context (or runner gRPC context)
|
|
tmp, cancel := context.WithTimeout(ctx, time.Duration(call.Timeout)*time.Second)
|
|
ctx = tmp
|
|
defer cancel()
|
|
}
|
|
|
|
ctx, span := trace.StartSpan(ctx, "agent_get_slot")
|
|
defer span.End()
|
|
|
|
// For hot requests, we use a long lived slot queue, which we use to manage hot containers
|
|
var isNew bool
|
|
|
|
if call.slotHashId == "" {
|
|
call.slotHashId = getSlotQueueKey(call)
|
|
}
|
|
|
|
call.slots, isNew = a.slotMgr.getSlotQueue(call.slotHashId)
|
|
call.requestState.UpdateState(ctx, RequestStateWait, call.slots)
|
|
|
|
// setup slot caller with a ctx that gets cancelled once waitHot() is completed.
|
|
// This allows runHot() to detect if original caller has been serviced by
|
|
// another container or if original caller was disconnected.
|
|
caller := &slotCaller{}
|
|
{
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
|
|
caller.done = ctx.Done()
|
|
caller.notify = make(chan error)
|
|
}
|
|
|
|
if isNew {
|
|
go a.hotLauncher(ctx, call, caller)
|
|
}
|
|
s, err := a.waitHot(ctx, call, caller)
|
|
return s, err
|
|
}
|
|
|
|
// hotLauncher is spawned in a go routine for each slot queue to monitor stats and launch hot
|
|
// containers if needed. Upon shutdown or activity timeout, hotLauncher exits and during exit,
|
|
// it destroys the slot queue.
|
|
func (a *agent) hotLauncher(ctx context.Context, call *call, caller *slotCaller) {
|
|
// Let use 60 minutes or 2 * IdleTimeout as hot queue idle timeout, pick
|
|
// whichever is longer. If in this time, there's no activity, then
|
|
// we destroy the hot queue.
|
|
timeout := a.cfg.HotLauncherTimeout
|
|
idleTimeout := time.Duration(call.IdleTimeout) * time.Second * 2
|
|
if timeout < idleTimeout {
|
|
timeout = idleTimeout
|
|
}
|
|
|
|
logger := common.Logger(ctx)
|
|
logger.WithField("launcher_timeout", timeout).Debug("Hot function launcher starting")
|
|
|
|
// IMPORTANT: get a context that has a child span / logger but NO timeout
|
|
// TODO this is a 'FollowsFrom'
|
|
ctx = common.BackgroundContext(ctx)
|
|
ctx, span := trace.StartSpan(ctx, "agent_hot_launcher")
|
|
defer span.End()
|
|
|
|
for {
|
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
|
a.checkLaunch(ctx, call, *caller)
|
|
|
|
select {
|
|
case <-a.shutWg.Closer(): // server shutdown
|
|
cancel()
|
|
return
|
|
case <-ctx.Done(): // timed out
|
|
cancel()
|
|
if a.slotMgr.deleteSlotQueue(call.slots) {
|
|
logger.Debug("Hot function launcher timed out")
|
|
return
|
|
}
|
|
case caller = <-call.slots.signaller:
|
|
cancel()
|
|
}
|
|
}
|
|
}
|
|
|
|
func tryNotify(notifyChan chan error, err error) {
|
|
if notifyChan != nil && err != nil {
|
|
select {
|
|
case notifyChan <- err:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *agent) checkLaunch(ctx context.Context, call *call, caller slotCaller) {
|
|
curStats := call.slots.getStats()
|
|
isNB := a.cfg.EnableNBResourceTracker
|
|
if !isNewContainerNeeded(&curStats) {
|
|
return
|
|
}
|
|
|
|
state := NewContainerState()
|
|
state.UpdateState(ctx, ContainerStateWait, call.slots)
|
|
|
|
mem := call.Memory + uint64(call.TmpFsSize)
|
|
|
|
var notifyChans []chan struct{}
|
|
var tok ResourceToken
|
|
|
|
// WARNING: Tricky flow below. We are here because: isNewContainerNeeded is true,
|
|
// in other words, we need to launch a new container at this time due to high load.
|
|
//
|
|
// For non-blocking mode, this means, if we cannot acquire resources (cpu+mem), then we need
|
|
// to notify the caller through notifyChan. This is not perfect as the callers and
|
|
// checkLaunch do not match 1-1. But this is OK, we can notify *any* waiter that
|
|
// has signalled us, this is because non-blocking mode is a system wide setting.
|
|
// The notifications are lossy, but callers will signal/poll again if this is the case
|
|
// or this may not matter if they've already acquired an empty slot.
|
|
//
|
|
// For Non-blocking mode, a.cfg.HotPoll should not be set to too high since a missed
|
|
// notify event from here will add a.cfg.HotPoll msec latency. Setting a.cfg.HotPoll may
|
|
// be an acceptable workaround for the short term since non-blocking mode likely to reduce
|
|
// the number of waiters which perhaps could compensate for more frequent polling.
|
|
//
|
|
// Non-blocking mode only applies to cpu+mem, and if isNewContainerNeeded decided that we do not
|
|
// need to start a new container, then waiters will wait.
|
|
select {
|
|
case tok = <-a.resources.GetResourceToken(ctx, mem, call.CPUs, isNB):
|
|
case <-time.After(a.cfg.HotPoll):
|
|
// Request routines are polling us with this a.cfg.HotPoll frequency. We can use this
|
|
// same timer to assume that we waited for cpu/mem long enough. Let's try to evict an
|
|
// idle container. We do this by submitting a non-blocking request and evicting required
|
|
// amount of resources.
|
|
select {
|
|
case tok = <-a.resources.GetResourceToken(ctx, mem, call.CPUs, true):
|
|
case <-ctx.Done(): // timeout
|
|
case <-a.shutWg.Closer(): // server shutdown
|
|
}
|
|
case <-ctx.Done(): // timeout
|
|
case <-a.shutWg.Closer(): // server shutdown
|
|
}
|
|
|
|
if tok != nil {
|
|
if tok.Error() != nil {
|
|
if tok.Error() != CapacityFull {
|
|
tryNotify(caller.notify, tok.Error())
|
|
} else {
|
|
needMem, needCpu := tok.NeededCapacity()
|
|
notifyChans = a.evictor.PerformEviction(call.slotHashId, needMem, uint64(needCpu))
|
|
// For Non-blocking mode, if there's nothing to evict, we emit 503.
|
|
if len(notifyChans) == 0 && isNB {
|
|
tryNotify(caller.notify, models.ErrCallTimeoutServerBusy)
|
|
}
|
|
}
|
|
} else if a.shutWg.AddSession(1) {
|
|
go func() {
|
|
// NOTE: runHot will not inherit the timeout from ctx (ignore timings)
|
|
a.runHot(ctx, caller, call, tok, state)
|
|
a.shutWg.DoneSession()
|
|
}()
|
|
// early return (do not allow container state to switch to ContainerStateDone)
|
|
return
|
|
}
|
|
statsUtilization(ctx, a.resources.GetUtilization())
|
|
tok.Close()
|
|
}
|
|
|
|
defer state.UpdateState(ctx, ContainerStateDone, call.slots)
|
|
|
|
// IMPORTANT: we wait here for any possible evictions to finalize. Otherwise
|
|
// hotLauncher could call checkLaunch again and cause a capacity full (http 503)
|
|
// error.
|
|
for _, wait := range notifyChans {
|
|
select {
|
|
case <-wait:
|
|
case <-ctx.Done(): // timeout
|
|
return
|
|
case <-a.shutWg.Closer(): // server shutdown
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// waitHot pings and waits for a hot container from the slot queue
|
|
func (a *agent) waitHot(ctx context.Context, call *call, caller *slotCaller) (Slot, error) {
|
|
ctx, span := trace.StartSpan(ctx, "agent_wait_hot")
|
|
defer span.End()
|
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
defer cancel() // shut down dequeuer if we grab a slot
|
|
|
|
ch := call.slots.startDequeuer(ctx)
|
|
|
|
// 1) if we can get a slot immediately, grab it.
|
|
// 2) if we don't, send a signaller every x msecs until we do.
|
|
|
|
sleep := 1 * time.Microsecond // pad, so time.After doesn't send immediately
|
|
for {
|
|
select {
|
|
case err := <-caller.notify:
|
|
return nil, err
|
|
case s := <-ch:
|
|
if call.slots.acquireSlot(s) {
|
|
if s.slot.Error() != nil {
|
|
s.slot.Close()
|
|
return nil, s.slot.Error()
|
|
}
|
|
return s.slot, nil
|
|
}
|
|
// we failed to take ownership of the token (eg. container idle timeout) => try again
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
case <-a.shutWg.Closer(): // server shutdown
|
|
return nil, models.ErrCallTimeoutServerBusy
|
|
case <-time.After(sleep):
|
|
// ping dequeuer again
|
|
}
|
|
|
|
// set sleep to x msecs after first iteration
|
|
sleep = a.cfg.HotPoll
|
|
// send a notification to launchHot()
|
|
select {
|
|
case call.slots.signaller <- caller:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
// implements Slot
|
|
type hotSlot struct {
|
|
done chan struct{} // signal we are done with slot
|
|
container *container // TODO mask this
|
|
cfg *Config
|
|
fatalErr error
|
|
containerSpan trace.SpanContext
|
|
}
|
|
|
|
func (s *hotSlot) Close() error {
|
|
close(s.done)
|
|
return nil
|
|
}
|
|
|
|
func (s *hotSlot) Error() error {
|
|
return s.fatalErr
|
|
}
|
|
|
|
func (s *hotSlot) trySetError(err error) {
|
|
if s.fatalErr == nil {
|
|
s.fatalErr = err
|
|
}
|
|
}
|
|
|
|
func (s *hotSlot) exec(ctx context.Context, call *call) error {
|
|
ctx, span := trace.StartSpan(ctx, "agent_hot_exec")
|
|
defer span.End()
|
|
|
|
call.requestState.UpdateState(ctx, RequestStateExec, call.slots)
|
|
|
|
// link the container id and id in the logs [for us!]
|
|
common.Logger(ctx).WithField("container_id", s.container.id).Info("starting call")
|
|
|
|
// link the container span to ours for additional context (start/freeze/etc.)
|
|
span.AddLink(trace.Link{
|
|
TraceID: s.containerSpan.TraceID,
|
|
SpanID: s.containerSpan.SpanID,
|
|
Type: trace.LinkTypeChild,
|
|
})
|
|
|
|
call.req = call.req.WithContext(ctx) // TODO this is funny biz reed is bad
|
|
return s.dispatch(ctx, call)
|
|
}
|
|
|
|
var removeHeaders = map[string]bool{
|
|
"connection": true,
|
|
"keep-alive": true,
|
|
"trailer": true,
|
|
"transfer-encoding": true,
|
|
"te": true,
|
|
"upgrade": true,
|
|
"authorization": true,
|
|
}
|
|
|
|
func createUDSRequest(ctx context.Context, call *call) *http.Request {
|
|
req, err := http.NewRequest("POST", "http://localhost/call", call.req.Body)
|
|
if err != nil {
|
|
common.Logger(ctx).WithError(err).Error("somebody put a bad url in the call http request. 10 lashes.")
|
|
panic(err)
|
|
}
|
|
// Set the context on the request to make sure transport and client handle
|
|
// it properly and close connections at the end, e.g. when using UDS.
|
|
req = req.WithContext(ctx)
|
|
|
|
req.Header = make(http.Header)
|
|
for k, vs := range call.req.Header {
|
|
if !removeHeaders[strings.ToLower(k)] {
|
|
for _, v := range vs {
|
|
req.Header.Add(k, v)
|
|
}
|
|
}
|
|
}
|
|
|
|
req.Header.Set("Fn-Call-Id", call.ID)
|
|
deadline, ok := ctx.Deadline()
|
|
if ok {
|
|
deadlineStr := deadline.Format(time.RFC3339)
|
|
req.Header.Set("Fn-Deadline", deadlineStr)
|
|
}
|
|
|
|
return req
|
|
}
|
|
|
|
func (s *hotSlot) dispatch(ctx context.Context, call *call) error {
|
|
ctx, span := trace.StartSpan(ctx, "agent_dispatch_httpstream")
|
|
defer span.End()
|
|
|
|
// TODO it's possible we can get rid of this (after getting rid of logs API) - may need for call id/debug mode still
|
|
// TODO there's a timeout race for swapping this back if the container doesn't get killed for timing out, and don't you forget it
|
|
swapBack := s.container.swap(call.stderr, &call.Stats)
|
|
defer swapBack()
|
|
|
|
resp, err := s.container.udsClient.Do(createUDSRequest(ctx, call))
|
|
if err != nil {
|
|
// IMPORTANT: Container contract: If http-uds errors/timeout, container cannot continue
|
|
s.trySetError(err)
|
|
// first filter out timeouts
|
|
if ctx.Err() == context.DeadlineExceeded {
|
|
return context.DeadlineExceeded
|
|
}
|
|
return models.ErrFunctionResponse
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
common.Logger(ctx).WithField("resp", resp).Debug("Got resp from UDS socket")
|
|
|
|
ioErrChan := make(chan error, 1)
|
|
go func() {
|
|
ioErrChan <- s.writeResp(ctx, s.cfg.MaxResponseSize, resp, call.respWriter)
|
|
}()
|
|
|
|
select {
|
|
case ioErr := <-ioErrChan:
|
|
return ioErr
|
|
case <-ctx.Done():
|
|
if ctx.Err() == context.DeadlineExceeded {
|
|
// IMPORTANT: Container contract: If http-uds timeout, container cannot continue
|
|
s.trySetError(ctx.Err())
|
|
}
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
|
|
func (s *hotSlot) writeResp(ctx context.Context, max uint64, resp *http.Response, w io.Writer) error {
|
|
rw, ok := w.(http.ResponseWriter)
|
|
if !ok {
|
|
// WARNING: this bypasses container contract translation. Assuming this is
|
|
// async mode, where we are storing response in call.stderr.
|
|
w = common.NewClampWriter(w, max, models.ErrFunctionResponseTooBig)
|
|
return resp.Write(w)
|
|
}
|
|
|
|
// IMPORTANT: Container contract: Enforce 200/502/504 expections
|
|
switch resp.StatusCode {
|
|
case http.StatusOK:
|
|
// FDK processed the request OK
|
|
case http.StatusBadGateway:
|
|
// FDK detected failure, container can continue
|
|
return models.ErrFunctionFailed
|
|
case http.StatusGatewayTimeout:
|
|
// FDK detected timeout, respond as if ctx expired, this gets translated & handled in handleCallEnd()
|
|
return context.DeadlineExceeded
|
|
default:
|
|
// Any other code. Possible FDK failure. We shutdown the container
|
|
s.trySetError(fmt.Errorf("FDK Error, invalid status code %d", resp.StatusCode))
|
|
return models.ErrFunctionInvalidResponse
|
|
}
|
|
|
|
rw = newSizerRespWriter(max, rw)
|
|
rw.WriteHeader(http.StatusOK)
|
|
|
|
// WARNING: is the following header copy safe?
|
|
// if we're writing directly to the response writer, we need to set headers
|
|
// and only copy the body. resp.Write would copy a full
|
|
// http request into the response body (not what we want).
|
|
for k, vs := range resp.Header {
|
|
for _, v := range vs {
|
|
rw.Header().Add(k, v)
|
|
}
|
|
}
|
|
|
|
_, ioErr := io.Copy(rw, resp.Body)
|
|
return ioErr
|
|
}
|
|
|
|
// XXX(reed): this is a remnant of old io.pipe plumbing, we need to get rid of
|
|
// the buffers from the front-end in actuality, but only after removing other formats... so here, eat this
|
|
type sizerRespWriter struct {
|
|
http.ResponseWriter
|
|
w io.Writer
|
|
}
|
|
|
|
var _ http.ResponseWriter = new(sizerRespWriter)
|
|
|
|
func newSizerRespWriter(max uint64, rw http.ResponseWriter) http.ResponseWriter {
|
|
return &sizerRespWriter{
|
|
ResponseWriter: rw,
|
|
w: common.NewClampWriter(rw, max, models.ErrFunctionResponseTooBig),
|
|
}
|
|
}
|
|
|
|
func (s *sizerRespWriter) Write(b []byte) (int, error) { return s.w.Write(b) }
|
|
|
|
// Try to queue an error to the error channel if possible.
|
|
func tryQueueErr(err error, ch chan error) error {
|
|
if err != nil {
|
|
select {
|
|
case ch <- err:
|
|
default:
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (a *agent) runHot(ctx context.Context, caller slotCaller, call *call, tok ResourceToken, state ContainerState) {
|
|
// IMPORTANT: get a context that has a child span / logger but NO timeout
|
|
// TODO this is a 'FollowsFrom'
|
|
ctx = common.BackgroundContext(ctx)
|
|
ctx, span := trace.StartSpan(ctx, "agent_run_hot")
|
|
defer span.End()
|
|
|
|
var container *container
|
|
var cookie drivers.Cookie
|
|
var err error
|
|
|
|
id := id.New().String()
|
|
logger := logrus.WithFields(logrus.Fields{"id": id, "app_id": call.AppID, "fn_id": call.FnID, "image": call.Image, "memory": call.Memory, "cpus": call.CPUs, "idle_timeout": call.IdleTimeout})
|
|
ctx, cancel := context.WithCancel(common.WithLogger(ctx, logger))
|
|
|
|
initialized := make(chan struct{}) // when closed, container is ready to handle requests
|
|
udsWait := make(chan error, 1) // track UDS state and errors
|
|
errQueue := make(chan error, 1) // errors to be reflected back to the slot queue
|
|
|
|
evictor := a.evictor.CreateEvictToken(call.slotHashId, call.Memory+uint64(call.TmpFsSize), uint64(call.CPUs))
|
|
|
|
statsUtilization(ctx, a.resources.GetUtilization())
|
|
state.UpdateState(ctx, ContainerStateStart, call.slots)
|
|
|
|
// stack unwind spelled out with strict ordering below.
|
|
defer func() {
|
|
// IMPORTANT: we ignore any errors due to eviction and do not reflect these to clients.
|
|
if !evictor.isEvicted() {
|
|
select {
|
|
case <-initialized:
|
|
default:
|
|
tryQueueErr(models.ErrContainerInitFail, errQueue)
|
|
}
|
|
select {
|
|
case err := <-errQueue:
|
|
call.slots.queueSlot(&hotSlot{done: make(chan struct{}), fatalErr: err})
|
|
default:
|
|
}
|
|
}
|
|
|
|
// shutdown the container and related I/O operations and go routines
|
|
cancel()
|
|
|
|
// IMPORTANT: for release cookie (remove container), make sure ctx below has no timeout.
|
|
if cookie != nil {
|
|
cookie.Close(common.BackgroundContext(ctx))
|
|
}
|
|
|
|
if container != nil {
|
|
container.Close()
|
|
}
|
|
|
|
lastState := state.GetState()
|
|
state.UpdateState(ctx, ContainerStateDone, call.slots)
|
|
|
|
tok.Close() // release cpu/mem
|
|
|
|
// IMPORTANT: evict token is deleted *after* resource token.
|
|
// This ordering allows resource token to be freed first, which means once evict token
|
|
// is deleted, eviction is considered to be completed.
|
|
a.evictor.DeleteEvictToken(evictor)
|
|
|
|
statsUtilization(ctx, a.resources.GetUtilization())
|
|
if evictor.isEvicted() {
|
|
logger.Debugf("Hot function evicted")
|
|
statsContainerEvicted(ctx, lastState)
|
|
}
|
|
}()
|
|
|
|
// Monitor initialization and evictability. Closes 'initialized' channel
|
|
// to hand over the processing to main request processing go-routine
|
|
go func() {
|
|
for {
|
|
select {
|
|
case err := <-udsWait:
|
|
if tryQueueErr(err, errQueue) != nil {
|
|
cancel()
|
|
} else {
|
|
close(initialized)
|
|
}
|
|
return
|
|
case <-ctx.Done(): // container shutdown
|
|
return
|
|
case <-a.shutWg.Closer(): // agent shutdown
|
|
cancel()
|
|
return
|
|
case <-caller.done: // original caller disconnected or serviced by another container?
|
|
evictor.SetEvictable(true)
|
|
caller.done = nil // block 'caller.done' after this point
|
|
case <-evictor.C: // eviction
|
|
cancel()
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
container = newHotContainer(ctx, call, &a.cfg, id, udsWait)
|
|
if container == nil {
|
|
return
|
|
}
|
|
|
|
cookie, err = a.driver.CreateCookie(ctx, container)
|
|
if tryQueueErr(err, errQueue) != nil {
|
|
return
|
|
}
|
|
|
|
needsPull, err := cookie.ValidateImage(ctx)
|
|
if tryQueueErr(err, errQueue) != nil {
|
|
return
|
|
}
|
|
|
|
if needsPull {
|
|
ctx, cancel := context.WithTimeout(ctx, a.cfg.HotPullTimeout)
|
|
err = cookie.PullImage(ctx)
|
|
cancel()
|
|
if ctx.Err() == context.DeadlineExceeded {
|
|
err = models.ErrDockerPullTimeout
|
|
}
|
|
if tryQueueErr(err, errQueue) != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
err = cookie.CreateContainer(ctx)
|
|
if tryQueueErr(err, errQueue) != nil {
|
|
return
|
|
}
|
|
|
|
waiter, err := cookie.Run(ctx)
|
|
if tryQueueErr(err, errQueue) != nil {
|
|
return
|
|
}
|
|
|
|
// Main request processing go-routine
|
|
go func() {
|
|
defer cancel() // also close if we get an agent shutdown / idle timeout
|
|
|
|
// We record init wait for three basic states below: "initialized", "canceled", "timedout"
|
|
// Notice how we do not distinguish between agent-shutdown, eviction, ctx.Done, etc. This is
|
|
// because monitoring go-routine may pick these events earlier and cancel the ctx.
|
|
initStart := time.Now()
|
|
|
|
// INIT BARRIER HERE. Wait for the initialization go-routine signal
|
|
select {
|
|
case <-initialized:
|
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "initialized")
|
|
case <-a.shutWg.Closer(): // agent shutdown
|
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "canceled")
|
|
return
|
|
case <-ctx.Done():
|
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "canceled")
|
|
return
|
|
case <-evictor.C: // eviction
|
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "canceled")
|
|
return
|
|
case <-time.After(a.cfg.HotStartTimeout):
|
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "timedout")
|
|
tryQueueErr(models.ErrContainerInitTimeout, errQueue)
|
|
return
|
|
}
|
|
|
|
for {
|
|
// Below we are rather defensive and poll on evictor/ctx
|
|
// to reduce the likelyhood of attempting to queue a hotSlot when these
|
|
// two cases occur.
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-evictor.C: // eviction
|
|
return
|
|
default:
|
|
}
|
|
|
|
slot := &hotSlot{
|
|
done: make(chan struct{}),
|
|
container: container,
|
|
cfg: &a.cfg,
|
|
containerSpan: trace.FromContext(ctx).SpanContext(),
|
|
}
|
|
if !a.runHotReq(ctx, call, state, logger, cookie, slot, evictor) {
|
|
return
|
|
}
|
|
// wait for this call to finish
|
|
// NOTE do NOT select with shutdown / other channels. slot handles this.
|
|
<-slot.done
|
|
|
|
if slot.fatalErr != nil {
|
|
logger.WithError(slot.fatalErr).Info("hot function terminating")
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
runRes := waiter.Wait(ctx)
|
|
if runRes != nil && runRes.Error() != context.Canceled {
|
|
logger.WithError(runRes.Error()).Info("hot function terminated")
|
|
}
|
|
}
|
|
|
|
//checkSocketDestination verifies that the socket file created by the FDK is valid and permitted - notably verifying that any symlinks are relative to the socket dir
|
|
func checkSocketDestination(filename string) error {
|
|
finfo, err := os.Lstat(filename)
|
|
if err != nil {
|
|
return fmt.Errorf("error statting unix socket link file %s", err)
|
|
}
|
|
|
|
if (finfo.Mode() & os.ModeSymlink) > 0 {
|
|
linkDest, err := os.Readlink(filename)
|
|
if err != nil {
|
|
return fmt.Errorf("error reading unix socket symlink destination %s", err)
|
|
}
|
|
if filepath.Dir(linkDest) != "." {
|
|
return fmt.Errorf("invalid unix socket symlink, symlinks must be relative within the unix socket directory")
|
|
}
|
|
}
|
|
|
|
// stat the absolute path and check it is a socket
|
|
absInfo, err := os.Stat(filename)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to stat unix socket file %s", err)
|
|
}
|
|
if absInfo.Mode()&os.ModeSocket == 0 {
|
|
return fmt.Errorf("listener file is not a socket")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func inotifyAwait(ctx context.Context, iofsDir string, udsWait chan error) {
|
|
ctx, span := trace.StartSpan(ctx, "inotify_await")
|
|
defer span.End()
|
|
|
|
logger := common.Logger(ctx)
|
|
|
|
// Here we create the fs notify (inotify) synchronously and once that is
|
|
// setup, then fork off our async go-routine. Basically fsnotify should be enabled
|
|
// before we launch the container in order not to miss any events.
|
|
fsWatcher, err := fsnotify.NewWatcher()
|
|
if err != nil {
|
|
udsWait <- fmt.Errorf("error getting fsnotify watcher: %v", err)
|
|
return
|
|
}
|
|
|
|
err = fsWatcher.Add(iofsDir)
|
|
if err != nil {
|
|
if err := fsWatcher.Close(); err != nil {
|
|
logger.WithError(err).Error("Failed to close inotify watcher")
|
|
}
|
|
udsWait <- fmt.Errorf("error adding iofs dir to fswatcher: %v", err)
|
|
return
|
|
}
|
|
|
|
go func() {
|
|
ctx, span := trace.StartSpan(ctx, "inotify_await_poller")
|
|
defer span.End()
|
|
|
|
defer func() {
|
|
if err := fsWatcher.Close(); err != nil {
|
|
logger.WithError(err).Error("Failed to close inotify watcher")
|
|
}
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case err := <-fsWatcher.Errors:
|
|
// TODO: We do not know if these cases would be due to customer container/FDK
|
|
// fault or some kind of service/runner issue. As conservative choice,
|
|
// we reflect back a non API error, which means a 500 back to user.
|
|
logger.WithError(err).Error("error watching for iofs")
|
|
udsWait <- err
|
|
return
|
|
case event := <-fsWatcher.Events:
|
|
logger.WithField("event", event).Debug("fsnotify event")
|
|
if event.Op&fsnotify.Create == fsnotify.Create && event.Name == filepath.Join(iofsDir, udsFilename) {
|
|
err := checkSocketDestination(filepath.Join(iofsDir, udsFilename))
|
|
if err != nil {
|
|
// This case is more like a bad FDK/container, so let's reflect this back to
|
|
// clients as container init fail.
|
|
logger.WithError(err).Error("Failed to check socket destination")
|
|
udsWait <- models.ErrContainerInitFail
|
|
} else {
|
|
close(udsWait)
|
|
}
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
// runHotReq enqueues a free slot to slot queue manager and watches various timers and the consumer until
|
|
// the slot is consumed. A return value of false means, the container should shutdown and no subsequent
|
|
// calls should be made to this function.
|
|
func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState, logger logrus.FieldLogger, cookie drivers.Cookie, slot *hotSlot, evictor *EvictToken) bool {
|
|
|
|
var err error
|
|
isFrozen := false
|
|
|
|
freezeTimer := time.NewTimer(a.cfg.FreezeIdle)
|
|
idleTimer := time.NewTimer(time.Duration(call.IdleTimeout) * time.Second)
|
|
|
|
defer func() {
|
|
evictor.SetEvictable(false)
|
|
freezeTimer.Stop()
|
|
idleTimer.Stop()
|
|
// log if any error is encountered
|
|
if err != nil {
|
|
logger.WithError(err).Error("hot function failure")
|
|
}
|
|
}()
|
|
|
|
evictor.SetEvictable(true)
|
|
state.UpdateState(ctx, ContainerStateIdle, call.slots)
|
|
|
|
s := call.slots.queueSlot(slot)
|
|
|
|
for {
|
|
select {
|
|
case <-s.trigger: // slot already consumed
|
|
case <-ctx.Done(): // container shutdown
|
|
case <-a.shutWg.Closer(): // agent shutdown
|
|
case <-idleTimer.C:
|
|
case <-freezeTimer.C:
|
|
if !isFrozen {
|
|
ctx, cancel := context.WithTimeout(ctx, pauseTimeout)
|
|
err = cookie.Freeze(ctx)
|
|
cancel()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
isFrozen = true
|
|
state.UpdateState(ctx, ContainerStatePaused, call.slots)
|
|
}
|
|
continue
|
|
case <-evictor.C:
|
|
}
|
|
break
|
|
}
|
|
|
|
evictor.SetEvictable(false)
|
|
|
|
// if we can acquire token, that means we are here due to
|
|
// abort/shutdown/timeout, attempt to acquire and terminate,
|
|
// otherwise continue processing the request
|
|
if call.slots.acquireSlot(s) {
|
|
slot.Close()
|
|
return false
|
|
}
|
|
|
|
// In case, timer/acquireSlot failure landed us here, make
|
|
// sure to unfreeze.
|
|
if isFrozen {
|
|
ctx, cancel := context.WithTimeout(ctx, pauseTimeout)
|
|
err = cookie.Unfreeze(ctx)
|
|
cancel()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
isFrozen = false
|
|
}
|
|
|
|
state.UpdateState(ctx, ContainerStateBusy, call.slots)
|
|
return true
|
|
}
|
|
|
|
// container implements drivers.ContainerTask container is the execution of a
|
|
// single container, which may run multiple functions [consecutively]. the id
|
|
// and stderr can be swapped out by new calls in the container. input and
|
|
// output must be copied in and out. stdout is sent to stderr.
|
|
type container struct {
|
|
id string // contrived
|
|
image string
|
|
env map[string]string
|
|
extensions map[string]string
|
|
memory uint64
|
|
cpus uint64
|
|
fsSize uint64
|
|
tmpFsSize uint64
|
|
iofs iofs
|
|
logCfg drivers.LoggerConfig
|
|
close func()
|
|
|
|
stderr io.Writer
|
|
|
|
udsClient http.Client
|
|
|
|
// swapMu protects the stats swapping
|
|
swapMu sync.Mutex
|
|
stats *drivers.Stats
|
|
}
|
|
|
|
// newHotContainer creates a container that can be used for multiple sequential events
|
|
func newHotContainer(ctx context.Context, call *call, cfg *Config, id string, udsWait chan error) *container {
|
|
|
|
var iofs iofs
|
|
var err error
|
|
|
|
logger := common.Logger(ctx)
|
|
|
|
if cfg.IOFSEnableTmpfs {
|
|
iofs, err = newTmpfsIOFS(ctx, cfg)
|
|
} else {
|
|
iofs, err = newDirectoryIOFS(ctx, cfg)
|
|
}
|
|
if err != nil {
|
|
udsWait <- err
|
|
return nil
|
|
}
|
|
|
|
inotifyAwait(ctx, iofs.AgentPath(), udsWait)
|
|
|
|
// IMPORTANT: we are not operating on a TTY allocated container. This means, stderr and stdout are multiplexed
|
|
// from the same stream internally via docker using a multiplexing protocol. Therefore, stderr/stdout *BOTH*
|
|
// have to be read or *BOTH* blocked consistently. In other words, we cannot block one and continue
|
|
// reading from the other one without risking head-of-line blocking.
|
|
|
|
// TODO(reed): we should let the syslog driver pick this up really but our
|
|
// default story sucks there
|
|
|
|
// disable container logs if they're disabled on the call (pure_runner) -
|
|
// users may use syslog to get container logs, unrelated to this writer.
|
|
// otherwise, make a line writer and allow logrus DEBUG logs to host stderr
|
|
// between function invocations from the container.
|
|
|
|
var bufs []*bytes.Buffer
|
|
var stderr io.WriteCloser = call.stderr
|
|
if _, ok := stderr.(common.NoopReadWriteCloser); !ok {
|
|
gw := common.NewGhostWriter()
|
|
buf1 := bufPool.Get().(*bytes.Buffer)
|
|
sec := &nopCloser{&logWriter{
|
|
logrus.WithFields(logrus.Fields{"tag": "stderr", "app_id": call.AppID, "fn_id": call.FnID, "image": call.Image, "container_id": id}),
|
|
}}
|
|
gw.Swap(newLineWriterWithBuffer(buf1, sec))
|
|
stderr = gw
|
|
bufs = append(bufs, buf1)
|
|
}
|
|
|
|
return &container{
|
|
id: id, // XXX we could just let docker generate ids...
|
|
image: call.Image,
|
|
env: map[string]string(call.Config),
|
|
extensions: call.extensions,
|
|
memory: call.Memory,
|
|
cpus: uint64(call.CPUs),
|
|
fsSize: cfg.MaxFsSize,
|
|
tmpFsSize: uint64(call.TmpFsSize),
|
|
iofs: iofs,
|
|
logCfg: drivers.LoggerConfig{
|
|
URL: strings.TrimSpace(call.SyslogURL),
|
|
Tags: []drivers.LoggerTag{
|
|
{Name: "app_id", Value: call.AppID},
|
|
{Name: "fn_id", Value: call.FnID},
|
|
},
|
|
},
|
|
stderr: stderr,
|
|
udsClient: http.Client{
|
|
Transport: &http.Transport{
|
|
MaxIdleConns: 1,
|
|
MaxIdleConnsPerHost: 1,
|
|
// XXX(reed): other settings ?
|
|
IdleConnTimeout: 1 * time.Second,
|
|
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
|
|
var d net.Dialer
|
|
return d.DialContext(ctx, "unix", filepath.Join(iofs.AgentPath(), udsFilename))
|
|
},
|
|
},
|
|
},
|
|
close: func() {
|
|
stderr.Close()
|
|
for _, b := range bufs {
|
|
bufPool.Put(b)
|
|
}
|
|
if err := iofs.Close(); err != nil {
|
|
logger.WithError(err).Error("Error closing IOFS")
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
func (c *container) swap(stderr io.Writer, cs *drivers.Stats) func() {
|
|
// if they aren't using a ghost writer, the logs are disabled, we can skip swapping
|
|
gw, ok := c.stderr.(common.GhostWriter)
|
|
var ostderr io.Writer
|
|
if ok {
|
|
ostderr = gw.Swap(stderr)
|
|
}
|
|
c.swapMu.Lock()
|
|
ocs := c.stats
|
|
c.stats = cs
|
|
c.swapMu.Unlock()
|
|
|
|
return func() {
|
|
if ostderr != nil {
|
|
c.stderr.(common.GhostWriter).Swap(ostderr)
|
|
}
|
|
c.swapMu.Lock()
|
|
c.stats = ocs
|
|
c.swapMu.Unlock()
|
|
}
|
|
}
|
|
|
|
func (c *container) Id() string { return c.id }
|
|
func (c *container) Command() string { return "" }
|
|
func (c *container) Input() io.Reader { return common.NoopReadWriteCloser{} }
|
|
func (c *container) Logger() (io.Writer, io.Writer) { return c.stderr, c.stderr }
|
|
func (c *container) Volumes() [][2]string { return nil }
|
|
func (c *container) WorkDir() string { return "" }
|
|
func (c *container) Close() { c.close() }
|
|
func (c *container) Image() string { return c.image }
|
|
func (c *container) Timeout() time.Duration { return 0 } // context handles this
|
|
func (c *container) EnvVars() map[string]string { return c.env }
|
|
func (c *container) Memory() uint64 { return c.memory * 1024 * 1024 } // convert MB
|
|
func (c *container) CPUs() uint64 { return c.cpus }
|
|
func (c *container) FsSize() uint64 { return c.fsSize }
|
|
func (c *container) TmpFsSize() uint64 { return c.tmpFsSize }
|
|
func (c *container) Extensions() map[string]string { return c.extensions }
|
|
func (c *container) LoggerConfig() drivers.LoggerConfig { return c.logCfg }
|
|
func (c *container) UDSAgentPath() string { return c.iofs.AgentPath() }
|
|
func (c *container) UDSDockerPath() string { return c.iofs.DockerPath() }
|
|
func (c *container) UDSDockerDest() string { return iofsDockerMountDest }
|
|
|
|
// WriteStat publishes each metric in the specified Stats structure as a histogram metric
|
|
func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {
|
|
for key, value := range stat.Metrics {
|
|
if m, ok := dockerMeasures[key]; ok {
|
|
stats.Record(ctx, m.M(int64(value)))
|
|
}
|
|
}
|
|
|
|
c.swapMu.Lock()
|
|
if c.stats != nil {
|
|
*(c.stats) = append(*(c.stats), stat)
|
|
}
|
|
c.swapMu.Unlock()
|
|
}
|
|
|
|
// DockerAuth implements the docker.AuthConfiguration interface.
|
|
func (c *container) DockerAuth() (*docker.AuthConfiguration, error) {
|
|
registryToken := c.extensions[RegistryToken]
|
|
if registryToken != "" {
|
|
return &docker.AuthConfiguration{
|
|
RegistryToken: registryToken,
|
|
}, nil
|
|
}
|
|
return nil, nil
|
|
}
|