mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
Idle Hot Container Freeze/Preempt Support (#733)
* fn: freeze/unfreeze and eject idle under resource contention
This commit is contained in:
@@ -3,7 +3,10 @@ package agent
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -26,8 +29,6 @@ import (
|
|||||||
// TODO handle timeouts / no response in sync & async (sync is json+503 atm, not 504, async is empty log+status)
|
// TODO handle timeouts / no response in sync & async (sync is json+503 atm, not 504, async is empty log+status)
|
||||||
// see also: server/runner.go wrapping the response writer there, but need to handle async too (push down?)
|
// see also: server/runner.go wrapping the response writer there, but need to handle async too (push down?)
|
||||||
// TODO storing logs / call can push call over the timeout
|
// TODO storing logs / call can push call over the timeout
|
||||||
// TODO if we don't cap the number of any one container we could get into a situation
|
|
||||||
// where the machine is full but all the containers are idle up to the idle timeout. meh.
|
|
||||||
// TODO async is still broken, but way less so. we need to modify mq semantics
|
// TODO async is still broken, but way less so. we need to modify mq semantics
|
||||||
// to be much more robust. now we're at least running it if we delete the msg,
|
// to be much more robust. now we're at least running it if we delete the msg,
|
||||||
// but we may never store info about that execution so still broked (if fn
|
// but we may never store info about that execution so still broked (if fn
|
||||||
@@ -115,6 +116,9 @@ type agent struct {
|
|||||||
shutonce sync.Once
|
shutonce sync.Once
|
||||||
shutdown chan struct{}
|
shutdown chan struct{}
|
||||||
|
|
||||||
|
freezeIdleMsecs time.Duration
|
||||||
|
ejectIdleMsecs time.Duration
|
||||||
|
|
||||||
stats // TODO kill me
|
stats // TODO kill me
|
||||||
|
|
||||||
// Prometheus HTTP handler
|
// Prometheus HTTP handler
|
||||||
@@ -127,13 +131,25 @@ func New(da DataAccess) Agent {
|
|||||||
ServerVersion: "17.06.0",
|
ServerVersion: "17.06.0",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
freezeIdleMsecs, err := getEnvMsecs("FN_FREEZE_IDLE_MSECS", 50*time.Millisecond)
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).Fatal("error initializing freeze idle delay")
|
||||||
|
}
|
||||||
|
|
||||||
|
ejectIdleMsecs, err := getEnvMsecs("FN_EJECT_IDLE_MSECS", 1000*time.Millisecond)
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).Fatal("error initializing eject idle delay")
|
||||||
|
}
|
||||||
|
|
||||||
a := &agent{
|
a := &agent{
|
||||||
da: da,
|
da: da,
|
||||||
driver: driver,
|
driver: driver,
|
||||||
slotMgr: NewSlotQueueMgr(),
|
slotMgr: NewSlotQueueMgr(),
|
||||||
resources: NewResourceTracker(),
|
resources: NewResourceTracker(),
|
||||||
shutdown: make(chan struct{}),
|
shutdown: make(chan struct{}),
|
||||||
promHandler: promhttp.Handler(),
|
freezeIdleMsecs: freezeIdleMsecs,
|
||||||
|
ejectIdleMsecs: ejectIdleMsecs,
|
||||||
|
promHandler: promhttp.Handler(),
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO assert that agent doesn't get started for API nodes up above ?
|
// TODO assert that agent doesn't get started for API nodes up above ?
|
||||||
@@ -143,6 +159,26 @@ func New(da DataAccess) Agent {
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getEnvMsecs(name string, defaultVal time.Duration) (time.Duration, error) {
|
||||||
|
|
||||||
|
delay := defaultVal
|
||||||
|
|
||||||
|
if dur := os.Getenv(name); dur != "" {
|
||||||
|
durInt, err := strconv.ParseInt(dur, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return defaultVal, err
|
||||||
|
}
|
||||||
|
// disable if negative or set to msecs specified.
|
||||||
|
if durInt < 0 || time.Duration(durInt) >= math.MaxInt64/time.Millisecond {
|
||||||
|
delay = math.MaxInt64
|
||||||
|
} else {
|
||||||
|
delay = time.Duration(durInt) * time.Millisecond
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return delay, nil
|
||||||
|
}
|
||||||
|
|
||||||
// TODO shuffle this around somewhere else (maybe)
|
// TODO shuffle this around somewhere else (maybe)
|
||||||
func (a *agent) Enqueue(ctx context.Context, call *models.Call) error {
|
func (a *agent) Enqueue(ctx context.Context, call *models.Call) error {
|
||||||
return a.da.Enqueue(ctx, call)
|
return a.da.Enqueue(ctx, call)
|
||||||
@@ -681,25 +717,68 @@ func (a *agent) runHot(ctx context.Context, call *call, tok ResourceToken, state
|
|||||||
default: // ok
|
default: // ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isFrozen := false
|
||||||
|
elapsed := time.Duration(0)
|
||||||
|
freezerTicker := a.freezeIdleMsecs
|
||||||
|
idleTimeout := time.Duration(call.IdleTimeout) * time.Second
|
||||||
|
|
||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
state.UpdateState(ctx, ContainerStateIdle, call.slots)
|
state.UpdateState(ctx, ContainerStateIdle, call.slots)
|
||||||
s := call.slots.queueSlot(&hotSlot{done, errC, container, nil})
|
s := call.slots.queueSlot(&hotSlot{done, errC, container, nil})
|
||||||
|
|
||||||
select {
|
for {
|
||||||
case <-s.trigger:
|
select {
|
||||||
case <-time.After(time.Duration(call.IdleTimeout) * time.Second):
|
case <-s.trigger: // slot already consumed
|
||||||
if call.slots.ejectSlot(s) {
|
case <-ctx.Done(): // container shutdown
|
||||||
logger.Info("Canceling inactive hot function")
|
case <-a.shutdown: // server shutdown
|
||||||
return
|
case <-time.After(idleTimeout): // in case idleTimeout < a.freezeIdleMsecs or idleTimeout < a.ejectIdleMsecs
|
||||||
}
|
case <-time.After(freezerTicker):
|
||||||
case <-ctx.Done(): // container shutdown
|
elapsed += a.freezeIdleMsecs
|
||||||
if call.slots.ejectSlot(s) {
|
|
||||||
return
|
freezerTicker = math.MaxInt64 // do not fire again
|
||||||
}
|
|
||||||
case <-a.shutdown: // server shutdown
|
if elapsed < idleTimeout { // in case idleTimeout <= a.freezeIdleMsecs
|
||||||
if call.slots.ejectSlot(s) {
|
if !isFrozen {
|
||||||
|
err := cookie.Freeze(ctx)
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("freeze error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
isFrozen = true
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
case <-time.After(a.ejectIdleMsecs):
|
||||||
|
elapsed += a.ejectIdleMsecs
|
||||||
|
|
||||||
|
if elapsed < idleTimeout {
|
||||||
|
// if someone is waiting for resource in our slot queue, we must not terminate,
|
||||||
|
// otherwise, see if other slot queues have resource waiters that are blocked.
|
||||||
|
stats := call.slots.getStats()
|
||||||
|
if stats.containerStates[ContainerStateWait] > 0 ||
|
||||||
|
a.resources.GetResourceTokenWaiterCount() <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
logger.Debug("attempting hot function eject")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we can eject token, that means we are here due to
|
||||||
|
// abort/shutdown/timeout, attempt to eject and terminate,
|
||||||
|
// otherwise continue processing the request
|
||||||
|
if call.slots.ejectSlot(s) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if isFrozen {
|
||||||
|
err := cookie.Unfreeze(ctx)
|
||||||
|
if err != nil {
|
||||||
|
logger.WithError(err).Error("unfreeze error")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
isFrozen = false
|
||||||
}
|
}
|
||||||
|
|
||||||
state.UpdateState(ctx, ContainerStateBusy, call.slots)
|
state.UpdateState(ctx, ContainerStateBusy, call.slots)
|
||||||
|
|||||||
@@ -212,6 +212,28 @@ func (c *cookie) Run(ctx context.Context) (drivers.WaitResult, error) {
|
|||||||
return c.drv.run(ctx, c.id, c.task)
|
return c.drv.run(ctx, c.id, c.task)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *cookie) Freeze(ctx context.Context) error {
|
||||||
|
ctx, log := common.LoggerWithFields(ctx, logrus.Fields{"stack": "Freeze"})
|
||||||
|
log.WithFields(logrus.Fields{"call_id": c.id}).Debug("docker pause")
|
||||||
|
|
||||||
|
err := c.drv.docker.PauseContainer(c.id, ctx)
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).WithFields(logrus.Fields{"call_id": c.id}).Error("error pausing container")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *cookie) Unfreeze(ctx context.Context) error {
|
||||||
|
ctx, log := common.LoggerWithFields(ctx, logrus.Fields{"stack": "Unfreeze"})
|
||||||
|
log.WithFields(logrus.Fields{"call_id": c.id}).Debug("docker unpause")
|
||||||
|
|
||||||
|
err := c.drv.docker.UnpauseContainer(c.id, ctx)
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).WithFields(logrus.Fields{"call_id": c.id}).Error("error unpausing container")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func (drv *DockerDriver) removeContainer(ctx context.Context, container string) error {
|
func (drv *DockerDriver) removeContainer(ctx context.Context, container string) error {
|
||||||
err := drv.docker.RemoveContainer(docker.RemoveContainerOptions{
|
err := drv.docker.RemoveContainer(docker.RemoveContainerOptions{
|
||||||
ID: container, Force: true, RemoveVolumes: true, Context: ctx})
|
ID: container, Force: true, RemoveVolumes: true, Context: ctx})
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
retryTimeout = 10 * time.Minute
|
retryTimeout = 10 * time.Minute
|
||||||
|
pauseTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// wrap docker client calls so we can retry 500s, kind of sucks but fsouza doesn't
|
// wrap docker client calls so we can retry 500s, kind of sucks but fsouza doesn't
|
||||||
@@ -34,6 +35,8 @@ type dockerClient interface {
|
|||||||
StartContainerWithContext(id string, hostConfig *docker.HostConfig, ctx context.Context) error
|
StartContainerWithContext(id string, hostConfig *docker.HostConfig, ctx context.Context) error
|
||||||
CreateContainer(opts docker.CreateContainerOptions) (*docker.Container, error)
|
CreateContainer(opts docker.CreateContainerOptions) (*docker.Container, error)
|
||||||
RemoveContainer(opts docker.RemoveContainerOptions) error
|
RemoveContainer(opts docker.RemoveContainerOptions) error
|
||||||
|
PauseContainer(id string, ctx context.Context) error
|
||||||
|
UnpauseContainer(id string, ctx context.Context) error
|
||||||
PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error
|
PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error
|
||||||
InspectImage(ctx context.Context, name string) (*docker.Image, error)
|
InspectImage(ctx context.Context, name string) (*docker.Image, error)
|
||||||
InspectContainerWithContext(container string, ctx context.Context) (*docker.Container, error)
|
InspectContainerWithContext(container string, ctx context.Context) (*docker.Container, error)
|
||||||
@@ -260,6 +263,30 @@ func (d *dockerWrap) RemoveContainer(opts docker.RemoveContainerOptions) (err er
|
|||||||
return filterNoSuchContainer(ctx, err)
|
return filterNoSuchContainer(ctx, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d *dockerWrap) PauseContainer(id string, ctx context.Context) (err error) {
|
||||||
|
span, _ := opentracing.StartSpanFromContext(ctx, "docker_pause_container")
|
||||||
|
defer span.Finish()
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, pauseTimeout)
|
||||||
|
defer cancel()
|
||||||
|
err = d.retry(ctx, func() error {
|
||||||
|
err = d.docker.PauseContainer(id)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
return filterNoSuchContainer(ctx, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *dockerWrap) UnpauseContainer(id string, ctx context.Context) (err error) {
|
||||||
|
span, _ := opentracing.StartSpanFromContext(ctx, "docker_unpause_container")
|
||||||
|
defer span.Finish()
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, pauseTimeout)
|
||||||
|
defer cancel()
|
||||||
|
err = d.retry(ctx, func() error {
|
||||||
|
err = d.docker.UnpauseContainer(id)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
return filterNoSuchContainer(ctx, err)
|
||||||
|
}
|
||||||
|
|
||||||
func (d *dockerWrap) InspectImage(ctx context.Context, name string) (i *docker.Image, err error) {
|
func (d *dockerWrap) InspectImage(ctx context.Context, name string) (i *docker.Image, err error) {
|
||||||
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_inspect_image")
|
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_inspect_image")
|
||||||
defer span.Finish()
|
defer span.Finish()
|
||||||
|
|||||||
@@ -34,6 +34,12 @@ type Cookie interface {
|
|||||||
// Run() MUST monitor the context. task cancellation is indicated by
|
// Run() MUST monitor the context. task cancellation is indicated by
|
||||||
// cancelling the context.
|
// cancelling the context.
|
||||||
Run(ctx context.Context) (WaitResult, error)
|
Run(ctx context.Context) (WaitResult, error)
|
||||||
|
|
||||||
|
// Freeze the container to pause running processes
|
||||||
|
Freeze(ctx context.Context) error
|
||||||
|
|
||||||
|
// Unfreeze a frozen container to unpause frozen processes
|
||||||
|
Unfreeze(ctx context.Context) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type WaitResult interface {
|
type WaitResult interface {
|
||||||
|
|||||||
@@ -24,6 +24,14 @@ type cookie struct {
|
|||||||
m *Mocker
|
m *Mocker
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *cookie) Freeze(context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *cookie) Unfreeze(context.Context) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *cookie) Close(context.Context) error { return nil }
|
func (c *cookie) Close(context.Context) error { return nil }
|
||||||
|
|
||||||
func (c *cookie) Run(ctx context.Context) (drivers.WaitResult, error) {
|
func (c *cookie) Run(ctx context.Context) (drivers.WaitResult, error) {
|
||||||
|
|||||||
@@ -40,6 +40,9 @@ type ResourceTracker interface {
|
|||||||
// machine. It must be called before GetResourceToken or GetResourceToken may hang.
|
// machine. It must be called before GetResourceToken or GetResourceToken may hang.
|
||||||
// Memory is expected to be provided in MB units.
|
// Memory is expected to be provided in MB units.
|
||||||
IsResourcePossible(memory, cpuQuota uint64, isAsync bool) bool
|
IsResourcePossible(memory, cpuQuota uint64, isAsync bool) bool
|
||||||
|
|
||||||
|
// returns number of waiters waiting for a resource token blocked on condition variable
|
||||||
|
GetResourceTokenWaiterCount() uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
type resourceTracker struct {
|
type resourceTracker struct {
|
||||||
@@ -66,6 +69,8 @@ type resourceTracker struct {
|
|||||||
cpuAsyncUsed uint64
|
cpuAsyncUsed uint64
|
||||||
// cpu in use for async area in which agent stops dequeuing async jobs
|
// cpu in use for async area in which agent stops dequeuing async jobs
|
||||||
cpuAsyncHWMark uint64
|
cpuAsyncHWMark uint64
|
||||||
|
// number of waiters waiting for a token blocked on the condition variable
|
||||||
|
tokenWaiterCount uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewResourceTracker() ResourceTracker {
|
func NewResourceTracker() ResourceTracker {
|
||||||
@@ -123,6 +128,17 @@ func (a *resourceTracker) IsResourcePossible(memory uint64, cpuQuota uint64, isA
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// returns number of waiters waiting for a resource token blocked on condition variable
|
||||||
|
func (a *resourceTracker) GetResourceTokenWaiterCount() uint64 {
|
||||||
|
var waiters uint64
|
||||||
|
|
||||||
|
a.cond.L.Lock()
|
||||||
|
waiters = a.tokenWaiterCount
|
||||||
|
a.cond.L.Unlock()
|
||||||
|
|
||||||
|
return waiters
|
||||||
|
}
|
||||||
|
|
||||||
// the received token should be passed directly to launch (unconditionally), launch
|
// the received token should be passed directly to launch (unconditionally), launch
|
||||||
// will close this token (i.e. the receiver should not call Close)
|
// will close this token (i.e. the receiver should not call Close)
|
||||||
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken {
|
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken {
|
||||||
@@ -158,7 +174,9 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, c
|
|||||||
|
|
||||||
isWaiting = true
|
isWaiting = true
|
||||||
for !a.isResourceAvailableLocked(memory, cpuQuota, isAsync) && ctx.Err() == nil {
|
for !a.isResourceAvailableLocked(memory, cpuQuota, isAsync) && ctx.Err() == nil {
|
||||||
|
a.tokenWaiterCount++
|
||||||
c.Wait()
|
c.Wait()
|
||||||
|
a.tokenWaiterCount--
|
||||||
}
|
}
|
||||||
isWaiting = false
|
isWaiting = false
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ To get the best performance, you'll want to ensure that Docker is configured pro
|
|||||||
|
|
||||||
1. Linux 4.7 or newer with aufs or overlay2 module.
|
1. Linux 4.7 or newer with aufs or overlay2 module.
|
||||||
2. Ubuntu 16.04 LTS or newer with aufs or overlay2 module.
|
2. Ubuntu 16.04 LTS or newer with aufs or overlay2 module.
|
||||||
3. Docker 1.12 or newer to be available.
|
3. Docker 17.06 or newer to be available.
|
||||||
|
|
||||||
It is important to reconfigure host's Docker with this filesystem module. Thus, in your Docker start scripts you must do as following:
|
It is important to reconfigure host's Docker with this filesystem module. Thus, in your Docker start scripts you must do as following:
|
||||||
|
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ docker run -e VAR_NAME=VALUE ...
|
|||||||
| `FN_LOG_DEST` | Set a url to send logs to, instead of stderr. [scheme://][host][:port][/path]; default scheme to udp:// if none given, possible schemes: { udp, tcp, file }
|
| `FN_LOG_DEST` | Set a url to send logs to, instead of stderr. [scheme://][host][:port][/path]; default scheme to udp:// if none given, possible schemes: { udp, tcp, file }
|
||||||
| `FN_LOG_PREFIX` | If supplying a syslog url in `FN_LOG_DEST`, a prefix to add to each log line
|
| `FN_LOG_PREFIX` | If supplying a syslog url in `FN_LOG_DEST`, a prefix to add to each log line
|
||||||
| `FN_API_CORS` | A comma separated list of URLs to enable [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) for (or `*` for all domains). This corresponds to the allowed origins in the `Acccess-Control-Allow-Origin` header. | None |
|
| `FN_API_CORS` | A comma separated list of URLs to enable [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) for (or `*` for all domains). This corresponds to the allowed origins in the `Acccess-Control-Allow-Origin` header. | None |
|
||||||
|
| `FN_FREEZE_IDLE_MSECS` | Set this option to specify the amount of time to wait in milliseconds before pausing/freezing an idle hot container. Set to 0 to freeze idle containers without any delay. Set to negative integer to disable freeze/pause of idle hot containers. | 50 |
|
||||||
|
`FN_EJECT_IDLE_MSECS` | Set this option to specify the amount of time to wait in milliseconds before attempting to terminate an idle hot container when the system is starved for CPU and Memory resources. Set to negative integer to disable this feature. | 1000 |
|
||||||
| `DOCKER_HOST` | Docker remote API URL. | /var/run/docker.sock |
|
| `DOCKER_HOST` | Docker remote API URL. | /var/run/docker.sock |
|
||||||
| `DOCKER_API_VERSION` | Docker remote API version. | 1.24 |
|
| `DOCKER_API_VERSION` | Docker remote API version. | 1.24 |
|
||||||
| `DOCKER_TLS_VERIFY` | Set this option to enable/disable Docker remote API over TLS/SSL. | 0 |
|
| `DOCKER_TLS_VERIFY` | Set this option to enable/disable Docker remote API over TLS/SSL. | 0 |
|
||||||
|
|||||||
Reference in New Issue
Block a user