fn: paused and evicted container stats (#1209)

* fn: paused and evicted container stats

With this change, now stats reports paused state
as well as incidents of container exit due to evictions.

* fn: update/document state transitions in state tracker

There's no case of a transition moving from done to waiting. This
must be deprecated behavior.
This commit is contained in:
Tolga Ceylan
2018-09-13 16:24:26 -07:00
committed by GitHub
parent ede5b93c34
commit 4dcdb7d982
4 changed files with 48 additions and 13 deletions

View File

@@ -898,13 +898,18 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
return false
}
isFrozen = true
state.UpdateState(ctx, ContainerStatePaused, call.slots)
}
evictor := a.evictor.GetEvictor(call.ID, call.slotHashId, call.Memory+uint64(call.TmpFsSize), uint64(call.CPUs))
state.UpdateState(ctx, ContainerStateIdle, call.slots)
if !isFrozen {
state.UpdateState(ctx, ContainerStateIdle, call.slots)
}
s := call.slots.queueSlot(slot)
isEvictEvent := false
for {
select {
case <-s.trigger: // slot already consumed
@@ -918,10 +923,12 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
return false
}
isFrozen = true
state.UpdateState(ctx, ContainerStatePaused, call.slots)
}
continue
case <-evictor.C:
logger.Debug("attempting hot function eject")
isEvictEvent = true
case <-ejectTimer.C:
// we've been idle too long, now we are ejectable
a.evictor.RegisterEvictor(evictor)
@@ -940,6 +947,9 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
// otherwise continue processing the request
if call.slots.acquireSlot(s) {
slot.Close(ctx)
if isEvictEvent {
statsContainerEvicted(ctx)
}
return false
}

View File

@@ -163,6 +163,7 @@ func (a *slotQueue) isIdle() bool {
a.stats.containerStates[ContainerStateWait] == 0 &&
a.stats.containerStates[ContainerStateStart] == 0 &&
a.stats.containerStates[ContainerStateIdle] == 0 &&
a.stats.containerStates[ContainerStatePaused] == 0 &&
a.stats.containerStates[ContainerStateBusy] == 0
a.statsLock.Unlock()
@@ -180,7 +181,7 @@ func (a *slotQueue) getStats() slotQueueStats {
func isNewContainerNeeded(cur *slotQueueStats) bool {
idleWorkers := cur.containerStates[ContainerStateIdle]
idleWorkers := cur.containerStates[ContainerStateIdle] + cur.containerStates[ContainerStatePaused]
starters := cur.containerStates[ContainerStateStart]
startWaiters := cur.containerStates[ContainerStateWait]

View File

@@ -47,12 +47,13 @@ const (
)
const (
ContainerStateNone ContainerStateType = iota // uninitialized
ContainerStateWait // resource (cpu + mem) waiting
ContainerStateStart // launching
ContainerStateIdle // running idle
ContainerStateBusy // running busy
ContainerStateDone // exited/failed/done
ContainerStateNone ContainerStateType = iota // uninitialized
ContainerStateWait // resource (cpu + mem) waiting
ContainerStateStart // launching
ContainerStateIdle // running: idle but not paused
ContainerStatePaused // running: idle but paused
ContainerStateBusy // running: busy
ContainerStateDone // exited/failed/done
ContainerStateMax
)
@@ -61,14 +62,16 @@ var containerGaugeKeys = [ContainerStateMax]string{
"container_wait_total",
"container_start_total",
"container_idle_total",
"container_paused_total",
"container_busy_total",
"container_done_total",
}
var containerTimeKeys = [ContainerStateMax]string{
"",
"container_wait_duration_seconds",
"container_start_duration_seconds",
"container_idle_duration_seconds",
"container_paused_duration_seconds",
"container_busy_duration_seconds",
}
@@ -101,6 +104,10 @@ func (c *requestState) UpdateState(ctx context.Context, newState RequestStateTyp
}
}
func isIdleState(state ContainerStateType) bool {
return state == ContainerStateIdle || state == ContainerStatePaused
}
func (c *containerState) UpdateState(ctx context.Context, newState ContainerStateType, slots *slotQueue) {
var now time.Time
@@ -109,11 +116,13 @@ func (c *containerState) UpdateState(ctx context.Context, newState ContainerStat
c.lock.Lock()
// except for 1) switching back to idle from busy (hot containers) or 2)
// to waiting from done, otherwise we can only move forward in states
// Only the following state transitions are allowed:
// 1) any move forward in states as per ContainerStateType order
// 2) move back: from paused to idle
// 3) move back: from busy to idle/paused
if c.state < newState ||
(c.state == ContainerStateBusy && newState == ContainerStateIdle) ||
(c.state == ContainerStateDone && newState == ContainerStateIdle) {
(c.state == ContainerStatePaused && newState == ContainerStateIdle) ||
(c.state == ContainerStateBusy && isIdleState(newState)) {
now = time.Now()
oldState = c.state

View File

@@ -66,6 +66,10 @@ func statsLBAgentRunnerExecLatency(ctx context.Context, dur time.Duration) {
stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond)))
}
func statsContainerEvicted(ctx context.Context) {
stats.Record(ctx, containerEvictedMeasure.M(0))
}
const (
// TODO we should probably prefix these with calls_ ?
queuedMetricName = "queued"
@@ -77,6 +81,8 @@ const (
errorsMetricName = "errors"
serverBusyMetricName = "server_busy"
containerEvictedMetricName = "container_evictions"
// Reported By LB
runnerSchedLatencyMetricName = "lb_runner_sched_latency"
runnerExecLatencyMetricName = "lb_runner_exec_latency"
@@ -96,6 +102,8 @@ var (
containerGaugeMeasures = initContainerGaugeMeasures()
containerTimeMeasures = initContainerTimeMeasures()
containerEvictedMeasure = common.MakeMeasure(containerEvictedMetricName, "containers evicted", "")
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
runnerSchedLatencyMeasure = common.MakeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
// Reported By LB: Function execution time inside a container.
@@ -179,6 +187,13 @@ func RegisterContainerViews(tagKeys []string, latencyDist []float64) {
logrus.WithError(err).Fatal("cannot register view")
}
}
err := view.Register(
common.CreateView(containerEvictedMeasure, view.Count(), tagKeys),
)
if err != nil {
logrus.WithError(err).Fatal("cannot register view")
}
}
// initDockerMeasures initializes Docker related measures