mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: uds init wait latency metric in prometheus (#1336)
* fn: uds init wait latency metric in prometheus Adding tracker for UDS initialization during container start. This complements our existing container state latency trackers and docker-api latency trackers.
This commit is contained in:
@@ -895,16 +895,26 @@ func (a *agent) runHot(ctx context.Context, caller slotCaller, call *call, tok R
|
|||||||
go func() {
|
go func() {
|
||||||
defer cancel() // also close if we get an agent shutdown / idle timeout
|
defer cancel() // also close if we get an agent shutdown / idle timeout
|
||||||
|
|
||||||
|
// We record init wait for three basic states below: "initialized", "canceled", "timedout"
|
||||||
|
// Notice how we do not distinguish between agent-shutdown, eviction, ctx.Done, etc. This is
|
||||||
|
// because monitoring go-routine may pick these events earlier and cancel the ctx.
|
||||||
|
initStart := time.Now()
|
||||||
|
|
||||||
// INIT BARRIER HERE. Wait for the initialization go-routine signal
|
// INIT BARRIER HERE. Wait for the initialization go-routine signal
|
||||||
select {
|
select {
|
||||||
case <-initialized:
|
case <-initialized:
|
||||||
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "initialized")
|
||||||
case <-a.shutWg.Closer(): // agent shutdown
|
case <-a.shutWg.Closer(): // agent shutdown
|
||||||
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "canceled")
|
||||||
return
|
return
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "canceled")
|
||||||
return
|
return
|
||||||
case <-evictor.C: // eviction
|
case <-evictor.C: // eviction
|
||||||
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "canceled")
|
||||||
return
|
return
|
||||||
case <-time.After(a.cfg.HotStartTimeout):
|
case <-time.After(a.cfg.HotStartTimeout):
|
||||||
|
statsContainerUDSInitLatency(ctx, initStart, time.Now(), "timedout")
|
||||||
tryQueueErr(models.ErrContainerInitTimeout, errQueue)
|
tryQueueErr(models.ErrContainerInitTimeout, errQueue)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,8 +14,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
containerStateKey = common.MakeKey("container_state")
|
containerStateKey = common.MakeKey("container_state")
|
||||||
callStatusKey = common.MakeKey("call_status")
|
callStatusKey = common.MakeKey("call_status")
|
||||||
|
containerUDSStateKey = common.MakeKey("container_uds_state")
|
||||||
)
|
)
|
||||||
|
|
||||||
func statsCalls(ctx context.Context) {
|
func statsCalls(ctx context.Context) {
|
||||||
@@ -66,6 +67,22 @@ func statsLBAgentRunnerExecLatency(ctx context.Context, dur time.Duration) {
|
|||||||
stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond)))
|
stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func statsContainerUDSInitLatency(ctx context.Context, start time.Time, end time.Time, containerUDSState string) {
|
||||||
|
if end.Before(start) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, err := tag.New(ctx,
|
||||||
|
tag.Upsert(containerUDSStateKey, containerUDSState),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dur := end.Sub(start)
|
||||||
|
stats.Record(ctx, containerUDSInitLatencyMeasure.M(int64(dur/time.Millisecond)))
|
||||||
|
}
|
||||||
|
|
||||||
func statsContainerEvicted(ctx context.Context, containerState string) {
|
func statsContainerEvicted(ctx context.Context, containerState string) {
|
||||||
ctx, err := tag.New(ctx,
|
ctx, err := tag.New(ctx,
|
||||||
tag.Upsert(containerStateKey, containerState),
|
tag.Upsert(containerStateKey, containerState),
|
||||||
@@ -129,7 +146,8 @@ const (
|
|||||||
errorsMetricName = "errors"
|
errorsMetricName = "errors"
|
||||||
serverBusyMetricName = "server_busy"
|
serverBusyMetricName = "server_busy"
|
||||||
|
|
||||||
containerEvictedMetricName = "container_evictions"
|
containerEvictedMetricName = "container_evictions"
|
||||||
|
containerUDSInitLatencyMetricName = "container_uds_init_latency"
|
||||||
|
|
||||||
utilCpuUsedMetricName = "util_cpu_used"
|
utilCpuUsedMetricName = "util_cpu_used"
|
||||||
utilCpuAvailMetricName = "util_cpu_avail"
|
utilCpuAvailMetricName = "util_cpu_avail"
|
||||||
@@ -160,7 +178,8 @@ var (
|
|||||||
utilMemUsedMeasure = common.MakeMeasure(utilMemUsedMetricName, "agent memory in use", "By")
|
utilMemUsedMeasure = common.MakeMeasure(utilMemUsedMetricName, "agent memory in use", "By")
|
||||||
utilMemAvailMeasure = common.MakeMeasure(utilMemAvailMetricName, "agent memory available", "By")
|
utilMemAvailMeasure = common.MakeMeasure(utilMemAvailMetricName, "agent memory available", "By")
|
||||||
|
|
||||||
containerEvictedMeasure = common.MakeMeasure(containerEvictedMetricName, "containers evicted", "")
|
containerEvictedMeasure = common.MakeMeasure(containerEvictedMetricName, "containers evicted", "")
|
||||||
|
containerUDSInitLatencyMeasure = common.MakeMeasure(containerUDSInitLatencyMetricName, "container UDS Init-Wait Latency", "msecs")
|
||||||
|
|
||||||
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
|
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
|
||||||
runnerSchedLatencyMeasure = common.MakeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
|
runnerSchedLatencyMeasure = common.MakeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
|
||||||
@@ -271,8 +290,18 @@ func RegisterContainerViews(tagKeys []string, latencyDist []float64) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add container uds_state tag for uds-wait
|
||||||
|
udsInitTags := make([]string, 0, len(tagKeys)+1)
|
||||||
|
udsInitTags = append(udsInitTags, "container_uds_state")
|
||||||
|
for _, key := range tagKeys {
|
||||||
|
if key != "container_uds_state" {
|
||||||
|
udsInitTags = append(udsInitTags, key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
common.CreateView(containerEvictedMeasure, view.Count(), evictTags),
|
common.CreateView(containerEvictedMeasure, view.Count(), evictTags),
|
||||||
|
common.CreateView(containerUDSInitLatencyMeasure, view.Distribution(latencyDist...), udsInitTags),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
|
|||||||
Reference in New Issue
Block a user