mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
* fn: stats view/distribution improvements *) View latency distribution is now an argument in view creation functions. This allows easier override to set custom buckets. It is simplistic and assumes all latency views would use the same set, but in practice this is already the case. *) Removed API view creation to main, this should not be enabled for all node types. This is consistent with the rest of the system. * fn: Docker samples of cpu/mem/disk with specific buckets
227 lines
7.9 KiB
Go
227 lines
7.9 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/fnproject/fn/api/common"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"go.opencensus.io/stats"
|
|
"go.opencensus.io/stats/view"
|
|
)
|
|
|
|
// TODO add some suga:
|
|
// * hot containers active
|
|
// * memory used / available
|
|
|
|
func statsEnqueue(ctx context.Context) {
|
|
stats.Record(ctx, queuedMeasure.M(1))
|
|
stats.Record(ctx, callsMeasure.M(1))
|
|
}
|
|
|
|
// Call when a function has been queued but cannot be started because of an error
|
|
func statsDequeue(ctx context.Context) {
|
|
stats.Record(ctx, queuedMeasure.M(-1))
|
|
}
|
|
|
|
func statsDequeueAndStart(ctx context.Context) {
|
|
stats.Record(ctx, queuedMeasure.M(-1))
|
|
stats.Record(ctx, runningMeasure.M(1))
|
|
}
|
|
|
|
func statsComplete(ctx context.Context) {
|
|
stats.Record(ctx, runningMeasure.M(-1))
|
|
stats.Record(ctx, completedMeasure.M(1))
|
|
}
|
|
|
|
func statsFailed(ctx context.Context) {
|
|
stats.Record(ctx, runningMeasure.M(-1))
|
|
stats.Record(ctx, failedMeasure.M(1))
|
|
}
|
|
|
|
func statsDequeueAndFail(ctx context.Context) {
|
|
stats.Record(ctx, queuedMeasure.M(-1))
|
|
stats.Record(ctx, failedMeasure.M(1))
|
|
}
|
|
|
|
func statsTimedout(ctx context.Context) {
|
|
stats.Record(ctx, timedoutMeasure.M(1))
|
|
}
|
|
|
|
func statsErrors(ctx context.Context) {
|
|
stats.Record(ctx, errorsMeasure.M(1))
|
|
}
|
|
|
|
func statsTooBusy(ctx context.Context) {
|
|
stats.Record(ctx, serverBusyMeasure.M(1))
|
|
}
|
|
|
|
func statsLBAgentRunnerSchedLatency(ctx context.Context, dur time.Duration) {
|
|
stats.Record(ctx, runnerSchedLatencyMeasure.M(int64(dur/time.Millisecond)))
|
|
}
|
|
|
|
func statsLBAgentRunnerExecLatency(ctx context.Context, dur time.Duration) {
|
|
stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond)))
|
|
}
|
|
|
|
const (
|
|
// TODO we should probably prefix these with calls_ ?
|
|
queuedMetricName = "queued"
|
|
callsMetricName = "calls" // TODO this is a dupe of sum {complete,failed} ?
|
|
runningMetricName = "running"
|
|
completedMetricName = "completed"
|
|
failedMetricName = "failed"
|
|
timedoutMetricName = "timeouts"
|
|
errorsMetricName = "errors"
|
|
serverBusyMetricName = "server_busy"
|
|
|
|
// Reported By LB
|
|
runnerSchedLatencyMetricName = "lb_runner_sched_latency"
|
|
runnerExecLatencyMetricName = "lb_runner_exec_latency"
|
|
)
|
|
|
|
var (
|
|
queuedMeasure = common.MakeMeasure(queuedMetricName, "calls currently queued against agent", "")
|
|
// TODO this is a dupe of sum {complete,failed} ?
|
|
callsMeasure = common.MakeMeasure(callsMetricName, "calls created in agent", "")
|
|
runningMeasure = common.MakeMeasure(runningMetricName, "calls currently running in agent", "")
|
|
completedMeasure = common.MakeMeasure(completedMetricName, "calls completed in agent", "")
|
|
failedMeasure = common.MakeMeasure(failedMetricName, "calls failed in agent", "")
|
|
timedoutMeasure = common.MakeMeasure(timedoutMetricName, "calls timed out in agent", "")
|
|
errorsMeasure = common.MakeMeasure(errorsMetricName, "calls errored in agent", "")
|
|
serverBusyMeasure = common.MakeMeasure(serverBusyMetricName, "calls where server was too busy in agent", "")
|
|
dockerMeasures = initDockerMeasures()
|
|
containerGaugeMeasures = initContainerGaugeMeasures()
|
|
containerTimeMeasures = initContainerTimeMeasures()
|
|
|
|
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
|
|
runnerSchedLatencyMeasure = common.MakeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
|
|
// Reported By LB: Function execution time inside a container.
|
|
runnerExecLatencyMeasure = common.MakeMeasure(runnerExecLatencyMetricName, "Runner Container Execution Latency Reported By LBAgent", "msecs")
|
|
)
|
|
|
|
func RegisterLBAgentViews(tagKeys []string, latencyDist []float64) {
|
|
err := view.Register(
|
|
common.CreateView(runnerSchedLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
|
|
common.CreateView(runnerExecLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("cannot register view")
|
|
}
|
|
}
|
|
|
|
// RegisterAgentViews creates and registers all agent views
|
|
func RegisterAgentViews(tagKeys []string, latencyDist []float64) {
|
|
err := view.Register(
|
|
common.CreateView(queuedMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(callsMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(runningMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(completedMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(failedMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(timedoutMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(errorsMeasure, view.Sum(), tagKeys),
|
|
common.CreateView(serverBusyMeasure, view.Sum(), tagKeys),
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("cannot register view")
|
|
}
|
|
}
|
|
|
|
// RegisterDockerViews creates a and registers Docker views with provided tag keys
|
|
func RegisterDockerViews(tagKeys []string, latencyDist []float64) {
|
|
|
|
for _, m := range dockerMeasures {
|
|
|
|
var dist *view.Aggregation
|
|
|
|
// Remember these are sampled by docker in short intervals (approx 1 sec)
|
|
|
|
// Bytes for net/disk/mem
|
|
if m.Name() == "docker_stats_net_rx" || m.Name() == "docker_stats_net_tx" {
|
|
// net IO: 8k to 32MB
|
|
dist = view.Distribution(0, 8192, 65536, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432)
|
|
} else if m.Name() == "docker_stats_disk_read" || m.Name() == "docker_stats_disk_write" {
|
|
// disk IO: 8k to 32MB
|
|
dist = view.Distribution(0, 8192, 65536, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432)
|
|
} else if m.Name() == "docker_stats_mem_limit" || m.Name() == "docker_stats_mem_usage" {
|
|
// memory: 128K to 32MB
|
|
dist = view.Distribution(0, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432)
|
|
} else if m.Name() == "docker_stats_cpu_user" || m.Name() == "docker_stats_cpu_total" || m.Name() == "docker_stats_cpu_kernel" {
|
|
// percentages
|
|
dist = view.Distribution(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
|
|
} else {
|
|
// Not used yet.
|
|
dist = view.Distribution(latencyDist...)
|
|
}
|
|
|
|
v := common.CreateView(m, dist, tagKeys)
|
|
if err := view.Register(v); err != nil {
|
|
logrus.WithError(err).Fatal("cannot register view")
|
|
}
|
|
}
|
|
}
|
|
|
|
// RegisterContainerViews creates and register containers views with provided tag keys
|
|
func RegisterContainerViews(tagKeys []string, latencyDist []float64) {
|
|
// Create views for container measures
|
|
for i, key := range containerGaugeKeys {
|
|
if key == "" {
|
|
continue
|
|
}
|
|
v := common.CreateView(containerGaugeMeasures[i], view.Sum(), tagKeys)
|
|
if err := view.Register(v); err != nil {
|
|
logrus.WithError(err).Fatal("cannot register view")
|
|
}
|
|
}
|
|
|
|
for i, key := range containerTimeKeys {
|
|
if key == "" {
|
|
continue
|
|
}
|
|
v := common.CreateView(containerTimeMeasures[i], view.Distribution(latencyDist...), tagKeys)
|
|
if err := view.Register(v); err != nil {
|
|
logrus.WithError(err).Fatal("cannot register view")
|
|
}
|
|
}
|
|
}
|
|
|
|
// initDockerMeasures initializes Docker related measures
|
|
func initDockerMeasures() map[string]*stats.Int64Measure {
|
|
// TODO this is nasty figure out how to use opencensus to not have to declare these
|
|
keys := []string{"net_rx", "net_tx", "mem_limit", "mem_usage", "disk_read", "disk_write", "cpu_user", "cpu_total", "cpu_kernel"}
|
|
measures := make(map[string]*stats.Int64Measure, len(keys))
|
|
for _, key := range keys {
|
|
units := "bytes"
|
|
if strings.Contains(key, "cpu") {
|
|
units = "cpu"
|
|
}
|
|
measures[key] = common.MakeMeasure("docker_stats_"+key, "docker container stats for "+key, units)
|
|
}
|
|
return measures
|
|
}
|
|
|
|
func initContainerGaugeMeasures() []*stats.Int64Measure {
|
|
gaugeMeasures := make([]*stats.Int64Measure, len(containerGaugeKeys))
|
|
for i, key := range containerGaugeKeys {
|
|
if key == "" { // leave nil intentionally, let it panic
|
|
continue
|
|
}
|
|
gaugeMeasures[i] = common.MakeMeasure(key, "containers in state "+key, "")
|
|
}
|
|
return gaugeMeasures
|
|
}
|
|
|
|
func initContainerTimeMeasures() []*stats.Int64Measure {
|
|
timeMeasures := make([]*stats.Int64Measure, len(containerTimeKeys))
|
|
for i, key := range containerTimeKeys {
|
|
if key == "" {
|
|
continue
|
|
}
|
|
timeMeasures[i] = common.MakeMeasure(key, "time spent in container state "+key, "ms")
|
|
}
|
|
|
|
return timeMeasures
|
|
}
|