Files
fn-serverless/api/agent/stats.go
Tolga Ceylan b6aeae3680 fn: moving opencensus distribution buckets out of agent (#1158)
Users can best pick the proper range for their operating
environment. Default cmd/fnserver uses some sensible
defaults.
2018-08-06 10:48:52 -07:00

221 lines
7.6 KiB
Go

package agent
import (
"context"
"strings"
"time"
"github.com/fnproject/fn/api/common"
"github.com/sirupsen/logrus"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
)
// TODO add some suga:
// * hot containers active
// * memory used / available
func statsEnqueue(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(1))
stats.Record(ctx, callsMeasure.M(1))
}
// Call when a function has been queued but cannot be started because of an error
func statsDequeue(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(-1))
}
func statsDequeueAndStart(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(-1))
stats.Record(ctx, runningMeasure.M(1))
}
func statsComplete(ctx context.Context) {
stats.Record(ctx, runningMeasure.M(-1))
stats.Record(ctx, completedMeasure.M(1))
}
func statsFailed(ctx context.Context) {
stats.Record(ctx, runningMeasure.M(-1))
stats.Record(ctx, failedMeasure.M(1))
}
func statsDequeueAndFail(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(-1))
stats.Record(ctx, failedMeasure.M(1))
}
func statsTimedout(ctx context.Context) {
stats.Record(ctx, timedoutMeasure.M(1))
}
func statsErrors(ctx context.Context) {
stats.Record(ctx, errorsMeasure.M(1))
}
func statsTooBusy(ctx context.Context) {
stats.Record(ctx, serverBusyMeasure.M(1))
}
func statsLBAgentRunnerSchedLatency(ctx context.Context, dur time.Duration) {
stats.Record(ctx, runnerSchedLatencyMeasure.M(int64(dur/time.Millisecond)))
}
func statsLBAgentRunnerExecLatency(ctx context.Context, dur time.Duration) {
stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond)))
}
const (
// TODO we should probably prefix these with calls_ ?
queuedMetricName = "queued"
callsMetricName = "calls" // TODO this is a dupe of sum {complete,failed} ?
runningMetricName = "running"
completedMetricName = "completed"
failedMetricName = "failed"
timedoutMetricName = "timeouts"
errorsMetricName = "errors"
serverBusyMetricName = "server_busy"
// Reported By LB
runnerSchedLatencyMetricName = "lb_runner_sched_latency"
runnerExecLatencyMetricName = "lb_runner_exec_latency"
)
var (
queuedMeasure = common.MakeMeasure(queuedMetricName, "calls currently queued against agent", "")
// TODO this is a dupe of sum {complete,failed} ?
callsMeasure = common.MakeMeasure(callsMetricName, "calls created in agent", "")
runningMeasure = common.MakeMeasure(runningMetricName, "calls currently running in agent", "")
completedMeasure = common.MakeMeasure(completedMetricName, "calls completed in agent", "")
failedMeasure = common.MakeMeasure(failedMetricName, "calls failed in agent", "")
timedoutMeasure = common.MakeMeasure(timedoutMetricName, "calls timed out in agent", "")
errorsMeasure = common.MakeMeasure(errorsMetricName, "calls errored in agent", "")
serverBusyMeasure = common.MakeMeasure(serverBusyMetricName, "calls where server was too busy in agent", "")
dockerMeasures = initDockerMeasures()
containerGaugeMeasures = initContainerGaugeMeasures()
containerTimeMeasures = initContainerTimeMeasures()
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
runnerSchedLatencyMeasure = common.MakeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
// Reported By LB: Function execution time inside a container.
runnerExecLatencyMeasure = common.MakeMeasure(runnerExecLatencyMetricName, "Runner Container Execution Latency Reported By LBAgent", "msecs")
)
func RegisterLBAgentViews(tagKeys []string, latencyDist []float64) {
err := view.Register(
common.CreateView(runnerSchedLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
common.CreateView(runnerExecLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
)
if err != nil {
logrus.WithError(err).Fatal("cannot register view")
}
}
// RegisterAgentViews creates and registers all agent views
func RegisterAgentViews(tagKeys []string, latencyDist []float64) {
err := view.Register(
common.CreateView(queuedMeasure, view.Sum(), tagKeys),
common.CreateView(callsMeasure, view.Sum(), tagKeys),
common.CreateView(runningMeasure, view.Sum(), tagKeys),
common.CreateView(completedMeasure, view.Sum(), tagKeys),
common.CreateView(failedMeasure, view.Sum(), tagKeys),
common.CreateView(timedoutMeasure, view.Sum(), tagKeys),
common.CreateView(errorsMeasure, view.Sum(), tagKeys),
common.CreateView(serverBusyMeasure, view.Sum(), tagKeys),
)
if err != nil {
logrus.WithError(err).Fatal("cannot register view")
}
}
// RegisterDockerViews creates a and registers Docker views with provided tag keys
func RegisterDockerViews(tagKeys []string, latencyDist, ioNetDist, ioDiskDist, memoryDist, cpuDist []float64) {
for _, m := range dockerMeasures {
var dist *view.Aggregation
// Remember these are sampled by docker in short intervals (approx 1 sec)
if m.Name() == "docker_stats_net_rx" || m.Name() == "docker_stats_net_tx" {
dist = view.Distribution(ioNetDist...)
} else if m.Name() == "docker_stats_disk_read" || m.Name() == "docker_stats_disk_write" {
dist = view.Distribution(ioDiskDist...)
} else if m.Name() == "docker_stats_mem_limit" || m.Name() == "docker_stats_mem_usage" {
dist = view.Distribution(memoryDist...)
} else if m.Name() == "docker_stats_cpu_user" || m.Name() == "docker_stats_cpu_total" || m.Name() == "docker_stats_cpu_kernel" {
dist = view.Distribution(cpuDist...)
} else {
// Not used yet.
dist = view.Distribution(latencyDist...)
}
v := common.CreateView(m, dist, tagKeys)
if err := view.Register(v); err != nil {
logrus.WithError(err).Fatal("cannot register view")
}
}
}
// RegisterContainerViews creates and register containers views with provided tag keys
func RegisterContainerViews(tagKeys []string, latencyDist []float64) {
// Create views for container measures
for i, key := range containerGaugeKeys {
if key == "" {
continue
}
v := common.CreateView(containerGaugeMeasures[i], view.Sum(), tagKeys)
if err := view.Register(v); err != nil {
logrus.WithError(err).Fatal("cannot register view")
}
}
for i, key := range containerTimeKeys {
if key == "" {
continue
}
v := common.CreateView(containerTimeMeasures[i], view.Distribution(latencyDist...), tagKeys)
if err := view.Register(v); err != nil {
logrus.WithError(err).Fatal("cannot register view")
}
}
}
// initDockerMeasures initializes Docker related measures
func initDockerMeasures() map[string]*stats.Int64Measure {
// TODO this is nasty figure out how to use opencensus to not have to declare these
keys := []string{"net_rx", "net_tx", "mem_limit", "mem_usage", "disk_read", "disk_write", "cpu_user", "cpu_total", "cpu_kernel"}
measures := make(map[string]*stats.Int64Measure, len(keys))
for _, key := range keys {
units := "bytes"
if strings.Contains(key, "cpu") {
units = "cpu"
}
measures[key] = common.MakeMeasure("docker_stats_"+key, "docker container stats for "+key, units)
}
return measures
}
func initContainerGaugeMeasures() []*stats.Int64Measure {
gaugeMeasures := make([]*stats.Int64Measure, len(containerGaugeKeys))
for i, key := range containerGaugeKeys {
if key == "" { // leave nil intentionally, let it panic
continue
}
gaugeMeasures[i] = common.MakeMeasure(key, "containers in state "+key, "")
}
return gaugeMeasures
}
func initContainerTimeMeasures() []*stats.Int64Measure {
timeMeasures := make([]*stats.Int64Measure, len(containerTimeKeys))
for i, key := range containerTimeKeys {
if key == "" {
continue
}
timeMeasures[i] = common.MakeMeasure(key, "time spent in container state "+key, "ms")
}
return timeMeasures
}