mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: stats view/distribution improvements (#1154)
* fn: stats view/distribution improvements *) View latency distribution is now an argument in view creation functions. This allows easier override to set custom buckets. It is simplistic and assumes all latency views would use the same set, but in practice this is already the case. *) Removed API view creation to main, this should not be enabled for all node types. This is consistent with the rest of the system. * fn: Docker samples of cpu/mem/disk with specific buckets
This commit is contained in:
@@ -12,7 +12,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"go.opencensus.io/stats/view"
|
"go.opencensus.io/stats/view"
|
||||||
"go.opencensus.io/tag"
|
|
||||||
|
|
||||||
"github.com/fnproject/fn/api/common"
|
"github.com/fnproject/fn/api/common"
|
||||||
"github.com/fsouza/go-dockerclient"
|
"github.com/fsouza/go-dockerclient"
|
||||||
@@ -104,56 +103,26 @@ type dockerWrap struct {
|
|||||||
dockerNoTimeout *docker.Client
|
dockerNoTimeout *docker.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
|
||||||
dockerRetriesMeasure = makeMeasure("docker_api_retries", "docker api retries", "")
|
|
||||||
dockerTimeoutMeasure = makeMeasure("docker_api_timeout", "docker api timeouts", "")
|
|
||||||
dockerErrorMeasure = makeMeasure("docker_api_error", "docker api errors", "")
|
|
||||||
dockerOOMMeasure = makeMeasure("docker_oom", "docker oom", "")
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// TODO it's either this or stats.FindMeasure("string").M() -- this is safer but painful
|
dockerRetriesMeasure = common.MakeMeasure("docker_api_retries", "docker api retries", "")
|
||||||
dockerRetriesMeasure *stats.Int64Measure
|
dockerTimeoutMeasure = common.MakeMeasure("docker_api_timeout", "docker api timeouts", "")
|
||||||
dockerTimeoutMeasure *stats.Int64Measure
|
dockerErrorMeasure = common.MakeMeasure("docker_api_error", "docker api errors", "")
|
||||||
dockerErrorMeasure *stats.Int64Measure
|
dockerOOMMeasure = common.MakeMeasure("docker_oom", "docker oom", "")
|
||||||
dockerOOMMeasure *stats.Int64Measure
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// RegisterViews creates and registers views with provided tag keys
|
// RegisterViews creates and registers views with provided tag keys
|
||||||
func RegisterViews(tagKeys []string) {
|
func RegisterViews(tagKeys []string, dist []float64) {
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
createView(dockerRetriesMeasure, view.Sum(), tagKeys),
|
common.CreateView(dockerRetriesMeasure, view.Sum(), tagKeys),
|
||||||
createView(dockerTimeoutMeasure, view.Count(), tagKeys),
|
common.CreateView(dockerTimeoutMeasure, view.Count(), tagKeys),
|
||||||
createView(dockerErrorMeasure, view.Count(), tagKeys),
|
common.CreateView(dockerErrorMeasure, view.Count(), tagKeys),
|
||||||
createView(dockerOOMMeasure, view.Count(), tagKeys),
|
common.CreateView(dockerOOMMeasure, view.Count(), tagKeys),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func createView(measure stats.Measure, agg *view.Aggregation, tagKeys []string) *view.View {
|
|
||||||
return &view.View{
|
|
||||||
Name: measure.Name(),
|
|
||||||
Description: measure.Description(),
|
|
||||||
Measure: measure,
|
|
||||||
TagKeys: makeKeys(tagKeys),
|
|
||||||
Aggregation: agg,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeKeys(names []string) []tag.Key {
|
|
||||||
tagKeys := make([]tag.Key, len(names))
|
|
||||||
for i, name := range names {
|
|
||||||
key, err := tag.NewKey(name)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Fatal(err)
|
|
||||||
}
|
|
||||||
tagKeys[i] = key
|
|
||||||
}
|
|
||||||
return tagKeys
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *dockerWrap) retry(ctx context.Context, logger logrus.FieldLogger, f func() error) error {
|
func (d *dockerWrap) retry(ctx context.Context, logger logrus.FieldLogger, f func() error) error {
|
||||||
var i int
|
var i int
|
||||||
var err error
|
var err error
|
||||||
@@ -413,6 +382,6 @@ func (d *dockerWrap) Stats(opts docker.StatsOptions) (err error) {
|
|||||||
//return err
|
//return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeMeasure(name string, desc string, unit string) *stats.Int64Measure {
|
func MakeMeasure(name string, desc string, unit string) *stats.Int64Measure {
|
||||||
return stats.Int64(name, desc, unit)
|
return stats.Int64(name, desc, unit)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,10 +5,11 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/fnproject/fn/api/common"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"go.opencensus.io/stats"
|
"go.opencensus.io/stats"
|
||||||
"go.opencensus.io/stats/view"
|
"go.opencensus.io/stats/view"
|
||||||
"go.opencensus.io/tag"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO add some suga:
|
// TODO add some suga:
|
||||||
@@ -82,29 +83,29 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
queuedMeasure = makeMeasure(queuedMetricName, "calls currently queued against agent", "")
|
queuedMeasure = common.MakeMeasure(queuedMetricName, "calls currently queued against agent", "")
|
||||||
// TODO this is a dupe of sum {complete,failed} ?
|
// TODO this is a dupe of sum {complete,failed} ?
|
||||||
callsMeasure = makeMeasure(callsMetricName, "calls created in agent", "")
|
callsMeasure = common.MakeMeasure(callsMetricName, "calls created in agent", "")
|
||||||
runningMeasure = makeMeasure(runningMetricName, "calls currently running in agent", "")
|
runningMeasure = common.MakeMeasure(runningMetricName, "calls currently running in agent", "")
|
||||||
completedMeasure = makeMeasure(completedMetricName, "calls completed in agent", "")
|
completedMeasure = common.MakeMeasure(completedMetricName, "calls completed in agent", "")
|
||||||
failedMeasure = makeMeasure(failedMetricName, "calls failed in agent", "")
|
failedMeasure = common.MakeMeasure(failedMetricName, "calls failed in agent", "")
|
||||||
timedoutMeasure = makeMeasure(timedoutMetricName, "calls timed out in agent", "")
|
timedoutMeasure = common.MakeMeasure(timedoutMetricName, "calls timed out in agent", "")
|
||||||
errorsMeasure = makeMeasure(errorsMetricName, "calls errored in agent", "")
|
errorsMeasure = common.MakeMeasure(errorsMetricName, "calls errored in agent", "")
|
||||||
serverBusyMeasure = makeMeasure(serverBusyMetricName, "calls where server was too busy in agent", "")
|
serverBusyMeasure = common.MakeMeasure(serverBusyMetricName, "calls where server was too busy in agent", "")
|
||||||
dockerMeasures = initDockerMeasures()
|
dockerMeasures = initDockerMeasures()
|
||||||
containerGaugeMeasures = initContainerGaugeMeasures()
|
containerGaugeMeasures = initContainerGaugeMeasures()
|
||||||
containerTimeMeasures = initContainerTimeMeasures()
|
containerTimeMeasures = initContainerTimeMeasures()
|
||||||
|
|
||||||
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
|
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
|
||||||
runnerSchedLatencyMeasure = makeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
|
runnerSchedLatencyMeasure = common.MakeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
|
||||||
// Reported By LB: Function execution time inside a container.
|
// Reported By LB: Function execution time inside a container.
|
||||||
runnerExecLatencyMeasure = makeMeasure(runnerExecLatencyMetricName, "Runner Container Execution Latency Reported By LBAgent", "msecs")
|
runnerExecLatencyMeasure = common.MakeMeasure(runnerExecLatencyMetricName, "Runner Container Execution Latency Reported By LBAgent", "msecs")
|
||||||
)
|
)
|
||||||
|
|
||||||
func RegisterLBAgentViews(tagKeys []string) {
|
func RegisterLBAgentViews(tagKeys []string, latencyDist []float64) {
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
createView(runnerSchedLatencyMeasure, view.Distribution(1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000), tagKeys),
|
common.CreateView(runnerSchedLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
|
||||||
createView(runnerExecLatencyMeasure, view.Distribution(1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000), tagKeys),
|
common.CreateView(runnerExecLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
@@ -112,16 +113,16 @@ func RegisterLBAgentViews(tagKeys []string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RegisterAgentViews creates and registers all agent views
|
// RegisterAgentViews creates and registers all agent views
|
||||||
func RegisterAgentViews(tagKeys []string) {
|
func RegisterAgentViews(tagKeys []string, latencyDist []float64) {
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
createView(queuedMeasure, view.Sum(), tagKeys),
|
common.CreateView(queuedMeasure, view.Sum(), tagKeys),
|
||||||
createView(callsMeasure, view.Sum(), tagKeys),
|
common.CreateView(callsMeasure, view.Sum(), tagKeys),
|
||||||
createView(runningMeasure, view.Sum(), tagKeys),
|
common.CreateView(runningMeasure, view.Sum(), tagKeys),
|
||||||
createView(completedMeasure, view.Sum(), tagKeys),
|
common.CreateView(completedMeasure, view.Sum(), tagKeys),
|
||||||
createView(failedMeasure, view.Sum(), tagKeys),
|
common.CreateView(failedMeasure, view.Sum(), tagKeys),
|
||||||
createView(timedoutMeasure, view.Sum(), tagKeys),
|
common.CreateView(timedoutMeasure, view.Sum(), tagKeys),
|
||||||
createView(errorsMeasure, view.Sum(), tagKeys),
|
common.CreateView(errorsMeasure, view.Sum(), tagKeys),
|
||||||
createView(serverBusyMeasure, view.Sum(), tagKeys),
|
common.CreateView(serverBusyMeasure, view.Sum(), tagKeys),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
@@ -129,9 +130,33 @@ func RegisterAgentViews(tagKeys []string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RegisterDockerViews creates a and registers Docker views with provided tag keys
|
// RegisterDockerViews creates a and registers Docker views with provided tag keys
|
||||||
func RegisterDockerViews(tagKeys []string) {
|
func RegisterDockerViews(tagKeys []string, latencyDist []float64) {
|
||||||
|
|
||||||
for _, m := range dockerMeasures {
|
for _, m := range dockerMeasures {
|
||||||
v := createView(m, view.Distribution(), tagKeys)
|
|
||||||
|
var dist *view.Aggregation
|
||||||
|
|
||||||
|
// Remember these are sampled by docker in short intervals (approx 1 sec)
|
||||||
|
|
||||||
|
// Bytes for net/disk/mem
|
||||||
|
if m.Name() == "docker_stats_net_rx" || m.Name() == "docker_stats_net_tx" {
|
||||||
|
// net IO: 8k to 32MB
|
||||||
|
dist = view.Distribution(0, 8192, 65536, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432)
|
||||||
|
} else if m.Name() == "docker_stats_disk_read" || m.Name() == "docker_stats_disk_write" {
|
||||||
|
// disk IO: 8k to 32MB
|
||||||
|
dist = view.Distribution(0, 8192, 65536, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432)
|
||||||
|
} else if m.Name() == "docker_stats_mem_limit" || m.Name() == "docker_stats_mem_usage" {
|
||||||
|
// memory: 128K to 32MB
|
||||||
|
dist = view.Distribution(0, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432)
|
||||||
|
} else if m.Name() == "docker_stats_cpu_user" || m.Name() == "docker_stats_cpu_total" || m.Name() == "docker_stats_cpu_kernel" {
|
||||||
|
// percentages
|
||||||
|
dist = view.Distribution(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
|
||||||
|
} else {
|
||||||
|
// Not used yet.
|
||||||
|
dist = view.Distribution(latencyDist...)
|
||||||
|
}
|
||||||
|
|
||||||
|
v := common.CreateView(m, dist, tagKeys)
|
||||||
if err := view.Register(v); err != nil {
|
if err := view.Register(v); err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
}
|
}
|
||||||
@@ -139,13 +164,13 @@ func RegisterDockerViews(tagKeys []string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RegisterContainerViews creates and register containers views with provided tag keys
|
// RegisterContainerViews creates and register containers views with provided tag keys
|
||||||
func RegisterContainerViews(tagKeys []string) {
|
func RegisterContainerViews(tagKeys []string, latencyDist []float64) {
|
||||||
// Create views for container measures
|
// Create views for container measures
|
||||||
for i, key := range containerGaugeKeys {
|
for i, key := range containerGaugeKeys {
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
v := createView(containerGaugeMeasures[i], view.Count(), tagKeys)
|
v := common.CreateView(containerGaugeMeasures[i], view.Sum(), tagKeys)
|
||||||
if err := view.Register(v); err != nil {
|
if err := view.Register(v); err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
}
|
}
|
||||||
@@ -155,7 +180,7 @@ func RegisterContainerViews(tagKeys []string) {
|
|||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
v := createView(containerTimeMeasures[i], view.Distribution(), tagKeys)
|
v := common.CreateView(containerTimeMeasures[i], view.Distribution(latencyDist...), tagKeys)
|
||||||
if err := view.Register(v); err != nil {
|
if err := view.Register(v); err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
}
|
}
|
||||||
@@ -172,7 +197,7 @@ func initDockerMeasures() map[string]*stats.Int64Measure {
|
|||||||
if strings.Contains(key, "cpu") {
|
if strings.Contains(key, "cpu") {
|
||||||
units = "cpu"
|
units = "cpu"
|
||||||
}
|
}
|
||||||
measures[key] = makeMeasure("docker_stats_"+key, "docker container stats for "+key, units)
|
measures[key] = common.MakeMeasure("docker_stats_"+key, "docker container stats for "+key, units)
|
||||||
}
|
}
|
||||||
return measures
|
return measures
|
||||||
}
|
}
|
||||||
@@ -183,48 +208,19 @@ func initContainerGaugeMeasures() []*stats.Int64Measure {
|
|||||||
if key == "" { // leave nil intentionally, let it panic
|
if key == "" { // leave nil intentionally, let it panic
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
gaugeMeasures[i] = makeMeasure(key, "containers in state "+key, "")
|
gaugeMeasures[i] = common.MakeMeasure(key, "containers in state "+key, "")
|
||||||
}
|
}
|
||||||
return gaugeMeasures
|
return gaugeMeasures
|
||||||
}
|
}
|
||||||
|
|
||||||
func initContainerTimeMeasures() []*stats.Int64Measure {
|
func initContainerTimeMeasures() []*stats.Int64Measure {
|
||||||
// TODO(reed): do we have to do this? the measurements will be tagged on the context, will they be propagated
|
|
||||||
// or we have to white list them in the view for them to show up? test...
|
|
||||||
|
|
||||||
timeMeasures := make([]*stats.Int64Measure, len(containerTimeKeys))
|
timeMeasures := make([]*stats.Int64Measure, len(containerTimeKeys))
|
||||||
for i, key := range containerTimeKeys {
|
for i, key := range containerTimeKeys {
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
timeMeasures[i] = makeMeasure(key, "time spent in container state "+key, "ms")
|
timeMeasures[i] = common.MakeMeasure(key, "time spent in container state "+key, "ms")
|
||||||
}
|
}
|
||||||
|
|
||||||
return timeMeasures
|
return timeMeasures
|
||||||
}
|
}
|
||||||
|
|
||||||
func createView(measure stats.Measure, agg *view.Aggregation, tagKeys []string) *view.View {
|
|
||||||
return &view.View{
|
|
||||||
Name: measure.Name(),
|
|
||||||
Description: measure.Description(),
|
|
||||||
Measure: measure,
|
|
||||||
TagKeys: makeKeys(tagKeys),
|
|
||||||
Aggregation: agg,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeMeasure(name string, desc string, unit string) *stats.Int64Measure {
|
|
||||||
return stats.Int64(name, desc, unit)
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeKeys(names []string) []tag.Key {
|
|
||||||
tagKeys := make([]tag.Key, len(names))
|
|
||||||
for i, name := range names {
|
|
||||||
key, err := tag.NewKey(name)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Fatal(err)
|
|
||||||
}
|
|
||||||
tagKeys[i] = key
|
|
||||||
}
|
|
||||||
return tagKeys
|
|
||||||
}
|
|
||||||
|
|||||||
34
api/common/stats_utils.go
Normal file
34
api/common/stats_utils.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"go.opencensus.io/stats"
|
||||||
|
"go.opencensus.io/stats/view"
|
||||||
|
"go.opencensus.io/tag"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CreateView(measure stats.Measure, agg *view.Aggregation, tagKeys []string) *view.View {
|
||||||
|
return &view.View{
|
||||||
|
Name: measure.Name(),
|
||||||
|
Description: measure.Description(),
|
||||||
|
Measure: measure,
|
||||||
|
TagKeys: makeKeys(tagKeys),
|
||||||
|
Aggregation: agg,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeMeasure(name string, desc string, unit string) *stats.Int64Measure {
|
||||||
|
return stats.Int64(name, desc, unit)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeKeys(names []string) []tag.Key {
|
||||||
|
tagKeys := make([]tag.Key, len(names))
|
||||||
|
for i, name := range names {
|
||||||
|
key, err := tag.NewKey(name)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatal(err)
|
||||||
|
}
|
||||||
|
tagKeys[i] = key
|
||||||
|
}
|
||||||
|
return tagKeys
|
||||||
|
}
|
||||||
@@ -26,7 +26,6 @@ import (
|
|||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"go.opencensus.io/stats"
|
"go.opencensus.io/stats"
|
||||||
"go.opencensus.io/stats/view"
|
"go.opencensus.io/stats/view"
|
||||||
"go.opencensus.io/tag"
|
|
||||||
"go.opencensus.io/trace"
|
"go.opencensus.io/trace"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -431,43 +430,21 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
uploadSizeMeasure = stats.Int64(uploadSizeMetricName, "uploaded log size", "byte")
|
uploadSizeMeasure = common.MakeMeasure(uploadSizeMetricName, "uploaded log size", "byte")
|
||||||
downloadSizeMeasure = stats.Int64(downloadSizeMetricName, "downloaded log size", "byte")
|
downloadSizeMeasure = common.MakeMeasure(downloadSizeMetricName, "downloaded log size", "byte")
|
||||||
)
|
)
|
||||||
|
|
||||||
// RegisterViews registers views for s3 measures
|
// RegisterViews registers views for s3 measures
|
||||||
func RegisterViews(tagKeys []string) {
|
func RegisterViews(tagKeys []string, dist []float64) {
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
createView(uploadSizeMeasure, view.Distribution(), tagKeys),
|
common.CreateView(uploadSizeMeasure, view.Distribution(dist...), tagKeys),
|
||||||
createView(downloadSizeMeasure, view.Distribution(), tagKeys),
|
common.CreateView(downloadSizeMeasure, view.Distribution(dist...), tagKeys),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot create view")
|
logrus.WithError(err).Fatal("cannot create view")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func createView(measure stats.Measure, agg *view.Aggregation, tagKeys []string) *view.View {
|
|
||||||
return &view.View{
|
|
||||||
Name: measure.Name(),
|
|
||||||
Description: measure.Description(),
|
|
||||||
Measure: measure,
|
|
||||||
TagKeys: makeKeys(tagKeys),
|
|
||||||
Aggregation: agg,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeKeys(names []string) []tag.Key {
|
|
||||||
tagKeys := make([]tag.Key, len(names))
|
|
||||||
for i, name := range names {
|
|
||||||
key, err := tag.NewKey(name)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Fatal(err)
|
|
||||||
}
|
|
||||||
tagKeys[i] = key
|
|
||||||
}
|
|
||||||
return tagKeys
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
logs.Register(s3StoreProvider(0))
|
logs.Register(s3StoreProvider(0))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,23 +5,24 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/fnproject/fn/api/common"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"go.opencensus.io/stats"
|
"go.opencensus.io/stats"
|
||||||
"go.opencensus.io/stats/view"
|
"go.opencensus.io/stats/view"
|
||||||
"go.opencensus.io/tag"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
attemptCountMeasure = stats.Int64("lb_placer_attempt_count", "LB Placer Number of Runners Attempted Count", "")
|
attemptCountMeasure = common.MakeMeasure("lb_placer_attempt_count", "LB Placer Number of Runners Attempted Count", "")
|
||||||
errorPoolCountMeasure = stats.Int64("lb_placer_rp_error_count", "LB Placer RunnerPool RunnerList Error Count", "")
|
errorPoolCountMeasure = common.MakeMeasure("lb_placer_rp_error_count", "LB Placer RunnerPool RunnerList Error Count", "")
|
||||||
emptyPoolCountMeasure = stats.Int64("lb_placer_rp_empty_count", "LB Placer RunnerPool RunnerList Empty Count", "")
|
emptyPoolCountMeasure = common.MakeMeasure("lb_placer_rp_empty_count", "LB Placer RunnerPool RunnerList Empty Count", "")
|
||||||
cancelCountMeasure = stats.Int64("lb_placer_client_cancelled_count", "LB Placer Client Cancel Count", "")
|
cancelCountMeasure = common.MakeMeasure("lb_placer_client_cancelled_count", "LB Placer Client Cancel Count", "")
|
||||||
placerTimeoutMeasure = stats.Int64("lb_placer_timeout_count", "LB Placer Timeout Count", "")
|
placerTimeoutMeasure = common.MakeMeasure("lb_placer_timeout_count", "LB Placer Timeout Count", "")
|
||||||
placedErrorCountMeasure = stats.Int64("lb_placer_placed_error_count", "LB Placer Placed Call Count With Errors", "")
|
placedErrorCountMeasure = common.MakeMeasure("lb_placer_placed_error_count", "LB Placer Placed Call Count With Errors", "")
|
||||||
placedOKCountMeasure = stats.Int64("lb_placer_placed_ok_count", "LB Placer Placed Call Count Without Errors", "")
|
placedOKCountMeasure = common.MakeMeasure("lb_placer_placed_ok_count", "LB Placer Placed Call Count Without Errors", "")
|
||||||
retryTooBusyCountMeasure = stats.Int64("lb_placer_retry_busy_count", "LB Placer Retry Count - Too Busy", "")
|
retryTooBusyCountMeasure = common.MakeMeasure("lb_placer_retry_busy_count", "LB Placer Retry Count - Too Busy", "")
|
||||||
retryErrorCountMeasure = stats.Int64("lb_placer_retry_error_count", "LB Placer Retry Count - Errors", "")
|
retryErrorCountMeasure = common.MakeMeasure("lb_placer_retry_error_count", "LB Placer Retry Count - Errors", "")
|
||||||
placerLatencyMeasure = stats.Int64("lb_placer_latency", "LB Placer Latency", "msecs")
|
placerLatencyMeasure = common.MakeMeasure("lb_placer_latency", "LB Placer Latency", "msecs")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Helper struct for tracking LB Placer latency and attempt counts
|
// Helper struct for tracking LB Placer latency and attempt counts
|
||||||
@@ -64,40 +65,18 @@ func (data *attemptTracker) recordAttempt() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeKeys(names []string) []tag.Key {
|
func RegisterPlacerViews(tagKeys []string, latencyDist []float64) {
|
||||||
var tagKeys []tag.Key
|
|
||||||
for _, name := range names {
|
|
||||||
key, err := tag.NewKey(name)
|
|
||||||
if err != nil {
|
|
||||||
logrus.WithError(err).Fatal("cannot create tag key for %v", name)
|
|
||||||
}
|
|
||||||
tagKeys = append(tagKeys, key)
|
|
||||||
}
|
|
||||||
return tagKeys
|
|
||||||
}
|
|
||||||
|
|
||||||
func createView(measure stats.Measure, agg *view.Aggregation, tagKeys []string) *view.View {
|
|
||||||
return &view.View{
|
|
||||||
Name: measure.Name(),
|
|
||||||
Description: measure.Description(),
|
|
||||||
TagKeys: makeKeys(tagKeys),
|
|
||||||
Measure: measure,
|
|
||||||
Aggregation: agg,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func RegisterPlacerViews(tagKeys []string) {
|
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
createView(attemptCountMeasure, view.Distribution(0, 2, 3, 4, 8, 16, 32, 64, 128, 256), tagKeys),
|
common.CreateView(attemptCountMeasure, view.Distribution(0, 2, 3, 4, 8, 16, 32, 64, 128, 256), tagKeys),
|
||||||
createView(errorPoolCountMeasure, view.Count(), tagKeys),
|
common.CreateView(errorPoolCountMeasure, view.Count(), tagKeys),
|
||||||
createView(emptyPoolCountMeasure, view.Count(), tagKeys),
|
common.CreateView(emptyPoolCountMeasure, view.Count(), tagKeys),
|
||||||
createView(cancelCountMeasure, view.Count(), tagKeys),
|
common.CreateView(cancelCountMeasure, view.Count(), tagKeys),
|
||||||
createView(placerTimeoutMeasure, view.Count(), tagKeys),
|
common.CreateView(placerTimeoutMeasure, view.Count(), tagKeys),
|
||||||
createView(placedErrorCountMeasure, view.Count(), tagKeys),
|
common.CreateView(placedErrorCountMeasure, view.Count(), tagKeys),
|
||||||
createView(placedOKCountMeasure, view.Count(), tagKeys),
|
common.CreateView(placedOKCountMeasure, view.Count(), tagKeys),
|
||||||
createView(retryTooBusyCountMeasure, view.Count(), tagKeys),
|
common.CreateView(retryTooBusyCountMeasure, view.Count(), tagKeys),
|
||||||
createView(retryErrorCountMeasure, view.Count(), tagKeys),
|
common.CreateView(retryErrorCountMeasure, view.Count(), tagKeys),
|
||||||
createView(placerLatencyMeasure, view.Distribution(1, 10, 25, 50, 200, 1000, 1500, 2000, 2500, 3000, 10000, 60000), tagKeys),
|
common.CreateView(placerLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot create view")
|
logrus.WithError(err).Fatal("cannot create view")
|
||||||
|
|||||||
@@ -79,6 +79,19 @@ func traceWrap(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func apiMetricsWrap(s *Server) {
|
func apiMetricsWrap(s *Server) {
|
||||||
|
pathKey, err := tag.NewKey("path")
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatal(err)
|
||||||
|
}
|
||||||
|
methodKey, err := tag.NewKey("method")
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatal(err)
|
||||||
|
}
|
||||||
|
statusKey, err := tag.NewKey("status")
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
measure := func(engine *gin.Engine) func(*gin.Context) {
|
measure := func(engine *gin.Engine) func(*gin.Context) {
|
||||||
var routes gin.RoutesInfo
|
var routes gin.RoutesInfo
|
||||||
return func(c *gin.Context) {
|
return func(c *gin.Context) {
|
||||||
@@ -102,7 +115,7 @@ func apiMetricsWrap(s *Server) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Fatal(err)
|
logrus.Fatal(err)
|
||||||
}
|
}
|
||||||
stats.Record(ctx, apiRequestCount.M(1))
|
stats.Record(ctx, apiRequestCountMeasure.M(1))
|
||||||
c.Next()
|
c.Next()
|
||||||
|
|
||||||
status := strconv.Itoa(c.Writer.Status())
|
status := strconv.Itoa(c.Writer.Status())
|
||||||
@@ -112,7 +125,7 @@ func apiMetricsWrap(s *Server) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Fatal(err)
|
logrus.Fatal(err)
|
||||||
}
|
}
|
||||||
stats.Record(ctx, apiLatency.M(float64(time.Since(start))/float64(time.Millisecond)))
|
stats.Record(ctx, apiLatencyMeasure.M(int64(time.Since(start)/time.Millisecond)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -600,10 +600,6 @@ func WithAgentFromEnv() Option {
|
|||||||
placer = pool.NewNaivePlacer(&placerCfg)
|
placer = pool.NewNaivePlacer(&placerCfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
keys := []string{"fn_appname", "fn_path"}
|
|
||||||
pool.RegisterPlacerViews(keys)
|
|
||||||
agent.RegisterLBAgentViews(keys)
|
|
||||||
|
|
||||||
s.lbReadAccess = agent.NewCachedDataAccess(cl)
|
s.lbReadAccess = agent.NewCachedDataAccess(cl)
|
||||||
s.agent, err = agent.NewLBAgent(cl, runnerPool, placer)
|
s.agent, err = agent.NewLBAgent(cl, runnerPool, placer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -758,7 +754,7 @@ func WithPrometheus() Option {
|
|||||||
}
|
}
|
||||||
s.promExporter = exporter
|
s.promExporter = exporter
|
||||||
view.RegisterExporter(exporter)
|
view.RegisterExporter(exporter)
|
||||||
registerViews()
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,68 +1,26 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/fnproject/fn/api/common"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"go.opencensus.io/stats"
|
"go.opencensus.io/stats"
|
||||||
"go.opencensus.io/stats/view"
|
"go.opencensus.io/stats/view"
|
||||||
"go.opencensus.io/tag"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
apiRequestCount = stats.Int64("api/request_count", "Number of API requests", stats.UnitDimensionless)
|
apiRequestCountMeasure = common.MakeMeasure("api/request_count", "Count of API requests started", stats.UnitDimensionless)
|
||||||
apiLatency = stats.Float64("api/latency", "API latency", stats.UnitMilliseconds)
|
apiResponseCountMeasure = common.MakeMeasure("api/response_count", "API response count", stats.UnitDimensionless)
|
||||||
|
apiLatencyMeasure = common.MakeMeasure("api/latency", "Latency distribution of API requests", stats.UnitMilliseconds)
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
func RegisterAPIViews(tagKeys []string, dist []float64) {
|
||||||
pathKey = makeKey("path")
|
|
||||||
methodKey = makeKey("method")
|
|
||||||
statusKey = makeKey("status")
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
defaultLatencyDistribution = view.Distribution(0, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800, 1000, 2000, 5000, 10000, 20000, 50000, 100000)
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
apiRequestCountView = &view.View{
|
|
||||||
Name: "api/request_count",
|
|
||||||
Description: "Count of API requests started",
|
|
||||||
Measure: apiRequestCount,
|
|
||||||
TagKeys: []tag.Key{pathKey, methodKey},
|
|
||||||
Aggregation: view.Count(),
|
|
||||||
}
|
|
||||||
|
|
||||||
apiResponseCountView = &view.View{
|
|
||||||
Name: "api/response_count",
|
|
||||||
Description: "API response count",
|
|
||||||
TagKeys: []tag.Key{pathKey, methodKey, statusKey},
|
|
||||||
Measure: apiLatency,
|
|
||||||
Aggregation: view.Count(),
|
|
||||||
}
|
|
||||||
|
|
||||||
apiLatencyView = &view.View{
|
|
||||||
Name: "api/latency",
|
|
||||||
Description: "Latency distribution of API requests",
|
|
||||||
Measure: apiLatency,
|
|
||||||
TagKeys: []tag.Key{pathKey, methodKey, statusKey},
|
|
||||||
Aggregation: defaultLatencyDistribution,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
func registerViews() {
|
|
||||||
err := view.Register(
|
err := view.Register(
|
||||||
apiRequestCountView,
|
common.CreateView(apiRequestCountMeasure, view.Count(), tagKeys),
|
||||||
apiResponseCountView,
|
common.CreateView(apiResponseCountMeasure, view.Count(), tagKeys),
|
||||||
apiLatencyView,
|
common.CreateView(apiLatencyMeasure, view.Distribution(dist...), tagKeys),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Fatal("cannot register view")
|
logrus.WithError(err).Fatal("cannot register view")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeKey(name string) tag.Key {
|
|
||||||
key, err := tag.NewKey(name)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Fatal(err)
|
|
||||||
}
|
|
||||||
return key
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -27,13 +27,20 @@ func main() {
|
|||||||
func registerViews() {
|
func registerViews() {
|
||||||
// Register views in agent package
|
// Register views in agent package
|
||||||
keys := []string{"fn_appname", "fn_path"}
|
keys := []string{"fn_appname", "fn_path"}
|
||||||
agent.RegisterAgentViews(keys)
|
dist := []float64{1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000}
|
||||||
agent.RegisterDockerViews(keys)
|
|
||||||
agent.RegisterContainerViews(keys)
|
agent.RegisterAgentViews(keys, dist)
|
||||||
|
agent.RegisterDockerViews(keys, dist)
|
||||||
|
agent.RegisterContainerViews(keys, dist)
|
||||||
|
|
||||||
// Register docker client views
|
// Register docker client views
|
||||||
docker.RegisterViews(keys)
|
docker.RegisterViews(keys, dist)
|
||||||
|
|
||||||
// Register s3 log views
|
// Register s3 log views
|
||||||
s3.RegisterViews(keys)
|
s3.RegisterViews(keys, dist)
|
||||||
|
|
||||||
|
apiKeys := []string{"path", "method", "status"}
|
||||||
|
apiDist := []float64{0, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800, 1000, 2000, 5000, 10000, 20000, 50000, 100000}
|
||||||
|
|
||||||
|
server.RegisterAPIViews(apiKeys, apiDist)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -221,8 +221,9 @@ func SetUpLBNode(ctx context.Context) (*server.Server, error) {
|
|||||||
placer := pool.NewNaivePlacer(&placerCfg)
|
placer := pool.NewNaivePlacer(&placerCfg)
|
||||||
|
|
||||||
keys := []string{"fn_appname", "fn_path"}
|
keys := []string{"fn_appname", "fn_path"}
|
||||||
pool.RegisterPlacerViews(keys)
|
dist := []float64{1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000}
|
||||||
agent.RegisterLBAgentViews(keys)
|
pool.RegisterPlacerViews(keys, dist)
|
||||||
|
agent.RegisterLBAgentViews(keys, dist)
|
||||||
|
|
||||||
// Create an LB Agent with a Call Overrider to intercept calls in GetCall(). Overrider in this example
|
// Create an LB Agent with a Call Overrider to intercept calls in GetCall(). Overrider in this example
|
||||||
// scrubs CPU/TmpFsSize and adds FN_CHEESE key/value into extensions.
|
// scrubs CPU/TmpFsSize and adds FN_CHEESE key/value into extensions.
|
||||||
|
|||||||
Reference in New Issue
Block a user