Files
fn-serverless/api/runnerpool/placer_stats.go
Tolga Ceylan 0105f8321e fn: stats view/distribution improvements (#1154)
* fn: stats view/distribution improvements

*) View latency distribution is now an argument
in view creation functions. This allows easier
override to set custom buckets. It is simplistic
and assumes all latency views would use the same
set, but in practice this is already the case.
*) Removed API view creation to main, this should not
be enabled for all node types. This is consistent with
the rest of the system.

* fn: Docker samples of cpu/mem/disk with specific buckets
2018-08-03 11:06:54 -07:00

85 lines
3.5 KiB
Go

package runnerpool
import (
"context"
"math"
"time"
"github.com/fnproject/fn/api/common"
"github.com/sirupsen/logrus"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
)
var (
attemptCountMeasure = common.MakeMeasure("lb_placer_attempt_count", "LB Placer Number of Runners Attempted Count", "")
errorPoolCountMeasure = common.MakeMeasure("lb_placer_rp_error_count", "LB Placer RunnerPool RunnerList Error Count", "")
emptyPoolCountMeasure = common.MakeMeasure("lb_placer_rp_empty_count", "LB Placer RunnerPool RunnerList Empty Count", "")
cancelCountMeasure = common.MakeMeasure("lb_placer_client_cancelled_count", "LB Placer Client Cancel Count", "")
placerTimeoutMeasure = common.MakeMeasure("lb_placer_timeout_count", "LB Placer Timeout Count", "")
placedErrorCountMeasure = common.MakeMeasure("lb_placer_placed_error_count", "LB Placer Placed Call Count With Errors", "")
placedOKCountMeasure = common.MakeMeasure("lb_placer_placed_ok_count", "LB Placer Placed Call Count Without Errors", "")
retryTooBusyCountMeasure = common.MakeMeasure("lb_placer_retry_busy_count", "LB Placer Retry Count - Too Busy", "")
retryErrorCountMeasure = common.MakeMeasure("lb_placer_retry_error_count", "LB Placer Retry Count - Errors", "")
placerLatencyMeasure = common.MakeMeasure("lb_placer_latency", "LB Placer Latency", "msecs")
)
// Helper struct for tracking LB Placer latency and attempt counts
type attemptTracker struct {
ctx context.Context
startTime time.Time
lastAttemptTime time.Time
attemptCount int64
}
func newAttemptTracker(ctx context.Context) *attemptTracker {
return &attemptTracker{
ctx: ctx,
startTime: time.Now(),
}
}
func (data *attemptTracker) finalizeAttempts(isCommited bool) {
stats.Record(data.ctx, attemptCountMeasure.M(data.attemptCount))
// IMPORTANT: here we use (lastAttemptTime - startTime). We want to exclude TryExec
// latency *if* TryExec() goes through with commit. Placer latency metric only shows
// how much time are spending in Placer loop/retries. The metric includes rtt/latency of
// *all* unsuccessful NACK (retriable) responses from runners as well. For example, if
// Placer loop here retries 4 runners (which takes 5 msecs each) and then 5th runner
// succeeds (but takes 35 seconds to finish execution), we report 20 msecs as our LB
// latency.
endTime := data.lastAttemptTime
if !isCommited {
endTime = time.Now()
}
stats.Record(data.ctx, placerLatencyMeasure.M(int64(endTime.Sub(data.startTime)/time.Millisecond)))
}
func (data *attemptTracker) recordAttempt() {
data.lastAttemptTime = time.Now()
if data.attemptCount != math.MaxInt64 {
data.attemptCount++
}
}
func RegisterPlacerViews(tagKeys []string, latencyDist []float64) {
err := view.Register(
common.CreateView(attemptCountMeasure, view.Distribution(0, 2, 3, 4, 8, 16, 32, 64, 128, 256), tagKeys),
common.CreateView(errorPoolCountMeasure, view.Count(), tagKeys),
common.CreateView(emptyPoolCountMeasure, view.Count(), tagKeys),
common.CreateView(cancelCountMeasure, view.Count(), tagKeys),
common.CreateView(placerTimeoutMeasure, view.Count(), tagKeys),
common.CreateView(placedErrorCountMeasure, view.Count(), tagKeys),
common.CreateView(placedOKCountMeasure, view.Count(), tagKeys),
common.CreateView(retryTooBusyCountMeasure, view.Count(), tagKeys),
common.CreateView(retryErrorCountMeasure, view.Count(), tagKeys),
common.CreateView(placerLatencyMeasure, view.Distribution(latencyDist...), tagKeys),
)
if err != nil {
logrus.WithError(err).Fatal("cannot create view")
}
}