mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
LB agent reports lb placer latency. It should also report how long it took for the runner to initiate the call as well as execution time inside the container if the runner has accepted (committed) to the call.
104 lines
3.8 KiB
Go
104 lines
3.8 KiB
Go
package runnerpool
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
"time"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"go.opencensus.io/stats"
|
|
"go.opencensus.io/stats/view"
|
|
"go.opencensus.io/tag"
|
|
)
|
|
|
|
var (
|
|
attemptCountMeasure = stats.Int64("lb_placer_attempt_count", "LB Placer Number of Runners Attempted Count", "")
|
|
errorPoolCountMeasure = stats.Int64("lb_placer_rp_error_count", "LB Placer RunnerPool RunnerList Error Count", "")
|
|
emptyPoolCountMeasure = stats.Int64("lb_placer_rp_empty_count", "LB Placer RunnerPool RunnerList Empty Count", "")
|
|
cancelCountMeasure = stats.Int64("lb_placer_client_cancelled_count", "LB Placer Client Cancel Count", "")
|
|
placedErrorCountMeasure = stats.Int64("lb_placer_placed_error_count", "LB Placer Placed Call Count With Errors", "")
|
|
placedOKCountMeasure = stats.Int64("lb_placer_placed_ok_count", "LB Placer Placed Call Count Without Errors", "")
|
|
retryTooBusyCountMeasure = stats.Int64("lb_placer_retry_busy_count", "LB Placer Retry Count - Too Busy", "")
|
|
retryErrorCountMeasure = stats.Int64("lb_placer_retry_error_count", "LB Placer Retry Count - Errors", "")
|
|
placerLatencyMeasure = stats.Int64("lb_placer_latency", "LB Placer Latency", "msecs")
|
|
)
|
|
|
|
// Helper struct for tracking LB Placer latency and attempt counts
|
|
type attemptTracker struct {
|
|
ctx context.Context
|
|
startTime time.Time
|
|
lastAttemptTime time.Time
|
|
attemptCount int64
|
|
}
|
|
|
|
func newAttemptTracker(ctx context.Context) *attemptTracker {
|
|
return &attemptTracker{
|
|
ctx: ctx,
|
|
startTime: time.Now(),
|
|
}
|
|
}
|
|
|
|
func (data *attemptTracker) finalizeAttempts(isCommited bool) {
|
|
stats.Record(data.ctx, attemptCountMeasure.M(data.attemptCount))
|
|
|
|
// IMPORTANT: here we use (lastAttemptTime - startTime). We want to exclude TryExec
|
|
// latency *if* TryExec() goes through with commit. Placer latency metric only shows
|
|
// how much time are spending in Placer loop/retries. The metric includes rtt/latency of
|
|
// *all* unsuccessful NACK (retriable) responses from runners as well. For example, if
|
|
// Placer loop here retries 4 runners (which takes 5 msecs each) and then 5th runner
|
|
// succeeds (but takes 35 seconds to finish execution), we report 20 msecs as our LB
|
|
// latency.
|
|
endTime := data.lastAttemptTime
|
|
if !isCommited {
|
|
endTime = time.Now()
|
|
}
|
|
|
|
stats.Record(data.ctx, placerLatencyMeasure.M(int64(endTime.Sub(data.startTime)/time.Millisecond)))
|
|
}
|
|
|
|
func (data *attemptTracker) recordAttempt() {
|
|
data.lastAttemptTime = time.Now()
|
|
if data.attemptCount != math.MaxInt64 {
|
|
data.attemptCount++
|
|
}
|
|
}
|
|
|
|
func makeKeys(names []string) []tag.Key {
|
|
var tagKeys []tag.Key
|
|
for _, name := range names {
|
|
key, err := tag.NewKey(name)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("cannot create tag key for %v", name)
|
|
}
|
|
tagKeys = append(tagKeys, key)
|
|
}
|
|
return tagKeys
|
|
}
|
|
|
|
func createView(measure stats.Measure, agg *view.Aggregation, tagKeys []string) *view.View {
|
|
return &view.View{
|
|
Name: measure.Name(),
|
|
Description: measure.Description(),
|
|
TagKeys: makeKeys(tagKeys),
|
|
Measure: measure,
|
|
Aggregation: agg,
|
|
}
|
|
}
|
|
|
|
func RegisterPlacerViews(tagKeys []string) {
|
|
err := view.Register(
|
|
createView(attemptCountMeasure, view.Distribution(0, 1, 2, 4, 8, 32, 64, 256), tagKeys),
|
|
createView(errorPoolCountMeasure, view.Count(), tagKeys),
|
|
createView(emptyPoolCountMeasure, view.Count(), tagKeys),
|
|
createView(cancelCountMeasure, view.Count(), tagKeys),
|
|
createView(placedErrorCountMeasure, view.Count(), tagKeys),
|
|
createView(placedOKCountMeasure, view.Count(), tagKeys),
|
|
createView(retryTooBusyCountMeasure, view.Count(), tagKeys),
|
|
createView(retryErrorCountMeasure, view.Count(), tagKeys),
|
|
createView(placerLatencyMeasure, view.Distribution(1, 10, 25, 50, 200, 1000, 1500, 2000, 2500, 3000, 10000, 60000), tagKeys),
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("cannot create view")
|
|
}
|
|
}
|