Change basic stats to use opentracing rather than Prometheus API (#671)

* Change basic stats to use opentracing rather than Prometheus API directly

* Just ran gofmt

* Extract opentracing access for metrics to common/metrics.go

* Replace quotes strings with constants where possible
This commit is contained in:
Nigel Deakin
2018-01-11 17:34:51 +00:00
committed by GitHub
parent ba0aa3b1a9
commit ac2bfd3462
5 changed files with 361 additions and 118 deletions

View File

@@ -16,7 +16,6 @@ import (
"github.com/fnproject/fn/api/models" "github.com/fnproject/fn/api/models"
"github.com/fnproject/fn/fnext" "github.com/fnproject/fn/fnext"
"github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/log"
"github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@@ -174,11 +173,11 @@ func transformTimeout(e error, isRetriable bool) error {
// handleStatsDequeue handles stats for dequeuing for early exit (getSlot or Start) // handleStatsDequeue handles stats for dequeuing for early exit (getSlot or Start)
// cases. Only timeouts can be a simple dequeue while other cases are actual errors. // cases. Only timeouts can be a simple dequeue while other cases are actual errors.
func (a *agent) handleStatsDequeue(err error, callI Call) { func (a *agent) handleStatsDequeue(ctx context.Context, err error, callI Call) {
if err == context.DeadlineExceeded { if err == context.DeadlineExceeded {
a.stats.Dequeue(callI.Model().AppName, callI.Model().Path) a.stats.Dequeue(ctx, callI.Model().AppName, callI.Model().Path)
} else { } else {
a.stats.DequeueAndFail(callI.Model().AppName, callI.Model().Path) a.stats.DequeueAndFail(ctx, callI.Model().AppName, callI.Model().Path)
} }
} }
@@ -192,9 +191,6 @@ func (a *agent) Submit(callI Call) error {
default: default:
} }
// increment queued count
a.stats.Enqueue(callI.Model().AppName, callI.Model().Path)
call := callI.(*call) call := callI.(*call)
ctx := call.req.Context() ctx := call.req.Context()
@@ -219,9 +215,14 @@ func (a *agent) Submit(callI Call) error {
call.req = call.req.WithContext(ctxSlotWait) call.req = call.req.WithContext(ctxSlotWait)
defer cancelSlotWait() defer cancelSlotWait()
// increment queued count
// this is done after setting "fn_appname" and "fn_path"
a.stats.Enqueue(ctx, callI.Model().AppName, callI.Model().Path)
slot, err := a.getSlot(ctxSlotWait, call) // find ram available / running slot, err := a.getSlot(ctxSlotWait, call) // find ram available / running
if err != nil { if err != nil {
a.handleStatsDequeue(err, call) a.handleStatsDequeue(ctx, err, call)
return transformTimeout(err, true) return transformTimeout(err, true)
} }
// TODO if the call times out & container is created, we need // TODO if the call times out & container is created, we need
@@ -230,7 +231,7 @@ func (a *agent) Submit(callI Call) error {
err = call.Start(ctxSlotWait) err = call.Start(ctxSlotWait)
if err != nil { if err != nil {
a.handleStatsDequeue(err, call) a.handleStatsDequeue(ctx, err, call)
return transformTimeout(err, true) return transformTimeout(err, true)
} }
@@ -241,7 +242,7 @@ func (a *agent) Submit(callI Call) error {
defer cancelExec() defer cancelExec()
// decrement queued count, increment running count // decrement queued count, increment running count
a.stats.DequeueAndStart(callI.Model().AppName, callI.Model().Path) a.stats.DequeueAndStart(ctx, callI.Model().AppName, callI.Model().Path)
err = slot.exec(ctxExec, call) err = slot.exec(ctxExec, call)
// pass this error (nil or otherwise) to end directly, to store status, etc // pass this error (nil or otherwise) to end directly, to store status, etc
@@ -249,10 +250,10 @@ func (a *agent) Submit(callI Call) error {
if err == nil { if err == nil {
// decrement running count, increment completed count // decrement running count, increment completed count
a.stats.Complete(callI.Model().AppName, callI.Model().Path) a.stats.Complete(ctx, callI.Model().AppName, callI.Model().Path)
} else { } else {
// decrement running count, increment failed count // decrement running count, increment failed count
a.stats.Failed(callI.Model().AppName, callI.Model().Path) a.stats.Failed(ctx, callI.Model().AppName, callI.Model().Path)
} }
// TODO: we need to allocate more time to store the call + logs in case the call timed out, // TODO: we need to allocate more time to store the call + logs in case the call timed out,
@@ -726,16 +727,19 @@ func (c *container) Timeout() time.Duration { return c.timeout }
func (c *container) EnvVars() map[string]string { return c.env } func (c *container) EnvVars() map[string]string { return c.env }
func (c *container) Memory() uint64 { return c.memory * 1024 * 1024 } // convert MB func (c *container) Memory() uint64 { return c.memory * 1024 * 1024 } // convert MB
// Log the specified stats to a tracing span. // WriteStat publishes each metric in the specified Stats structure as a histogram metric
// Spans are not processed by the collector until the span ends, so to prevent any delay
// in processing the stats when the function is long-lived we create a new span for every call
func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) { func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_stats")
defer span.Finish() // Convert each metric value from uint64 to float64
// and, for backward compatibility reasons, prepend each metric name with "docker_stats_fn_"
// (if we don't care about compatibility then we can remove that)
var metrics = make(map[string]float64)
for key, value := range stat.Metrics { for key, value := range stat.Metrics {
span.LogFields(log.Uint64("fn_"+key, value)) metrics["docker_stats_fn_"+key] = float64(value)
} }
common.PublishHistograms(ctx, metrics)
c.Lock() c.Lock()
defer c.Unlock() defer c.Unlock()
if c.stats != nil { if c.stats != nil {

View File

@@ -1,9 +1,9 @@
package agent package agent
import ( import (
"context"
"github.com/fnproject/fn/api/common"
"sync" "sync"
"github.com/prometheus/client_golang/prometheus"
) )
// TODO this should expose: // TODO this should expose:
@@ -30,8 +30,9 @@ type functionStats struct {
failed uint64 failed uint64
} }
// Stats hold the statistics for all functions combined
// and the statistics for each individual function
type Stats struct { type Stats struct {
// statistics for all functions combined
Queue uint64 Queue uint64
Running uint64 Running uint64
Complete uint64 Complete uint64
@@ -40,7 +41,7 @@ type Stats struct {
FunctionStatsMap map[string]*FunctionStats FunctionStatsMap map[string]*FunctionStats
} }
// statistics for an individual function // FunctionStats holds the statistics for an individual function
type FunctionStats struct { type FunctionStats struct {
Queue uint64 Queue uint64
Running uint64 Running uint64
@@ -48,52 +49,6 @@ type FunctionStats struct {
Failed uint64 Failed uint64
} }
var (
fnCalls = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "fn_api_calls",
Help: "Function calls by app and path",
},
[](string){"app", "path"},
)
fnQueued = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "fn_api_queued",
Help: "Queued requests by app and path",
},
[](string){"app", "path"},
)
fnRunning = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "fn_api_running",
Help: "Running requests by app and path",
},
[](string){"app", "path"},
)
fnCompleted = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "fn_api_completed",
Help: "Completed requests by app and path",
},
[](string){"app", "path"},
)
fnFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "fn_api_failed",
Help: "Failed requests by path",
},
[](string){"app", "path"},
)
)
func init() {
prometheus.MustRegister(fnCalls)
prometheus.MustRegister(fnQueued)
prometheus.MustRegister(fnRunning)
prometheus.MustRegister(fnFailed)
prometheus.MustRegister(fnCompleted)
}
func (s *stats) getStatsForFunction(path string) *functionStats { func (s *stats) getStatsForFunction(path string) *functionStats {
if s.functionStatsMap == nil { if s.functionStatsMap == nil {
s.functionStatsMap = make(map[string]*functionStats) s.functionStatsMap = make(map[string]*functionStats)
@@ -107,80 +62,81 @@ func (s *stats) getStatsForFunction(path string) *functionStats {
return thisFunctionStats return thisFunctionStats
} }
func (s *stats) Enqueue(app string, path string) { func (s *stats) Enqueue(ctx context.Context, app string, path string) {
s.mu.Lock() s.mu.Lock()
s.queue++ s.queue++
s.getStatsForFunction(path).queue++ s.getStatsForFunction(path).queue++
fnQueued.WithLabelValues(app, path).Inc() common.IncrementGauge(ctx, queuedMetricName)
fnCalls.WithLabelValues(app, path).Inc()
common.IncrementCounter(ctx, callsMetricName)
s.mu.Unlock() s.mu.Unlock()
} }
// Call when a function has been queued but cannot be started because of an error // Call when a function has been queued but cannot be started because of an error
func (s *stats) Dequeue(app string, path string) { func (s *stats) Dequeue(ctx context.Context, app string, path string) {
s.mu.Lock() s.mu.Lock()
s.queue-- s.queue--
s.getStatsForFunction(path).queue-- s.getStatsForFunction(path).queue--
fnQueued.WithLabelValues(app, path).Dec() common.DecrementGauge(ctx, queuedMetricName)
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) DequeueAndStart(app string, path string) { func (s *stats) DequeueAndStart(ctx context.Context, app string, path string) {
s.mu.Lock() s.mu.Lock()
s.queue-- s.queue--
s.getStatsForFunction(path).queue-- s.getStatsForFunction(path).queue--
fnQueued.WithLabelValues(app, path).Dec() common.DecrementGauge(ctx, queuedMetricName)
s.running++ s.running++
s.getStatsForFunction(path).running++ s.getStatsForFunction(path).running++
fnRunning.WithLabelValues(app, path).Inc() common.IncrementGauge(ctx, runningSuffix)
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) Complete(app string, path string) { func (s *stats) Complete(ctx context.Context, app string, path string) {
s.mu.Lock() s.mu.Lock()
s.running-- s.running--
s.getStatsForFunction(path).running-- s.getStatsForFunction(path).running--
fnRunning.WithLabelValues(app, path).Dec() common.DecrementGauge(ctx, runningSuffix)
s.complete++ s.complete++
s.getStatsForFunction(path).complete++ s.getStatsForFunction(path).complete++
fnCompleted.WithLabelValues(app, path).Inc() common.IncrementCounter(ctx, completedMetricName)
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) Failed(app string, path string) { func (s *stats) Failed(ctx context.Context, app string, path string) {
s.mu.Lock() s.mu.Lock()
s.running-- s.running--
s.getStatsForFunction(path).running-- s.getStatsForFunction(path).running--
fnRunning.WithLabelValues(app, path).Dec() common.DecrementGauge(ctx, runningSuffix)
s.failed++ s.failed++
s.getStatsForFunction(path).failed++ s.getStatsForFunction(path).failed++
fnFailed.WithLabelValues(app, path).Inc() common.IncrementCounter(ctx, failedMetricName)
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) DequeueAndFail(app string, path string) { func (s *stats) DequeueAndFail(ctx context.Context, app string, path string) {
s.mu.Lock() s.mu.Lock()
s.queue-- s.queue--
s.getStatsForFunction(path).queue-- s.getStatsForFunction(path).queue--
fnQueued.WithLabelValues(app, path).Dec() common.DecrementGauge(ctx, queuedMetricName)
s.failed++ s.failed++
s.getStatsForFunction(path).failed++ s.getStatsForFunction(path).failed++
fnFailed.WithLabelValues(app, path).Inc() common.IncrementCounter(ctx, failedMetricName)
s.mu.Unlock() s.mu.Unlock()
} }
@@ -200,3 +156,11 @@ func (s *stats) Stats() Stats {
s.mu.Unlock() s.mu.Unlock()
return stats return stats
} }
const (
queuedMetricName = "queued"
callsMetricName = "calls"
runningSuffix = "running"
completedMetricName = "completed"
failedMetricName = "failed"
)

101
api/common/metrics.go Normal file
View File

@@ -0,0 +1,101 @@
package common
import (
"context"
"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/log"
)
// IncrementGauge increments the specified gauge metric
// It does this by logging an appropriate field value to a tracing span.
func IncrementGauge(ctx context.Context, metric string) {
// The field name we use is the specified metric name prepended with FieldnamePrefixGauge to designate that it is a Prometheus gauge metric
// The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name.
fieldname := FieldnamePrefixGauge + metric
// Spans are not processed by the collector until the span ends, so to prevent any delay
// in processing the stats when the current span is long-lived we create a new span for every call
// suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest
span, ctx := opentracing.StartSpanFromContext(ctx, fieldname+SpannameSuffixDummy)
defer span.Finish()
// gauge metrics are actually float64; here we log that it should be increased by +1
span.LogFields(log.Float64(fieldname, 1.))
}
// DecrementGauge decrements the specified gauge metric
// It does this by logging an appropriate field value to a tracing span.
func DecrementGauge(ctx context.Context, metric string) {
// The field name we use is the specified metric name prepended with FieldnamePrefixGauge to designate that it is a Prometheus gauge metric
// The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name.
fieldname := FieldnamePrefixGauge + metric
// Spans are not processed by the collector until the span ends, so to prevent any delay
// in processing the stats when the current span is long-lived we create a new span for every call.
// suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest
span, ctx := opentracing.StartSpanFromContext(ctx, fieldname+SpannameSuffixDummy)
defer span.Finish()
// gauge metrics are actually float64; here we log that it should be increased by -1
span.LogFields(log.Float64(fieldname, -1.))
}
// IncrementCounter increments the specified counter metric
// It does this by logging an appropriate field value to a tracing span.
func IncrementCounter(ctx context.Context, metric string) {
// The field name we use is the specified metric name prepended with FieldnamePrefixCounter to designate that it is a Prometheus counter metric
// The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name.
fieldname := FieldnamePrefixCounter + metric
// Spans are not processed by the collector until the span ends, so to prevent any delay
// in processing the stats when the current span is long-lived we create a new span for every call.
// suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest
span, ctx := opentracing.StartSpanFromContext(ctx, fieldname+SpannameSuffixDummy)
defer span.Finish()
// counter metrics are actually float64; here we log that it should be increased by +1
span.LogFields(log.Float64(fieldname, 1.))
}
// If required, create a scalar version of PublishHistograms that publishes a single histogram metric
// PublishHistograms publishes the specifed histogram metrics
// It does this by logging appropriate field values to a tracing span
func PublishHistograms(ctx context.Context, metrics map[string]float64) {
// Spans are not processed by the collector until the span ends, so to prevent any delay
// in processing the stats when the current span is long-lived we create a new span for every call.
// suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest
span, ctx := opentracing.StartSpanFromContext(ctx, "histogram_metrics"+SpannameSuffixDummy)
defer span.Finish()
for key, value := range metrics {
// The field name we use is the metric name prepended with FieldnamePrefixHistogram to designate that it is a Prometheus histogram metric
// The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name.
fieldname := FieldnamePrefixHistogram + key
span.LogFields(log.Float64(fieldname, value))
}
}
const (
// FnPrefix is a constant for "fn_", used as a prefix for span names, field names, Prometheus metric names and Prometheus label names
FnPrefix = "fn_"
// FieldnamePrefixHistogram is prefixed to the name of a logged field
// to denote that it corresponds to a histogram metric
FieldnamePrefixHistogram = FnPrefix + "histogram_"
// FieldnamePrefixCounter is prefixed to the name of a logged field
// to denote that it corresponds to a counter metric
FieldnamePrefixCounter = FnPrefix + "counter_"
// FieldnamePrefixGauge is prefixed to the name of a logged field
// to denote that it corresponds to a gauge metric
FieldnamePrefixGauge = FnPrefix + "gauge_"
// SpannameSuffixDummy is suffixed to the name of a tracing span
// to denote that it has been created solely for the purpose of carrying metric values
// and is not itself of any interest and should not be converted to a Prometheus duration metric
SpannameSuffixDummy = "_dummy"
)

View File

@@ -1,6 +1,7 @@
package server package server
import ( import (
"github.com/fnproject/fn/api/common"
"github.com/openzipkin/zipkin-go-opentracing" "github.com/openzipkin/zipkin-go-opentracing"
"github.com/openzipkin/zipkin-go-opentracing/thrift/gen-go/zipkincore" "github.com/openzipkin/zipkin-go-opentracing/thrift/gen-go/zipkincore"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
@@ -21,6 +22,14 @@ type PrometheusCollector struct {
// and the corresponding value is a HistogramVec metric used to report the duration of spans with this name to Prometheus // and the corresponding value is a HistogramVec metric used to report the duration of spans with this name to Prometheus
histogramVecMap map[string]*prometheus.HistogramVec histogramVecMap map[string]*prometheus.HistogramVec
// In this map, the key is the name of a tracing span,
// and the corresponding value is a CounterVec metric used to report the duration of spans with this name to Prometheus
counterVecMap map[string]*prometheus.CounterVec
// In this map, the key is the name of a tracing span,
// and the corresponding value is a GaugeVec metric used to report the duration of spans with this name to Prometheus
gaugeVecMap map[string]*prometheus.GaugeVec
// In this map, the key is the name of a tracing span, // In this map, the key is the name of a tracing span,
// and the corresponding value is an array containing the label keys that were specified when the HistogramVec metric was created // and the corresponding value is an array containing the label keys that were specified when the HistogramVec metric was created
registeredLabelKeysMap map[string][]string registeredLabelKeysMap map[string][]string
@@ -30,6 +39,8 @@ type PrometheusCollector struct {
func NewPrometheusCollector() (zipkintracer.Collector, error) { func NewPrometheusCollector() (zipkintracer.Collector, error) {
pc := &PrometheusCollector{ pc := &PrometheusCollector{
histogramVecMap: make(map[string]*prometheus.HistogramVec), histogramVecMap: make(map[string]*prometheus.HistogramVec),
counterVecMap: make(map[string]*prometheus.CounterVec),
gaugeVecMap: make(map[string]*prometheus.GaugeVec),
registeredLabelKeysMap: make(map[string][]string), registeredLabelKeysMap: make(map[string][]string),
} }
return pc, nil return pc, nil
@@ -43,22 +54,51 @@ func (pc *PrometheusCollector) Collect(span *zipkincore.Span) error {
// extract any label values from the span // extract any label values from the span
labelKeysFromSpan, labelValuesFromSpan := getLabels(span) labelKeysFromSpan, labelValuesFromSpan := getLabels(span)
// get the HistogramVec for this span name // report the duration of this span as a histogram
histogramVec, labelValuesToUse := pc.getHistogramVec( // (unless the span name ends with SpannameSuffixDummy to denote it as being purely the carrier of a metric value and so of no interest in itself)
("fn_span_" + spanName + "_duration_seconds"), ("Span " + spanName + " duration, by span name"), labelKeysFromSpan, labelValuesFromSpan) if !strings.HasSuffix(spanName, common.SpannameSuffixDummy) {
// now report the span duration value // get the HistogramVec for this span name
histogramVec.With(labelValuesToUse).Observe((time.Duration(span.GetDuration()) * time.Microsecond).Seconds()) histogramVec, labelValuesToUse := pc.getHistogramVec(
("fn_span_" + spanName + "_duration_seconds"), ("Span " + spanName + " duration, by span name"), labelKeysFromSpan, labelValuesFromSpan)
// now extract any logged metric values from the span // now report the span duration value
for key, value := range getLoggedMetrics(span) { histogramVec.With(labelValuesToUse).Observe((time.Duration(span.GetDuration()) * time.Microsecond).Seconds())
}
// now extract any logged histogram metric values from the span
for key, value := range getLoggedHistogramMetrics(span) {
// get the HistogramVec for this metric // get the HistogramVec for this metric
thisMetricHistogramVec, labelValuesToUse := pc.getHistogramVec( thisMetricHistogramVec, labelValuesToUse := pc.getHistogramVec(
("fn_" + spanName + "_" + key), (spanName + " metric " + key), labelKeysFromSpan, labelValuesFromSpan) key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan)
// now report the metric value // now report the metric value
thisMetricHistogramVec.With(labelValuesToUse).Observe(float64(value)) thisMetricHistogramVec.With(labelValuesToUse).Observe(value)
}
// now extract any logged counter metric values from the span
for key, value := range getLoggedCounterMetrics(span) {
// get the CounterVec for this metric
thisMetricCounterVec, labelValuesToUse := pc.getCounterVec(
key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan)
// now report the metric value
thisMetricCounterVec.With(labelValuesToUse).Add(value)
}
// now extract any logged gauge metric values from the span
for key, value := range getLoggedGaugeMetrics(span) {
// get the GaugeVec for this metric
thisMetricGaugeVec, labelValuesToUse := pc.getGaugeVec(
key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan)
// now report the metric value
thisMetricGaugeVec.With(labelValuesToUse).Add(value)
} }
return nil return nil
@@ -106,6 +146,90 @@ func (pc *PrometheusCollector) getHistogramVec(
return histogramVec, labelValuesToUse return histogramVec, labelValuesToUse
} }
// Return (and create, if necessary) a CounterVec for the specified Prometheus metric
func (pc *PrometheusCollector) getCounterVec(
metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) (
*prometheus.CounterVec, map[string]string) {
var labelValuesToUse map[string]string
pc.lock.Lock()
defer pc.lock.Unlock()
counterVec, found := pc.counterVecMap[metricName]
if !found {
// create a new CounterVec
counterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metricName,
Help: metricHelp,
},
labelKeysFromSpan,
)
pc.counterVecMap[metricName] = counterVec
pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan
prometheus.MustRegister(counterVec)
labelValuesToUse = labelValuesFromSpan
} else {
// found an existing CounterVec
// need to be careful here, since we must supply the same label keys as when we first created the metric
// otherwise we will get a "inconsistent label cardinality" panic
// that's why we saved the original label keys in the registeredLabelKeysMap map
// so we can use that to construct a map of label key/value pairs to set on the metric
labelValuesToUse = make(map[string]string)
for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] {
if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found {
labelValuesToUse[thisRegisteredLabelKey] = value
} else {
labelValuesToUse[thisRegisteredLabelKey] = ""
}
}
}
return counterVec, labelValuesToUse
}
// Return (and create, if necessary) a GaugeVec for the specified Prometheus metric
func (pc *PrometheusCollector) getGaugeVec(
metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) (
*prometheus.GaugeVec, map[string]string) {
var labelValuesToUse map[string]string
pc.lock.Lock()
defer pc.lock.Unlock()
gaugeVec, found := pc.gaugeVecMap[metricName]
if !found {
// create a new GaugeVec
gaugeVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: metricName,
Help: metricHelp,
},
labelKeysFromSpan,
)
pc.gaugeVecMap[metricName] = gaugeVec
pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan
prometheus.MustRegister(gaugeVec)
labelValuesToUse = labelValuesFromSpan
} else {
// found an existing GaugeVec
// need to be careful here, since we must supply the same label keys as when we first created the metric
// otherwise we will get a "inconsistent label cardinality" panic
// that's why we saved the original label keys in the registeredLabelKeysMap map
// so we can use that to construct a map of label key/value pairs to set on the metric
labelValuesToUse = make(map[string]string)
for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] {
if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found {
labelValuesToUse[thisRegisteredLabelKey] = value
} else {
labelValuesToUse[thisRegisteredLabelKey] = ""
}
}
}
return gaugeVec, labelValuesToUse
}
// extract from the specified span the key/value pairs that we want to add as labels to the Prometheus metric for this span // extract from the specified span the key/value pairs that we want to add as labels to the Prometheus metric for this span
// returns an array of keys, and a map of key-value pairs // returns an array of keys, and a map of key-value pairs
func getLabels(span *zipkincore.Span) ([]string, map[string]string) { func getLabels(span *zipkincore.Span) ([]string, map[string]string) {
@@ -127,20 +251,70 @@ func getLabels(span *zipkincore.Span) ([]string, map[string]string) {
return keys, labelMap return keys, labelMap
} }
// extract from the span the logged metric values, which we assume as uint64 values // extract from the span the logged histogram metric values.
func getLoggedMetrics(span *zipkincore.Span) map[string]uint64 { // These are the ones whose names start with FieldnamePrefixHistogram,
// and whose values we assume are float64
func getLoggedHistogramMetrics(span *zipkincore.Span) map[string]float64 {
keyValueMap := make(map[string]uint64) keyValueMap := make(map[string]float64)
// extract any annotations whose Value starts with "fn_" // extract any annotations whose Value starts with FieldnamePrefixHistogram
annotations := span.GetAnnotations() annotations := span.GetAnnotations()
for _, thisAnnotation := range annotations { for _, thisAnnotation := range annotations {
if strings.HasPrefix(thisAnnotation.GetValue(), "fn_") { if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixHistogram) {
keyvalue := strings.Split(thisAnnotation.GetValue(), "=") keyvalue := strings.Split(thisAnnotation.GetValue(), "=")
if len(keyvalue) == 2 { if len(keyvalue) == 2 {
if value, err := strconv.ParseUint(keyvalue[1], 10, 64); err == nil { if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil {
key := strings.TrimSpace(keyvalue[0]) key := strings.TrimSpace(keyvalue[0])
key = key[3:] // strip off leading fn_ key = common.FnPrefix + key[len(common.FieldnamePrefixHistogram):] // strip off fieldname prefix and then prepend "fn_" to the front
keyValueMap[key] = value
}
}
}
}
return keyValueMap
}
// extract from the span the logged counter metric values.
// These are the ones whose names start with FieldnamePrefixCounter,
// and whose values we assume are float64
func getLoggedCounterMetrics(span *zipkincore.Span) map[string]float64 {
keyValueMap := make(map[string]float64)
// extract any annotations whose Value starts with FieldnamePrefixCounter
annotations := span.GetAnnotations()
for _, thisAnnotation := range annotations {
if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixCounter) {
keyvalue := strings.Split(thisAnnotation.GetValue(), "=")
if len(keyvalue) == 2 {
if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil {
key := strings.TrimSpace(keyvalue[0])
key = common.FnPrefix + key[len(common.FieldnamePrefixCounter):] // strip off fieldname prefix and then prepend "fn_" to the front
keyValueMap[key] = value
}
}
}
}
return keyValueMap
}
// extract from the span the logged gauge metric values.
// These are the ones whose names start with FieldnamePrefixGauge,
// and whose values we assume are float64
func getLoggedGaugeMetrics(span *zipkincore.Span) map[string]float64 {
keyValueMap := make(map[string]float64)
// extract any annotations whose Value starts with FieldnamePrefixGauge
annotations := span.GetAnnotations()
for _, thisAnnotation := range annotations {
if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixGauge) {
keyvalue := strings.Split(thisAnnotation.GetValue(), "=")
if len(keyvalue) == 2 {
if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil {
key := strings.TrimSpace(keyvalue[0])
key = common.FnPrefix + key[len(common.FieldnamePrefixGauge):] // strip off fieldname prefix and then prepend "fn_" to the front
keyValueMap[key] = value keyValueMap[key] = value
} }
} }

View File

@@ -116,7 +116,7 @@
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_queued)", "expr": "sum(fn_queued)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",
@@ -193,7 +193,7 @@
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_running)", "expr": "sum(fn_running)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",
@@ -270,7 +270,7 @@
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_completed)", "expr": "sum(fn_completed)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",
@@ -347,7 +347,7 @@
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_failed)", "expr": "sum(fn_failed)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",
@@ -413,7 +413,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_queued)", "expr": "sum(fn_queued)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Total queued", "legendFormat": "Total queued",
@@ -490,7 +490,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_running)", "expr": "sum(fn_running)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Total running", "legendFormat": "Total running",
@@ -567,7 +567,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_completed)", "expr": "sum(fn_completed)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Total completed", "legendFormat": "Total completed",
@@ -644,7 +644,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(fn_api_failed)", "expr": "sum(fn_failed)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Total failed", "legendFormat": "Total failed",
@@ -655,7 +655,7 @@
"thresholds": [], "thresholds": [],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Total queued", "title": "Total failed",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 0, "sort": 0,
@@ -738,11 +738,11 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "fn_api_queued", "expr": "fn_queued",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{app}} {{path}}", "legendFormat": "{{fn_appname}} {{fn_path}}",
"refId": "A", "refId": "A",
"step": 1 "step": 1
} }
@@ -822,10 +822,10 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "fn_api_running", "expr": "fn_running",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{app}} {{path}}", "legendFormat": "{{fn_appname}} {{fn_path}}",
"refId": "A", "refId": "A",
"step": 2 "step": 2
} }
@@ -904,10 +904,10 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "fn_api_completed", "expr": "fn_completed",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{app}} {{path}}", "legendFormat": "{{fn_appname}} {{fn_path}}",
"refId": "A", "refId": "A",
"step": 2 "step": 2
} }
@@ -986,10 +986,10 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "fn_api_failed", "expr": "fn_failed",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{app}} {{path}}", "legendFormat": "{{fn_appname}} {{fn_path}}",
"refId": "A", "refId": "A",
"step": 2 "step": 2
} }