From ac2bfd346238d81ef9d246c1cea1b96eef043ad1 Mon Sep 17 00:00:00 2001 From: Nigel Deakin Date: Thu, 11 Jan 2018 17:34:51 +0000 Subject: [PATCH] Change basic stats to use opentracing rather than Prometheus API (#671) * Change basic stats to use opentracing rather than Prometheus API directly * Just ran gofmt * Extract opentracing access for metrics to common/metrics.go * Replace quotes strings with constants where possible --- api/agent/agent.go | 40 ++-- api/agent/stats.go | 98 ++++------ api/common/metrics.go | 101 ++++++++++ api/server/prom_zip_collector.go | 206 +++++++++++++++++++-- examples/grafana/fn_grafana_dashboard.json | 34 ++-- 5 files changed, 361 insertions(+), 118 deletions(-) create mode 100644 api/common/metrics.go diff --git a/api/agent/agent.go b/api/agent/agent.go index d57fbcb5c..9d3adea03 100644 --- a/api/agent/agent.go +++ b/api/agent/agent.go @@ -16,7 +16,6 @@ import ( "github.com/fnproject/fn/api/models" "github.com/fnproject/fn/fnext" "github.com/opentracing/opentracing-go" - "github.com/opentracing/opentracing-go/log" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/sirupsen/logrus" ) @@ -174,11 +173,11 @@ func transformTimeout(e error, isRetriable bool) error { // handleStatsDequeue handles stats for dequeuing for early exit (getSlot or Start) // cases. Only timeouts can be a simple dequeue while other cases are actual errors. -func (a *agent) handleStatsDequeue(err error, callI Call) { +func (a *agent) handleStatsDequeue(ctx context.Context, err error, callI Call) { if err == context.DeadlineExceeded { - a.stats.Dequeue(callI.Model().AppName, callI.Model().Path) + a.stats.Dequeue(ctx, callI.Model().AppName, callI.Model().Path) } else { - a.stats.DequeueAndFail(callI.Model().AppName, callI.Model().Path) + a.stats.DequeueAndFail(ctx, callI.Model().AppName, callI.Model().Path) } } @@ -192,9 +191,6 @@ func (a *agent) Submit(callI Call) error { default: } - // increment queued count - a.stats.Enqueue(callI.Model().AppName, callI.Model().Path) - call := callI.(*call) ctx := call.req.Context() @@ -219,9 +215,14 @@ func (a *agent) Submit(callI Call) error { call.req = call.req.WithContext(ctxSlotWait) defer cancelSlotWait() + // increment queued count + // this is done after setting "fn_appname" and "fn_path" + a.stats.Enqueue(ctx, callI.Model().AppName, callI.Model().Path) + slot, err := a.getSlot(ctxSlotWait, call) // find ram available / running + if err != nil { - a.handleStatsDequeue(err, call) + a.handleStatsDequeue(ctx, err, call) return transformTimeout(err, true) } // TODO if the call times out & container is created, we need @@ -230,7 +231,7 @@ func (a *agent) Submit(callI Call) error { err = call.Start(ctxSlotWait) if err != nil { - a.handleStatsDequeue(err, call) + a.handleStatsDequeue(ctx, err, call) return transformTimeout(err, true) } @@ -241,7 +242,7 @@ func (a *agent) Submit(callI Call) error { defer cancelExec() // decrement queued count, increment running count - a.stats.DequeueAndStart(callI.Model().AppName, callI.Model().Path) + a.stats.DequeueAndStart(ctx, callI.Model().AppName, callI.Model().Path) err = slot.exec(ctxExec, call) // pass this error (nil or otherwise) to end directly, to store status, etc @@ -249,10 +250,10 @@ func (a *agent) Submit(callI Call) error { if err == nil { // decrement running count, increment completed count - a.stats.Complete(callI.Model().AppName, callI.Model().Path) + a.stats.Complete(ctx, callI.Model().AppName, callI.Model().Path) } else { // decrement running count, increment failed count - a.stats.Failed(callI.Model().AppName, callI.Model().Path) + a.stats.Failed(ctx, callI.Model().AppName, callI.Model().Path) } // TODO: we need to allocate more time to store the call + logs in case the call timed out, @@ -726,16 +727,19 @@ func (c *container) Timeout() time.Duration { return c.timeout } func (c *container) EnvVars() map[string]string { return c.env } func (c *container) Memory() uint64 { return c.memory * 1024 * 1024 } // convert MB -// Log the specified stats to a tracing span. -// Spans are not processed by the collector until the span ends, so to prevent any delay -// in processing the stats when the function is long-lived we create a new span for every call +// WriteStat publishes each metric in the specified Stats structure as a histogram metric func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) { - span, ctx := opentracing.StartSpanFromContext(ctx, "docker_stats") - defer span.Finish() + + // Convert each metric value from uint64 to float64 + // and, for backward compatibility reasons, prepend each metric name with "docker_stats_fn_" + // (if we don't care about compatibility then we can remove that) + var metrics = make(map[string]float64) for key, value := range stat.Metrics { - span.LogFields(log.Uint64("fn_"+key, value)) + metrics["docker_stats_fn_"+key] = float64(value) } + common.PublishHistograms(ctx, metrics) + c.Lock() defer c.Unlock() if c.stats != nil { diff --git a/api/agent/stats.go b/api/agent/stats.go index b57ed6855..645a574f1 100644 --- a/api/agent/stats.go +++ b/api/agent/stats.go @@ -1,9 +1,9 @@ package agent import ( + "context" + "github.com/fnproject/fn/api/common" "sync" - - "github.com/prometheus/client_golang/prometheus" ) // TODO this should expose: @@ -30,8 +30,9 @@ type functionStats struct { failed uint64 } +// Stats hold the statistics for all functions combined +// and the statistics for each individual function type Stats struct { - // statistics for all functions combined Queue uint64 Running uint64 Complete uint64 @@ -40,7 +41,7 @@ type Stats struct { FunctionStatsMap map[string]*FunctionStats } -// statistics for an individual function +// FunctionStats holds the statistics for an individual function type FunctionStats struct { Queue uint64 Running uint64 @@ -48,52 +49,6 @@ type FunctionStats struct { Failed uint64 } -var ( - fnCalls = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "fn_api_calls", - Help: "Function calls by app and path", - }, - [](string){"app", "path"}, - ) - fnQueued = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "fn_api_queued", - Help: "Queued requests by app and path", - }, - [](string){"app", "path"}, - ) - fnRunning = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "fn_api_running", - Help: "Running requests by app and path", - }, - [](string){"app", "path"}, - ) - fnCompleted = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "fn_api_completed", - Help: "Completed requests by app and path", - }, - [](string){"app", "path"}, - ) - fnFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "fn_api_failed", - Help: "Failed requests by path", - }, - [](string){"app", "path"}, - ) -) - -func init() { - prometheus.MustRegister(fnCalls) - prometheus.MustRegister(fnQueued) - prometheus.MustRegister(fnRunning) - prometheus.MustRegister(fnFailed) - prometheus.MustRegister(fnCompleted) -} - func (s *stats) getStatsForFunction(path string) *functionStats { if s.functionStatsMap == nil { s.functionStatsMap = make(map[string]*functionStats) @@ -107,80 +62,81 @@ func (s *stats) getStatsForFunction(path string) *functionStats { return thisFunctionStats } -func (s *stats) Enqueue(app string, path string) { +func (s *stats) Enqueue(ctx context.Context, app string, path string) { s.mu.Lock() s.queue++ s.getStatsForFunction(path).queue++ - fnQueued.WithLabelValues(app, path).Inc() - fnCalls.WithLabelValues(app, path).Inc() + common.IncrementGauge(ctx, queuedMetricName) + + common.IncrementCounter(ctx, callsMetricName) s.mu.Unlock() } // Call when a function has been queued but cannot be started because of an error -func (s *stats) Dequeue(app string, path string) { +func (s *stats) Dequeue(ctx context.Context, app string, path string) { s.mu.Lock() s.queue-- s.getStatsForFunction(path).queue-- - fnQueued.WithLabelValues(app, path).Dec() + common.DecrementGauge(ctx, queuedMetricName) s.mu.Unlock() } -func (s *stats) DequeueAndStart(app string, path string) { +func (s *stats) DequeueAndStart(ctx context.Context, app string, path string) { s.mu.Lock() s.queue-- s.getStatsForFunction(path).queue-- - fnQueued.WithLabelValues(app, path).Dec() + common.DecrementGauge(ctx, queuedMetricName) s.running++ s.getStatsForFunction(path).running++ - fnRunning.WithLabelValues(app, path).Inc() + common.IncrementGauge(ctx, runningSuffix) s.mu.Unlock() } -func (s *stats) Complete(app string, path string) { +func (s *stats) Complete(ctx context.Context, app string, path string) { s.mu.Lock() s.running-- s.getStatsForFunction(path).running-- - fnRunning.WithLabelValues(app, path).Dec() + common.DecrementGauge(ctx, runningSuffix) s.complete++ s.getStatsForFunction(path).complete++ - fnCompleted.WithLabelValues(app, path).Inc() + common.IncrementCounter(ctx, completedMetricName) s.mu.Unlock() } -func (s *stats) Failed(app string, path string) { +func (s *stats) Failed(ctx context.Context, app string, path string) { s.mu.Lock() s.running-- s.getStatsForFunction(path).running-- - fnRunning.WithLabelValues(app, path).Dec() + common.DecrementGauge(ctx, runningSuffix) s.failed++ s.getStatsForFunction(path).failed++ - fnFailed.WithLabelValues(app, path).Inc() + common.IncrementCounter(ctx, failedMetricName) s.mu.Unlock() } -func (s *stats) DequeueAndFail(app string, path string) { +func (s *stats) DequeueAndFail(ctx context.Context, app string, path string) { s.mu.Lock() s.queue-- s.getStatsForFunction(path).queue-- - fnQueued.WithLabelValues(app, path).Dec() + common.DecrementGauge(ctx, queuedMetricName) s.failed++ s.getStatsForFunction(path).failed++ - fnFailed.WithLabelValues(app, path).Inc() + common.IncrementCounter(ctx, failedMetricName) s.mu.Unlock() } @@ -200,3 +156,11 @@ func (s *stats) Stats() Stats { s.mu.Unlock() return stats } + +const ( + queuedMetricName = "queued" + callsMetricName = "calls" + runningSuffix = "running" + completedMetricName = "completed" + failedMetricName = "failed" +) diff --git a/api/common/metrics.go b/api/common/metrics.go new file mode 100644 index 000000000..d939ac251 --- /dev/null +++ b/api/common/metrics.go @@ -0,0 +1,101 @@ +package common + +import ( + "context" + "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go/log" +) + +// IncrementGauge increments the specified gauge metric +// It does this by logging an appropriate field value to a tracing span. +func IncrementGauge(ctx context.Context, metric string) { + // The field name we use is the specified metric name prepended with FieldnamePrefixGauge to designate that it is a Prometheus gauge metric + // The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name. + fieldname := FieldnamePrefixGauge + metric + + // Spans are not processed by the collector until the span ends, so to prevent any delay + // in processing the stats when the current span is long-lived we create a new span for every call + // suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest + span, ctx := opentracing.StartSpanFromContext(ctx, fieldname+SpannameSuffixDummy) + defer span.Finish() + + // gauge metrics are actually float64; here we log that it should be increased by +1 + span.LogFields(log.Float64(fieldname, 1.)) +} + +// DecrementGauge decrements the specified gauge metric +// It does this by logging an appropriate field value to a tracing span. +func DecrementGauge(ctx context.Context, metric string) { + // The field name we use is the specified metric name prepended with FieldnamePrefixGauge to designate that it is a Prometheus gauge metric + // The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name. + fieldname := FieldnamePrefixGauge + metric + + // Spans are not processed by the collector until the span ends, so to prevent any delay + // in processing the stats when the current span is long-lived we create a new span for every call. + // suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest + span, ctx := opentracing.StartSpanFromContext(ctx, fieldname+SpannameSuffixDummy) + defer span.Finish() + + // gauge metrics are actually float64; here we log that it should be increased by -1 + span.LogFields(log.Float64(fieldname, -1.)) +} + +// IncrementCounter increments the specified counter metric +// It does this by logging an appropriate field value to a tracing span. +func IncrementCounter(ctx context.Context, metric string) { + // The field name we use is the specified metric name prepended with FieldnamePrefixCounter to designate that it is a Prometheus counter metric + // The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name. + fieldname := FieldnamePrefixCounter + metric + + // Spans are not processed by the collector until the span ends, so to prevent any delay + // in processing the stats when the current span is long-lived we create a new span for every call. + // suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest + span, ctx := opentracing.StartSpanFromContext(ctx, fieldname+SpannameSuffixDummy) + defer span.Finish() + + // counter metrics are actually float64; here we log that it should be increased by +1 + span.LogFields(log.Float64(fieldname, 1.)) +} + +// If required, create a scalar version of PublishHistograms that publishes a single histogram metric + +// PublishHistograms publishes the specifed histogram metrics +// It does this by logging appropriate field values to a tracing span +func PublishHistograms(ctx context.Context, metrics map[string]float64) { + + // Spans are not processed by the collector until the span ends, so to prevent any delay + // in processing the stats when the current span is long-lived we create a new span for every call. + // suffix the span name with SpannameSuffixDummy to denote that it is used only to hold a metric and isn't itself of any interest + span, ctx := opentracing.StartSpanFromContext(ctx, "histogram_metrics"+SpannameSuffixDummy) + defer span.Finish() + + for key, value := range metrics { + // The field name we use is the metric name prepended with FieldnamePrefixHistogram to designate that it is a Prometheus histogram metric + // The collector will replace that prefix with "fn_" and use the result as the Prometheus metric name. + fieldname := FieldnamePrefixHistogram + key + span.LogFields(log.Float64(fieldname, value)) + } +} + +const ( + + // FnPrefix is a constant for "fn_", used as a prefix for span names, field names, Prometheus metric names and Prometheus label names + FnPrefix = "fn_" + + // FieldnamePrefixHistogram is prefixed to the name of a logged field + // to denote that it corresponds to a histogram metric + FieldnamePrefixHistogram = FnPrefix + "histogram_" + + // FieldnamePrefixCounter is prefixed to the name of a logged field + // to denote that it corresponds to a counter metric + FieldnamePrefixCounter = FnPrefix + "counter_" + + // FieldnamePrefixGauge is prefixed to the name of a logged field + // to denote that it corresponds to a gauge metric + FieldnamePrefixGauge = FnPrefix + "gauge_" + + // SpannameSuffixDummy is suffixed to the name of a tracing span + // to denote that it has been created solely for the purpose of carrying metric values + // and is not itself of any interest and should not be converted to a Prometheus duration metric + SpannameSuffixDummy = "_dummy" +) diff --git a/api/server/prom_zip_collector.go b/api/server/prom_zip_collector.go index d11f9e9f0..cc8900e70 100644 --- a/api/server/prom_zip_collector.go +++ b/api/server/prom_zip_collector.go @@ -1,6 +1,7 @@ package server import ( + "github.com/fnproject/fn/api/common" "github.com/openzipkin/zipkin-go-opentracing" "github.com/openzipkin/zipkin-go-opentracing/thrift/gen-go/zipkincore" "github.com/prometheus/client_golang/prometheus" @@ -21,6 +22,14 @@ type PrometheusCollector struct { // and the corresponding value is a HistogramVec metric used to report the duration of spans with this name to Prometheus histogramVecMap map[string]*prometheus.HistogramVec + // In this map, the key is the name of a tracing span, + // and the corresponding value is a CounterVec metric used to report the duration of spans with this name to Prometheus + counterVecMap map[string]*prometheus.CounterVec + + // In this map, the key is the name of a tracing span, + // and the corresponding value is a GaugeVec metric used to report the duration of spans with this name to Prometheus + gaugeVecMap map[string]*prometheus.GaugeVec + // In this map, the key is the name of a tracing span, // and the corresponding value is an array containing the label keys that were specified when the HistogramVec metric was created registeredLabelKeysMap map[string][]string @@ -30,6 +39,8 @@ type PrometheusCollector struct { func NewPrometheusCollector() (zipkintracer.Collector, error) { pc := &PrometheusCollector{ histogramVecMap: make(map[string]*prometheus.HistogramVec), + counterVecMap: make(map[string]*prometheus.CounterVec), + gaugeVecMap: make(map[string]*prometheus.GaugeVec), registeredLabelKeysMap: make(map[string][]string), } return pc, nil @@ -43,22 +54,51 @@ func (pc *PrometheusCollector) Collect(span *zipkincore.Span) error { // extract any label values from the span labelKeysFromSpan, labelValuesFromSpan := getLabels(span) - // get the HistogramVec for this span name - histogramVec, labelValuesToUse := pc.getHistogramVec( - ("fn_span_" + spanName + "_duration_seconds"), ("Span " + spanName + " duration, by span name"), labelKeysFromSpan, labelValuesFromSpan) + // report the duration of this span as a histogram + // (unless the span name ends with SpannameSuffixDummy to denote it as being purely the carrier of a metric value and so of no interest in itself) + if !strings.HasSuffix(spanName, common.SpannameSuffixDummy) { - // now report the span duration value - histogramVec.With(labelValuesToUse).Observe((time.Duration(span.GetDuration()) * time.Microsecond).Seconds()) + // get the HistogramVec for this span name + histogramVec, labelValuesToUse := pc.getHistogramVec( + ("fn_span_" + spanName + "_duration_seconds"), ("Span " + spanName + " duration, by span name"), labelKeysFromSpan, labelValuesFromSpan) - // now extract any logged metric values from the span - for key, value := range getLoggedMetrics(span) { + // now report the span duration value + histogramVec.With(labelValuesToUse).Observe((time.Duration(span.GetDuration()) * time.Microsecond).Seconds()) + + } + + // now extract any logged histogram metric values from the span + for key, value := range getLoggedHistogramMetrics(span) { // get the HistogramVec for this metric thisMetricHistogramVec, labelValuesToUse := pc.getHistogramVec( - ("fn_" + spanName + "_" + key), (spanName + " metric " + key), labelKeysFromSpan, labelValuesFromSpan) + key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan) // now report the metric value - thisMetricHistogramVec.With(labelValuesToUse).Observe(float64(value)) + thisMetricHistogramVec.With(labelValuesToUse).Observe(value) + } + + // now extract any logged counter metric values from the span + for key, value := range getLoggedCounterMetrics(span) { + + // get the CounterVec for this metric + thisMetricCounterVec, labelValuesToUse := pc.getCounterVec( + key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan) + + // now report the metric value + thisMetricCounterVec.With(labelValuesToUse).Add(value) + } + + // now extract any logged gauge metric values from the span + for key, value := range getLoggedGaugeMetrics(span) { + + // get the GaugeVec for this metric + thisMetricGaugeVec, labelValuesToUse := pc.getGaugeVec( + key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan) + + // now report the metric value + thisMetricGaugeVec.With(labelValuesToUse).Add(value) + } return nil @@ -106,6 +146,90 @@ func (pc *PrometheusCollector) getHistogramVec( return histogramVec, labelValuesToUse } +// Return (and create, if necessary) a CounterVec for the specified Prometheus metric +func (pc *PrometheusCollector) getCounterVec( + metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) ( + *prometheus.CounterVec, map[string]string) { + + var labelValuesToUse map[string]string + + pc.lock.Lock() + defer pc.lock.Unlock() + + counterVec, found := pc.counterVecMap[metricName] + if !found { + // create a new CounterVec + counterVec = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: metricName, + Help: metricHelp, + }, + labelKeysFromSpan, + ) + pc.counterVecMap[metricName] = counterVec + pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan + prometheus.MustRegister(counterVec) + labelValuesToUse = labelValuesFromSpan + } else { + // found an existing CounterVec + // need to be careful here, since we must supply the same label keys as when we first created the metric + // otherwise we will get a "inconsistent label cardinality" panic + // that's why we saved the original label keys in the registeredLabelKeysMap map + // so we can use that to construct a map of label key/value pairs to set on the metric + labelValuesToUse = make(map[string]string) + for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] { + if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found { + labelValuesToUse[thisRegisteredLabelKey] = value + } else { + labelValuesToUse[thisRegisteredLabelKey] = "" + } + } + } + return counterVec, labelValuesToUse +} + +// Return (and create, if necessary) a GaugeVec for the specified Prometheus metric +func (pc *PrometheusCollector) getGaugeVec( + metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) ( + *prometheus.GaugeVec, map[string]string) { + + var labelValuesToUse map[string]string + + pc.lock.Lock() + defer pc.lock.Unlock() + + gaugeVec, found := pc.gaugeVecMap[metricName] + if !found { + // create a new GaugeVec + gaugeVec = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: metricName, + Help: metricHelp, + }, + labelKeysFromSpan, + ) + pc.gaugeVecMap[metricName] = gaugeVec + pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan + prometheus.MustRegister(gaugeVec) + labelValuesToUse = labelValuesFromSpan + } else { + // found an existing GaugeVec + // need to be careful here, since we must supply the same label keys as when we first created the metric + // otherwise we will get a "inconsistent label cardinality" panic + // that's why we saved the original label keys in the registeredLabelKeysMap map + // so we can use that to construct a map of label key/value pairs to set on the metric + labelValuesToUse = make(map[string]string) + for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] { + if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found { + labelValuesToUse[thisRegisteredLabelKey] = value + } else { + labelValuesToUse[thisRegisteredLabelKey] = "" + } + } + } + return gaugeVec, labelValuesToUse +} + // extract from the specified span the key/value pairs that we want to add as labels to the Prometheus metric for this span // returns an array of keys, and a map of key-value pairs func getLabels(span *zipkincore.Span) ([]string, map[string]string) { @@ -127,20 +251,70 @@ func getLabels(span *zipkincore.Span) ([]string, map[string]string) { return keys, labelMap } -// extract from the span the logged metric values, which we assume as uint64 values -func getLoggedMetrics(span *zipkincore.Span) map[string]uint64 { +// extract from the span the logged histogram metric values. +// These are the ones whose names start with FieldnamePrefixHistogram, +// and whose values we assume are float64 +func getLoggedHistogramMetrics(span *zipkincore.Span) map[string]float64 { - keyValueMap := make(map[string]uint64) + keyValueMap := make(map[string]float64) - // extract any annotations whose Value starts with "fn_" + // extract any annotations whose Value starts with FieldnamePrefixHistogram annotations := span.GetAnnotations() for _, thisAnnotation := range annotations { - if strings.HasPrefix(thisAnnotation.GetValue(), "fn_") { + if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixHistogram) { keyvalue := strings.Split(thisAnnotation.GetValue(), "=") if len(keyvalue) == 2 { - if value, err := strconv.ParseUint(keyvalue[1], 10, 64); err == nil { + if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil { key := strings.TrimSpace(keyvalue[0]) - key = key[3:] // strip off leading fn_ + key = common.FnPrefix + key[len(common.FieldnamePrefixHistogram):] // strip off fieldname prefix and then prepend "fn_" to the front + keyValueMap[key] = value + } + } + } + } + return keyValueMap +} + +// extract from the span the logged counter metric values. +// These are the ones whose names start with FieldnamePrefixCounter, +// and whose values we assume are float64 +func getLoggedCounterMetrics(span *zipkincore.Span) map[string]float64 { + + keyValueMap := make(map[string]float64) + + // extract any annotations whose Value starts with FieldnamePrefixCounter + annotations := span.GetAnnotations() + for _, thisAnnotation := range annotations { + if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixCounter) { + keyvalue := strings.Split(thisAnnotation.GetValue(), "=") + if len(keyvalue) == 2 { + if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil { + key := strings.TrimSpace(keyvalue[0]) + key = common.FnPrefix + key[len(common.FieldnamePrefixCounter):] // strip off fieldname prefix and then prepend "fn_" to the front + keyValueMap[key] = value + } + } + } + } + return keyValueMap +} + +// extract from the span the logged gauge metric values. +// These are the ones whose names start with FieldnamePrefixGauge, +// and whose values we assume are float64 +func getLoggedGaugeMetrics(span *zipkincore.Span) map[string]float64 { + + keyValueMap := make(map[string]float64) + + // extract any annotations whose Value starts with FieldnamePrefixGauge + annotations := span.GetAnnotations() + for _, thisAnnotation := range annotations { + if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixGauge) { + keyvalue := strings.Split(thisAnnotation.GetValue(), "=") + if len(keyvalue) == 2 { + if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil { + key := strings.TrimSpace(keyvalue[0]) + key = common.FnPrefix + key[len(common.FieldnamePrefixGauge):] // strip off fieldname prefix and then prepend "fn_" to the front keyValueMap[key] = value } } diff --git a/examples/grafana/fn_grafana_dashboard.json b/examples/grafana/fn_grafana_dashboard.json index 89aba2976..37c93c508 100644 --- a/examples/grafana/fn_grafana_dashboard.json +++ b/examples/grafana/fn_grafana_dashboard.json @@ -116,7 +116,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(fn_api_queued)", + "expr": "sum(fn_queued)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -193,7 +193,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(fn_api_running)", + "expr": "sum(fn_running)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -270,7 +270,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(fn_api_completed)", + "expr": "sum(fn_completed)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -347,7 +347,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(fn_api_failed)", + "expr": "sum(fn_failed)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -413,7 +413,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(fn_api_queued)", + "expr": "sum(fn_queued)", "format": "time_series", "intervalFactor": 2, "legendFormat": "Total queued", @@ -490,7 +490,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(fn_api_running)", + "expr": "sum(fn_running)", "format": "time_series", "intervalFactor": 2, "legendFormat": "Total running", @@ -567,7 +567,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(fn_api_completed)", + "expr": "sum(fn_completed)", "format": "time_series", "intervalFactor": 2, "legendFormat": "Total completed", @@ -644,7 +644,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(fn_api_failed)", + "expr": "sum(fn_failed)", "format": "time_series", "intervalFactor": 2, "legendFormat": "Total failed", @@ -655,7 +655,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Total queued", + "title": "Total failed", "tooltip": { "shared": true, "sort": 0, @@ -738,11 +738,11 @@ "steppedLine": false, "targets": [ { - "expr": "fn_api_queued", + "expr": "fn_queued", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{app}} {{path}}", + "legendFormat": "{{fn_appname}} {{fn_path}}", "refId": "A", "step": 1 } @@ -822,10 +822,10 @@ "steppedLine": false, "targets": [ { - "expr": "fn_api_running", + "expr": "fn_running", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{app}} {{path}}", + "legendFormat": "{{fn_appname}} {{fn_path}}", "refId": "A", "step": 2 } @@ -904,10 +904,10 @@ "steppedLine": false, "targets": [ { - "expr": "fn_api_completed", + "expr": "fn_completed", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{app}} {{path}}", + "legendFormat": "{{fn_appname}} {{fn_path}}", "refId": "A", "step": 2 } @@ -986,10 +986,10 @@ "steppedLine": false, "targets": [ { - "expr": "fn_api_failed", + "expr": "fn_failed", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{app}} {{path}}", + "legendFormat": "{{fn_appname}} {{fn_path}}", "refId": "A", "step": 2 }