Send tracing spans to Prometheus

This commit is contained in:
Nigel Deakin
2017-10-20 16:30:19 +01:00
parent de4a20624d
commit 39feaf8b69
7 changed files with 1856 additions and 11 deletions

View File

@@ -188,6 +188,8 @@ func (a *agent) Submit(callI Call) error {
ctx := call.req.Context() ctx := call.req.Context()
span, ctx := opentracing.StartSpanFromContext(ctx, "agent_submit") span, ctx := opentracing.StartSpanFromContext(ctx, "agent_submit")
span.SetBaggageItem("fn_appname", callI.Model().AppName)
span.SetBaggageItem("fn_path", callI.Model().Path)
defer span.Finish() defer span.Finish()
// start the timer STAT! TODO add some wiggle room // start the timer STAT! TODO add some wiggle room

130
api/server/fntracer.go Normal file
View File

@@ -0,0 +1,130 @@
package server
import (
"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/log"
"strings"
)
// FnTracer is a custom Tracer which wraps another another tracer
// its main purpose is to wrap the underlying Span in a FnSpan,
// which adds some extra behaviour required for sending tracing spans to prometheus
type FnTracer struct {
wrappedTracer opentracing.Tracer
}
// NewFnTracer returns a new FnTracer which wraps the specified Tracer
func NewFnTracer(tracerToWrap opentracing.Tracer) opentracing.Tracer {
newTracer := &FnTracer{}
newTracer.wrappedTracer = tracerToWrap
return newTracer
}
// FnTracer implements opentracing.Tracer
func (thisFnTracer FnTracer) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span {
return NewFnSpan(thisFnTracer.wrappedTracer.StartSpan(operationName, opts...))
}
// FnTracer implements opentracing.Tracer
func (thisFnTracer FnTracer) Inject(sm opentracing.SpanContext, format interface{}, carrier interface{}) error {
return thisFnTracer.wrappedTracer.Inject(sm, format, carrier)
}
// FnTracer implements opentracing.Tracer
func (thisFnTracer FnTracer) Extract(format interface{}, carrier interface{}) (opentracing.SpanContext, error) {
return thisFnTracer.wrappedTracer.Extract(format, carrier)
}
// FnSpan is a custom Span that wraps another span
// which adds some extra behaviour required for sending tracing spans to prometheus
type FnSpan struct {
wrappedSpan opentracing.Span
}
// NewFnSpan returns a new FnSpan which wraps the specified Span
func NewFnSpan(spanToWrap opentracing.Span) opentracing.Span {
newSpan := &FnSpan{}
newSpan.wrappedSpan = spanToWrap
return newSpan
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) Finish() {
thisFnSpan.copyBaggageItemsToTags()
thisFnSpan.wrappedSpan.Finish()
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) FinishWithOptions(opts opentracing.FinishOptions) {
thisFnSpan.copyBaggageItemsToTags()
thisFnSpan.wrappedSpan.FinishWithOptions(opts)
}
func (thisFnSpan FnSpan) copyBaggageItemsToTags() {
// copy baggage items (which are inherited from the parent) with keys starting with "fn" to tags
// the PrometheusCollector will send these to Prometheus
// need to do this because the collector can't access baggage items, but it can access tags
// whereas here we can access the parent's baggage items, but not its tags
thisFnSpan.Context().ForeachBaggageItem(func(k, v string) bool {
if strings.HasPrefix(k, "fn") {
thisFnSpan.SetTag(k, v)
}
return true
})
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) Context() opentracing.SpanContext {
return thisFnSpan.wrappedSpan.Context()
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) SetOperationName(operationName string) opentracing.Span {
return thisFnSpan.wrappedSpan.SetOperationName(operationName)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) SetTag(key string, value interface{}) opentracing.Span {
return thisFnSpan.wrappedSpan.SetTag(key, value)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) LogFields(fields ...log.Field) {
thisFnSpan.wrappedSpan.LogFields(fields...)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) LogKV(alternatingKeyValues ...interface{}) {
thisFnSpan.wrappedSpan.LogKV(alternatingKeyValues...)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) SetBaggageItem(restrictedKey, value string) opentracing.Span {
return thisFnSpan.wrappedSpan.SetBaggageItem(restrictedKey, value)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) BaggageItem(restrictedKey string) string {
return thisFnSpan.wrappedSpan.BaggageItem(restrictedKey)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) Tracer() opentracing.Tracer {
return thisFnSpan.wrappedSpan.Tracer()
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) LogEvent(event string) {
thisFnSpan.wrappedSpan.LogEvent(event)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) LogEventWithPayload(event string, payload interface{}) {
thisFnSpan.wrappedSpan.LogEventWithPayload(event, payload)
}
// FnSpan implements opentracing.Span
func (thisFnSpan FnSpan) Log(data opentracing.LogData) {
thisFnSpan.wrappedSpan.Log(data)
}

View File

@@ -0,0 +1,80 @@
package server
import (
"github.com/fnproject/fn/api/agent"
"github.com/openzipkin/zipkin-go-opentracing"
"github.com/openzipkin/zipkin-go-opentracing/thrift/gen-go/zipkincore"
"github.com/prometheus/client_golang/prometheus"
"strings"
)
// Each span name is published as a separate Histogram metric
// Using metric names of the form fn_span_<span-name>_duration_seconds
var histogramVecMap = make(map[string]*prometheus.HistogramVec)
// Return the HistogramVec corresponding to the specified spanName.
// If a HistogramVec does not already exist for specified spanName then one is created and configured with the specified labels
// otherwise the labels parameter is ignored.
func getHistogramVecForSpanName(spanName string, labels []string) *prometheus.HistogramVec {
thisHistogramVec, found := histogramVecMap[spanName]
if !found {
thisHistogramVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "fn_span_" + spanName + "_duration_seconds",
Help: "Span " + spanName + " duration, by span name",
},
labels,
)
histogramVecMap[spanName] = thisHistogramVec
prometheus.MustRegister(thisHistogramVec)
}
return thisHistogramVec
}
// PrometheusCollector is a custom Collector
// which sends ZipKin traces to Prometheus
type PrometheusCollector struct {
a agent.Agent
}
// NewPrometheusCollector returns a new PrometheusCollector
func NewPrometheusCollector(agent agent.Agent) (zipkintracer.Collector, error) {
pc := &PrometheusCollector{}
pc.a = agent
return pc, nil
}
// PrometheusCollector implements Collector.
func (pc PrometheusCollector) Collect(span *zipkincore.Span) error {
// extract any label values from the span
labelKeys, labelValueMap := getLabels(span)
getHistogramVecForSpanName(span.GetName(), labelKeys).With(labelValueMap).Observe(float64(span.GetDuration()) / 1000000)
return nil
}
// extract from the specified span the key/value pairs that we want to add as labels to the Prometheus metric for this span
// returns an array of keys, and a map of key-value pairs
func getLabels(span *zipkincore.Span) ([]string, map[string]string) {
var keys []string
labelMap := make(map[string]string)
// extract any tags whose key starts with "fn" from the span
binaryAnnotations := span.GetBinaryAnnotations()
for _, thisBinaryAnnotation := range binaryAnnotations {
key := thisBinaryAnnotation.GetKey()
if thisBinaryAnnotation.GetAnnotationType() == zipkincore.AnnotationType_STRING && strings.HasPrefix(key, "fn") {
keys = append(keys, key)
value := string(thisBinaryAnnotation.GetValue()[:])
labelMap[key] = value
}
}
return keys, labelMap
}
// PrometheusCollector implements Collector.
func (PrometheusCollector) Close() error { return nil }

View File

@@ -86,7 +86,7 @@ func New(ctx context.Context, ds models.Datastore, mq models.MessageQueue, logDB
} }
setMachineId() setMachineId()
setTracer() s.setTracer()
s.Router.Use(loggerWrap, traceWrap, panicWrap) s.Router.Use(loggerWrap, traceWrap, panicWrap)
s.bindHandlers(ctx) s.bindHandlers(ctx)
@@ -117,7 +117,7 @@ func traceWrap(c *gin.Context) {
c.Next() c.Next()
} }
func setTracer() { func (s *Server) setTracer() {
var ( var (
debugMode = false debugMode = false
serviceName = "fn-server" serviceName = "fn-server"
@@ -126,25 +126,40 @@ func setTracer() {
// ex: "http://zipkin:9411/api/v1/spans" // ex: "http://zipkin:9411/api/v1/spans"
) )
if zipkinHTTPEndpoint == "" { var collector zipkintracer.Collector
return
// custom Zipkin collector to send tracing spans to Prometheus
promCollector, promErr := NewPrometheusCollector(s.Agent)
if promErr != nil {
logrus.WithError(promErr).Fatalln("couldn't start Prometheus trace collector")
} }
logger := zipkintracer.LoggerFunc(func(i ...interface{}) error { logrus.Error(i...); return nil }) logger := zipkintracer.LoggerFunc(func(i ...interface{}) error { logrus.Error(i...); return nil })
collector, err := zipkintracer.NewHTTPCollector(zipkinHTTPEndpoint, zipkintracer.HTTPLogger(logger)) if zipkinHTTPEndpoint != "" {
if err != nil { // Custom PrometheusCollector and Zipkin HTTPCollector
logrus.WithError(err).Fatalln("couldn't start trace collector") httpCollector, zipErr := zipkintracer.NewHTTPCollector(zipkinHTTPEndpoint, zipkintracer.HTTPLogger(logger))
if zipErr != nil {
logrus.WithError(zipErr).Fatalln("couldn't start Zipkin trace collector")
}
collector = zipkintracer.MultiCollector{httpCollector, promCollector}
} else {
// Custom PrometheusCollector only
collector = promCollector
} }
tracer, err := zipkintracer.NewTracer(zipkintracer.NewRecorder(collector, debugMode, serviceHostPort, serviceName),
ziptracer, err := zipkintracer.NewTracer(zipkintracer.NewRecorder(collector, debugMode, serviceHostPort, serviceName),
zipkintracer.ClientServerSameSpan(true), zipkintracer.ClientServerSameSpan(true),
zipkintracer.TraceID128Bit(true), zipkintracer.TraceID128Bit(true),
) )
if err != nil { if err != nil {
logrus.WithError(err).Fatalln("couldn't start tracer") logrus.WithError(err).Fatalln("couldn't start tracer")
} }
// wrap the Zipkin tracer in a FnTracer which will also send spans to Prometheus
fntracer := NewFnTracer(ziptracer)
opentracing.SetGlobalTracer(tracer) opentracing.SetGlobalTracer(fntracer)
logrus.WithFields(logrus.Fields{"url": zipkinHTTPEndpoint}).Info("started tracer") logrus.WithFields(logrus.Fields{"url": zipkinHTTPEndpoint}).Info("started tracer")
} }

BIN
docs/assets/GrafanaDashboard2.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 552 KiB

View File

@@ -90,11 +90,20 @@ Create a datasource to obtain metrics from Promethesus:
* Click **Add** and then **Save and test** * Click **Add** and then **Save and test**
Import the example dashboard that displays metrics from the Fn server: Import the example dashboard that displays metrics from the Fn server:
* Click on the main menu at the top left and choose **Dashboards** and then **Home** * Click on the main menu at the top left and choose **Dashboards** and then **Import**
* Click on **Home** at the top and then **Import dashboard**
* In the dialog that opens, click **Upload .json file** and specify `fn_grafana_dashboard.json` in this example's directory. * In the dialog that opens, click **Upload .json file** and specify `fn_grafana_dashboard.json` in this example's directory.
* Specify the Prometheus data source that you just created * Specify the Prometheus data source that you just created
* Click **Import** * Click **Import**
You should then see the dashboard shown above. Now execute some functions and see the graphs update. You should then see the dashboard shown above. Now execute some functions and see the graphs update.
## Tracing metrics
Tracing spans from the Fn server are available as Prometheus metrics. Each span has a name that describes the operation being performed (for example `docker_wait_container`), and its duration in seconds. Each span name is represented by a separate histogram metric, which has a name of the form `fn_span_<span-name>_duration_seconds`.
If the span is associated with a specific function invocation, the corresponding metric is given the labels `fn_app` and `fn_path` which are set to the application name and function path respectively.
A second example dashboard shows rate and duration data for a selection of tracing spans. This is `fn_grafana_dashboard2.json` in this example's directory.
<img src="../../docs/assets/GrafanaDashboard.png" width="800">

File diff suppressed because it is too large Load Diff