opentracing -> opencensus (#802)

* update vendor directory, add go.opencensus.io

* update imports

* oops

* s/opentracing/opencensus/ & remove prometheus / zipkin stuff & remove old stats

* the dep train rides again

* fix gin build

* deps from last guy

* start in on the agent metrics

* she builds

* remove tags for now, cardinality error is fussing. subscribe instead of register

* update to patched version of opencensus to proceed for now TODO switch to a release

* meh

fix imports

* println debug the bad boys

* lace it with the tags

* update deps again

* fix all inconsistent cardinality errors

* add our own logger

* fix init

* fix oom measure

* remove bugged removal code

* fix s3 measures

* fix prom handler nil
This commit is contained in:
Reed Allman
2018-03-05 09:35:28 -08:00
committed by GitHub
parent 924d27559c
commit 206aa3c203
5975 changed files with 158755 additions and 566592 deletions

View File

@@ -6,10 +6,11 @@ import (
"net/http"
"errors"
"strings"
"github.com/fnproject/fn/api"
"github.com/fnproject/fn/api/models"
"github.com/gin-gonic/gin"
"strings"
)
// note: for backward compatibility, will go away later

View File

@@ -1,60 +0,0 @@
package server
import (
"github.com/opentracing/opentracing-go"
"strings"
)
// FnTracer is a custom Tracer which wraps another another tracer
// its main purpose is to wrap the underlying Span in a FnSpan,
// which adds some extra behaviour required for sending tracing spans to prometheus
type FnTracer struct {
opentracing.Tracer
}
// NewFnTracer returns a new FnTracer which wraps the specified Tracer
func NewFnTracer(t opentracing.Tracer) opentracing.Tracer {
return &FnTracer{t}
}
// FnTracer implements opentracing.Tracer
// Override StartSpan to wrap the returned Span in a FnSpan
func (fnt FnTracer) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span {
return NewFnSpan(fnt.Tracer.StartSpan(operationName, opts...))
}
// FnSpan is a custom Span that wraps another span
// which adds some extra behaviour required for sending tracing spans to prometheus
type FnSpan struct {
opentracing.Span
}
// NewFnSpan returns a new FnSpan which wraps the specified Span
func NewFnSpan(s opentracing.Span) opentracing.Span {
return &FnSpan{s}
}
// FnSpan implements opentracing.Span
func (fns FnSpan) Finish() {
fns.copyBaggageItemsToTags()
fns.Span.Finish()
}
// FnSpan implements opentracing.Span
func (fns FnSpan) FinishWithOptions(opts opentracing.FinishOptions) {
fns.copyBaggageItemsToTags()
fns.Span.FinishWithOptions(opts)
}
func (fns FnSpan) copyBaggageItemsToTags() {
// copy baggage items (which are inherited from the parent) with keys starting with "fn" to tags
// the PrometheusCollector will send these to Prometheus
// need to do this because the collector can't access baggage items, but it can access tags
// whereas here we can access the parent's baggage items, but not its tags
fns.Context().ForeachBaggageItem(func(k, v string) bool {
if strings.HasPrefix(k, "fn") {
fns.SetTag(k, v)
}
return true
})
}

View File

@@ -13,9 +13,9 @@ import (
"github.com/fnproject/fn/fnext"
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
opentracing "github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/ext"
"github.com/sirupsen/logrus"
"go.opencensus.io/tag"
"go.opencensus.io/trace"
)
func optionalCorsWrap(r *gin.Engine) {
@@ -37,20 +37,28 @@ func optionalCorsWrap(r *gin.Engine) {
// we should use http grr
func traceWrap(c *gin.Context) {
// try to grab a span from the request if made from another service, ignore err if not
wireContext, _ := opentracing.GlobalTracer().Extract(
opentracing.HTTPHeaders,
opentracing.HTTPHeadersCarrier(c.Request.Header))
appKey, err := tag.NewKey("fn_appname")
if err != nil {
logrus.Fatal(err)
}
pathKey, err := tag.NewKey("fn_path")
if err != nil {
logrus.Fatal(err)
}
ctx, err := tag.New(c.Request.Context(),
tag.Insert(appKey, c.Param(api.CApp)),
tag.Insert(pathKey, c.Param(api.CRoute)),
)
if err != nil {
logrus.Fatal(err)
}
// Create the span referring to the RPC client if available.
// If wireContext == nil, a root span will be created.
// TODO we should add more tags?
serverSpan := opentracing.StartSpan("serve_http", ext.RPCServerOption(wireContext), opentracing.Tag{Key: "path", Value: c.Request.URL.Path})
serverSpan.SetBaggageItem("fn_appname", c.Param(api.CApp))
serverSpan.SetBaggageItem("fn_path", c.Param(api.CRoute))
defer serverSpan.Finish()
// TODO inspect opencensus more and see if we need to define a header ourselves
// to trigger per-request spans (we will want this), we can set sampler here per request.
ctx, serverSpan := trace.StartSpan(ctx, "serve_http")
defer serverSpan.End()
ctx := opentracing.ContextWithSpan(c.Request.Context(), serverSpan)
c.Request = c.Request.WithContext(ctx)
c.Next()
}

View File

@@ -1,327 +0,0 @@
package server
import (
"github.com/fnproject/fn/api/common"
"github.com/openzipkin/zipkin-go-opentracing"
"github.com/openzipkin/zipkin-go-opentracing/thrift/gen-go/zipkincore"
"github.com/prometheus/client_golang/prometheus"
"strconv"
"strings"
"sync"
"time"
)
// PrometheusCollector is a custom Collector
// which sends ZipKin traces to Prometheus
type PrometheusCollector struct {
lock sync.Mutex
// Each span name is published as a separate Histogram metric
// Using metric names of the form fn_span_<span-name>_duration_seconds
// In this map, the key is the name of a tracing span,
// and the corresponding value is a HistogramVec metric used to report the duration of spans with this name to Prometheus
histogramVecMap map[string]*prometheus.HistogramVec
// In this map, the key is the name of a tracing span,
// and the corresponding value is a CounterVec metric used to report the duration of spans with this name to Prometheus
counterVecMap map[string]*prometheus.CounterVec
// In this map, the key is the name of a tracing span,
// and the corresponding value is a GaugeVec metric used to report the duration of spans with this name to Prometheus
gaugeVecMap map[string]*prometheus.GaugeVec
// In this map, the key is the name of a tracing span,
// and the corresponding value is an array containing the label keys that were specified when the HistogramVec metric was created
registeredLabelKeysMap map[string][]string
}
// NewPrometheusCollector returns a new PrometheusCollector
func NewPrometheusCollector() (zipkintracer.Collector, error) {
pc := &PrometheusCollector{
histogramVecMap: make(map[string]*prometheus.HistogramVec),
counterVecMap: make(map[string]*prometheus.CounterVec),
gaugeVecMap: make(map[string]*prometheus.GaugeVec),
registeredLabelKeysMap: make(map[string][]string),
}
return pc, nil
}
// PrometheusCollector implements Collector.
func (pc *PrometheusCollector) Collect(span *zipkincore.Span) error {
spanName := span.GetName()
// extract any label values from the span
labelKeysFromSpan, labelValuesFromSpan := getLabels(span)
// report the duration of this span as a histogram
// (unless the span name ends with SpannameSuffixDummy to denote it as being purely the carrier of a metric value and so of no interest in itself)
if !strings.HasSuffix(spanName, common.SpannameSuffixDummy) {
// get the HistogramVec for this span name
histogramVec, labelValuesToUse := pc.getHistogramVec(
("fn_span_" + spanName + "_duration_seconds"), ("Span " + spanName + " duration, by span name"), labelKeysFromSpan, labelValuesFromSpan)
// now report the span duration value
histogramVec.With(labelValuesToUse).Observe((time.Duration(span.GetDuration()) * time.Microsecond).Seconds())
}
// now extract any logged histogram metric values from the span
for key, value := range getLoggedHistogramMetrics(span) {
// get the HistogramVec for this metric
thisMetricHistogramVec, labelValuesToUse := pc.getHistogramVec(
key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan)
// now report the metric value
thisMetricHistogramVec.With(labelValuesToUse).Observe(value)
}
// now extract any logged counter metric values from the span
for key, value := range getLoggedCounterMetrics(span) {
// get the CounterVec for this metric
thisMetricCounterVec, labelValuesToUse := pc.getCounterVec(
key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan)
// now report the metric value
thisMetricCounterVec.With(labelValuesToUse).Add(value)
}
// now extract any logged gauge metric values from the span
for key, value := range getLoggedGaugeMetrics(span) {
// get the GaugeVec for this metric
thisMetricGaugeVec, labelValuesToUse := pc.getGaugeVec(
key, ("Metric " + key), labelKeysFromSpan, labelValuesFromSpan)
// now report the metric value
thisMetricGaugeVec.With(labelValuesToUse).Add(value)
}
return nil
}
// Return (and create, if necessary) a HistogramVec for the specified Prometheus metric
func (pc *PrometheusCollector) getHistogramVec(
metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) (
*prometheus.HistogramVec, map[string]string) {
var labelValuesToUse map[string]string
pc.lock.Lock()
defer pc.lock.Unlock()
histogramVec, found := pc.histogramVecMap[metricName]
if !found {
// create a new HistogramVec
histogramVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: metricName,
Help: metricHelp,
},
labelKeysFromSpan,
)
pc.histogramVecMap[metricName] = histogramVec
pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan
prometheus.MustRegister(histogramVec)
labelValuesToUse = labelValuesFromSpan
} else {
// found an existing HistogramVec
// need to be careful here, since we must supply the same label keys as when we first created the metric
// otherwise we will get a "inconsistent label cardinality" panic
// that's why we saved the original label keys in the registeredLabelKeysMap map
// so we can use that to construct a map of label key/value pairs to set on the metric
labelValuesToUse = make(map[string]string)
for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] {
if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found {
labelValuesToUse[thisRegisteredLabelKey] = value
} else {
labelValuesToUse[thisRegisteredLabelKey] = ""
}
}
}
return histogramVec, labelValuesToUse
}
// Return (and create, if necessary) a CounterVec for the specified Prometheus metric
func (pc *PrometheusCollector) getCounterVec(
metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) (
*prometheus.CounterVec, map[string]string) {
var labelValuesToUse map[string]string
pc.lock.Lock()
defer pc.lock.Unlock()
counterVec, found := pc.counterVecMap[metricName]
if !found {
// create a new CounterVec
counterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metricName,
Help: metricHelp,
},
labelKeysFromSpan,
)
pc.counterVecMap[metricName] = counterVec
pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan
prometheus.MustRegister(counterVec)
labelValuesToUse = labelValuesFromSpan
} else {
// found an existing CounterVec
// need to be careful here, since we must supply the same label keys as when we first created the metric
// otherwise we will get a "inconsistent label cardinality" panic
// that's why we saved the original label keys in the registeredLabelKeysMap map
// so we can use that to construct a map of label key/value pairs to set on the metric
labelValuesToUse = make(map[string]string)
for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] {
if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found {
labelValuesToUse[thisRegisteredLabelKey] = value
} else {
labelValuesToUse[thisRegisteredLabelKey] = ""
}
}
}
return counterVec, labelValuesToUse
}
// Return (and create, if necessary) a GaugeVec for the specified Prometheus metric
func (pc *PrometheusCollector) getGaugeVec(
metricName string, metricHelp string, labelKeysFromSpan []string, labelValuesFromSpan map[string]string) (
*prometheus.GaugeVec, map[string]string) {
var labelValuesToUse map[string]string
pc.lock.Lock()
defer pc.lock.Unlock()
gaugeVec, found := pc.gaugeVecMap[metricName]
if !found {
// create a new GaugeVec
gaugeVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: metricName,
Help: metricHelp,
},
labelKeysFromSpan,
)
pc.gaugeVecMap[metricName] = gaugeVec
pc.registeredLabelKeysMap[metricName] = labelKeysFromSpan
prometheus.MustRegister(gaugeVec)
labelValuesToUse = labelValuesFromSpan
} else {
// found an existing GaugeVec
// need to be careful here, since we must supply the same label keys as when we first created the metric
// otherwise we will get a "inconsistent label cardinality" panic
// that's why we saved the original label keys in the registeredLabelKeysMap map
// so we can use that to construct a map of label key/value pairs to set on the metric
labelValuesToUse = make(map[string]string)
for _, thisRegisteredLabelKey := range pc.registeredLabelKeysMap[metricName] {
if value, found := labelValuesFromSpan[thisRegisteredLabelKey]; found {
labelValuesToUse[thisRegisteredLabelKey] = value
} else {
labelValuesToUse[thisRegisteredLabelKey] = ""
}
}
}
return gaugeVec, labelValuesToUse
}
// extract from the specified span the key/value pairs that we want to add as labels to the Prometheus metric for this span
// returns an array of keys, and a map of key-value pairs
func getLabels(span *zipkincore.Span) ([]string, map[string]string) {
var keys []string
labelMap := make(map[string]string)
// extract any tags whose key starts with "fn" from the span
binaryAnnotations := span.GetBinaryAnnotations()
for _, thisBinaryAnnotation := range binaryAnnotations {
key := thisBinaryAnnotation.GetKey()
if thisBinaryAnnotation.GetAnnotationType() == zipkincore.AnnotationType_STRING && strings.HasPrefix(key, "fn") {
keys = append(keys, key)
value := string(thisBinaryAnnotation.GetValue()[:])
labelMap[key] = value
}
}
return keys, labelMap
}
// extract from the span the logged histogram metric values.
// These are the ones whose names start with FieldnamePrefixHistogram,
// and whose values we assume are float64
func getLoggedHistogramMetrics(span *zipkincore.Span) map[string]float64 {
keyValueMap := make(map[string]float64)
// extract any annotations whose Value starts with FieldnamePrefixHistogram
annotations := span.GetAnnotations()
for _, thisAnnotation := range annotations {
if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixHistogram) {
keyvalue := strings.Split(thisAnnotation.GetValue(), "=")
if len(keyvalue) == 2 {
if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil {
key := strings.TrimSpace(keyvalue[0])
key = common.FnPrefix + key[len(common.FieldnamePrefixHistogram):] // strip off fieldname prefix and then prepend "fn_" to the front
keyValueMap[key] = value
}
}
}
}
return keyValueMap
}
// extract from the span the logged counter metric values.
// These are the ones whose names start with FieldnamePrefixCounter,
// and whose values we assume are float64
func getLoggedCounterMetrics(span *zipkincore.Span) map[string]float64 {
keyValueMap := make(map[string]float64)
// extract any annotations whose Value starts with FieldnamePrefixCounter
annotations := span.GetAnnotations()
for _, thisAnnotation := range annotations {
if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixCounter) {
keyvalue := strings.Split(thisAnnotation.GetValue(), "=")
if len(keyvalue) == 2 {
if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil {
key := strings.TrimSpace(keyvalue[0])
key = common.FnPrefix + key[len(common.FieldnamePrefixCounter):] // strip off fieldname prefix and then prepend "fn_" to the front
keyValueMap[key] = value
}
}
}
}
return keyValueMap
}
// extract from the span the logged gauge metric values.
// These are the ones whose names start with FieldnamePrefixGauge,
// and whose values we assume are float64
func getLoggedGaugeMetrics(span *zipkincore.Span) map[string]float64 {
keyValueMap := make(map[string]float64)
// extract any annotations whose Value starts with FieldnamePrefixGauge
annotations := span.GetAnnotations()
for _, thisAnnotation := range annotations {
if strings.HasPrefix(thisAnnotation.GetValue(), common.FieldnamePrefixGauge) {
keyvalue := strings.Split(thisAnnotation.GetValue(), "=")
if len(keyvalue) == 2 {
if value, err := strconv.ParseFloat(keyvalue[1], 64); err == nil {
key := strings.TrimSpace(keyvalue[0])
key = common.FnPrefix + key[len(common.FieldnamePrefixGauge):] // strip off fieldname prefix and then prepend "fn_" to the front
keyValueMap[key] = value
}
}
}
}
return keyValueMap
}
// PrometheusCollector implements Collector.
func (*PrometheusCollector) Close() error { return nil }

View File

@@ -1,9 +0,0 @@
package server
import (
"github.com/gin-gonic/gin"
)
func (s *Server) handlePrometheusMetrics(c *gin.Context) {
s.agent.PromHandler().ServeHTTP(c.Writer, c.Request)
}

View File

@@ -24,9 +24,13 @@ import (
"github.com/fnproject/fn/api/version"
"github.com/fnproject/fn/fnext"
"github.com/gin-gonic/gin"
opentracing "github.com/opentracing/opentracing-go"
zipkintracer "github.com/openzipkin/zipkin-go-opentracing"
zipkinhttp "github.com/openzipkin/zipkin-go/reporter/http"
"github.com/sirupsen/logrus"
"go.opencensus.io/exporter/prometheus"
"go.opencensus.io/exporter/zipkin"
"go.opencensus.io/plugin/ochttp"
"go.opencensus.io/stats/view"
"go.opencensus.io/trace"
)
const (
@@ -79,6 +83,7 @@ type Server struct {
appListeners *appListeners
rootMiddlewares []fnext.Middleware
apiMiddlewares []fnext.Middleware
promExporter *prometheus.Exporter
}
func nodeTypeFromString(value string) ServerNodeType {
@@ -226,8 +231,8 @@ func WithAgent(agent agent.Agent) ServerOption {
// New creates a new Functions server with the opts given. For convenience, users may
// prefer to use NewFromEnv but New is more flexible if needed.
func New(ctx context.Context, opts ...ServerOption) *Server {
span, ctx := opentracing.StartSpanFromContext(ctx, "server_init")
defer span.Finish()
ctx, span := trace.StartSpan(ctx, "server_init")
defer span.End()
log := common.Logger(ctx)
s := &Server{
@@ -290,50 +295,35 @@ func New(ctx context.Context, opts ...ServerOption) *Server {
func WithTracer(zipkinURL string) ServerOption {
return func(ctx context.Context, s *Server) error {
var (
debugMode = false
serviceName = "fnserver"
serviceHostPort = "localhost:8080" // meh
// TODO add server identifier to this crap
//debugMode = false
//serviceName = "fnserver"
//serviceHostPort = "localhost:8080" // meh
zipkinHTTPEndpoint = zipkinURL
// ex: "http://zipkin:9411/api/v1/spans"
// ex: "http://zipkin:9411/api/v2/spans"
)
var collector zipkintracer.Collector
// custom Zipkin collector to send tracing spans to Prometheus
promCollector, promErr := NewPrometheusCollector()
if promErr != nil {
logrus.WithError(promErr).Fatalln("couldn't start Prometheus trace collector")
}
logger := zipkintracer.LoggerFunc(func(i ...interface{}) error { logrus.Error(i...); return nil })
if zipkinHTTPEndpoint != "" {
// Custom PrometheusCollector and Zipkin HTTPCollector
httpCollector, zipErr := zipkintracer.NewHTTPCollector(zipkinHTTPEndpoint,
zipkintracer.HTTPLogger(logger), zipkintracer.HTTPMaxBacklog(1000),
)
if zipErr != nil {
logrus.WithError(zipErr).Fatalln("couldn't start Zipkin trace collector")
}
collector = zipkintracer.MultiCollector{httpCollector, promCollector}
} else {
// Custom PrometheusCollector only
collector = promCollector
reporter := zipkinhttp.NewReporter(zipkinURL, zipkinhttp.MaxBacklog(10000))
exporter := zipkin.NewExporter(reporter, nil)
trace.RegisterExporter(exporter)
logrus.WithFields(logrus.Fields{"url": zipkinHTTPEndpoint}).Info("exporting spans to zipkin")
// TODO don't do this. testing parity.
trace.SetDefaultSampler(trace.AlwaysSample())
}
ziptracer, err := zipkintracer.NewTracer(zipkintracer.NewRecorder(collector, debugMode, serviceHostPort, serviceName),
zipkintracer.ClientServerSameSpan(true),
zipkintracer.TraceID128Bit(true),
)
// TODO we can keep this on *Server and unregister it in Close()... can finagle later. same for tracer
exporter, err := prometheus.NewExporter(prometheus.Options{
Namespace: "fn",
OnError: func(err error) { logrus.WithError(err).Error("opencensus prometheus exporter err") },
})
if err != nil {
logrus.WithError(err).Fatalln("couldn't start tracer")
logrus.Fatal(err)
}
s.promExporter = exporter
view.RegisterExporter(exporter)
// wrap the Zipkin tracer in a FnTracer which will also send spans to Prometheus
fntracer := NewFnTracer(ziptracer)
opentracing.SetGlobalTracer(fntracer)
logrus.WithFields(logrus.Fields{"url": zipkinHTTPEndpoint}).Info("started tracer")
return nil
}
}
@@ -404,7 +394,8 @@ func (s *Server) startGears(ctx context.Context, cancel context.CancelFunc) {
server := http.Server{
Addr: listen,
Handler: s.Router,
Handler: &ochttp.Handler{Handler: s.Router},
// TODO we should set read/write timeouts
}
@@ -438,8 +429,11 @@ func (s *Server) bindHandlers(ctx context.Context) {
engine.GET("/", handlePing)
engine.GET("/version", handleVersion)
// TODO: move the following under v1
engine.GET("/metrics", s.handlePrometheusMetrics)
// TODO: move under v1 ?
if s.promExporter != nil {
engine.GET("/metrics", gin.WrapH(s.promExporter))
}
profilerSetup(engine, "/debug")