opentracing -> opencensus (#802)

* update vendor directory, add go.opencensus.io

* update imports

* oops

* s/opentracing/opencensus/ & remove prometheus / zipkin stuff & remove old stats

* the dep train rides again

* fix gin build

* deps from last guy

* start in on the agent metrics

* she builds

* remove tags for now, cardinality error is fussing. subscribe instead of register

* update to patched version of opencensus to proceed for now TODO switch to a release

* meh

fix imports

* println debug the bad boys

* lace it with the tags

* update deps again

* fix all inconsistent cardinality errors

* add our own logger

* fix init

* fix oom measure

* remove bugged removal code

* fix s3 measures

* fix prom handler nil
This commit is contained in:
Reed Allman
2018-03-05 09:35:28 -08:00
committed by GitHub
parent 924d27559c
commit 206aa3c203
5975 changed files with 158755 additions and 566592 deletions

View File

@@ -2,54 +2,63 @@ package agent
import (
"context"
"github.com/fnproject/fn/api/common"
"github.com/sirupsen/logrus"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
)
func StatsEnqueue(ctx context.Context) {
common.IncrementGauge(ctx, queuedMetricName)
common.IncrementCounter(ctx, callsMetricName)
// TODO add some suga:
// * hot containers active
// * memory used / available
func statsEnqueue(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(1))
stats.Record(ctx, callsMeasure.M(1))
}
// Call when a function has been queued but cannot be started because of an error
func StatsDequeue(ctx context.Context) {
common.DecrementGauge(ctx, queuedMetricName)
func statsDequeue(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(-1))
}
func StatsDequeueAndStart(ctx context.Context) {
common.DecrementGauge(ctx, queuedMetricName)
common.IncrementGauge(ctx, runningMetricName)
func statsDequeueAndStart(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(-1))
stats.Record(ctx, runningMeasure.M(1))
}
func StatsComplete(ctx context.Context) {
common.DecrementGauge(ctx, runningMetricName)
common.IncrementCounter(ctx, completedMetricName)
func statsComplete(ctx context.Context) {
stats.Record(ctx, runningMeasure.M(-1))
stats.Record(ctx, completedMeasure.M(1))
}
func StatsFailed(ctx context.Context) {
common.DecrementGauge(ctx, runningMetricName)
common.IncrementCounter(ctx, failedMetricName)
func statsFailed(ctx context.Context) {
stats.Record(ctx, runningMeasure.M(-1))
stats.Record(ctx, failedMeasure.M(1))
}
func StatsDequeueAndFail(ctx context.Context) {
common.DecrementGauge(ctx, queuedMetricName)
common.IncrementCounter(ctx, failedMetricName)
func statsDequeueAndFail(ctx context.Context) {
stats.Record(ctx, queuedMeasure.M(-1))
stats.Record(ctx, failedMeasure.M(1))
}
func StatsIncrementTimedout(ctx context.Context) {
common.IncrementCounter(ctx, timedoutMetricName)
func statsTimedout(ctx context.Context) {
stats.Record(ctx, timedoutMeasure.M(1))
}
func StatsIncrementErrors(ctx context.Context) {
common.IncrementCounter(ctx, errorsMetricName)
func statsErrors(ctx context.Context) {
stats.Record(ctx, errorsMeasure.M(1))
}
func StatsIncrementTooBusy(ctx context.Context) {
common.IncrementCounter(ctx, serverBusyMetricName)
func statsTooBusy(ctx context.Context) {
stats.Record(ctx, serverBusyMeasure.M(1))
}
const (
// TODO we should probably prefix these with calls_ ?
queuedMetricName = "queued"
callsMetricName = "calls"
callsMetricName = "calls" // TODO this is a dupe of sum {complete,failed} ?
runningMetricName = "running"
completedMetricName = "completed"
failedMetricName = "failed"
@@ -57,3 +66,192 @@ const (
errorsMetricName = "errors"
serverBusyMetricName = "server_busy"
)
var (
queuedMeasure *stats.Int64Measure
callsMeasure *stats.Int64Measure // TODO this is a dupe of sum {complete,failed} ?
runningMeasure *stats.Int64Measure
completedMeasure *stats.Int64Measure
failedMeasure *stats.Int64Measure
timedoutMeasure *stats.Int64Measure
errorsMeasure *stats.Int64Measure
serverBusyMeasure *stats.Int64Measure
)
func init() {
// TODO(reed): doing this at each call site seems not the intention of the library since measurements
// need to be created and views registered. doing this up front seems painful but maybe there
// are benefits?
// TODO(reed): do we have to do this? the measurements will be tagged on the context, will they be propagated
// or we have to white list them in the view for them to show up? test...
var err error
appKey, err := tag.NewKey("fn_appname")
if err != nil {
logrus.Fatal(err)
}
pathKey, err := tag.NewKey("fn_path")
if err != nil {
logrus.Fatal(err)
}
{
queuedMeasure, err = stats.Int64(queuedMetricName, "calls currently queued against agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
queuedMetricName,
"calls currently queued to agent",
[]tag.Key{appKey, pathKey},
queuedMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
callsMeasure, err = stats.Int64(callsMetricName, "calls created in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
callsMetricName,
"calls created in agent",
[]tag.Key{appKey, pathKey},
callsMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
runningMeasure, err = stats.Int64(runningMetricName, "calls currently running in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
runningMetricName,
"calls currently running in agent",
[]tag.Key{appKey, pathKey},
runningMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
completedMeasure, err = stats.Int64(completedMetricName, "calls completed in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
completedMetricName,
"calls completed in agent",
[]tag.Key{appKey, pathKey},
completedMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
failedMeasure, err = stats.Int64(failedMetricName, "calls failed in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
failedMetricName,
"calls failed in agent",
[]tag.Key{appKey, pathKey},
failedMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
timedoutMeasure, err = stats.Int64(timedoutMetricName, "calls timed out in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
timedoutMetricName,
"calls timed out in agent",
[]tag.Key{appKey, pathKey},
timedoutMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
errorsMeasure, err = stats.Int64(errorsMetricName, "calls errored in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
errorsMetricName,
"calls errored in agent",
[]tag.Key{appKey, pathKey},
errorsMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
{
serverBusyMeasure, err = stats.Int64(serverBusyMetricName, "calls where server was too busy in agent", "")
if err != nil {
logrus.Fatal(err)
}
v, err := view.New(
serverBusyMetricName,
"calls where server was too busy in agent",
[]tag.Key{appKey, pathKey},
serverBusyMeasure,
view.SumAggregation{},
)
if err != nil {
logrus.Fatalf("cannot create view: %v", err)
}
if err := v.Subscribe(); err != nil {
logrus.Fatal(err)
}
}
}