Add Prometheus statistics and an example to showcase them using Grafana

This commit is contained in:
Nigel Deakin
2017-10-05 16:21:31 +01:00
parent 02715442f0
commit ae31944224
9 changed files with 1293 additions and 8 deletions

View File

@@ -6,6 +6,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"net/http"
"sort" "sort"
"sync" "sync"
"time" "time"
@@ -17,6 +18,7 @@ import (
"github.com/fnproject/fn/api/id" "github.com/fnproject/fn/api/id"
"github.com/fnproject/fn/api/models" "github.com/fnproject/fn/api/models"
"github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@@ -105,6 +107,9 @@ type Agent interface {
// Stats should be burned at the stake. adding so as to not ruffle feathers. // Stats should be burned at the stake. adding so as to not ruffle feathers.
// TODO this should be derived from our metrics // TODO this should be derived from our metrics
Stats() Stats Stats() Stats
// Return the http.Handler used to handle Prometheus metric requests
PromHandler() http.Handler
} }
type agent struct { type agent struct {
@@ -131,6 +136,9 @@ type agent struct {
shutdown chan struct{} shutdown chan struct{}
stats // TODO kill me stats // TODO kill me
// Prometheus HTTP handler
promHandler http.Handler
} }
func New(ds models.Datastore, mq models.MessageQueue) Agent { func New(ds models.Datastore, mq models.MessageQueue) Agent {
@@ -138,13 +146,14 @@ func New(ds models.Datastore, mq models.MessageQueue) Agent {
driver := docker.NewDocker(drivers.Config{}) driver := docker.NewDocker(drivers.Config{})
a := &agent{ a := &agent{
ds: ds, ds: ds,
mq: mq, mq: mq,
driver: driver, driver: driver,
hot: make(map[string]chan slot), hot: make(map[string]chan slot),
cond: sync.NewCond(new(sync.Mutex)), cond: sync.NewCond(new(sync.Mutex)),
ramTotal: getAvailableMemory(), ramTotal: getAvailableMemory(),
shutdown: make(chan struct{}), shutdown: make(chan struct{}),
promHandler: promhttp.Handler(),
} }
go a.asyncDequeue() // safe shutdown can nanny this fine go a.asyncDequeue() // safe shutdown can nanny this fine

View File

@@ -0,0 +1,9 @@
package agent
import (
"net/http"
)
func (a *agent) PromHandler() http.Handler {
return a.promHandler
}

View File

@@ -1,6 +1,9 @@
package agent package agent
import "sync" import (
"github.com/prometheus/client_golang/prometheus"
"sync"
)
// TODO this should expose: // TODO this should expose:
// * hot containers active // * hot containers active
@@ -44,6 +47,44 @@ type FunctionStats struct {
Failed uint64 Failed uint64
} }
var (
fnQueued = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "fn_api_queued",
Help: "Queued requests by path",
},
[](string){"path"},
)
fnRunning = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "fn_api_running",
Help: "Running requests by path",
},
[](string){"path"},
)
fnCompleted = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "fn_api_completed",
Help: "Completed requests by path",
},
[](string){"path"},
)
fnFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "fn_api_failed",
Help: "Failed requests by path",
},
[](string){"path"},
)
)
func init() {
prometheus.MustRegister(fnQueued)
prometheus.MustRegister(fnRunning)
prometheus.MustRegister(fnFailed)
prometheus.MustRegister(fnCompleted)
}
func (s *stats) getStatsForFunction(path string) *functionStats { func (s *stats) getStatsForFunction(path string) *functionStats {
if s.functionStatsMap == nil { if s.functionStatsMap == nil {
s.functionStatsMap = make(map[string]*functionStats) s.functionStatsMap = make(map[string]*functionStats)
@@ -59,52 +100,78 @@ func (s *stats) getStatsForFunction(path string) *functionStats {
func (s *stats) Enqueue(path string) { func (s *stats) Enqueue(path string) {
s.mu.Lock() s.mu.Lock()
s.queue++ s.queue++
s.getStatsForFunction(path).queue++ s.getStatsForFunction(path).queue++
fnQueued.WithLabelValues(path).Inc()
s.mu.Unlock() s.mu.Unlock()
} }
// Call when a function has been queued but cannot be started because of an error // Call when a function has been queued but cannot be started because of an error
func (s *stats) Dequeue(path string) { func (s *stats) Dequeue(path string) {
s.mu.Lock() s.mu.Lock()
s.queue-- s.queue--
s.getStatsForFunction(path).queue-- s.getStatsForFunction(path).queue--
fnQueued.WithLabelValues(path).Dec()
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) DequeueAndStart(path string) { func (s *stats) DequeueAndStart(path string) {
s.mu.Lock() s.mu.Lock()
s.queue-- s.queue--
s.getStatsForFunction(path).queue-- s.getStatsForFunction(path).queue--
fnQueued.WithLabelValues(path).Dec()
s.running++ s.running++
s.getStatsForFunction(path).running++ s.getStatsForFunction(path).running++
fnRunning.WithLabelValues(path).Inc()
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) Complete(path string) { func (s *stats) Complete(path string) {
s.mu.Lock() s.mu.Lock()
s.running-- s.running--
s.getStatsForFunction(path).running-- s.getStatsForFunction(path).running--
fnRunning.WithLabelValues(path).Dec()
s.complete++ s.complete++
s.getStatsForFunction(path).complete++ s.getStatsForFunction(path).complete++
fnCompleted.WithLabelValues(path).Inc()
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) Failed(path string) { func (s *stats) Failed(path string) {
s.mu.Lock() s.mu.Lock()
s.running-- s.running--
s.getStatsForFunction(path).running-- s.getStatsForFunction(path).running--
fnRunning.WithLabelValues(path).Dec()
s.failed++ s.failed++
s.getStatsForFunction(path).failed++ s.getStatsForFunction(path).failed++
fnFailed.WithLabelValues(path).Inc()
s.mu.Unlock() s.mu.Unlock()
} }
func (s *stats) DequeueAndFail(path string) { func (s *stats) DequeueAndFail(path string) {
s.mu.Lock() s.mu.Lock()
s.queue-- s.queue--
s.getStatsForFunction(path).queue-- s.getStatsForFunction(path).queue--
fnQueued.WithLabelValues(path).Dec()
s.failed++ s.failed++
s.getStatsForFunction(path).failed++ s.getStatsForFunction(path).failed++
fnFailed.WithLabelValues(path).Inc()
s.mu.Unlock() s.mu.Unlock()
} }

View File

@@ -0,0 +1,9 @@
package server
import (
"github.com/gin-gonic/gin"
)
func (s *Server) handlePrometheusMetrics(c *gin.Context) {
s.Agent.PromHandler().ServeHTTP(c.Writer, c.Request)
}

View File

@@ -280,6 +280,7 @@ func (s *Server) bindHandlers(ctx context.Context) {
engine.GET("/", handlePing) engine.GET("/", handlePing)
engine.GET("/version", handleVersion) engine.GET("/version", handleVersion)
engine.GET("/stats", s.handleStats) engine.GET("/stats", s.handleStats)
engine.GET("/metrics", s.handlePrometheusMetrics)
{ {
v1 := engine.Group("/v1") v1 := engine.Group("/v1")

100
examples/grafana/README.md Normal file
View File

@@ -0,0 +1,100 @@
# Display runtime metrics using Prometheus and Grafana
The Fn server exports metrics using [Prometheus](https://prometheus.io/). This allows [Grafana](https://grafana.com/) to be used to display these metrics graphically.
-- screenshot
## Start a fn server and deploy some functions
This example requires a Fn server to be running and that you have deployed one or more functions.
See the [front page](/README.md) or any of the other examples for instructions.
The steps below assume that the Fn server is running at `localhost:8080`.
## Examine the endpoint used to export metrics to Prometheus
The Fn server exports metrics to Prometheus using the API endpoint `/metrics`.
Try pointing your browser at [http://localhost:8080/metrics](http://localhost:8080/metrics).
This will display the metrics in prometheus format.
## Start Prometheus
Open a terminal window and navigate to the directory containing this example.
Examine the provised Prometheus configuration file:
```
cat prometheus.yml
```
This gives
``` yml
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'fn-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's the Fn server
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'functions'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
# Specify all the fn servers from which metrics will be scraped
- targets: ['localhost:8080'] # Uses /metrics by default
```
Note the last line. This specifies the host and port of the Fn server from which metrics will be obtained.
If you are running a cluster of Fn servers then you can specify them all here.
Now start Prometheus, specifying this config file:
```
docker run --name=prometheus -d -p 9090:9090 \
--mount type=bind,source=`pwd`/prometheus.yml,target=/etc/prometheus/prometheus.yml \
--add-host="localhost:`route | grep default | awk '{print $2}'`" prom/prometheus
```
Note: The parameter `` --add-host="localhost:`route | grep default | awk '{print $2}'`" `` means that Prometheus can use localhost to refer to the host. (The expression `` `route | grep default | awk '{print $2}'` `` returns the IP of the host).
Open a browser on Prometheus's graph tool at [http://localhost:9090/graph](http://localhost:9090/graph). If you wish you can use this to view metrics and display metrics from the fn server: see the [Prometheus](https://prometheus.io/) documentation for instructions. Alternatively continue with the next step to view a ready-made set of graphs in Grafana.
## Start Grafana and load the example dashboard
[Grafana](https://grafana.com/) provides powerful and flexible facilities to create graphs of any metric available to Prometheus. This example provides a ready-made dashboard that displays the numbers of functions that are queued, running, completed and failed.
Open a terminal window and navigate to the directory containing this example.
Start Grafana on port 3000:
```
docker run --name=grafana -d -p 3000:3000 \
--add-host="localhost:`route | grep default | awk '{print $2}'`" grafana/grafana
```
Open a browser on Grafana at [http://localhost:3000](http://localhost:3000).
Login using the default user `admin` and default password `admin`.
Create a datasource to obtain metrics from Promethesus:
* Click on **Add data source**. In the form that opens:
* Set **Name** to `PromDS` (or whatever name you choose)
* Set **Type** to `Prometheus`
* Set **URL** to `http://localhost:9090`
* Set **Access** to `proxy`
* Click **Add** and then **Save and test**
Import the example dashboard that displays metrics from the Fn server:
* Click on the main menu at the top left and choose **Dashboards** and then **Home**
* Click on **Home** at the top and then **Import dashboard**
* In the dialog that opens, click **Upload .json file** and specify `fn_grafana_dashboard.json` in this example's directory.
* Specify the Prometheus data source that you just created
* Click **Import**
You should then see the dashboard shown above. Now execute some functions and see the graphs update.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'fn-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's the Fn server
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'functions'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
# Specify all the fn servers from which metrics will be scraped
- targets: ['localhost:8080'] # Uses /metrics by default

View File

@@ -67,6 +67,7 @@ import:
version: 19f72df4d05d31cbe1c56bfc8045c96babff6c7e version: 19f72df4d05d31cbe1c56bfc8045c96babff6c7e
- package: github.com/prometheus/common - package: github.com/prometheus/common
version: 2f17f4a9d485bf34b4bfaccc273805040e4f86c8 version: 2f17f4a9d485bf34b4bfaccc273805040e4f86c8
- package: github.com/prometheus/client_golang
testImport: testImport:
- package: github.com/patrickmn/go-cache - package: github.com/patrickmn/go-cache
branch: master branch: master