mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
Add Prometheus statistics and an example to showcase them using Grafana
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"github.com/fnproject/fn/api/id"
|
||||
"github.com/fnproject/fn/api/models"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -105,6 +107,9 @@ type Agent interface {
|
||||
// Stats should be burned at the stake. adding so as to not ruffle feathers.
|
||||
// TODO this should be derived from our metrics
|
||||
Stats() Stats
|
||||
|
||||
// Return the http.Handler used to handle Prometheus metric requests
|
||||
PromHandler() http.Handler
|
||||
}
|
||||
|
||||
type agent struct {
|
||||
@@ -131,6 +136,9 @@ type agent struct {
|
||||
shutdown chan struct{}
|
||||
|
||||
stats // TODO kill me
|
||||
|
||||
// Prometheus HTTP handler
|
||||
promHandler http.Handler
|
||||
}
|
||||
|
||||
func New(ds models.Datastore, mq models.MessageQueue) Agent {
|
||||
@@ -138,13 +146,14 @@ func New(ds models.Datastore, mq models.MessageQueue) Agent {
|
||||
driver := docker.NewDocker(drivers.Config{})
|
||||
|
||||
a := &agent{
|
||||
ds: ds,
|
||||
mq: mq,
|
||||
driver: driver,
|
||||
hot: make(map[string]chan slot),
|
||||
cond: sync.NewCond(new(sync.Mutex)),
|
||||
ramTotal: getAvailableMemory(),
|
||||
shutdown: make(chan struct{}),
|
||||
ds: ds,
|
||||
mq: mq,
|
||||
driver: driver,
|
||||
hot: make(map[string]chan slot),
|
||||
cond: sync.NewCond(new(sync.Mutex)),
|
||||
ramTotal: getAvailableMemory(),
|
||||
shutdown: make(chan struct{}),
|
||||
promHandler: promhttp.Handler(),
|
||||
}
|
||||
|
||||
go a.asyncDequeue() // safe shutdown can nanny this fine
|
||||
|
||||
9
api/agent/prometheus_metrics.go
Normal file
9
api/agent/prometheus_metrics.go
Normal file
@@ -0,0 +1,9 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func (a *agent) PromHandler() http.Handler {
|
||||
return a.promHandler
|
||||
}
|
||||
@@ -1,6 +1,9 @@
|
||||
package agent
|
||||
|
||||
import "sync"
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// TODO this should expose:
|
||||
// * hot containers active
|
||||
@@ -44,6 +47,44 @@ type FunctionStats struct {
|
||||
Failed uint64
|
||||
}
|
||||
|
||||
var (
|
||||
fnQueued = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "fn_api_queued",
|
||||
Help: "Queued requests by path",
|
||||
},
|
||||
[](string){"path"},
|
||||
)
|
||||
fnRunning = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "fn_api_running",
|
||||
Help: "Running requests by path",
|
||||
},
|
||||
[](string){"path"},
|
||||
)
|
||||
fnCompleted = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "fn_api_completed",
|
||||
Help: "Completed requests by path",
|
||||
},
|
||||
[](string){"path"},
|
||||
)
|
||||
fnFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "fn_api_failed",
|
||||
Help: "Failed requests by path",
|
||||
},
|
||||
[](string){"path"},
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(fnQueued)
|
||||
prometheus.MustRegister(fnRunning)
|
||||
prometheus.MustRegister(fnFailed)
|
||||
prometheus.MustRegister(fnCompleted)
|
||||
}
|
||||
|
||||
func (s *stats) getStatsForFunction(path string) *functionStats {
|
||||
if s.functionStatsMap == nil {
|
||||
s.functionStatsMap = make(map[string]*functionStats)
|
||||
@@ -59,52 +100,78 @@ func (s *stats) getStatsForFunction(path string) *functionStats {
|
||||
|
||||
func (s *stats) Enqueue(path string) {
|
||||
s.mu.Lock()
|
||||
|
||||
s.queue++
|
||||
s.getStatsForFunction(path).queue++
|
||||
fnQueued.WithLabelValues(path).Inc()
|
||||
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
// Call when a function has been queued but cannot be started because of an error
|
||||
func (s *stats) Dequeue(path string) {
|
||||
s.mu.Lock()
|
||||
|
||||
s.queue--
|
||||
s.getStatsForFunction(path).queue--
|
||||
fnQueued.WithLabelValues(path).Dec()
|
||||
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
func (s *stats) DequeueAndStart(path string) {
|
||||
s.mu.Lock()
|
||||
|
||||
s.queue--
|
||||
s.getStatsForFunction(path).queue--
|
||||
fnQueued.WithLabelValues(path).Dec()
|
||||
|
||||
s.running++
|
||||
s.getStatsForFunction(path).running++
|
||||
fnRunning.WithLabelValues(path).Inc()
|
||||
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
func (s *stats) Complete(path string) {
|
||||
s.mu.Lock()
|
||||
|
||||
s.running--
|
||||
s.getStatsForFunction(path).running--
|
||||
fnRunning.WithLabelValues(path).Dec()
|
||||
|
||||
s.complete++
|
||||
s.getStatsForFunction(path).complete++
|
||||
fnCompleted.WithLabelValues(path).Inc()
|
||||
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
func (s *stats) Failed(path string) {
|
||||
s.mu.Lock()
|
||||
|
||||
s.running--
|
||||
s.getStatsForFunction(path).running--
|
||||
fnRunning.WithLabelValues(path).Dec()
|
||||
|
||||
s.failed++
|
||||
s.getStatsForFunction(path).failed++
|
||||
fnFailed.WithLabelValues(path).Inc()
|
||||
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
func (s *stats) DequeueAndFail(path string) {
|
||||
s.mu.Lock()
|
||||
|
||||
s.queue--
|
||||
s.getStatsForFunction(path).queue--
|
||||
fnQueued.WithLabelValues(path).Dec()
|
||||
|
||||
s.failed++
|
||||
s.getStatsForFunction(path).failed++
|
||||
fnFailed.WithLabelValues(path).Inc()
|
||||
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
|
||||
9
api/server/prometheus_metrics.go
Normal file
9
api/server/prometheus_metrics.go
Normal file
@@ -0,0 +1,9 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (s *Server) handlePrometheusMetrics(c *gin.Context) {
|
||||
s.Agent.PromHandler().ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
@@ -280,6 +280,7 @@ func (s *Server) bindHandlers(ctx context.Context) {
|
||||
engine.GET("/", handlePing)
|
||||
engine.GET("/version", handleVersion)
|
||||
engine.GET("/stats", s.handleStats)
|
||||
engine.GET("/metrics", s.handlePrometheusMetrics)
|
||||
|
||||
{
|
||||
v1 := engine.Group("/v1")
|
||||
|
||||
100
examples/grafana/README.md
Normal file
100
examples/grafana/README.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Display runtime metrics using Prometheus and Grafana
|
||||
|
||||
The Fn server exports metrics using [Prometheus](https://prometheus.io/). This allows [Grafana](https://grafana.com/) to be used to display these metrics graphically.
|
||||
|
||||
-- screenshot
|
||||
|
||||
## Start a fn server and deploy some functions
|
||||
|
||||
This example requires a Fn server to be running and that you have deployed one or more functions.
|
||||
See the [front page](/README.md) or any of the other examples for instructions.
|
||||
|
||||
The steps below assume that the Fn server is running at `localhost:8080`.
|
||||
|
||||
## Examine the endpoint used to export metrics to Prometheus
|
||||
|
||||
The Fn server exports metrics to Prometheus using the API endpoint `/metrics`.
|
||||
|
||||
Try pointing your browser at [http://localhost:8080/metrics](http://localhost:8080/metrics).
|
||||
This will display the metrics in prometheus format.
|
||||
|
||||
## Start Prometheus
|
||||
|
||||
Open a terminal window and navigate to the directory containing this example.
|
||||
|
||||
Examine the provised Prometheus configuration file:
|
||||
|
||||
```
|
||||
cat prometheus.yml
|
||||
```
|
||||
|
||||
This gives
|
||||
|
||||
``` yml
|
||||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'fn-monitor'
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's the Fn server
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'functions'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
# Specify all the fn servers from which metrics will be scraped
|
||||
- targets: ['localhost:8080'] # Uses /metrics by default
|
||||
```
|
||||
Note the last line. This specifies the host and port of the Fn server from which metrics will be obtained.
|
||||
If you are running a cluster of Fn servers then you can specify them all here.
|
||||
|
||||
Now start Prometheus, specifying this config file:
|
||||
```
|
||||
docker run --name=prometheus -d -p 9090:9090 \
|
||||
--mount type=bind,source=`pwd`/prometheus.yml,target=/etc/prometheus/prometheus.yml \
|
||||
--add-host="localhost:`route | grep default | awk '{print $2}'`" prom/prometheus
|
||||
```
|
||||
Note: The parameter `` --add-host="localhost:`route | grep default | awk '{print $2}'`" `` means that Prometheus can use localhost to refer to the host. (The expression `` `route | grep default | awk '{print $2}'` `` returns the IP of the host).
|
||||
|
||||
Open a browser on Prometheus's graph tool at [http://localhost:9090/graph](http://localhost:9090/graph). If you wish you can use this to view metrics and display metrics from the fn server: see the [Prometheus](https://prometheus.io/) documentation for instructions. Alternatively continue with the next step to view a ready-made set of graphs in Grafana.
|
||||
|
||||
## Start Grafana and load the example dashboard
|
||||
|
||||
[Grafana](https://grafana.com/) provides powerful and flexible facilities to create graphs of any metric available to Prometheus. This example provides a ready-made dashboard that displays the numbers of functions that are queued, running, completed and failed.
|
||||
|
||||
Open a terminal window and navigate to the directory containing this example.
|
||||
|
||||
Start Grafana on port 3000:
|
||||
```
|
||||
docker run --name=grafana -d -p 3000:3000 \
|
||||
--add-host="localhost:`route | grep default | awk '{print $2}'`" grafana/grafana
|
||||
```
|
||||
|
||||
Open a browser on Grafana at [http://localhost:3000](http://localhost:3000).
|
||||
|
||||
Login using the default user `admin` and default password `admin`.
|
||||
|
||||
Create a datasource to obtain metrics from Promethesus:
|
||||
* Click on **Add data source**. In the form that opens:
|
||||
* Set **Name** to `PromDS` (or whatever name you choose)
|
||||
* Set **Type** to `Prometheus`
|
||||
* Set **URL** to `http://localhost:9090`
|
||||
* Set **Access** to `proxy`
|
||||
* Click **Add** and then **Save and test**
|
||||
|
||||
Import the example dashboard that displays metrics from the Fn server:
|
||||
* Click on the main menu at the top left and choose **Dashboards** and then **Home**
|
||||
* Click on **Home** at the top and then **Import dashboard**
|
||||
* In the dialog that opens, click **Upload .json file** and specify `fn_grafana_dashboard.json` in this example's directory.
|
||||
* Specify the Prometheus data source that you just created
|
||||
* Click **Import**
|
||||
|
||||
You should then see the dashboard shown above. Now execute some functions and see the graphs update.
|
||||
|
||||
1069
examples/grafana/fn_grafana_dashboard.json
Normal file
1069
examples/grafana/fn_grafana_dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
20
examples/grafana/prometheus.yml
Normal file
20
examples/grafana/prometheus.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'fn-monitor'
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's the Fn server
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'functions'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
# Specify all the fn servers from which metrics will be scraped
|
||||
- targets: ['localhost:8080'] # Uses /metrics by default
|
||||
@@ -67,6 +67,7 @@ import:
|
||||
version: 19f72df4d05d31cbe1c56bfc8045c96babff6c7e
|
||||
- package: github.com/prometheus/common
|
||||
version: 2f17f4a9d485bf34b4bfaccc273805040e4f86c8
|
||||
- package: github.com/prometheus/client_golang
|
||||
testImport:
|
||||
- package: github.com/patrickmn/go-cache
|
||||
branch: master
|
||||
|
||||
Reference in New Issue
Block a user