mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
Merge pull request #396 from fnproject/add_prometheus_metrics
Add Prometheus statistics and an example to showcase them using Grafana
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"net/http"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -17,6 +18,7 @@ import (
|
|||||||
"github.com/fnproject/fn/api/id"
|
"github.com/fnproject/fn/api/id"
|
||||||
"github.com/fnproject/fn/api/models"
|
"github.com/fnproject/fn/api/models"
|
||||||
"github.com/opentracing/opentracing-go"
|
"github.com/opentracing/opentracing-go"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -105,6 +107,9 @@ type Agent interface {
|
|||||||
// Stats should be burned at the stake. adding so as to not ruffle feathers.
|
// Stats should be burned at the stake. adding so as to not ruffle feathers.
|
||||||
// TODO this should be derived from our metrics
|
// TODO this should be derived from our metrics
|
||||||
Stats() Stats
|
Stats() Stats
|
||||||
|
|
||||||
|
// Return the http.Handler used to handle Prometheus metric requests
|
||||||
|
PromHandler() http.Handler
|
||||||
}
|
}
|
||||||
|
|
||||||
type agent struct {
|
type agent struct {
|
||||||
@@ -131,6 +136,9 @@ type agent struct {
|
|||||||
shutdown chan struct{}
|
shutdown chan struct{}
|
||||||
|
|
||||||
stats // TODO kill me
|
stats // TODO kill me
|
||||||
|
|
||||||
|
// Prometheus HTTP handler
|
||||||
|
promHandler http.Handler
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(ds models.Datastore, mq models.MessageQueue) Agent {
|
func New(ds models.Datastore, mq models.MessageQueue) Agent {
|
||||||
@@ -145,6 +153,7 @@ func New(ds models.Datastore, mq models.MessageQueue) Agent {
|
|||||||
cond: sync.NewCond(new(sync.Mutex)),
|
cond: sync.NewCond(new(sync.Mutex)),
|
||||||
ramTotal: getAvailableMemory(),
|
ramTotal: getAvailableMemory(),
|
||||||
shutdown: make(chan struct{}),
|
shutdown: make(chan struct{}),
|
||||||
|
promHandler: promhttp.Handler(),
|
||||||
}
|
}
|
||||||
|
|
||||||
go a.asyncDequeue() // safe shutdown can nanny this fine
|
go a.asyncDequeue() // safe shutdown can nanny this fine
|
||||||
|
|||||||
9
api/agent/prometheus_metrics.go
Normal file
9
api/agent/prometheus_metrics.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (a *agent) PromHandler() http.Handler {
|
||||||
|
return a.promHandler
|
||||||
|
}
|
||||||
@@ -1,6 +1,9 @@
|
|||||||
package agent
|
package agent
|
||||||
|
|
||||||
import "sync"
|
import (
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
// TODO this should expose:
|
// TODO this should expose:
|
||||||
// * hot containers active
|
// * hot containers active
|
||||||
@@ -44,6 +47,44 @@ type FunctionStats struct {
|
|||||||
Failed uint64
|
Failed uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
fnQueued = prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "fn_api_queued",
|
||||||
|
Help: "Queued requests by path",
|
||||||
|
},
|
||||||
|
[](string){"path"},
|
||||||
|
)
|
||||||
|
fnRunning = prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "fn_api_running",
|
||||||
|
Help: "Running requests by path",
|
||||||
|
},
|
||||||
|
[](string){"path"},
|
||||||
|
)
|
||||||
|
fnCompleted = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "fn_api_completed",
|
||||||
|
Help: "Completed requests by path",
|
||||||
|
},
|
||||||
|
[](string){"path"},
|
||||||
|
)
|
||||||
|
fnFailed = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "fn_api_failed",
|
||||||
|
Help: "Failed requests by path",
|
||||||
|
},
|
||||||
|
[](string){"path"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
prometheus.MustRegister(fnQueued)
|
||||||
|
prometheus.MustRegister(fnRunning)
|
||||||
|
prometheus.MustRegister(fnFailed)
|
||||||
|
prometheus.MustRegister(fnCompleted)
|
||||||
|
}
|
||||||
|
|
||||||
func (s *stats) getStatsForFunction(path string) *functionStats {
|
func (s *stats) getStatsForFunction(path string) *functionStats {
|
||||||
if s.functionStatsMap == nil {
|
if s.functionStatsMap == nil {
|
||||||
s.functionStatsMap = make(map[string]*functionStats)
|
s.functionStatsMap = make(map[string]*functionStats)
|
||||||
@@ -59,52 +100,78 @@ func (s *stats) getStatsForFunction(path string) *functionStats {
|
|||||||
|
|
||||||
func (s *stats) Enqueue(path string) {
|
func (s *stats) Enqueue(path string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
|
||||||
s.queue++
|
s.queue++
|
||||||
s.getStatsForFunction(path).queue++
|
s.getStatsForFunction(path).queue++
|
||||||
|
fnQueued.WithLabelValues(path).Inc()
|
||||||
|
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call when a function has been queued but cannot be started because of an error
|
// Call when a function has been queued but cannot be started because of an error
|
||||||
func (s *stats) Dequeue(path string) {
|
func (s *stats) Dequeue(path string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
|
||||||
s.queue--
|
s.queue--
|
||||||
s.getStatsForFunction(path).queue--
|
s.getStatsForFunction(path).queue--
|
||||||
|
fnQueued.WithLabelValues(path).Dec()
|
||||||
|
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *stats) DequeueAndStart(path string) {
|
func (s *stats) DequeueAndStart(path string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
|
||||||
s.queue--
|
s.queue--
|
||||||
s.getStatsForFunction(path).queue--
|
s.getStatsForFunction(path).queue--
|
||||||
|
fnQueued.WithLabelValues(path).Dec()
|
||||||
|
|
||||||
s.running++
|
s.running++
|
||||||
s.getStatsForFunction(path).running++
|
s.getStatsForFunction(path).running++
|
||||||
|
fnRunning.WithLabelValues(path).Inc()
|
||||||
|
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *stats) Complete(path string) {
|
func (s *stats) Complete(path string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
|
||||||
s.running--
|
s.running--
|
||||||
s.getStatsForFunction(path).running--
|
s.getStatsForFunction(path).running--
|
||||||
|
fnRunning.WithLabelValues(path).Dec()
|
||||||
|
|
||||||
s.complete++
|
s.complete++
|
||||||
s.getStatsForFunction(path).complete++
|
s.getStatsForFunction(path).complete++
|
||||||
|
fnCompleted.WithLabelValues(path).Inc()
|
||||||
|
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *stats) Failed(path string) {
|
func (s *stats) Failed(path string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
|
||||||
s.running--
|
s.running--
|
||||||
s.getStatsForFunction(path).running--
|
s.getStatsForFunction(path).running--
|
||||||
|
fnRunning.WithLabelValues(path).Dec()
|
||||||
|
|
||||||
s.failed++
|
s.failed++
|
||||||
s.getStatsForFunction(path).failed++
|
s.getStatsForFunction(path).failed++
|
||||||
|
fnFailed.WithLabelValues(path).Inc()
|
||||||
|
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *stats) DequeueAndFail(path string) {
|
func (s *stats) DequeueAndFail(path string) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
|
|
||||||
s.queue--
|
s.queue--
|
||||||
s.getStatsForFunction(path).queue--
|
s.getStatsForFunction(path).queue--
|
||||||
|
fnQueued.WithLabelValues(path).Dec()
|
||||||
|
|
||||||
s.failed++
|
s.failed++
|
||||||
s.getStatsForFunction(path).failed++
|
s.getStatsForFunction(path).failed++
|
||||||
|
fnFailed.WithLabelValues(path).Inc()
|
||||||
|
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
9
api/server/prometheus_metrics.go
Normal file
9
api/server/prometheus_metrics.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *Server) handlePrometheusMetrics(c *gin.Context) {
|
||||||
|
s.Agent.PromHandler().ServeHTTP(c.Writer, c.Request)
|
||||||
|
}
|
||||||
@@ -280,6 +280,7 @@ func (s *Server) bindHandlers(ctx context.Context) {
|
|||||||
engine.GET("/", handlePing)
|
engine.GET("/", handlePing)
|
||||||
engine.GET("/version", handleVersion)
|
engine.GET("/version", handleVersion)
|
||||||
engine.GET("/stats", s.handleStats)
|
engine.GET("/stats", s.handleStats)
|
||||||
|
engine.GET("/metrics", s.handlePrometheusMetrics)
|
||||||
|
|
||||||
{
|
{
|
||||||
v1 := engine.Group("/v1")
|
v1 := engine.Group("/v1")
|
||||||
|
|||||||
BIN
docs/assets/GrafanaDashboard.png
Executable file
BIN
docs/assets/GrafanaDashboard.png
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 496 KiB |
100
examples/grafana/README.md
Normal file
100
examples/grafana/README.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
# Display runtime metrics using Prometheus and Grafana
|
||||||
|
|
||||||
|
The Fn server exports metrics using [Prometheus](https://prometheus.io/). This allows [Grafana](https://grafana.com/) to be used to display these metrics graphically.
|
||||||
|
|
||||||
|
<img src="../../docs/assets/GrafanaDashboard.png" width="800">
|
||||||
|
|
||||||
|
## Start an Fn server and deploy some functions
|
||||||
|
|
||||||
|
This example requires an Fn server to be running and that you have deployed one or more functions.
|
||||||
|
See the [front page](/README.md) or any of the other examples for instructions.
|
||||||
|
|
||||||
|
The steps below assume that the Fn server is running at `localhost:8080`.
|
||||||
|
|
||||||
|
## Examine the endpoint used to export metrics to Prometheus
|
||||||
|
|
||||||
|
The Fn server exports metrics to Prometheus using the API endpoint `/metrics`.
|
||||||
|
|
||||||
|
Try pointing your browser at [http://localhost:8080/metrics](http://localhost:8080/metrics).
|
||||||
|
This will display the metrics in prometheus format.
|
||||||
|
|
||||||
|
## Start Prometheus
|
||||||
|
|
||||||
|
Open a terminal window and navigate to the directory containing this example.
|
||||||
|
|
||||||
|
Examine the provised Prometheus configuration file:
|
||||||
|
|
||||||
|
```
|
||||||
|
cat prometheus.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
This gives
|
||||||
|
|
||||||
|
``` yml
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'fn-monitor'
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's the Fn server
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: 'functions'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
# Specify all the Fn servers from which metrics will be scraped
|
||||||
|
- targets: ['localhost:8080'] # Uses /metrics by default
|
||||||
|
```
|
||||||
|
Note the last line. This specifies the host and port of the Fn server from which metrics will be obtained.
|
||||||
|
If you are running a cluster of Fn servers then you can specify them all here.
|
||||||
|
|
||||||
|
Now start Prometheus, specifying this config file:
|
||||||
|
```
|
||||||
|
docker run --name=prometheus -d -p 9090:9090 \
|
||||||
|
--mount type=bind,source=`pwd`/prometheus.yml,target=/etc/prometheus/prometheus.yml \
|
||||||
|
--add-host="localhost:`route | grep default | awk '{print $2}'`" prom/prometheus
|
||||||
|
```
|
||||||
|
Note: The parameter `` --add-host="localhost:`route | grep default | awk '{print $2}'`" `` means that Prometheus can use localhost to refer to the host. (The expression `` `route | grep default | awk '{print $2}'` `` returns the IP of the host).
|
||||||
|
|
||||||
|
Open a browser on Prometheus's graph tool at [http://localhost:9090/graph](http://localhost:9090/graph). If you wish you can use this to view metrics and display metrics from the Fn server: see the [Prometheus](https://prometheus.io/) documentation for instructions. Alternatively continue with the next step to view a ready-made set of graphs in Grafana.
|
||||||
|
|
||||||
|
## Start Grafana and load the example dashboard
|
||||||
|
|
||||||
|
[Grafana](https://grafana.com/) provides powerful and flexible facilities to create graphs of any metric available to Prometheus. This example provides a ready-made dashboard that displays the numbers of functions that are queued, running, completed and failed.
|
||||||
|
|
||||||
|
Open a terminal window and navigate to the directory containing this example.
|
||||||
|
|
||||||
|
Start Grafana on port 3000:
|
||||||
|
```
|
||||||
|
docker run --name=grafana -d -p 3000:3000 \
|
||||||
|
--add-host="localhost:`route | grep default | awk '{print $2}'`" grafana/grafana
|
||||||
|
```
|
||||||
|
|
||||||
|
Open a browser on Grafana at [http://localhost:3000](http://localhost:3000).
|
||||||
|
|
||||||
|
Login using the default user `admin` and default password `admin`.
|
||||||
|
|
||||||
|
Create a datasource to obtain metrics from Promethesus:
|
||||||
|
* Click on **Add data source**. In the form that opens:
|
||||||
|
* Set **Name** to `PromDS` (or whatever name you choose)
|
||||||
|
* Set **Type** to `Prometheus`
|
||||||
|
* Set **URL** to `http://localhost:9090`
|
||||||
|
* Set **Access** to `proxy`
|
||||||
|
* Click **Add** and then **Save and test**
|
||||||
|
|
||||||
|
Import the example dashboard that displays metrics from the Fn server:
|
||||||
|
* Click on the main menu at the top left and choose **Dashboards** and then **Home**
|
||||||
|
* Click on **Home** at the top and then **Import dashboard**
|
||||||
|
* In the dialog that opens, click **Upload .json file** and specify `fn_grafana_dashboard.json` in this example's directory.
|
||||||
|
* Specify the Prometheus data source that you just created
|
||||||
|
* Click **Import**
|
||||||
|
|
||||||
|
You should then see the dashboard shown above. Now execute some functions and see the graphs update.
|
||||||
|
|
||||||
1069
examples/grafana/fn_grafana_dashboard.json
Normal file
1069
examples/grafana/fn_grafana_dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
20
examples/grafana/prometheus.yml
Normal file
20
examples/grafana/prometheus.yml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'fn-monitor'
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's the Fn server
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: 'functions'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
# Specify all the fn servers from which metrics will be scraped
|
||||||
|
- targets: ['localhost:8080'] # Uses /metrics by default
|
||||||
@@ -67,6 +67,7 @@ import:
|
|||||||
version: 19f72df4d05d31cbe1c56bfc8045c96babff6c7e
|
version: 19f72df4d05d31cbe1c56bfc8045c96babff6c7e
|
||||||
- package: github.com/prometheus/common
|
- package: github.com/prometheus/common
|
||||||
version: 2f17f4a9d485bf34b4bfaccc273805040e4f86c8
|
version: 2f17f4a9d485bf34b4bfaccc273805040e4f86c8
|
||||||
|
- package: github.com/prometheus/client_golang
|
||||||
testImport:
|
testImport:
|
||||||
- package: github.com/patrickmn/go-cache
|
- package: github.com/patrickmn/go-cache
|
||||||
branch: master
|
branch: master
|
||||||
|
|||||||
Reference in New Issue
Block a user