Files
fn-serverless/api/server/server.go
Tolga Ceylan 9f29d824d6 fn: New timeout for LB Placer (#1137)
* fn: New timeout for LB Placer

Previously, LB Placers worked hard as long as
client contexts allowed for. Adding a Placer
config setting to bound this by 360 seconds by
default.

The new timeout is not accounted during actual
function execution and only applies to the amount
of wait time in Placers when the call is not
being executed.
2018-07-26 10:19:25 -07:00

1251 lines
35 KiB
Go

package server
import (
"bufio"
"bytes"
"context"
"encoding/base64"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"path"
"path/filepath"
"reflect"
"strconv"
"strings"
"syscall"
"unicode"
"github.com/fnproject/fn/api/agent"
"github.com/fnproject/fn/api/agent/hybrid"
"github.com/fnproject/fn/api/common"
"github.com/fnproject/fn/api/datastore"
"github.com/fnproject/fn/api/id"
"github.com/fnproject/fn/api/logs"
"github.com/fnproject/fn/api/models"
"github.com/fnproject/fn/api/mqs"
pool "github.com/fnproject/fn/api/runnerpool"
"github.com/fnproject/fn/api/version"
"github.com/fnproject/fn/fnext"
"github.com/gin-gonic/gin"
zipkinhttp "github.com/openzipkin/zipkin-go/reporter/http"
promclient "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"go.opencensus.io/exporter/jaeger"
"go.opencensus.io/exporter/prometheus"
"go.opencensus.io/exporter/zipkin"
"go.opencensus.io/plugin/ochttp"
"go.opencensus.io/stats/view"
"go.opencensus.io/trace"
)
const (
// TODO these are kind of redundant as exported values since the env vars
// have to be set using these values (hopefully from an env), consider
// forcing usage through WithXxx configuration methods and documenting there vs.
// expecting users to use os.SetEnv(EnvLogLevel, "debug") // why ?
// EnvLogLevel sets the stderr logging level
EnvLogLevel = "FN_LOG_LEVEL"
// EnvLogDest is a url of a log destination:
// possible schemes: { udp, tcp, file }
// file url must contain only a path, syslog must contain only a host[:port]
// expect: [scheme://][host][:port][/path]
// default scheme to udp:// if none given
EnvLogDest = "FN_LOG_DEST"
// EnvLogPrefix is a prefix to affix to each log line.
EnvLogPrefix = "FN_LOG_PREFIX"
// EnvMQURL is a url to an MQ service:
// possible out-of-the-box schemes: { memory, redis, bolt }
EnvMQURL = "FN_MQ_URL"
// EnvDBURL is a url to a db service:
// possible schemes: { postgres, sqlite3, mysql }
EnvDBURL = "FN_DB_URL"
// EnvLogDBURL is a url to a log storage service:
// possible schemes: { postgres, sqlite3, mysql, s3 }
EnvLogDBURL = "FN_LOGSTORE_URL"
// EnvRunnerURL is a url pointing to an Fn API service.
EnvRunnerURL = "FN_RUNNER_API_URL"
// EnvRunnerAddresses is a list of runner urls for an lb to use.
EnvRunnerAddresses = "FN_RUNNER_ADDRESSES"
// EnvPublicLoadBalancerURL is the url to inject into trigger responses to get a public url.
EnvPublicLoadBalancerURL = "FN_PUBLIC_LB_URL"
// EnvNodeType defines the runtime mode for fn to run in, options
// are one of: { full, api, lb, runner, pure-runner }
EnvNodeType = "FN_NODE_TYPE"
// EnvPort is the port to listen on for fn http server.
EnvPort = "FN_PORT" // be careful, Gin expects this variable to be "port"
// EnvGRPCPort is the port to run the grpc server on for a pure-runner node.
EnvGRPCPort = "FN_GRPC_PORT"
// EnvAPICORSOrigins is the list of CORS origins to allow.
EnvAPICORSOrigins = "FN_API_CORS_ORIGINS"
// EnvAPICORSHeaders is the list of CORS headers allowed.
EnvAPICORSHeaders = "FN_API_CORS_HEADERS"
// EnvZipkinURL is the url of a zipkin node to send traces to.
EnvZipkinURL = "FN_ZIPKIN_URL"
// EnvJaegerURL is the url of a jaeger node to send traces to.
EnvJaegerURL = "FN_JAEGER_URL"
// EnvCert is the certificate used to communicate with other fn nodes.
EnvCert = "FN_NODE_CERT"
// EnvCertKey is the key for the specified cert.
EnvCertKey = "FN_NODE_CERT_KEY"
// EnvCertAuth is the CA for the cert provided.
EnvCertAuth = "FN_NODE_CERT_AUTHORITY"
// EnvRIDHeader is the header name of the incoming request which holds the request ID
EnvRIDHeader = "FN_RID_HEADER"
// EnvProcessCollectorList is the list of procid's to collect metrics for.
EnvProcessCollectorList = "FN_PROCESS_COLLECTOR_LIST"
// EnvLBPlacementAlg is the algorithm to place fn calls to fn runners in lb.[0w
EnvLBPlacementAlg = "FN_PLACER"
// DefaultLogLevel is info
DefaultLogLevel = "info"
// DefaultLogDest is stderr
DefaultLogDest = "stderr"
// DefaultPort is 8080
DefaultPort = 8080
// DefaultGRPCPort is 9190
DefaultGRPCPort = 9190
)
// NodeType is the mode to run fn in.
type NodeType int32
const (
// ServerTypeFull runs all API endpoints, including executing tasks.
ServerTypeFull NodeType = iota
// ServerTypeAPI runs only /v1 endpoints, to manage resources.
ServerTypeAPI
// ServerTypeLB runs only /r/ endpoints, routing to runner nodes.
ServerTypeLB
// ServerTypeRunner runs only /r/ endpoints, to execute tasks.
ServerTypeRunner
// ServerTypePureRunner runs only grpc server, to execute tasks.
ServerTypePureRunner
)
func (s NodeType) String() string {
switch s {
case ServerTypeFull:
return "full"
case ServerTypeAPI:
return "api"
case ServerTypeLB:
return "lb"
case ServerTypeRunner:
return "runner"
case ServerTypePureRunner:
return "pure-runner"
default:
return fmt.Sprintf("unknown(%d)", s)
}
}
// Server is the object which ties together all the fn things, it is the entrypoint
// for managing fn resources and executing tasks.
type Server struct {
Router *gin.Engine
AdminRouter *gin.Engine
webListenPort int
adminListenPort int
grpcListenPort int
agent agent.Agent
datastore models.Datastore
mq models.MessageQueue
logstore models.LogStore
nodeType NodeType
// Agent enqueue and read stores
lbEnqueue agent.EnqueueDataAccess
lbReadAccess agent.ReadDataAccess
noHTTTPTriggerEndpoint bool
cert string
certKey string
certAuthority string
appListeners *appListeners
routeListeners *routeListeners
fnListeners *fnListeners
triggerListeners *triggerListeners
rootMiddlewares []fnext.Middleware
apiMiddlewares []fnext.Middleware
promExporter *prometheus.Exporter
triggerAnnotator TriggerAnnotator
// Extensions can append to this list of contexts so that cancellations are properly handled.
extraCtxs []context.Context
}
func nodeTypeFromString(value string) NodeType {
switch value {
case "api":
return ServerTypeAPI
case "lb":
return ServerTypeLB
case "runner":
return ServerTypeRunner
case "pure-runner":
return ServerTypePureRunner
default:
return ServerTypeFull
}
}
// NewFromEnv creates a new Functions server based on env vars.
func NewFromEnv(ctx context.Context, opts ...Option) *Server {
curDir := pwd()
var defaultDB, defaultMQ string
nodeType := nodeTypeFromString(getEnv(EnvNodeType, "")) // default to full
switch nodeType {
case ServerTypeLB: // nothing
case ServerTypeRunner: // nothing
case ServerTypePureRunner: // nothing
default:
// only want to activate these for full and api nodes
defaultDB = fmt.Sprintf("sqlite3://%s/data/fn.db", curDir)
defaultMQ = fmt.Sprintf("bolt://%s/data/fn.mq", curDir)
}
opts = append(opts, WithWebPort(getEnvInt(EnvPort, DefaultPort)))
opts = append(opts, WithGRPCPort(getEnvInt(EnvGRPCPort, DefaultGRPCPort)))
opts = append(opts, WithLogLevel(getEnv(EnvLogLevel, DefaultLogLevel)))
opts = append(opts, WithLogDest(getEnv(EnvLogDest, DefaultLogDest), getEnv(EnvLogPrefix, "")))
opts = append(opts, WithZipkin(getEnv(EnvZipkinURL, "")))
opts = append(opts, WithJaeger(getEnv(EnvJaegerURL, "")))
opts = append(opts, WithPrometheus()) // TODO option to turn this off?
opts = append(opts, WithDBURL(getEnv(EnvDBURL, defaultDB)))
opts = append(opts, WithMQURL(getEnv(EnvMQURL, defaultMQ)))
opts = append(opts, WithLogURL(getEnv(EnvLogDBURL, "")))
opts = append(opts, WithRunnerURL(getEnv(EnvRunnerURL, "")))
opts = append(opts, WithType(nodeType))
opts = append(opts, WithNodeCert(getEnv(EnvCert, "")))
opts = append(opts, WithNodeCertKey(getEnv(EnvCertKey, "")))
opts = append(opts, WithNodeCertAuthority(getEnv(EnvCertAuth, "")))
publicLBURL := getEnv(EnvPublicLoadBalancerURL, "")
if publicLBURL != "" {
logrus.Infof("using LB Base URL: '%s'", publicLBURL)
opts = append(opts, WithTriggerAnnotator(NewStaticURLTriggerAnnotator(publicLBURL)))
} else {
opts = append(opts, WithTriggerAnnotator(NewRequestBasedTriggerAnnotator()))
}
// Agent handling depends on node type and several other options so it must be the last processed option.
// Also we only need to create an agent if this is not an API node.
if nodeType != ServerTypeAPI {
opts = append(opts, WithAgentFromEnv())
} else {
// NOTE: ensures logstore is set or there will be troubles
opts = append(opts, WithLogstoreFromDatastore())
}
return New(ctx, opts...)
}
func pwd() string {
cwd, err := os.Getwd()
if err != nil {
logrus.WithError(err).Fatalln("couldn't get working directory, possibly unsupported platform?")
}
// Replace forward slashes in case this is windows, URL parser errors
return strings.Replace(cwd, "\\", "/", -1)
}
// WithWebPort maps EnvPort
func WithWebPort(port int) Option {
return func(ctx context.Context, s *Server) error {
if s.adminListenPort == s.webListenPort {
s.adminListenPort = port
}
s.webListenPort = port
return nil
}
}
// WithGRPCPort maps EnvGRPCPort
func WithGRPCPort(port int) Option {
return func(ctx context.Context, s *Server) error {
s.grpcListenPort = port
return nil
}
}
// WithLogLevel maps EnvLogLevel
func WithLogLevel(ll string) Option {
return func(ctx context.Context, s *Server) error {
common.SetLogLevel(ll)
return nil
}
}
// WithLogDest maps EnvLogDest
func WithLogDest(dst, prefix string) Option {
return func(ctx context.Context, s *Server) error {
common.SetLogDest(dst, prefix)
return nil
}
}
// WithDBURL maps EnvDBURL
func WithDBURL(dbURL string) Option {
return func(ctx context.Context, s *Server) error {
if dbURL != "" {
ds, err := datastore.New(ctx, dbURL)
if err != nil {
return err
}
s.datastore = ds
s.lbReadAccess = agent.NewCachedDataAccess(s.datastore)
}
return nil
}
}
// WithMQURL maps EnvMQURL
func WithMQURL(mqURL string) Option {
return func(ctx context.Context, s *Server) error {
if mqURL != "" {
mq, err := mqs.New(mqURL)
if err != nil {
return err
}
s.mq = mq
s.lbEnqueue = agent.NewDirectEnqueueAccess(mq)
}
return nil
}
}
// WithLogURL maps EnvLogURL
func WithLogURL(logstoreURL string) Option {
return func(ctx context.Context, s *Server) error {
if ldb := logstoreURL; ldb != "" {
logDB, err := logs.New(ctx, logstoreURL)
if err != nil {
return err
}
s.logstore = logDB
}
return nil
}
}
// WithRunnerURL maps EnvRunnerURL
func WithRunnerURL(runnerURL string) Option {
return func(ctx context.Context, s *Server) error {
if runnerURL != "" {
cl, err := hybrid.NewClient(runnerURL)
if err != nil {
return err
}
s.lbReadAccess = agent.NewCachedDataAccess(cl)
}
return nil
}
}
// WithType maps EnvNodeType
func WithType(t NodeType) Option {
return func(ctx context.Context, s *Server) error {
s.nodeType = t
return nil
}
}
// WithNodeCert maps EnvNodeCert
func WithNodeCert(cert string) Option {
return func(ctx context.Context, s *Server) error {
if cert != "" {
abscert, err := filepath.Abs(cert)
if err != nil {
return fmt.Errorf("Unable to resolve %v: please specify a valid and readable cert file", cert)
}
_, err = os.Stat(abscert)
if err != nil {
return fmt.Errorf("Cannot stat %v: please specify a valid and readable cert file", abscert)
}
s.cert = abscert
}
return nil
}
}
// WithNodeCertKey maps EnvNodeCertKey
func WithNodeCertKey(key string) Option {
return func(ctx context.Context, s *Server) error {
if key != "" {
abskey, err := filepath.Abs(key)
if err != nil {
return fmt.Errorf("Unable to resolve %v: please specify a valid and readable cert key file", key)
}
_, err = os.Stat(abskey)
if err != nil {
return fmt.Errorf("Cannot stat %v: please specify a valid and readable cert key file", abskey)
}
s.certKey = abskey
}
return nil
}
}
// WithNodeCertAuthority maps EnvNodeCertAuthority
func WithNodeCertAuthority(ca string) Option {
return func(ctx context.Context, s *Server) error {
if ca != "" {
absca, err := filepath.Abs(ca)
if err != nil {
return fmt.Errorf("Unable to resolve %v: please specify a valid and readable cert authority file", ca)
}
_, err = os.Stat(absca)
if err != nil {
return fmt.Errorf("Cannot stat %v: please specify a valid and readable cert authority file", absca)
}
s.certAuthority = absca
}
return nil
}
}
// WithReadDataAccess overrides the LB read DataAccess for a server
func WithReadDataAccess(ds agent.ReadDataAccess) Option {
return func(ctx context.Context, s *Server) error {
s.lbReadAccess = ds
return nil
}
}
// WithDatastore allows directly setting a datastore
func WithDatastore(ds models.Datastore) Option {
return func(ctx context.Context, s *Server) error {
s.datastore = ds
if s.lbReadAccess == nil {
s.lbReadAccess = agent.NewCachedDataAccess(ds)
}
return nil
}
}
// WithMQ allows directly setting an MQ
func WithMQ(mq models.MessageQueue) Option {
return func(ctx context.Context, s *Server) error {
s.mq = mq
s.lbEnqueue = agent.NewDirectEnqueueAccess(mq)
return nil
}
}
// WithLogstore allows directly setting a logstore
func WithLogstore(ls models.LogStore) Option {
return func(ctx context.Context, s *Server) error {
s.logstore = ls
return nil
}
}
// WithAgent allows directly setting an agent
func WithAgent(agent agent.Agent) Option {
return func(ctx context.Context, s *Server) error {
s.agent = agent
return nil
}
}
func (s *Server) defaultRunnerPool() (pool.RunnerPool, error) {
runnerAddresses := getEnv(EnvRunnerAddresses, "")
if runnerAddresses == "" {
return nil, errors.New("must provide FN_RUNNER_ADDRESSES when running in default load-balanced mode")
}
return agent.DefaultStaticRunnerPool(strings.Split(runnerAddresses, ",")), nil
}
// WithLogstoreFromDatastore sets the logstore to the datastore, iff
// the datastore implements the logstore interface.
func WithLogstoreFromDatastore() Option {
return func(ctx context.Context, s *Server) error {
if s.datastore == nil {
return errors.New("Need a datastore in order to use it as a logstore")
}
if s.logstore == nil {
if ls, ok := s.datastore.(models.LogStore); ok {
s.logstore = ls
} else {
return errors.New("datastore must implement logstore interface")
}
}
return nil
}
}
// WithFullAgent is a shorthand for WithAgent(... create a full agent here ...)
func WithFullAgent() Option {
return func(ctx context.Context, s *Server) error {
s.nodeType = ServerTypeFull
// ensure logstore is set (TODO compat only?)
if s.logstore == nil {
WithLogstoreFromDatastore()(ctx, s)
}
if s.datastore == nil || s.logstore == nil || s.mq == nil {
return errors.New("full nodes must configure FN_DB_URL, FN_LOG_URL, FN_MQ_URL")
}
da := agent.NewDirectCallDataAccess(s.logstore, s.mq)
dq := agent.NewDirectDequeueAccess(s.mq)
s.agent = agent.New(da, agent.WithAsync(dq))
return nil
}
}
// WithAgentFromEnv must be provided as the last server option because it relies
// on all other options being set first.
func WithAgentFromEnv() Option {
return func(ctx context.Context, s *Server) error {
switch s.nodeType {
case ServerTypeAPI:
return errors.New("should not initialize an agent for an Fn API node")
case ServerTypeRunner:
runnerURL := getEnv(EnvRunnerURL, "")
if runnerURL == "" {
return errors.New("no FN_RUNNER_API_URL provided for an Fn Runner node")
}
cl, err := hybrid.NewClient(runnerURL)
if err != nil {
return err
}
s.agent = agent.New(cl)
case ServerTypePureRunner:
if s.datastore != nil {
return errors.New("pure runner nodes must not be configured with a datastore (FN_DB_URL)")
}
if s.mq != nil {
return errors.New("pure runner nodes must not be configured with a message queue (FN_MQ_URL)")
}
ds, err := hybrid.NewNopDataStore()
if err != nil {
return err
}
grpcAddr := fmt.Sprintf(":%d", s.grpcListenPort)
cancelCtx, cancel := context.WithCancel(ctx)
prAgent, err := agent.DefaultPureRunner(cancel, grpcAddr, ds, s.cert, s.certKey, s.certAuthority)
if err != nil {
return err
}
s.agent = prAgent
s.extraCtxs = append(s.extraCtxs, cancelCtx)
case ServerTypeLB:
s.nodeType = ServerTypeLB
runnerURL := getEnv(EnvRunnerURL, "")
if runnerURL == "" {
return errors.New("no FN_RUNNER_API_URL provided for an Fn NuLB node")
}
if s.datastore != nil {
return errors.New("lb nodes must not be configured with a datastore (FN_DB_URL)")
}
if s.mq != nil {
return errors.New("lb nodes must not be configured with a message queue (FN_MQ_URL)")
}
cl, err := hybrid.NewClient(runnerURL)
if err != nil {
return err
}
runnerPool, err := s.defaultRunnerPool()
if err != nil {
return err
}
// Select the placement algorithm
placerCfg := pool.NewPlacerConfig()
var placer pool.Placer
switch getEnv(EnvLBPlacementAlg, "") {
case "ch":
placer = pool.NewCHPlacer(&placerCfg)
default:
placer = pool.NewNaivePlacer(&placerCfg)
}
keys := []string{"fn_appname", "fn_path"}
pool.RegisterPlacerViews(keys)
agent.RegisterLBAgentViews(keys)
s.lbReadAccess = agent.NewCachedDataAccess(cl)
s.agent, err = agent.NewLBAgent(cl, runnerPool, placer)
if err != nil {
return errors.New("LBAgent creation failed")
}
default:
WithFullAgent()(ctx, s)
}
return nil
}
}
// WithExtraCtx appends a context to the list of contexts the server will watch for cancellations / errors / signals.
func WithExtraCtx(extraCtx context.Context) Option {
return func(ctx context.Context, s *Server) error {
s.extraCtxs = append(s.extraCtxs, extraCtx)
return nil
}
}
//WithTriggerAnnotator adds a trigggerEndpoint provider to the server
func WithTriggerAnnotator(provider TriggerAnnotator) Option {
return func(ctx context.Context, s *Server) error {
s.triggerAnnotator = provider
return nil
}
}
// WithAdminServer starts the admin server on the specified port.
func WithAdminServer(port int) Option {
return func(ctx context.Context, s *Server) error {
s.AdminRouter = gin.New()
s.adminListenPort = port
return nil
}
}
// New creates a new Functions server with the opts given. For convenience, users may
// prefer to use NewFromEnv but New is more flexible if needed.
func New(ctx context.Context, opts ...Option) *Server {
ctx, span := trace.StartSpan(ctx, "server_init")
defer span.End()
log := common.Logger(ctx)
engine := gin.New()
s := &Server{
Router: engine,
AdminRouter: engine,
// Add default ports
webListenPort: DefaultPort,
adminListenPort: DefaultPort,
grpcListenPort: DefaultGRPCPort,
lbEnqueue: agent.NewUnsupportedAsyncEnqueueAccess(),
// Almost everything else is configured through opts (see NewFromEnv for ex.) or below
}
for _, opt := range opts {
if opt == nil {
continue
}
err := opt(ctx, s)
if err != nil {
log.WithError(err).Fatal("Error during server opt initialization.")
}
}
requireConfigSet := func(id string, val interface{}) {
if val == nil {
log.Fatalf("Invalid configuration for server type %s, %s must be configured during startup", s.nodeType, id)
}
}
requireConfigNotSet := func(id string, val interface{}) {
if val != nil {
log.Fatalf("Invalid configuration for server type %s, %s must not be configured during startup", s.nodeType, id)
}
}
// Check that WithAgent options have been processed correctly.
// Yuck the yuck - server should really be split into several interfaces (LB, Runner, API) and each should be instantiated separately
switch s.nodeType {
case ServerTypeAPI:
requireConfigNotSet("agent", s.agent)
requireConfigSet("datastore", s.datastore)
requireConfigSet("triggerAnnotator", s.triggerAnnotator)
case ServerTypeFull:
requireConfigSet("enqueue", s.lbEnqueue)
requireConfigSet("agent", s.agent)
requireConfigSet("lbReadAccess", s.lbReadAccess)
requireConfigSet("datastore", s.datastore)
requireConfigSet("triggerAnnotator", s.triggerAnnotator)
case ServerTypeLB:
requireConfigSet("lbReadAccess", s.lbReadAccess)
requireConfigSet("agent", s.agent)
requireConfigSet("lbEnqueue", s.lbEnqueue)
case ServerTypeRunner:
requireConfigSet("lbReadAccess", s.lbReadAccess)
requireConfigSet("agent", s.agent)
case ServerTypePureRunner:
requireConfigSet("agent", s.agent)
default:
log.Fatal("unknown server type %d", s.nodeType)
}
setMachineID()
s.Router.Use(loggerWrap, traceWrap, panicWrap) // TODO should be opts
optionalCorsWrap(s.Router) // TODO should be an opt
apiMetricsWrap(s)
s.bindHandlers(ctx)
s.appListeners = new(appListeners)
s.routeListeners = new(routeListeners)
s.fnListeners = new(fnListeners)
s.triggerListeners = new(triggerListeners)
// TODO it's not clear that this is always correct as the read store won't get wrapping
s.datastore = datastore.Wrap(s.datastore)
s.datastore = fnext.NewDatastore(s.datastore, s.appListeners, s.routeListeners, s.fnListeners, s.triggerListeners)
s.logstore = logs.Wrap(s.logstore)
return s
}
// WithPrometheus activates the prometheus collection and /metrics endpoint
func WithPrometheus() Option {
return func(ctx context.Context, s *Server) error {
reg := promclient.NewRegistry()
reg.MustRegister(promclient.NewProcessCollector(os.Getpid(), "fn"),
promclient.NewGoCollector(),
)
for _, exeName := range getMonitoredCmdNames() {
san := promSanitizeMetricName(exeName)
err := reg.Register(promclient.NewProcessCollectorPIDFn(getPidCmd(exeName), san))
if err != nil {
panic(err)
}
}
exporter, err := prometheus.NewExporter(prometheus.Options{
Namespace: "fn",
Registry: reg,
OnError: func(err error) { logrus.WithError(err).Error("opencensus prometheus exporter err") },
})
if err != nil {
return fmt.Errorf("error starting prometheus exporter: %v", err)
}
s.promExporter = exporter
view.RegisterExporter(exporter)
registerViews()
return nil
}
}
// WithoutHTTPTriggerEndpoints optionally disables the trigger and route endpoints from a LB -supporting server, allowing extensions to replace them with their own versions
func WithoutHTTPTriggerEndpoints() Option {
return func(ctx context.Context, s *Server) error {
s.noHTTTPTriggerEndpoint = true
return nil
}
}
// WithJaeger maps EnvJaegerURL
func WithJaeger(jaegerURL string) Option {
return func(ctx context.Context, s *Server) error {
// ex: "http://localhost:14268"
if jaegerURL == "" {
return nil
}
exporter, err := jaeger.NewExporter(jaeger.Options{
Endpoint: jaegerURL,
ServiceName: "fn",
})
if err != nil {
return fmt.Errorf("error connecting to jaeger: %v", err)
}
trace.RegisterExporter(exporter)
logrus.WithFields(logrus.Fields{"url": jaegerURL}).Info("exporting spans to jaeger")
// TODO don't do this. testing parity.
trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()})
return nil
}
}
// WithZipkin maps EnvZipkinURL
func WithZipkin(zipkinURL string) Option {
return func(ctx context.Context, s *Server) error {
// ex: "http://zipkin:9411/api/v2/spans"
if zipkinURL == "" {
return nil
}
reporter := zipkinhttp.NewReporter(zipkinURL, zipkinhttp.MaxBacklog(10000))
exporter := zipkin.NewExporter(reporter, nil)
trace.RegisterExporter(exporter)
logrus.WithFields(logrus.Fields{"url": zipkinURL}).Info("exporting spans to zipkin")
// TODO don't do this. testing parity.
trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()})
return nil
}
}
// prometheus only allows [a-zA-Z0-9:_] in metrics names.
func promSanitizeMetricName(name string) string {
res := make([]rune, 0, len(name))
for _, rVal := range name {
if unicode.IsDigit(rVal) || unicode.IsLetter(rVal) || rVal == ':' {
res = append(res, rVal)
} else {
res = append(res, '_')
}
}
return string(res)
}
// determine sidecar-monitored cmd names. But by default
// we track dockerd + containerd
func getMonitoredCmdNames() []string {
// override? empty variable to disable trackers
val, ok := os.LookupEnv(EnvProcessCollectorList)
if ok {
return strings.Fields(val)
}
// by default, we monitor dockerd and containerd
return []string{"dockerd", "docker-containerd"}
}
// TODO plumbing considerations, we've put the S pipe next to the chandalier...
func getPidCmd(cmdName string) func() (int, error) {
// prometheus' process collector only works on linux anyway. let them do the
// process detection, if we return an error here we just get 0 metrics and it
// does not log / blow up (that's fine!) it's also likely we hit permissions
// errors here for many installations, we want to do similar and ignore (we
// just want for prod).
var pid int
return func() (int, error) {
if pid != 0 {
// make sure it's our pid.
if isPidMatchCmd(cmdName, pid) {
return pid, nil
}
pid = 0 // reset to go search
}
if pids, err := getPidList(); err == nil {
for _, test := range pids {
if isPidMatchCmd(cmdName, test) {
pid = test
return pid, nil
}
}
}
return pid, io.EOF
}
}
func isPidMatchCmd(cmdName string, pid int) bool {
fs, err := os.Open("/proc/" + strconv.Itoa(pid) + "/cmdline")
if err != nil {
return false
}
defer fs.Close()
rd := bufio.NewReader(fs)
tok, err := rd.ReadSlice(0)
if err != nil || len(tok) < len(cmdName) {
return false
}
return filepath.Base(string(tok[:len(tok)-1])) == cmdName
}
func getPidList() ([]int, error) {
var pids []int
dir, err := os.Open("/proc")
if err != nil {
return pids, nil
}
defer dir.Close()
files, err := dir.Readdirnames(0)
if err != nil {
return pids, nil
}
pids = make([]int, 0, len(files))
for _, tok := range files {
if conv, err := strconv.ParseUint(tok, 10, 64); err == nil {
pids = append(pids, int(conv))
}
}
return pids, nil
}
func setMachineID() {
port := uint16(getEnvInt(EnvPort, DefaultPort))
addr := whoAmI().To4()
if addr == nil {
addr = net.ParseIP("127.0.0.1").To4()
logrus.Warn("could not find non-local ipv4 address to use, using '127.0.0.1' for ids, if this is a cluster beware of duplicate ids!")
}
id.SetMachineIdHost(addr, port)
}
// whoAmI searches for a non-local address on any network interface, returning
// the first one it finds. it could be expanded to search eth0 or en0 only but
// to date this has been unnecessary.
func whoAmI() net.IP {
ints, _ := net.Interfaces()
for _, i := range ints {
if i.Name == "docker0" || i.Name == "lo" {
// not perfect
continue
}
addrs, _ := i.Addrs()
for _, a := range addrs {
ip, _, err := net.ParseCIDR(a.String())
if a.Network() == "ip+net" && err == nil && ip.To4() != nil {
if !bytes.Equal(ip, net.ParseIP("127.0.0.1")) {
return ip
}
}
}
}
return nil
}
func extractFields(c *gin.Context) logrus.Fields {
fields := logrus.Fields{"action": path.Base(c.HandlerName())}
for _, param := range c.Params {
fields[param.Key] = param.Value
}
return fields
}
// Start runs any configured machinery, including the http server, agent, etc.
// Start will block until the context is cancelled or times out.
func (s *Server) Start(ctx context.Context) {
newctx, cancel := contextWithSignal(ctx, os.Interrupt, syscall.SIGTERM)
s.startGears(newctx, cancel)
}
func (s *Server) startGears(ctx context.Context, cancel context.CancelFunc) {
// By default it serves on :8080 unless a
// FN_PORT environment variable was defined.
listen := fmt.Sprintf(":%d", s.webListenPort)
const runHeader = `
______
/ ____/___
/ /_ / __ \
/ __/ / / / /
/_/ /_/ /_/`
fmt.Println(runHeader)
fmt.Printf(" v%s\n\n", version.Version)
logrus.WithField("type", s.nodeType).Infof("Fn serving on `%v`", listen)
installChildReaper()
server := http.Server{
Addr: listen,
Handler: &ochttp.Handler{Handler: s.Router},
// TODO we should set read/write timeouts
}
go func() {
err := server.ListenAndServe()
if err != nil && err != http.ErrServerClosed {
logrus.WithError(err).Error("server error")
cancel()
} else {
logrus.Info("server stopped")
}
}()
if s.webListenPort != s.adminListenPort {
adminListen := fmt.Sprintf(":%d", s.adminListenPort)
logrus.WithField("type", s.nodeType).Infof("Fn Admin serving on `%v`", adminListen)
adminServer := http.Server{
Addr: adminListen,
Handler: &ochttp.Handler{Handler: s.AdminRouter},
}
go func() {
err := adminServer.ListenAndServe()
if err != nil && err != http.ErrServerClosed {
logrus.WithError(err).Error("server error")
cancel()
} else {
logrus.Info("server stopped")
}
}()
defer func() {
if err := adminServer.Shutdown(context.Background()); err != nil {
logrus.WithError(err).Error("admin server shutdown error")
}
}()
}
// listening for signals or listener errors or cancellations on all registered contexts.
s.extraCtxs = append(s.extraCtxs, ctx)
cases := make([]reflect.SelectCase, len(s.extraCtxs))
for i, ctx := range s.extraCtxs {
cases[i] = reflect.SelectCase{Dir: reflect.SelectRecv, Chan: reflect.ValueOf(ctx.Done())}
}
nth, recv, wasSend := reflect.Select(cases)
if wasSend {
logrus.WithFields(logrus.Fields{
"ctxNumber": nth,
"receivedValue": recv.String(),
}).Debug("Stopping because of received value from done context.")
} else {
logrus.WithFields(logrus.Fields{
"ctxNumber": nth,
}).Debug("Stopping because of closed channel from done context.")
}
// TODO: do not wait forever during graceful shutdown (add graceful shutdown timeout)
if err := server.Shutdown(context.Background()); err != nil {
logrus.WithError(err).Error("server shutdown error")
}
if s.agent != nil {
err := s.agent.Close() // after we stop taking requests, wait for all tasks to finish
if err != nil {
logrus.WithError(err).Error("Fail to close the agent")
}
}
}
func (s *Server) bindHandlers(ctx context.Context) {
engine := s.Router
admin := s.AdminRouter
// now for extensible middleware
engine.Use(s.rootMiddlewareWrapper())
engine.GET("/", handlePing)
admin.GET("/version", handleVersion)
// TODO: move under v1 ?
if s.promExporter != nil {
admin.GET("/metrics", gin.WrapH(s.promExporter))
}
profilerSetup(admin, "/debug")
// Pure runners don't have any route, they have grpc
switch s.nodeType {
case ServerTypeFull, ServerTypeAPI:
clean := engine.Group("/v1")
v1 := clean.Group("")
v1.Use(setAppNameInCtx)
v1.Use(s.apiMiddlewareWrapper())
v1.GET("/apps", s.handleV1AppList)
v1.POST("/apps", s.handleV1AppCreate)
{
apps := v1.Group("/apps/:appName")
apps.Use(appNameCheck)
{
withAppCheck := apps.Group("")
withAppCheck.Use(s.checkAppPresenceByName())
withAppCheck.GET("", s.handleV1AppGetByIdOrName)
withAppCheck.PATCH("", s.handleV1AppUpdate)
withAppCheck.DELETE("", s.handleV1AppDelete)
withAppCheck.GET("/routes", s.handleRouteList)
withAppCheck.GET("/routes/:route", s.handleRouteGetAPI)
withAppCheck.PATCH("/routes/*route", s.handleRoutesPatch)
withAppCheck.DELETE("/routes/*route", s.handleRouteDelete)
withAppCheck.GET("/calls/:call", s.handleCallGet)
withAppCheck.GET("/calls/:call/log", s.handleCallLogGet)
withAppCheck.GET("/calls", s.handleCallList)
}
apps.POST("/routes", s.handleRoutesPostPut)
apps.PUT("/routes/*route", s.handleRoutesPostPut)
}
cleanv2 := engine.Group("/v2")
v2 := cleanv2.Group("")
v2.Use(s.apiMiddlewareWrapper())
{
v2.GET("/apps", s.handleAppList)
v2.POST("/apps", s.handleAppCreate)
v2.GET("/apps/:appID", s.handleAppGet)
v2.PUT("/apps/:appID", s.handleAppUpdate)
v2.DELETE("/apps/:appID", s.handleAppDelete)
v2.GET("/fns", s.handleFnList)
v2.POST("/fns", s.handleFnCreate)
v2.GET("/fns/:fnID", s.handleFnGet)
v2.PUT("/fns/:fnID", s.handleFnUpdate)
v2.DELETE("/fns/:fnID", s.handleFnDelete)
v2.GET("/triggers", s.handleTriggerList)
v2.POST("/triggers", s.handleTriggerCreate)
v2.GET("/triggers/:triggerID", s.handleTriggerGet)
v2.PUT("/triggers/:triggerID", s.handleTriggerUpdate)
v2.DELETE("/triggers/:triggerID", s.handleTriggerDelete)
}
{ // Hybrid API - this should only be enabled on API servers
runner := cleanv2.Group("/runner")
runner.PUT("/async", s.handleRunnerEnqueue)
runner.GET("/async", s.handleRunnerDequeue)
runner.POST("/start", s.handleRunnerStart)
runner.POST("/finish", s.handleRunnerFinish)
runnerAppAPI := runner.Group(
"/apps/:appID")
runnerAppAPI.Use(setAppIDInCtx)
// Both of these are somewhat odd -
// Deprecate, remove with routes
runnerAppAPI.GET("/routes/*route", s.handleRunnerGetRoute)
runnerAppAPI.GET("/triggerBySource/:triggerType/*triggerSource", s.handleRunnerGetTriggerBySource)
}
}
switch s.nodeType {
case ServerTypeFull, ServerTypeLB, ServerTypeRunner:
if !s.noHTTTPTriggerEndpoint {
lbTriggerGroup := engine.Group("/t")
lbTriggerGroup.Any("/:appName", s.handleHTTPTriggerCall)
lbTriggerGroup.Any("/:appName/*triggerSource", s.handleHTTPTriggerCall)
// TODO Deprecate with routes
lbRouteGroup := engine.Group("/r")
lbRouteGroup.Use(s.checkAppPresenceByNameAtLB())
lbRouteGroup.Any("/:appName", s.handleV1FunctionCall)
lbRouteGroup.Any("/:appName/*route", s.handleV1FunctionCall)
}
}
engine.NoRoute(func(c *gin.Context) {
var err error
switch {
case s.nodeType == ServerTypeAPI && strings.HasPrefix(c.Request.URL.Path, "/r/"):
err = models.ErrInvokeNotSupported
case s.nodeType == ServerTypeRunner && strings.HasPrefix(c.Request.URL.Path, "/v1/"):
err = models.ErrAPINotSupported
default:
var e models.APIError = models.ErrPathNotFound
err = models.NewAPIError(e.Code(), fmt.Errorf("%v: %s", e.Error(), c.Request.URL.Path))
}
handleV1ErrorResponse(c, err)
})
}
// Datastore implements fnext.ExtServer
func (s *Server) Datastore() models.Datastore {
return s.datastore
}
// Agent implements fnext.ExtServer
func (s *Server) Agent() agent.Agent {
return s.agent
}
// returns the unescaped ?cursor and ?perPage values
// pageParams clamps 0 < ?perPage <= 100 and defaults to 30 if 0
// ignores parsing errors and falls back to defaults.
func pageParams(c *gin.Context, base64d bool) (cursor string, perPage int) {
cursor = c.Query("cursor")
if base64d {
cbytes, _ := base64.RawURLEncoding.DecodeString(cursor)
cursor = string(cbytes)
}
perPage, _ = strconv.Atoi(c.Query("per_page"))
if perPage > 100 {
perPage = 100
} else if perPage <= 0 {
perPage = 30
}
return cursor, perPage
}
func pageParamsV2(c *gin.Context) (cursor string, perPage int) {
cursor = c.Query("cursor")
perPage, _ = strconv.Atoi(c.Query("per_page"))
if perPage > 100 {
perPage = 100
} else if perPage <= 0 {
perPage = 30
}
return cursor, perPage
}
type appResponse struct {
Message string `json:"message"`
App *models.App `json:"app"`
}
//TODO deprecate with V1
type appsV1Response struct {
Message string `json:"message"`
NextCursor string `json:"next_cursor"`
Apps []*models.App `json:"apps"`
}
type routeResponse struct {
Message string `json:"message"`
Route *models.Route `json:"route"`
}
type routesResponse struct {
Message string `json:"message"`
NextCursor string `json:"next_cursor"`
Routes []*models.Route `json:"routes"`
}
type callResponse struct {
Message string `json:"message"`
Call *models.Call `json:"call"`
}
type callsResponse struct {
Message string `json:"message"`
NextCursor string `json:"next_cursor"`
Calls []*models.Call `json:"calls"`
}