mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
Refactor controlplane into a go plugin (#833)
* Refactor controlplane into a go plugin * Move vbox to controlplane package
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## Motivation
|
## Motivation
|
||||||
|
|
||||||
By running Fn in multitenant mode, you can define independent pools of compute resources available to functions in the platform. By associating a function with a particular _load balancing group_, its invocations are guaranteed to execute on the compute resources assigned to that specific group. The pluggable _node pool manager_ abstraction provides a mechanism to scale compute resources dynamically, based on capacity requirements advertised by the load-balancing layer. Together with load balancer groups, it allows you to implement independent capacity and scaling policies for different sets of users or tenants.
|
By running Fn in multitenant mode, you can define independent pools of compute resources available to functions in the platform. By associating a function with a particular _load balancing group_, its invocations are guaranteed to execute on the compute resources assigned to that specific group. The pluggable _node pool manager_ abstraction provides a mechanism to scale compute resources dynamically, based on capacity requirements advertised by the load-balancing layer. Together with load balancer groups, it allows you to implement independent capacity and scaling policies for different sets of users or tenants.
|
||||||
|
|
||||||
## Create certificates
|
## Create certificates
|
||||||
|
|
||||||
@@ -56,6 +56,7 @@ but the GRPC port is 9190.
|
|||||||
Grap the runner address and put in as value for the `FN_RUNNER_ADDRESSES` env variable.
|
Grap the runner address and put in as value for the `FN_RUNNER_ADDRESSES` env variable.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
go build -o noop.so poolmanager/server/controlplane/plugin/noop.go
|
||||||
go build -o fnnpm poolmanager/server/main.go
|
go build -o fnnpm poolmanager/server/main.go
|
||||||
|
|
||||||
FN_LOG_LEVEL=DEBUG \
|
FN_LOG_LEVEL=DEBUG \
|
||||||
@@ -64,6 +65,7 @@ FN_NODE_CERT_KEY=key.pem \
|
|||||||
FN_NODE_CERT_AUTHORITY=cert.pem \
|
FN_NODE_CERT_AUTHORITY=cert.pem \
|
||||||
FN_PORT=8083 \
|
FN_PORT=8083 \
|
||||||
FN_RUNNER_ADDRESSES=<RUNNER_ADDRESS_HERE>:9190 \
|
FN_RUNNER_ADDRESSES=<RUNNER_ADDRESS_HERE>:9190 \
|
||||||
|
CONTROL_PLANE_SO=noop.so \
|
||||||
./fnnpm
|
./fnnpm
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
model "github.com/fnproject/fn/poolmanager/grpc"
|
model "github.com/fnproject/fn/poolmanager/grpc"
|
||||||
"github.com/fnproject/fn/poolmanager/server/cp"
|
"github.com/fnproject/fn/poolmanager/server/controlplane"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -32,12 +32,12 @@ type Predictor interface {
|
|||||||
type capacityManager struct {
|
type capacityManager struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
mx sync.RWMutex
|
mx sync.RWMutex
|
||||||
cp cp.ControlPlane
|
cp controlplane.ControlPlane
|
||||||
lbg map[string]LBGroup
|
lbg map[string]LBGroup
|
||||||
predictorFactory func() Predictor
|
predictorFactory func() Predictor
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCapacityManager(ctx context.Context, cp cp.ControlPlane, opts ...func(*capacityManager) error) (CapacityManager, error) {
|
func NewCapacityManager(ctx context.Context, cp controlplane.ControlPlane, opts ...func(*capacityManager) error) (CapacityManager, error) {
|
||||||
cm := &capacityManager{
|
cm := &capacityManager{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
cp: cp,
|
cp: cp,
|
||||||
@@ -107,7 +107,7 @@ type lbGroup struct {
|
|||||||
|
|
||||||
// Attributes for managing runner pool membership
|
// Attributes for managing runner pool membership
|
||||||
run_mx sync.RWMutex
|
run_mx sync.RWMutex
|
||||||
cp cp.ControlPlane
|
cp controlplane.ControlPlane
|
||||||
|
|
||||||
current_capacity int64 // Of all active runners
|
current_capacity int64 // Of all active runners
|
||||||
target_capacity int64 // All active runners plus any we've already asked for
|
target_capacity int64 // All active runners plus any we've already asked for
|
||||||
@@ -140,7 +140,7 @@ type runner struct {
|
|||||||
kill_after time.Time
|
kill_after time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLBGroup(lbgid string, ctx context.Context, cp cp.ControlPlane, predictorFactory func() Predictor) LBGroup {
|
func newLBGroup(lbgid string, ctx context.Context, cp controlplane.ControlPlane, predictorFactory func() Predictor) LBGroup {
|
||||||
lbg := &lbGroup{
|
lbg := &lbGroup{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
id: lbgid,
|
id: lbgid,
|
||||||
@@ -296,20 +296,20 @@ func (lbg *lbGroup) target(ts time.Time, target int64) {
|
|||||||
|
|
||||||
if desiredScale > lbg.target_capacity {
|
if desiredScale > lbg.target_capacity {
|
||||||
// We still need additional capacity
|
// We still need additional capacity
|
||||||
wanted := math.Min(math.Ceil(float64(target-lbg.target_capacity)/cp.CAPACITY_PER_RUNNER), LARGEST_REQUEST_AT_ONCE)
|
wanted := math.Min(math.Ceil(float64(target-lbg.target_capacity)/controlplane.CapacityPerRunner), LARGEST_REQUEST_AT_ONCE)
|
||||||
asked_for, err := lbg.cp.ProvisionRunners(lbg.Id(), int(wanted)) // Send the request; they'll show up later
|
asked_for, err := lbg.cp.ProvisionRunners(lbg.Id(), int(wanted)) // Send the request; they'll show up later
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Some kind of error during attempt to scale up
|
// Some kind of error during attempt to scale up
|
||||||
logrus.WithError(err).Error("Error occured during attempt to scale up")
|
logrus.WithError(err).Error("Error occured during attempt to scale up")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
lbg.target_capacity += int64(asked_for) * cp.CAPACITY_PER_RUNNER
|
lbg.target_capacity += int64(asked_for) * controlplane.CapacityPerRunner
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if desiredScale <= lbg.current_capacity-cp.CAPACITY_PER_RUNNER {
|
} else if desiredScale <= lbg.current_capacity-controlplane.CapacityPerRunner {
|
||||||
// Scale down.
|
// Scale down.
|
||||||
// We pick a node to turn off and move it to the draining pool.
|
// We pick a node to turn off and move it to the draining pool.
|
||||||
for target <= lbg.current_capacity-cp.CAPACITY_PER_RUNNER && len(lbg.active_runners) > 0 {
|
for target <= lbg.current_capacity-controlplane.CapacityPerRunner && len(lbg.active_runners) > 0 {
|
||||||
// Begin with the one we added last.
|
// Begin with the one we added last.
|
||||||
runner := lbg.active_runners[len(lbg.active_runners)-1]
|
runner := lbg.active_runners[len(lbg.active_runners)-1]
|
||||||
logrus.Infof("Marking runner %v at %v for draindown", runner.id, runner.address)
|
logrus.Infof("Marking runner %v at %v for draindown", runner.id, runner.address)
|
||||||
|
|||||||
20
poolmanager/server/controlplane/controlplane.go
Normal file
20
poolmanager/server/controlplane/controlplane.go
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
Interface between the Node Pool Manager and the Control Plane
|
||||||
|
*/
|
||||||
|
|
||||||
|
package controlplane
|
||||||
|
|
||||||
|
const CapacityPerRunner = 4096
|
||||||
|
|
||||||
|
type Runner struct {
|
||||||
|
Id string
|
||||||
|
Address string
|
||||||
|
// Other: certs etc here as managed and installed by CP
|
||||||
|
Capacity int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type ControlPlane interface {
|
||||||
|
GetLBGRunners(lgbId string) ([]*Runner, error)
|
||||||
|
ProvisionRunners(lgbId string, n int) (int, error)
|
||||||
|
RemoveRunner(lbgId string, id string) error
|
||||||
|
}
|
||||||
@@ -1,56 +1,47 @@
|
|||||||
/*
|
/**
|
||||||
Interface between the Node Pool Manager and the Control Plane
|
* Dummy implementation for the controlplane that just adds delays
|
||||||
*/
|
*/
|
||||||
|
package main
|
||||||
package cp
|
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/fnproject/fn/poolmanager/server/controlplane"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Runner struct {
|
const (
|
||||||
Id string
|
EnvFixedRunners = "FN_RUNNER_ADDRESSES"
|
||||||
Address string
|
)
|
||||||
// Other: certs etc here as managed and installed by CP
|
|
||||||
Capacity int64
|
|
||||||
}
|
|
||||||
|
|
||||||
const CAPACITY_PER_RUNNER = 4096
|
|
||||||
|
|
||||||
type ControlPlane interface {
|
|
||||||
GetLBGRunners(lgbId string) ([]*Runner, error)
|
|
||||||
ProvisionRunners(lgbId string, n int) (int, error)
|
|
||||||
RemoveRunner(lbgId string, id string) error
|
|
||||||
}
|
|
||||||
|
|
||||||
type controlPlane struct {
|
|
||||||
mx sync.RWMutex
|
|
||||||
|
|
||||||
runners map[string][]*Runner
|
|
||||||
|
|
||||||
|
type noopControlPlane struct {
|
||||||
|
mx sync.RWMutex
|
||||||
|
runners map[string][]*controlplane.Runner
|
||||||
_fakeRunners []string
|
_fakeRunners []string
|
||||||
}
|
}
|
||||||
|
|
||||||
const REQUEST_DURATION = 5 * time.Second
|
const REQUEST_DURATION = 5 * time.Second
|
||||||
|
|
||||||
func NewControlPlane(fakeRunners []string) ControlPlane {
|
func init() {
|
||||||
cp := &controlPlane{
|
ControlPlane = noopControlPlane{
|
||||||
runners: make(map[string][]*Runner),
|
runners: make(map[string][]*controlplane.Runner),
|
||||||
|
_fakeRunners: strings.Split(getEnv(EnvFixedRunners), ","),
|
||||||
_fakeRunners: fakeRunners,
|
|
||||||
}
|
}
|
||||||
return cp
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cp *controlPlane) GetLBGRunners(lbgId string) ([]*Runner, error) {
|
func main() {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cp *noopControlPlane) GetLBGRunners(lbgId string) ([]*controlplane.Runner, error) {
|
||||||
cp.mx.RLock()
|
cp.mx.RLock()
|
||||||
defer cp.mx.RUnlock()
|
defer cp.mx.RUnlock()
|
||||||
|
|
||||||
runners := make([]*Runner, 0)
|
runners := make([]*controlplane.Runner, 0)
|
||||||
if hosts, ok := cp.runners[lbgId]; ok {
|
if hosts, ok := cp.runners[lbgId]; ok {
|
||||||
for _, host := range hosts {
|
for _, host := range hosts {
|
||||||
runners = append(runners, host) // In this CP implementation, a Runner is an immutable thing, so passing the pointer is fine
|
runners = append(runners, host) // In this CP implementation, a Runner is an immutable thing, so passing the pointer is fine
|
||||||
@@ -60,7 +51,7 @@ func (cp *controlPlane) GetLBGRunners(lbgId string) ([]*Runner, error) {
|
|||||||
return runners, nil
|
return runners, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cp *controlPlane) ProvisionRunners(lbgId string, n int) (int, error) {
|
func (cp *noopControlPlane) ProvisionRunners(lbgId string, n int) (int, error) {
|
||||||
// Simulate some small amount of time for the CP to service this request
|
// Simulate some small amount of time for the CP to service this request
|
||||||
go func() {
|
go func() {
|
||||||
time.Sleep(REQUEST_DURATION)
|
time.Sleep(REQUEST_DURATION)
|
||||||
@@ -69,7 +60,7 @@ func (cp *controlPlane) ProvisionRunners(lbgId string, n int) (int, error) {
|
|||||||
|
|
||||||
runners, ok := cp.runners[lbgId]
|
runners, ok := cp.runners[lbgId]
|
||||||
if !ok {
|
if !ok {
|
||||||
runners = make([]*Runner, 0)
|
runners = make([]*controlplane.Runner, 0)
|
||||||
}
|
}
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
runners = append(runners, cp.makeRunners(lbgId)...)
|
runners = append(runners, cp.makeRunners(lbgId)...)
|
||||||
@@ -81,9 +72,9 @@ func (cp *controlPlane) ProvisionRunners(lbgId string, n int) (int, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Make runner(s)
|
// Make runner(s)
|
||||||
func (cp *controlPlane) makeRunners(lbg string) []*Runner {
|
func (cp *noopControlPlane) makeRunners(lbg string) []*controlplane.Runner {
|
||||||
|
|
||||||
var runners []*Runner
|
var runners []*controlplane.Runner
|
||||||
for _, fakeRunner := range cp._fakeRunners {
|
for _, fakeRunner := range cp._fakeRunners {
|
||||||
|
|
||||||
b := make([]byte, 16)
|
b := make([]byte, 16)
|
||||||
@@ -94,10 +85,10 @@ func (cp *controlPlane) makeRunners(lbg string) []*Runner {
|
|||||||
|
|
||||||
uuid := fmt.Sprintf("%X-%X-%X-%X-%X", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
|
uuid := fmt.Sprintf("%X-%X-%X-%X-%X", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
|
||||||
|
|
||||||
runners = append(runners, &Runner{
|
runners = append(runners, &controlplane.Runner{
|
||||||
Id: uuid,
|
Id: uuid,
|
||||||
Address: fakeRunner,
|
Address: fakeRunner,
|
||||||
Capacity: CAPACITY_PER_RUNNER,
|
Capacity: controlplane.CapacityPerRunner,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return runners
|
return runners
|
||||||
@@ -106,12 +97,12 @@ func (cp *controlPlane) makeRunners(lbg string) []*Runner {
|
|||||||
// Ditch a runner from the pool.
|
// Ditch a runner from the pool.
|
||||||
// We do this immediately - no point modelling a wait here
|
// We do this immediately - no point modelling a wait here
|
||||||
// note: if this actually took time, we'd want to detect this properly so as to not confuse the NPM
|
// note: if this actually took time, we'd want to detect this properly so as to not confuse the NPM
|
||||||
func (cp *controlPlane) RemoveRunner(lbgId string, id string) error {
|
func (cp *noopControlPlane) RemoveRunner(lbgId string, id string) error {
|
||||||
cp.mx.Lock()
|
cp.mx.Lock()
|
||||||
defer cp.mx.Unlock()
|
defer cp.mx.Unlock()
|
||||||
|
|
||||||
if runners, ok := cp.runners[lbgId]; ok {
|
if runners, ok := cp.runners[lbgId]; ok {
|
||||||
newRunners := make([]*Runner, 0)
|
newRunners := make([]*controlplane.Runner, 0)
|
||||||
for _, host := range runners {
|
for _, host := range runners {
|
||||||
if host.Id != id {
|
if host.Id != id {
|
||||||
newRunners = append(newRunners, host)
|
newRunners = append(newRunners, host)
|
||||||
@@ -121,3 +112,13 @@ func (cp *controlPlane) RemoveRunner(lbgId string, id string) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getEnv(key string) string {
|
||||||
|
value, ok := os.LookupEnv(key)
|
||||||
|
if !ok {
|
||||||
|
log.Panicf("Missing config key: %v", key)
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
var ControlPlane noopControlPlane
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package cp
|
package controlplane
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -9,7 +9,7 @@ import (
|
|||||||
|
|
||||||
"github.com/fnproject/fn/poolmanager"
|
"github.com/fnproject/fn/poolmanager"
|
||||||
model "github.com/fnproject/fn/poolmanager/grpc"
|
model "github.com/fnproject/fn/poolmanager/grpc"
|
||||||
"github.com/fnproject/fn/poolmanager/server/cp"
|
"github.com/fnproject/fn/poolmanager/server/controlplane"
|
||||||
|
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
@@ -19,7 +19,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"plugin"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"google.golang.org/grpc/credentials"
|
"google.golang.org/grpc/credentials"
|
||||||
@@ -30,7 +30,7 @@ type npmService struct {
|
|||||||
capMan poolmanager.CapacityManager
|
capMan poolmanager.CapacityManager
|
||||||
}
|
}
|
||||||
|
|
||||||
func newNPMService(ctx context.Context, cp cp.ControlPlane) *npmService {
|
func newNPMService(ctx context.Context, cp controlplane.ControlPlane) *npmService {
|
||||||
cm, err := poolmanager.NewCapacityManager(ctx, cp)
|
cm, err := poolmanager.NewCapacityManager(ctx, cp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Panic("Cannot construct capacity manager")
|
logrus.Panic("Cannot construct capacity manager")
|
||||||
@@ -66,11 +66,11 @@ func (npm *npmService) GetLBGroup(ctx context.Context, gid *model.LBGroupId) (*m
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// Certificates to communicate with other FN nodes
|
// Certificates to communicate with other FN nodes
|
||||||
EnvCert = "FN_NODE_CERT"
|
EnvCert = "FN_NODE_CERT"
|
||||||
EnvCertKey = "FN_NODE_CERT_KEY"
|
EnvCertKey = "FN_NODE_CERT_KEY"
|
||||||
EnvCertAuth = "FN_NODE_CERT_AUTHORITY"
|
EnvCertAuth = "FN_NODE_CERT_AUTHORITY"
|
||||||
EnvPort = "FN_PORT"
|
EnvPort = "FN_PORT"
|
||||||
EnvFixedRunners = "FN_RUNNER_ADDRESSES"
|
ControlPlaneSO = "CONTROL_PLANE_SO"
|
||||||
)
|
)
|
||||||
|
|
||||||
func getAndCheckFile(envVar string) (string, error) {
|
func getAndCheckFile(envVar string) (string, error) {
|
||||||
@@ -116,6 +116,27 @@ func createGrpcCreds(cert string, key string, ca string) (grpc.ServerOption, err
|
|||||||
return grpc.Creds(creds), nil
|
return grpc.Creds(creds), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newPluggableControlPlane() controlplane.ControlPlane {
|
||||||
|
pluginLocation := getEnv(ControlPlaneSO)
|
||||||
|
controlPlanePlugin, err := plugin.Open(pluginLocation)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cpSymbol, err := controlPlanePlugin.Lookup("ControlPlane")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cp := cpSymbol.(controlplane.ControlPlane)
|
||||||
|
|
||||||
|
logrus.Infof("Started controlplane : %s", cp)
|
||||||
|
|
||||||
|
return cp
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
level, err := logrus.ParseLevel(getEnv("FN_LOG_LEVEL"))
|
level, err := logrus.ParseLevel(getEnv("FN_LOG_LEVEL"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -146,8 +167,7 @@ func main() {
|
|||||||
|
|
||||||
logrus.Info("Starting Node Pool Manager gRPC service")
|
logrus.Info("Starting Node Pool Manager gRPC service")
|
||||||
|
|
||||||
fakeRunners := strings.Split(getEnv(EnvFixedRunners), ",")
|
svc := newNPMService(context.Background(), newPluggableControlPlane())
|
||||||
svc := newNPMService(context.Background(), cp.NewControlPlane(fakeRunners))
|
|
||||||
model.RegisterNodePoolScalerServer(gRPCServer, svc)
|
model.RegisterNodePoolScalerServer(gRPCServer, svc)
|
||||||
model.RegisterRunnerManagerServer(gRPCServer, svc)
|
model.RegisterRunnerManagerServer(gRPCServer, svc)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user