Files
fn-serverless/poolmanager/manager.go
Matt Stephenson a787ccac36 Refactor controlplane into a go plugin (#833)
* Refactor controlplane into a go plugin

* Move vbox to controlplane package
2018-03-12 12:50:55 -07:00

427 lines
14 KiB
Go

package poolmanager
import (
"time"
"context"
"math"
"sync"
model "github.com/fnproject/fn/poolmanager/grpc"
"github.com/fnproject/fn/poolmanager/server/controlplane"
"github.com/sirupsen/logrus"
)
type CapacityManager interface {
LBGroup(lbgid string) LBGroup
Merge(*model.CapacitySnapshotList)
}
type LBGroup interface {
Id() string
UpdateRequirements(lb string, total int64)
Purge(time.Time, func(LBGroup, string)) int64 // Remove outdated requirements, return updated value
GetMembers() []string // Return *ACTIVE* members
}
type Predictor interface {
// Given a series of requirements, return the target scale to attempt to reach
GetScale(time.Time, int64) int64
}
type capacityManager struct {
ctx context.Context
mx sync.RWMutex
cp controlplane.ControlPlane
lbg map[string]LBGroup
predictorFactory func() Predictor
}
func NewCapacityManager(ctx context.Context, cp controlplane.ControlPlane, opts ...func(*capacityManager) error) (CapacityManager, error) {
cm := &capacityManager{
ctx: ctx,
cp: cp,
lbg: make(map[string]LBGroup),
predictorFactory: newPredictor,
}
for _, o := range opts {
if err := o(cm); err != nil {
logrus.WithError(err).Error("Error handling option for CapacityManager")
return nil, err
}
}
return cm, nil
}
func WithPredictorFactory(pf func() Predictor) func(*capacityManager) error {
return func(cm *capacityManager) error {
cm.predictorFactory = pf
return nil
}
}
func (m *capacityManager) LBGroup(lbgid string) LBGroup {
m.mx.RLock()
// Optimistic path
if lbg, ok := m.lbg[lbgid]; ok {
m.mx.RUnlock()
return lbg
}
// We don't have one: upgrade the lock and allocate
m.mx.RUnlock()
m.mx.Lock()
defer m.mx.Unlock()
// Need to check again
if lbg, ok := m.lbg[lbgid]; ok {
return lbg
}
logrus.Infof("Making new LBG to handle %v", lbgid)
lbg := newLBGroup(lbgid, m.ctx, m.cp, m.predictorFactory)
m.lbg[lbgid] = lbg
return lbg
}
func (m *capacityManager) Merge(list *model.CapacitySnapshotList) {
lbid := list.GetLbId()
for _, new_req := range list.Snapshots {
lbg := new_req.GetGroupId().GetId()
logrus.Debugf("Merging snapshot %+v for %v from %v", new_req, lbg, lbid)
m.LBGroup(lbg).UpdateRequirements(lbid, int64(new_req.GetMemMbTotal()))
}
}
type lbGroup struct {
ctx context.Context
id string
// Attributes for managing incoming capacity requirements
cap_mx sync.RWMutex
total_wanted int64
requirements map[string]*requirement // NuLB id -> (ts, total_wanted)
controlStream chan requirement
// Attributes for managing runner pool membership
run_mx sync.RWMutex
cp controlplane.ControlPlane
current_capacity int64 // Of all active runners
target_capacity int64 // All active runners plus any we've already asked for
runners map[string]*runner // A map of everything we know about
active_runners []*runner // Everything currently in use
draining_runners []*runner // We keep tabs on these separately
dead_runners []*runner // Waiting for control plane to remove
predictor Predictor
}
type requirement struct {
ts time.Time // Time of last update
total_wanted int64
}
const (
RUNNER_ACTIVE = iota
RUNNER_DRAINING = iota
RUNNER_DEAD = iota
)
type runner struct {
id string // The same address may get recycled; we'll need to disambiguate somehow.
address string
status int
capacity int64
// XXX: If we're draining, this is handy to simulate runner readiness for shutdown
kill_after time.Time
}
func newLBGroup(lbgid string, ctx context.Context, cp controlplane.ControlPlane, predictorFactory func() Predictor) LBGroup {
lbg := &lbGroup{
ctx: ctx,
id: lbgid,
requirements: make(map[string]*requirement),
controlStream: make(chan requirement),
cp: cp,
runners: make(map[string]*runner),
predictor: predictorFactory(),
}
go lbg.control()
return lbg
}
func (lbg *lbGroup) Id() string {
return lbg.id
}
func (lbg *lbGroup) UpdateRequirements(lb string, total int64) {
logrus.Debugf("Updating capacity requirements for %v, lb=%v", lbg.Id(), lb)
defer logrus.Debugf("Updated %v, lb=%v", lbg.Id(), lb)
lbg.cap_mx.Lock()
last, ok := lbg.requirements[lb]
// Add in the new requirements, removing the old ones if required.
if !ok {
// This is a new NuLB that we're just learning about
last = &requirement{}
lbg.requirements[lb] = last
}
// Update totals: remove this LB's previous capacity assertions
lbg.total_wanted -= last.total_wanted
// Update totals: add this LB's new assertions and record them
lbg.total_wanted += total
// Keep a copy of this requirement
now := time.Now()
last.ts = now
last.total_wanted = total
// TODO: new_req also has a generation for the runner information that LB held. If that's out of date, signal that we need to readvertise
// Send a new signal to the capacity control loop
lbg.cap_mx.Unlock()
logrus.Debugf("Sending new capacity requirement of %v", lbg.total_wanted)
lbg.controlStream <- requirement{ts: now, total_wanted: lbg.total_wanted}
}
func (lbg *lbGroup) Purge(oldest time.Time, cb func(LBGroup, string)) int64 {
lbg.cap_mx.Lock()
defer lbg.cap_mx.Unlock()
for lb, req := range lbg.requirements {
if req.ts.Before(oldest) {
// We need to nix this entry, it's utterly out-of-date
lbg.total_wanted -= req.total_wanted
delete(lbg.requirements, lb)
// TODO: use a callback here to handle the deletion?
cb(lbg, lb)
}
}
return lbg.total_wanted
}
const PURGE_INTERVAL = 5 * time.Second
const VALID_REQUEST_LIFETIME = 500 * time.Millisecond
const POLL_INTERVAL = time.Second
const LARGEST_REQUEST_AT_ONCE = 20
const MAX_DRAINDOWN_LIFETIME = 50 * time.Second // For the moment.
func (lbg *lbGroup) control() {
// Control loop. This should receive a series of requirements.
// Occasionally, we walk the set of LBs that have spoken to us, purging those that are out-of-date
lastPurge := time.Now()
nextPurge := lastPurge.Add(PURGE_INTERVAL)
nextPoll := lastPurge
for {
logrus.Debugf("In capacity management loop for %v", lbg.Id())
select {
// Poll CP for runners (this will change, it's a stub)
// We put this first (and run it immediately) because if the NPM has just been restarted we want to
// repopulate our knowledge of what runners are currently up, so we don't generate spurious scaling requests
// to the CP.
case <-time.After(nextPoll.Sub(time.Now())):
logrus.Debugf("Polling for runners for %v", lbg.Id())
lbg.pollForRunners()
nextPoll = time.Now().Add(POLL_INTERVAL)
logrus.Debugf("Polled for %v", lbg.Id())
// Manage capacity requests
case <-time.After(nextPurge.Sub(time.Now())):
logrus.Debugf("Purging for %v", lbg.Id())
need := lbg.Purge(lastPurge, func(lbg LBGroup, lb string) {
logrus.Warnf("Purging LB %v from %v - no communication received", lb, lbg.Id())
})
lastPurge = time.Now()
nextPurge = lastPurge.Add(PURGE_INTERVAL)
lbg.target(lastPurge, need)
logrus.Debugf("Purged for %v", lbg.Id())
case req := <-lbg.controlStream:
logrus.Debugf("New requirement received by control loop for %v", req.total_wanted)
lbg.target(req.ts, req.total_wanted)
logrus.Debugf("New requirement handled", lbg.Id())
}
}
}
func (lbg *lbGroup) target(ts time.Time, target int64) {
if time.Now().Sub(ts) > VALID_REQUEST_LIFETIME {
// We have a request that's too old; drop it.
logrus.Warnf("Request for capacity is too old: %v", ts)
return
}
lbg.run_mx.Lock()
defer lbg.run_mx.Unlock()
desiredScale := lbg.predictor.GetScale(ts, target)
logrus.Debugf("Targeting capacity requirement of %v gives desired scale of %v", target, desiredScale)
// We have:
// - total capacity in active runners
// - required total capacity
// - capacity per runner
// - any additional capacity we've already asked for
// We scale appropriately.
if desiredScale > lbg.target_capacity {
// Scale up.
// Even including capacity we are expecting to come down the pipe, we don't have enough stuff.
// Begin by reactivating any runners we're currently draining down.
for desiredScale > lbg.target_capacity && len(lbg.draining_runners) > 0 {
// Begin with the one we started draining last.
runner := lbg.draining_runners[len(lbg.draining_runners)-1]
logrus.Infof("Recovering runner %v at %v from draindown", runner.id, runner.address)
lbg.draining_runners = lbg.draining_runners[:len(lbg.draining_runners)-1]
runner.status = RUNNER_ACTIVE
lbg.active_runners = append(lbg.active_runners, runner)
lbg.current_capacity += runner.capacity
lbg.target_capacity += runner.capacity
}
if desiredScale > lbg.target_capacity {
// We still need additional capacity
wanted := math.Min(math.Ceil(float64(target-lbg.target_capacity)/controlplane.CapacityPerRunner), LARGEST_REQUEST_AT_ONCE)
asked_for, err := lbg.cp.ProvisionRunners(lbg.Id(), int(wanted)) // Send the request; they'll show up later
if err != nil {
// Some kind of error during attempt to scale up
logrus.WithError(err).Error("Error occured during attempt to scale up")
return
}
lbg.target_capacity += int64(asked_for) * controlplane.CapacityPerRunner
}
} else if desiredScale <= lbg.current_capacity-controlplane.CapacityPerRunner {
// Scale down.
// We pick a node to turn off and move it to the draining pool.
for target <= lbg.current_capacity-controlplane.CapacityPerRunner && len(lbg.active_runners) > 0 {
// Begin with the one we added last.
runner := lbg.active_runners[len(lbg.active_runners)-1]
logrus.Infof("Marking runner %v at %v for draindown", runner.id, runner.address)
lbg.active_runners = lbg.active_runners[:len(lbg.active_runners)-1]
runner.status = RUNNER_DRAINING
runner.kill_after = time.Now().Add(MAX_DRAINDOWN_LIFETIME)
lbg.draining_runners = append(lbg.draining_runners, runner)
lbg.current_capacity -= runner.capacity
lbg.target_capacity -= runner.capacity
}
}
}
// Pool membership management
func (lbg *lbGroup) GetMembers() []string {
lbg.run_mx.RLock()
defer lbg.run_mx.RUnlock()
members := make([]string, len(lbg.active_runners))
for i, runner := range lbg.active_runners {
members[i] = runner.address
}
return members
}
// Three things handled here.
// First, if any drained runners are due to die, shut them off.
// Secondly, if the CP supplies any new capacity, add that the to pool as active.
// Finally, if dead runners have been shut down, remove them
func (lbg *lbGroup) pollForRunners() {
lbg.run_mx.Lock()
defer lbg.run_mx.Unlock()
now := time.Now()
// The oldest draining runner will be in the front of the pipe.
for len(lbg.draining_runners) > 0 && now.After(lbg.draining_runners[0].kill_after) {
// Mark this runner as to be killed
runner := lbg.draining_runners[0]
logrus.Infof("Drain down for runner %v at %v complete: signalling shutdown", runner.id, runner.address)
lbg.draining_runners = lbg.draining_runners[1:]
runner.status = RUNNER_DEAD
lbg.dead_runners = append(lbg.dead_runners, runner)
if err := lbg.cp.RemoveRunner(lbg.Id(), runner.id); err != nil {
logrus.WithError(err).Errorf("Error attempting to close down runner %v at %v", runner.id, runner.address)
}
}
// Get CP status and process it. This might be smarter but for the moment we just loop over everything we're told.
logrus.Debugf("Getting hosts from ControlPlane for %v", lbg.Id())
latestHosts, err := lbg.cp.GetLBGRunners(lbg.Id())
if err != nil {
logrus.WithError(err).Errorf("Problem talking to the CP to fetch runner status")
return
}
seen := make(map[string]bool)
for _, host := range latestHosts {
_, ok := lbg.runners[host.Id]
if ok {
// We already know about this
logrus.Debugf(" ... host %v at %d is known", host.Id, host.Address)
} else {
logrus.Infof(" ... host %v at %d is new", host.Id, host.Address)
// This is a new runner. Bring it into the active pool
runner := &runner{
id: host.Id,
address: host.Address,
status: RUNNER_ACTIVE,
capacity: host.Capacity,
}
lbg.runners[host.Id] = runner
lbg.active_runners = append(lbg.active_runners, runner)
lbg.current_capacity += runner.capacity // The total capacity is already computed, since we asked for this
}
seen[host.Id] = true
}
// Work out if runners that we asked to be killed have been shut down
logrus.Debugf("Removing dead hosts for %v", lbg.Id())
// The control plane might pull active or draining hosts out from under us. Deal with that too.
lbg.active_runners = removeDead(seen, lbg.runners, lbg.active_runners)
lbg.draining_runners = removeDead(seen, lbg.runners, lbg.draining_runners)
lbg.dead_runners = removeDead(seen, lbg.runners, lbg.dead_runners)
}
func removeDead(seen map[string]bool, runnerMap map[string]*runner, runnerList []*runner) []*runner {
i := 0
for _, runner := range runnerList {
if _, ok := seen[runner.id]; ok {
// This runner isn't shut down yet
runnerList[i] = runner
i++
} else {
logrus.Infof("Removing runner %v at %v that has disappeared", runner.id, runner.address)
delete(runnerMap, runner.id)
}
}
return runnerList[:i]
}
// Predictions. Given a timestamp and an input total capacity requirement, return the scale we should attempt to reach
func newPredictor() Predictor {
return &conservativePredictor{}
}
type conservativePredictor struct{}
func (p *conservativePredictor) GetScale(ts time.Time, target int64) int64 {
// This is the most conservative approach. If we have a target capacity, attempt to reach that capacity by
// asking for sufficient scale to satisfy it all at once.
return target
}