mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
427 lines
14 KiB
Go
427 lines
14 KiB
Go
package poolmanager
|
|
|
|
import (
|
|
"time"
|
|
|
|
"context"
|
|
"math"
|
|
"sync"
|
|
|
|
model "github.com/fnproject/fn/poolmanager/grpc"
|
|
"github.com/fnproject/fn/poolmanager/server/controlplane"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
type CapacityManager interface {
|
|
LBGroup(lbgid string) LBGroup
|
|
Merge(*model.CapacitySnapshotList)
|
|
}
|
|
|
|
type LBGroup interface {
|
|
Id() string
|
|
UpdateRequirements(lb string, total int64)
|
|
Purge(time.Time, func(LBGroup, string)) int64 // Remove outdated requirements, return updated value
|
|
GetMembers() []string // Return *ACTIVE* members
|
|
}
|
|
|
|
type Predictor interface {
|
|
// Given a series of requirements, return the target scale to attempt to reach
|
|
GetScale(time.Time, int64) int64
|
|
}
|
|
|
|
type capacityManager struct {
|
|
ctx context.Context
|
|
mx sync.RWMutex
|
|
cp controlplane.ControlPlane
|
|
lbg map[string]LBGroup
|
|
predictorFactory func() Predictor
|
|
}
|
|
|
|
func NewCapacityManager(ctx context.Context, cp controlplane.ControlPlane, opts ...func(*capacityManager) error) (CapacityManager, error) {
|
|
cm := &capacityManager{
|
|
ctx: ctx,
|
|
cp: cp,
|
|
lbg: make(map[string]LBGroup),
|
|
predictorFactory: newPredictor,
|
|
}
|
|
for _, o := range opts {
|
|
if err := o(cm); err != nil {
|
|
logrus.WithError(err).Error("Error handling option for CapacityManager")
|
|
return nil, err
|
|
}
|
|
}
|
|
return cm, nil
|
|
}
|
|
|
|
func WithPredictorFactory(pf func() Predictor) func(*capacityManager) error {
|
|
return func(cm *capacityManager) error {
|
|
cm.predictorFactory = pf
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (m *capacityManager) LBGroup(lbgid string) LBGroup {
|
|
m.mx.RLock()
|
|
// Optimistic path
|
|
if lbg, ok := m.lbg[lbgid]; ok {
|
|
m.mx.RUnlock()
|
|
return lbg
|
|
}
|
|
|
|
// We don't have one: upgrade the lock and allocate
|
|
m.mx.RUnlock()
|
|
m.mx.Lock()
|
|
defer m.mx.Unlock()
|
|
// Need to check again
|
|
if lbg, ok := m.lbg[lbgid]; ok {
|
|
return lbg
|
|
}
|
|
logrus.Infof("Making new LBG to handle %v", lbgid)
|
|
lbg := newLBGroup(lbgid, m.ctx, m.cp, m.predictorFactory)
|
|
m.lbg[lbgid] = lbg
|
|
return lbg
|
|
}
|
|
|
|
func (m *capacityManager) Merge(list *model.CapacitySnapshotList) {
|
|
lbid := list.GetLbId()
|
|
for _, new_req := range list.Snapshots {
|
|
lbg := new_req.GetGroupId().GetId()
|
|
|
|
logrus.Debugf("Merging snapshot %+v for %v from %v", new_req, lbg, lbid)
|
|
m.LBGroup(lbg).UpdateRequirements(lbid, int64(new_req.GetMemMbTotal()))
|
|
}
|
|
}
|
|
|
|
type lbGroup struct {
|
|
ctx context.Context
|
|
|
|
id string
|
|
|
|
// Attributes for managing incoming capacity requirements
|
|
cap_mx sync.RWMutex
|
|
|
|
total_wanted int64
|
|
requirements map[string]*requirement // NuLB id -> (ts, total_wanted)
|
|
|
|
controlStream chan requirement
|
|
|
|
// Attributes for managing runner pool membership
|
|
run_mx sync.RWMutex
|
|
cp controlplane.ControlPlane
|
|
|
|
current_capacity int64 // Of all active runners
|
|
target_capacity int64 // All active runners plus any we've already asked for
|
|
runners map[string]*runner // A map of everything we know about
|
|
active_runners []*runner // Everything currently in use
|
|
draining_runners []*runner // We keep tabs on these separately
|
|
dead_runners []*runner // Waiting for control plane to remove
|
|
|
|
predictor Predictor
|
|
}
|
|
|
|
type requirement struct {
|
|
ts time.Time // Time of last update
|
|
total_wanted int64
|
|
}
|
|
|
|
const (
|
|
RUNNER_ACTIVE = iota
|
|
RUNNER_DRAINING = iota
|
|
RUNNER_DEAD = iota
|
|
)
|
|
|
|
type runner struct {
|
|
id string // The same address may get recycled; we'll need to disambiguate somehow.
|
|
address string
|
|
status int
|
|
capacity int64
|
|
|
|
// XXX: If we're draining, this is handy to simulate runner readiness for shutdown
|
|
kill_after time.Time
|
|
}
|
|
|
|
func newLBGroup(lbgid string, ctx context.Context, cp controlplane.ControlPlane, predictorFactory func() Predictor) LBGroup {
|
|
lbg := &lbGroup{
|
|
ctx: ctx,
|
|
id: lbgid,
|
|
requirements: make(map[string]*requirement),
|
|
controlStream: make(chan requirement),
|
|
cp: cp,
|
|
runners: make(map[string]*runner),
|
|
predictor: predictorFactory(),
|
|
}
|
|
go lbg.control()
|
|
return lbg
|
|
}
|
|
|
|
func (lbg *lbGroup) Id() string {
|
|
return lbg.id
|
|
}
|
|
|
|
func (lbg *lbGroup) UpdateRequirements(lb string, total int64) {
|
|
logrus.Debugf("Updating capacity requirements for %v, lb=%v", lbg.Id(), lb)
|
|
defer logrus.Debugf("Updated %v, lb=%v", lbg.Id(), lb)
|
|
lbg.cap_mx.Lock()
|
|
|
|
last, ok := lbg.requirements[lb]
|
|
|
|
// Add in the new requirements, removing the old ones if required.
|
|
if !ok {
|
|
// This is a new NuLB that we're just learning about
|
|
last = &requirement{}
|
|
lbg.requirements[lb] = last
|
|
}
|
|
|
|
// Update totals: remove this LB's previous capacity assertions
|
|
lbg.total_wanted -= last.total_wanted
|
|
|
|
// Update totals: add this LB's new assertions and record them
|
|
lbg.total_wanted += total
|
|
|
|
// Keep a copy of this requirement
|
|
now := time.Now()
|
|
last.ts = now
|
|
last.total_wanted = total
|
|
|
|
// TODO: new_req also has a generation for the runner information that LB held. If that's out of date, signal that we need to readvertise
|
|
|
|
// Send a new signal to the capacity control loop
|
|
lbg.cap_mx.Unlock()
|
|
|
|
logrus.Debugf("Sending new capacity requirement of %v", lbg.total_wanted)
|
|
lbg.controlStream <- requirement{ts: now, total_wanted: lbg.total_wanted}
|
|
}
|
|
|
|
func (lbg *lbGroup) Purge(oldest time.Time, cb func(LBGroup, string)) int64 {
|
|
lbg.cap_mx.Lock()
|
|
defer lbg.cap_mx.Unlock()
|
|
|
|
for lb, req := range lbg.requirements {
|
|
if req.ts.Before(oldest) {
|
|
// We need to nix this entry, it's utterly out-of-date
|
|
lbg.total_wanted -= req.total_wanted
|
|
delete(lbg.requirements, lb)
|
|
|
|
// TODO: use a callback here to handle the deletion?
|
|
cb(lbg, lb)
|
|
}
|
|
}
|
|
return lbg.total_wanted
|
|
}
|
|
|
|
const PURGE_INTERVAL = 5 * time.Second
|
|
const VALID_REQUEST_LIFETIME = 500 * time.Millisecond
|
|
const POLL_INTERVAL = time.Second
|
|
const LARGEST_REQUEST_AT_ONCE = 20
|
|
|
|
const MAX_DRAINDOWN_LIFETIME = 50 * time.Second // For the moment.
|
|
|
|
func (lbg *lbGroup) control() {
|
|
// Control loop. This should receive a series of requirements.
|
|
// Occasionally, we walk the set of LBs that have spoken to us, purging those that are out-of-date
|
|
lastPurge := time.Now()
|
|
nextPurge := lastPurge.Add(PURGE_INTERVAL)
|
|
|
|
nextPoll := lastPurge
|
|
|
|
for {
|
|
logrus.Debugf("In capacity management loop for %v", lbg.Id())
|
|
select {
|
|
// Poll CP for runners (this will change, it's a stub)
|
|
// We put this first (and run it immediately) because if the NPM has just been restarted we want to
|
|
// repopulate our knowledge of what runners are currently up, so we don't generate spurious scaling requests
|
|
// to the CP.
|
|
case <-time.After(nextPoll.Sub(time.Now())):
|
|
logrus.Debugf("Polling for runners for %v", lbg.Id())
|
|
lbg.pollForRunners()
|
|
nextPoll = time.Now().Add(POLL_INTERVAL)
|
|
logrus.Debugf("Polled for %v", lbg.Id())
|
|
|
|
// Manage capacity requests
|
|
case <-time.After(nextPurge.Sub(time.Now())):
|
|
logrus.Debugf("Purging for %v", lbg.Id())
|
|
need := lbg.Purge(lastPurge, func(lbg LBGroup, lb string) {
|
|
logrus.Warnf("Purging LB %v from %v - no communication received", lb, lbg.Id())
|
|
})
|
|
lastPurge = time.Now()
|
|
nextPurge = lastPurge.Add(PURGE_INTERVAL)
|
|
lbg.target(lastPurge, need)
|
|
logrus.Debugf("Purged for %v", lbg.Id())
|
|
|
|
case req := <-lbg.controlStream:
|
|
logrus.Debugf("New requirement received by control loop for %v", req.total_wanted)
|
|
lbg.target(req.ts, req.total_wanted)
|
|
logrus.Debugf("New requirement handled", lbg.Id())
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
func (lbg *lbGroup) target(ts time.Time, target int64) {
|
|
if time.Now().Sub(ts) > VALID_REQUEST_LIFETIME {
|
|
// We have a request that's too old; drop it.
|
|
logrus.Warnf("Request for capacity is too old: %v", ts)
|
|
return
|
|
}
|
|
|
|
lbg.run_mx.Lock()
|
|
defer lbg.run_mx.Unlock()
|
|
|
|
desiredScale := lbg.predictor.GetScale(ts, target)
|
|
|
|
logrus.Debugf("Targeting capacity requirement of %v gives desired scale of %v", target, desiredScale)
|
|
// We have:
|
|
// - total capacity in active runners
|
|
// - required total capacity
|
|
// - capacity per runner
|
|
// - any additional capacity we've already asked for
|
|
|
|
// We scale appropriately.
|
|
if desiredScale > lbg.target_capacity {
|
|
// Scale up.
|
|
// Even including capacity we are expecting to come down the pipe, we don't have enough stuff.
|
|
|
|
// Begin by reactivating any runners we're currently draining down.
|
|
for desiredScale > lbg.target_capacity && len(lbg.draining_runners) > 0 {
|
|
// Begin with the one we started draining last.
|
|
runner := lbg.draining_runners[len(lbg.draining_runners)-1]
|
|
logrus.Infof("Recovering runner %v at %v from draindown", runner.id, runner.address)
|
|
|
|
lbg.draining_runners = lbg.draining_runners[:len(lbg.draining_runners)-1]
|
|
runner.status = RUNNER_ACTIVE
|
|
lbg.active_runners = append(lbg.active_runners, runner)
|
|
lbg.current_capacity += runner.capacity
|
|
lbg.target_capacity += runner.capacity
|
|
}
|
|
|
|
if desiredScale > lbg.target_capacity {
|
|
// We still need additional capacity
|
|
wanted := math.Min(math.Ceil(float64(target-lbg.target_capacity)/controlplane.CapacityPerRunner), LARGEST_REQUEST_AT_ONCE)
|
|
asked_for, err := lbg.cp.ProvisionRunners(lbg.Id(), int(wanted)) // Send the request; they'll show up later
|
|
if err != nil {
|
|
// Some kind of error during attempt to scale up
|
|
logrus.WithError(err).Error("Error occured during attempt to scale up")
|
|
return
|
|
}
|
|
lbg.target_capacity += int64(asked_for) * controlplane.CapacityPerRunner
|
|
}
|
|
|
|
} else if desiredScale <= lbg.current_capacity-controlplane.CapacityPerRunner {
|
|
// Scale down.
|
|
// We pick a node to turn off and move it to the draining pool.
|
|
for target <= lbg.current_capacity-controlplane.CapacityPerRunner && len(lbg.active_runners) > 0 {
|
|
// Begin with the one we added last.
|
|
runner := lbg.active_runners[len(lbg.active_runners)-1]
|
|
logrus.Infof("Marking runner %v at %v for draindown", runner.id, runner.address)
|
|
|
|
lbg.active_runners = lbg.active_runners[:len(lbg.active_runners)-1]
|
|
runner.status = RUNNER_DRAINING
|
|
runner.kill_after = time.Now().Add(MAX_DRAINDOWN_LIFETIME)
|
|
lbg.draining_runners = append(lbg.draining_runners, runner)
|
|
lbg.current_capacity -= runner.capacity
|
|
lbg.target_capacity -= runner.capacity
|
|
}
|
|
}
|
|
}
|
|
|
|
// Pool membership management
|
|
func (lbg *lbGroup) GetMembers() []string {
|
|
lbg.run_mx.RLock()
|
|
defer lbg.run_mx.RUnlock()
|
|
|
|
members := make([]string, len(lbg.active_runners))
|
|
for i, runner := range lbg.active_runners {
|
|
members[i] = runner.address
|
|
}
|
|
return members
|
|
}
|
|
|
|
// Three things handled here.
|
|
// First, if any drained runners are due to die, shut them off.
|
|
// Secondly, if the CP supplies any new capacity, add that the to pool as active.
|
|
// Finally, if dead runners have been shut down, remove them
|
|
func (lbg *lbGroup) pollForRunners() {
|
|
lbg.run_mx.Lock()
|
|
defer lbg.run_mx.Unlock()
|
|
|
|
now := time.Now()
|
|
// The oldest draining runner will be in the front of the pipe.
|
|
for len(lbg.draining_runners) > 0 && now.After(lbg.draining_runners[0].kill_after) {
|
|
// Mark this runner as to be killed
|
|
runner := lbg.draining_runners[0]
|
|
logrus.Infof("Drain down for runner %v at %v complete: signalling shutdown", runner.id, runner.address)
|
|
lbg.draining_runners = lbg.draining_runners[1:]
|
|
runner.status = RUNNER_DEAD
|
|
lbg.dead_runners = append(lbg.dead_runners, runner)
|
|
if err := lbg.cp.RemoveRunner(lbg.Id(), runner.id); err != nil {
|
|
logrus.WithError(err).Errorf("Error attempting to close down runner %v at %v", runner.id, runner.address)
|
|
}
|
|
}
|
|
|
|
// Get CP status and process it. This might be smarter but for the moment we just loop over everything we're told.
|
|
logrus.Debugf("Getting hosts from ControlPlane for %v", lbg.Id())
|
|
latestHosts, err := lbg.cp.GetLBGRunners(lbg.Id())
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("Problem talking to the CP to fetch runner status")
|
|
return
|
|
}
|
|
|
|
seen := make(map[string]bool)
|
|
for _, host := range latestHosts {
|
|
_, ok := lbg.runners[host.Id]
|
|
if ok {
|
|
// We already know about this
|
|
logrus.Debugf(" ... host %v at %d is known", host.Id, host.Address)
|
|
} else {
|
|
logrus.Infof(" ... host %v at %d is new", host.Id, host.Address)
|
|
|
|
// This is a new runner. Bring it into the active pool
|
|
runner := &runner{
|
|
id: host.Id,
|
|
address: host.Address,
|
|
status: RUNNER_ACTIVE,
|
|
capacity: host.Capacity,
|
|
}
|
|
lbg.runners[host.Id] = runner
|
|
lbg.active_runners = append(lbg.active_runners, runner)
|
|
lbg.current_capacity += runner.capacity // The total capacity is already computed, since we asked for this
|
|
}
|
|
seen[host.Id] = true
|
|
}
|
|
|
|
// Work out if runners that we asked to be killed have been shut down
|
|
logrus.Debugf("Removing dead hosts for %v", lbg.Id())
|
|
// The control plane might pull active or draining hosts out from under us. Deal with that too.
|
|
lbg.active_runners = removeDead(seen, lbg.runners, lbg.active_runners)
|
|
lbg.draining_runners = removeDead(seen, lbg.runners, lbg.draining_runners)
|
|
lbg.dead_runners = removeDead(seen, lbg.runners, lbg.dead_runners)
|
|
}
|
|
|
|
func removeDead(seen map[string]bool, runnerMap map[string]*runner, runnerList []*runner) []*runner {
|
|
i := 0
|
|
for _, runner := range runnerList {
|
|
if _, ok := seen[runner.id]; ok {
|
|
// This runner isn't shut down yet
|
|
runnerList[i] = runner
|
|
i++
|
|
} else {
|
|
logrus.Infof("Removing runner %v at %v that has disappeared", runner.id, runner.address)
|
|
delete(runnerMap, runner.id)
|
|
}
|
|
}
|
|
return runnerList[:i]
|
|
}
|
|
|
|
// Predictions. Given a timestamp and an input total capacity requirement, return the scale we should attempt to reach
|
|
func newPredictor() Predictor {
|
|
return &conservativePredictor{}
|
|
}
|
|
|
|
type conservativePredictor struct{}
|
|
|
|
func (p *conservativePredictor) GetScale(ts time.Time, target int64) int64 {
|
|
// This is the most conservative approach. If we have a target capacity, attempt to reach that capacity by
|
|
// asking for sufficient scale to satisfy it all at once.
|
|
return target
|
|
}
|