mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: new container lauch adjustments (#677)
*) revert executor wait queue size comparison. This is too aggresive and with stall check below, now unnecessary. *) new container logic now checks if stats are constant, if this is the case, then we assume the system is stalled (eg running functions that take long time), this means we need to make progress and spin up a new container.
This commit is contained in:
@@ -306,6 +306,7 @@ func (a *agent) hotLauncher(ctx context.Context, callObj *call) {
|
|||||||
logger := common.Logger(ctx)
|
logger := common.Logger(ctx)
|
||||||
logger.WithField("launcher_timeout", timeout).Info("Hot function launcher starting")
|
logger.WithField("launcher_timeout", timeout).Info("Hot function launcher starting")
|
||||||
isAsync := callObj.Type == models.TypeAsync
|
isAsync := callObj.Type == models.TypeAsync
|
||||||
|
prevStats := callObj.slots.getStats()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@@ -319,14 +320,22 @@ func (a *agent) hotLauncher(ctx context.Context, callObj *call) {
|
|||||||
case <-callObj.slots.signaller:
|
case <-callObj.slots.signaller:
|
||||||
}
|
}
|
||||||
|
|
||||||
isNeeded, stats := callObj.slots.isNewContainerNeeded()
|
curStats := callObj.slots.getStats()
|
||||||
logger.WithField("stats", stats).Debug("Hot function launcher stats")
|
isNeeded := isNewContainerNeeded(&curStats, &prevStats)
|
||||||
|
prevStats = curStats
|
||||||
|
logger.WithFields(logrus.Fields{
|
||||||
|
"currentStats": curStats,
|
||||||
|
"previousStats": curStats,
|
||||||
|
}).Debug("Hot function launcher stats")
|
||||||
if !isNeeded {
|
if !isNeeded {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
resourceCtx, cancel := context.WithCancel(context.Background())
|
resourceCtx, cancel := context.WithCancel(context.Background())
|
||||||
logger.WithField("stats", stats).Info("Hot function launcher starting hot container")
|
logger.WithFields(logrus.Fields{
|
||||||
|
"currentStats": curStats,
|
||||||
|
"previousStats": curStats,
|
||||||
|
}).Info("Hot function launcher starting hot container")
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, isAsync):
|
case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, isAsync):
|
||||||
|
|||||||
@@ -189,60 +189,59 @@ func (a *slotQueue) getStats() slotQueueStats {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *slotQueue) isNewContainerNeeded() (bool, slotQueueStats) {
|
func isNewContainerNeeded(cur, prev *slotQueueStats) bool {
|
||||||
|
|
||||||
stats := a.getStats()
|
waiters := cur.states[SlotQueueWaiter]
|
||||||
|
|
||||||
waiters := stats.states[SlotQueueWaiter]
|
|
||||||
if waiters == 0 {
|
if waiters == 0 {
|
||||||
return false, stats
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// while a container is starting, do not start more than waiters
|
// while a container is starting, do not start more than waiters
|
||||||
starters := stats.states[SlotQueueStarter]
|
starters := cur.states[SlotQueueStarter]
|
||||||
if starters >= waiters {
|
if starters >= waiters {
|
||||||
return false, stats
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is a bit aggresive and assumes that we only
|
// no executors? We need to spin up a container quickly
|
||||||
// want to queue as much as num of containers.
|
executors := starters + cur.states[SlotQueueRunner]
|
||||||
executors := starters + stats.states[SlotQueueRunner]
|
if executors == 0 {
|
||||||
if executors < waiters {
|
return true
|
||||||
return true, stats
|
}
|
||||||
|
|
||||||
|
// This means we are not making any progress and stats are
|
||||||
|
// not being refreshed quick enough. We err on side
|
||||||
|
// of new container here.
|
||||||
|
isEqual := true
|
||||||
|
for idx, _ := range cur.latencies {
|
||||||
|
if prev.latencies[idx] != cur.latencies[idx] {
|
||||||
|
isEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isEqual {
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// WARNING: Below is a few heuristics that are
|
// WARNING: Below is a few heuristics that are
|
||||||
// speculative, which may (and will) likely need
|
// speculative, which may (and will) likely need
|
||||||
// adjustments.
|
// adjustments.
|
||||||
|
|
||||||
// WARNING: latencies below are updated after a call
|
runLat := cur.latencies[SlotQueueRunner]
|
||||||
// switches to/from different states. Do not assume
|
waitLat := cur.latencies[SlotQueueWaiter]
|
||||||
// the metrics below are always up-to-date. For example,
|
startLat := cur.latencies[SlotQueueStarter]
|
||||||
// a sudden burst of incoming requests will increase
|
|
||||||
// waiter count but not necessarily wait latency until
|
|
||||||
// those requests switch from waiter state.
|
|
||||||
|
|
||||||
runLat := stats.latencies[SlotQueueRunner]
|
|
||||||
waitLat := stats.latencies[SlotQueueWaiter]
|
|
||||||
startLat := stats.latencies[SlotQueueStarter]
|
|
||||||
|
|
||||||
// no wait latency? No need to spin up new container
|
|
||||||
if waitLat == 0 {
|
|
||||||
return false, stats
|
|
||||||
}
|
|
||||||
|
|
||||||
// this determines the aggresiveness of the container launch.
|
// this determines the aggresiveness of the container launch.
|
||||||
if runLat/executors*2 < waitLat {
|
if executors > 0 && runLat/executors*2 < waitLat {
|
||||||
return true, stats
|
return true
|
||||||
}
|
}
|
||||||
if runLat < waitLat {
|
if runLat < waitLat {
|
||||||
return true, stats
|
return true
|
||||||
}
|
}
|
||||||
if startLat < waitLat {
|
if startLat < waitLat {
|
||||||
return true, stats
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, stats
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *slotQueue) enterState(metricIdx SlotQueueMetricType) {
|
func (a *slotQueue) enterState(metricIdx SlotQueueMetricType) {
|
||||||
|
|||||||
@@ -152,18 +152,6 @@ func TestSlotQueueBasic1(t *testing.T) {
|
|||||||
}
|
}
|
||||||
case <-time.After(time.Duration(500) * time.Millisecond):
|
case <-time.After(time.Duration(500) * time.Millisecond):
|
||||||
}
|
}
|
||||||
|
|
||||||
stats1 := obj.getStats()
|
|
||||||
isNeeded, stats2 := obj.isNewContainerNeeded()
|
|
||||||
|
|
||||||
if stats1 != stats2 {
|
|
||||||
t.Fatalf("Faulty stats %#v != %#v", stats1, stats2)
|
|
||||||
}
|
|
||||||
|
|
||||||
// there are no waiters.
|
|
||||||
if isNeeded {
|
|
||||||
t.Fatalf("Shouldn't need a container")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSlotQueueBasic2(t *testing.T) {
|
func TestSlotQueueBasic2(t *testing.T) {
|
||||||
@@ -173,9 +161,6 @@ func TestSlotQueueBasic2(t *testing.T) {
|
|||||||
if !obj.isIdle() {
|
if !obj.isIdle() {
|
||||||
t.Fatalf("Should be idle")
|
t.Fatalf("Should be idle")
|
||||||
}
|
}
|
||||||
if ok, _ := obj.isNewContainerNeeded(); ok {
|
|
||||||
t.Fatalf("Should not need a new container")
|
|
||||||
}
|
|
||||||
|
|
||||||
outChan, cancel := obj.startDequeuer(context.Background())
|
outChan, cancel := obj.startDequeuer(context.Background())
|
||||||
select {
|
select {
|
||||||
@@ -187,6 +172,27 @@ func TestSlotQueueBasic2(t *testing.T) {
|
|||||||
cancel()
|
cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSlotNewContainerLogic1(t *testing.T) {
|
||||||
|
|
||||||
|
cur := slotQueueStats{}
|
||||||
|
cur.states[SlotQueueRunner] = 10
|
||||||
|
cur.states[SlotQueueWaiter] = 1
|
||||||
|
|
||||||
|
prev := cur
|
||||||
|
|
||||||
|
if !isNewContainerNeeded(&cur, &prev) {
|
||||||
|
t.Fatalf("Should need a new container cur: %#v prev: %#v", cur, prev)
|
||||||
|
}
|
||||||
|
|
||||||
|
prev.latencies[SlotQueueRunner] = 1
|
||||||
|
prev.latencies[SlotQueueWaiter] = 1
|
||||||
|
prev.latencies[SlotQueueStarter] = 1
|
||||||
|
|
||||||
|
if isNewContainerNeeded(&cur, &prev) {
|
||||||
|
t.Fatalf("Should not need a new container cur: %#v prev: %#v", cur, prev)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSlotQueueBasic3(t *testing.T) {
|
func TestSlotQueueBasic3(t *testing.T) {
|
||||||
|
|
||||||
slotName := "test3"
|
slotName := "test3"
|
||||||
|
|||||||
Reference in New Issue
Block a user