mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
adds wait time based scaling across nodes
this works by having every request from the functions server kick back a FXLB-WAIT header on every request with the wait time for that function to start. the lb then keeps track on a per node+function basis an ewma of the last 10 request's wait times (to reduce jitter). now that we don't have max concurrency it's actually pretty challenging to get the wait time stuff to tick. i expect in the near future we will be throttling functions on a given node in order to induce this, but that is for another day as that code needs a lot of reworking. i tested this by introducing some arbitrary throttling (not checked in) and load spreads over nodes correctly (see images). we will also need to play with the intervals we want to use, as if you have a func with 50ms run time then basically 10 of those will rev up another node (this was before removing max_c, with max_c=1) but in any event this wires in the basic plumbing. * make docs great again. renamed lb dir to fnlb * added wait time to dashboard * wires in a ready channel to await the first pull for hot images to count in the wait time (should be otherwise useful) future: TODO rework lb code api to be pluggable + wire in data store TODO toss out first data point containing pull to not jump onto another node immediately (maybe this is actually a good thing?)
This commit is contained in:
@@ -47,6 +47,7 @@ func getCfg(t *models.Task) *task.Config {
|
||||
ID: t.ID,
|
||||
AppName: t.AppName,
|
||||
Env: t.EnvVars,
|
||||
Ready: make(chan struct{}),
|
||||
}
|
||||
if t.Timeout == nil || *t.Timeout <= 0 {
|
||||
cfg.Timeout = DefaultTimeout
|
||||
|
||||
@@ -60,7 +60,8 @@ type Auther interface {
|
||||
|
||||
type runResult struct {
|
||||
error
|
||||
StatusValue string
|
||||
status string
|
||||
start time.Time
|
||||
}
|
||||
|
||||
func (r *runResult) Error() string {
|
||||
@@ -70,8 +71,9 @@ func (r *runResult) Error() string {
|
||||
return r.error.Error()
|
||||
}
|
||||
|
||||
func (r *runResult) Status() string { return r.StatusValue }
|
||||
func (r *runResult) UserVisible() bool { return common.IsUserVisibleError(r.error) }
|
||||
func (r *runResult) Status() string { return r.status }
|
||||
func (r *runResult) UserVisible() bool { return common.IsUserVisibleError(r.error) }
|
||||
func (r *runResult) StartTime() time.Time { return r.start }
|
||||
|
||||
type DockerDriver struct {
|
||||
conf drivers.Config
|
||||
@@ -409,6 +411,8 @@ func (drv *DockerDriver) run(ctx context.Context, container string, task drivers
|
||||
return nil, err
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
|
||||
err = drv.startTask(ctx, container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -429,8 +433,9 @@ func (drv *DockerDriver) run(ctx context.Context, container string, task drivers
|
||||
|
||||
status, err := drv.status(ctx, container)
|
||||
return &runResult{
|
||||
StatusValue: status,
|
||||
error: err,
|
||||
start: start,
|
||||
status: status,
|
||||
error: err,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -52,6 +52,11 @@ type RunResult interface {
|
||||
// Status should return the current status of the task.
|
||||
// Only valid options are {"error", "success", "timeout", "killed", "cancelled"}.
|
||||
Status() string
|
||||
|
||||
// StartTime returns the time just before beginning execution of a task,
|
||||
// for example including the time to pull a container image and doing any
|
||||
// other setup. This should not include a container's execution duration.
|
||||
StartTime() time.Time
|
||||
}
|
||||
|
||||
// The ContainerTask interface guides task execution across a wide variety of
|
||||
|
||||
@@ -3,6 +3,7 @@ package mock
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"gitlab-odx.oracle.com/odx/functions/api/runner/drivers"
|
||||
)
|
||||
@@ -31,16 +32,17 @@ func (c *cookie) Run(ctx context.Context) (drivers.RunResult, error) {
|
||||
return nil, fmt.Errorf("Mocker error! Bad.")
|
||||
}
|
||||
return &runResult{
|
||||
error: nil,
|
||||
StatusValue: "success",
|
||||
error: nil,
|
||||
status: "success",
|
||||
start: time.Now(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type runResult struct {
|
||||
error
|
||||
StatusValue string
|
||||
status string
|
||||
start time.Time
|
||||
}
|
||||
|
||||
func (runResult *runResult) Status() string {
|
||||
return runResult.StatusValue
|
||||
}
|
||||
func (r *runResult) Status() string { return r.status }
|
||||
func (r *runResult) StartTime() time.Time { return r.start }
|
||||
|
||||
@@ -211,6 +211,12 @@ func (r *Runner) run(ctx context.Context, cfg *task.Config) (drivers.RunResult,
|
||||
}
|
||||
defer cookie.Close()
|
||||
|
||||
select {
|
||||
case <-cfg.Ready:
|
||||
default:
|
||||
close(cfg.Ready)
|
||||
}
|
||||
|
||||
metricStart := time.Now()
|
||||
|
||||
result, err := cookie.Run(ctx)
|
||||
|
||||
@@ -37,6 +37,7 @@ func TestRunnerHello(t *testing.T) {
|
||||
ID: fmt.Sprintf("hello-%d-%d", i, time.Now().Unix()),
|
||||
Image: test.route.Image,
|
||||
Timeout: 10 * time.Second,
|
||||
Ready: make(chan struct{}),
|
||||
Stdin: strings.NewReader(test.payload),
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
@@ -90,6 +91,7 @@ func TestRunnerError(t *testing.T) {
|
||||
ID: fmt.Sprintf("err-%d-%d", i, time.Now().Unix()),
|
||||
Image: test.route.Image,
|
||||
Timeout: 10 * time.Second,
|
||||
Ready: make(chan struct{}),
|
||||
Stdin: strings.NewReader(test.payload),
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
|
||||
@@ -9,15 +9,18 @@ import (
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
ID string
|
||||
Path string
|
||||
Image string
|
||||
Timeout time.Duration
|
||||
IdleTimeout time.Duration
|
||||
AppName string
|
||||
Memory uint64
|
||||
Env map[string]string
|
||||
Format string
|
||||
ID string
|
||||
Path string
|
||||
Image string
|
||||
Timeout time.Duration
|
||||
IdleTimeout time.Duration
|
||||
AppName string
|
||||
Memory uint64
|
||||
Env map[string]string
|
||||
Format string
|
||||
ReceivedTime time.Time
|
||||
// Ready is used to await the first pull
|
||||
Ready chan struct{}
|
||||
|
||||
Stdin io.Reader
|
||||
Stdout io.Writer
|
||||
|
||||
@@ -94,6 +94,7 @@ func (rnr *Runner) RunTask(ctx context.Context, cfg *task.Config) (drivers.RunRe
|
||||
} else {
|
||||
tasks <- treq
|
||||
}
|
||||
|
||||
resp := <-treq.Response
|
||||
return resp.Result, resp.Err
|
||||
}
|
||||
@@ -256,6 +257,14 @@ func (hc *htfn) serve(ctx context.Context) {
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-lctx.Done():
|
||||
case <-cfg.Ready:
|
||||
// on first execution, wait before starting idle timeout / stopping wait time clock,
|
||||
// since docker pull / container create need to happen.
|
||||
// XXX (reed): should we still obey the task timeout? docker image could be 8GB...
|
||||
}
|
||||
|
||||
select {
|
||||
case <-lctx.Done():
|
||||
return
|
||||
@@ -263,6 +272,7 @@ func (hc *htfn) serve(ctx context.Context) {
|
||||
logger.Info("Canceling inactive hot function")
|
||||
cancel()
|
||||
case t := <-hc.tasks:
|
||||
start := time.Now()
|
||||
err := hc.proto.Dispatch(lctx, t)
|
||||
status := "success"
|
||||
if err != nil {
|
||||
@@ -272,8 +282,8 @@ func (hc *htfn) serve(ctx context.Context) {
|
||||
hc.once()
|
||||
|
||||
t.Response <- task.Response{
|
||||
&runResult{StatusValue: status, error: err},
|
||||
err,
|
||||
Result: &runResult{start: start, status: status, error: err},
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -304,7 +314,8 @@ func runTaskReq(rnr *Runner, t task.Request) {
|
||||
|
||||
type runResult struct {
|
||||
error
|
||||
StatusValue string
|
||||
status string
|
||||
start time.Time
|
||||
}
|
||||
|
||||
func (r *runResult) Error() string {
|
||||
@@ -314,4 +325,5 @@ func (r *runResult) Error() string {
|
||||
return r.error.Error()
|
||||
}
|
||||
|
||||
func (r *runResult) Status() string { return r.StatusValue }
|
||||
func (r *runResult) Status() string { return r.status }
|
||||
func (r *runResult) StartTime() time.Time { return r.start }
|
||||
|
||||
Reference in New Issue
Block a user