Cpu resources (#642)

* fn: cpu quota implementation
This commit is contained in:
Tolga Ceylan
2018-01-12 11:38:28 -08:00
committed by GitHub
parent fa59400a97
commit 39b2cb2d9b
29 changed files with 856 additions and 91 deletions

View File

@@ -338,7 +338,7 @@ func (a *agent) hotLauncher(ctx context.Context, callObj *call) {
}).Info("Hot function launcher starting hot container")
select {
case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, isAsync):
case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, uint64(callObj.CPUs), isAsync):
cancel()
if isOpen {
a.wg.Add(1)
@@ -404,7 +404,7 @@ func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
ch := make(chan Slot)
select {
case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, isAsync):
case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, uint64(call.CPUs), isAsync):
if !isOpen {
return nil, models.ErrCallTimeoutServerBusy
}
@@ -544,6 +544,7 @@ func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch
image: call.Image,
env: map[string]string(call.Config),
memory: call.Memory,
cpus: uint64(call.CPUs),
timeout: time.Duration(call.Timeout) * time.Second, // this is unnecessary, but in case removal fails...
stdin: call.req.Body,
stdout: call.w,
@@ -604,12 +605,13 @@ func (a *agent) runHot(ctxArg context.Context, call *call, tok ResourceToken) {
image: call.Image,
env: map[string]string(call.Config),
memory: call.Memory,
cpus: uint64(call.CPUs),
stdin: stdinRead,
stdout: stdoutWrite,
stderr: &ghostWriter{inner: stderr},
}
logger := logrus.WithFields(logrus.Fields{"id": container.id, "app": call.AppName, "route": call.Path, "image": call.Image, "memory": call.Memory, "format": call.Format, "idle_timeout": call.IdleTimeout})
logger := logrus.WithFields(logrus.Fields{"id": container.id, "app": call.AppName, "route": call.Path, "image": call.Image, "memory": call.Memory, "cpus": call.CPUs, "format": call.Format, "idle_timeout": call.IdleTimeout})
ctx = common.WithLogger(ctx, logger)
cookie, err := a.driver.Prepare(ctx, container)
@@ -698,6 +700,7 @@ type container struct {
image string
env map[string]string
memory uint64
cpus uint64
timeout time.Duration // cold only (superfluous, but in case)
stdin io.Reader
@@ -735,6 +738,7 @@ func (c *container) Image() string { return c.image }
func (c *container) Timeout() time.Duration { return c.timeout }
func (c *container) EnvVars() map[string]string { return c.env }
func (c *container) Memory() uint64 { return c.memory * 1024 * 1024 } // convert MB
func (c *container) CPUs() uint64 { return c.cpus }
// WriteStat publishes each metric in the specified Stats structure as a histogram metric
func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {

View File

@@ -185,6 +185,7 @@ func TestCallConfigurationModel(t *testing.T) {
const timeout = 1
const idleTimeout = 20
const memory = 256
CPUs := models.MilliCPUs(1000)
method := "GET"
url := "http://127.0.0.1:8080/r/" + appName + path
payload := "payload"
@@ -195,6 +196,7 @@ func TestCallConfigurationModel(t *testing.T) {
"FN_APP_NAME": appName,
"FN_PATH": path,
"FN_MEMORY": strconv.Itoa(memory),
"FN_CPUS": CPUs.String(),
"FN_TYPE": typ,
"APP_VAR": "FOO",
"ROUTE_VAR": "BAR",
@@ -210,6 +212,7 @@ func TestCallConfigurationModel(t *testing.T) {
Timeout: timeout,
IdleTimeout: idleTimeout,
Memory: memory,
CPUs: CPUs,
Payload: payload,
URL: url,
Method: method,
@@ -243,6 +246,7 @@ func TestAsyncCallHeaders(t *testing.T) {
const timeout = 1
const idleTimeout = 20
const memory = 256
CPUs := models.MilliCPUs(200)
method := "GET"
url := "http://127.0.0.1:8080/r/" + appName + path
payload := "payload"
@@ -255,6 +259,7 @@ func TestAsyncCallHeaders(t *testing.T) {
"FN_APP_NAME": appName,
"FN_PATH": path,
"FN_MEMORY": strconv.Itoa(memory),
"FN_CPUS": CPUs.String(),
"FN_TYPE": typ,
"APP_VAR": "FOO",
"ROUTE_VAR": "BAR",
@@ -277,6 +282,7 @@ func TestAsyncCallHeaders(t *testing.T) {
Timeout: timeout,
IdleTimeout: idleTimeout,
Memory: memory,
CPUs: CPUs,
Payload: payload,
URL: url,
Method: method,
@@ -343,6 +349,7 @@ func TestSubmitError(t *testing.T) {
const timeout = 10
const idleTimeout = 20
const memory = 256
CPUs := models.MilliCPUs(200)
method := "GET"
url := "http://127.0.0.1:8080/r/" + appName + path
payload := "payload"
@@ -353,6 +360,7 @@ func TestSubmitError(t *testing.T) {
"FN_APP_NAME": appName,
"FN_PATH": path,
"FN_MEMORY": strconv.Itoa(memory),
"FN_CPUS": CPUs.String(),
"FN_TYPE": typ,
"APP_VAR": "FOO",
"ROUTE_VAR": "BAR",
@@ -369,6 +377,7 @@ func TestSubmitError(t *testing.T) {
Timeout: timeout,
IdleTimeout: idleTimeout,
Memory: memory,
CPUs: CPUs,
Payload: payload,
URL: url,
Method: method,

View File

@@ -103,6 +103,7 @@ func FromRequest(appName, path string, req *http.Request) CallOpt {
Timeout: route.Timeout,
IdleTimeout: route.IdleTimeout,
Memory: route.Memory,
CPUs: route.CPUs,
Config: buildConfig(app, route),
Headers: req.Header,
CreatedAt: strfmt.DateTime(time.Now()),
@@ -130,6 +131,11 @@ func buildConfig(app *models.App, route *models.Route) models.Config {
// TODO: might be a good idea to pass in: "FN_BASE_PATH" = fmt.Sprintf("/r/%s", appName) || "/" if using DNS entries per app
conf["FN_MEMORY"] = fmt.Sprintf("%d", route.Memory)
conf["FN_TYPE"] = route.Type
CPUs := route.CPUs.String()
if CPUs != "" {
conf["FN_CPUS"] = CPUs
}
return conf
}

View File

@@ -115,6 +115,15 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
Context: ctx,
}
// Translate milli cpus into CPUQuota & CPUPeriod (see Linux cGroups CFS cgroup v1 documentation)
// eg: task.CPUQuota() of 8000 means CPUQuota of 8 * 100000 usecs in 100000 usec period,
// which is approx 8 CPUS in CFS world.
// Also see docker run options --cpu-quota and --cpu-period
if task.CPUs() != 0 {
container.HostConfig.CPUQuota = int64(task.CPUs() * 100)
container.HostConfig.CPUPeriod = 100000
}
volumes := task.Volumes()
for _, mapping := range volumes {
hostDir := mapping[0]
@@ -140,7 +149,7 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
// since we retry under the hood, if the container gets created and retry fails, we can just ignore error
if err != docker.ErrContainerAlreadyExists {
log.WithFields(logrus.Fields{"call_id": task.Id(), "command": container.Config.Cmd, "memory": container.Config.Memory,
"cpu_shares": container.Config.CPUShares, "hostname": container.Config.Hostname, "name": container.Name,
"cpu_shares": container.Config.CPUShares, "cpu_quota": task.CPUs(), "hostname": container.Config.Hostname, "name": container.Name,
"image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": container.Name,
}).WithError(err).Error("Could not create container")

View File

@@ -31,6 +31,7 @@ func (f *taskDockerTest) Logger() (stdout, stderr io.Writer) { return f.out
func (f *taskDockerTest) WriteStat(context.Context, drivers.Stat) { /* TODO */ }
func (f *taskDockerTest) Volumes() [][2]string { return [][2]string{} }
func (f *taskDockerTest) Memory() uint64 { return 256 * 1024 * 1024 }
func (f *taskDockerTest) CPUs() uint64 { return 0 }
func (f *taskDockerTest) WorkDir() string { return "" }
func (f *taskDockerTest) Close() {}
func (f *taskDockerTest) Input() io.Reader { return f.input }

View File

@@ -104,6 +104,9 @@ type ContainerTask interface {
// 0 is unlimited.
Memory() uint64
// CPUs in milli CPU units
CPUs() uint64
// WorkDir returns the working directory to use for the task. Empty string
// leaves it unset.
WorkDir() string

View File

@@ -26,7 +26,7 @@ const (
type ResourceTracker interface {
WaitAsyncResource() chan struct{}
// returns a closed channel if the resource can never me met.
GetResourceToken(ctx context.Context, memory uint64, isAsync bool) <-chan ResourceToken
GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken
}
type resourceTracker struct {
@@ -42,6 +42,17 @@ type resourceTracker struct {
ramAsyncUsed uint64
// memory in use for async area in which agent stops dequeuing async jobs
ramAsyncHWMark uint64
// cpuTotal is the total usable cpu for sync functions
cpuSyncTotal uint64
// cpuSyncUsed is cpu reserved for running sync containers including hot/idle
cpuSyncUsed uint64
// cpuAsyncTotal is the total usable cpu for async + sync functions
cpuAsyncTotal uint64
// cpuAsyncUsed is cpu reserved for running async + sync containers including hot/idle
cpuAsyncUsed uint64
// cpu in use for async area in which agent stops dequeuing async jobs
cpuAsyncHWMark uint64
}
func NewResourceTracker() ResourceTracker {
@@ -51,6 +62,7 @@ func NewResourceTracker() ResourceTracker {
}
obj.initializeMemory()
obj.initializeCPU()
return obj
}
@@ -60,46 +72,52 @@ type ResourceToken interface {
}
type resourceToken struct {
once sync.Once
decrement func()
}
func (t *resourceToken) Close() error {
t.decrement()
t.once.Do(func() {
t.decrement()
})
return nil
}
func (a *resourceTracker) isResourceAvailableLocked(memory uint64, isAsync bool) bool {
func (a *resourceTracker) isResourceAvailableLocked(memory uint64, cpuQuota uint64, isAsync bool) bool {
asyncAvail := a.ramAsyncTotal - a.ramAsyncUsed
syncAvail := a.ramSyncTotal - a.ramSyncUsed
asyncAvailMem := a.ramAsyncTotal - a.ramAsyncUsed
syncAvailMem := a.ramSyncTotal - a.ramSyncUsed
asyncAvailCPU := a.cpuAsyncTotal - a.cpuAsyncUsed
syncAvailCPU := a.cpuSyncTotal - a.cpuSyncUsed
// For sync functions, we can steal from async pool. For async, we restrict it to sync pool
if isAsync {
return asyncAvail >= memory
return asyncAvailMem >= memory && asyncAvailCPU >= cpuQuota
} else {
return asyncAvail+syncAvail >= memory
return asyncAvailMem+syncAvailMem >= memory && asyncAvailCPU+syncAvailCPU >= cpuQuota
}
}
// is this request possible to meet? If no, fail quick
func (a *resourceTracker) isResourcePossible(memory uint64, isAsync bool) bool {
func (a *resourceTracker) isResourcePossible(memory uint64, cpuQuota uint64, isAsync bool) bool {
if isAsync {
return memory <= a.ramAsyncTotal
return memory <= a.ramAsyncTotal && cpuQuota <= a.cpuAsyncTotal
} else {
return memory <= a.ramSyncTotal+a.ramAsyncTotal
return memory <= a.ramSyncTotal+a.ramAsyncTotal && cpuQuota <= a.cpuSyncTotal+a.cpuAsyncTotal
}
}
// the received token should be passed directly to launch (unconditionally), launch
// will close this token (i.e. the receiver should not call Close)
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, isAsync bool) <-chan ResourceToken {
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken {
memory = memory * Mem1MB
c := a.cond
ch := make(chan ResourceToken)
if !a.isResourcePossible(memory, isAsync) {
if !a.isResourcePossible(memory, cpuQuota, isAsync) {
close(ch)
return ch
}
@@ -107,39 +125,44 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, i
go func() {
c.L.Lock()
for !a.isResourceAvailableLocked(memory, isAsync) {
select {
case <-ctx.Done():
c.L.Unlock()
return
default:
}
for !a.isResourceAvailableLocked(memory, cpuQuota, isAsync) && ctx.Err() == nil {
c.Wait()
}
if ctx.Err() != nil {
c.L.Unlock()
return
}
var asyncMem, syncMem uint64
var asyncCPU, syncCPU uint64
if isAsync {
// async uses async pool only
asyncMem = memory
} else if a.ramSyncTotal-a.ramSyncUsed >= memory {
// if sync fits in sync pool
syncMem = memory
asyncCPU = cpuQuota
} else {
// if sync fits async + sync pool
syncMem = a.ramSyncTotal - a.ramSyncUsed
syncMem = minUint64(a.ramSyncTotal-a.ramSyncUsed, memory)
syncCPU = minUint64(a.cpuSyncTotal-a.cpuSyncUsed, cpuQuota)
asyncMem = memory - syncMem
asyncCPU = cpuQuota - syncCPU
}
a.ramAsyncUsed += asyncMem
a.ramSyncUsed += syncMem
a.cpuAsyncUsed += asyncCPU
a.cpuSyncUsed += syncCPU
c.L.Unlock()
t := &resourceToken{decrement: func() {
c.L.Lock()
a.ramAsyncUsed -= asyncMem
a.ramSyncUsed -= syncMem
a.cpuAsyncUsed -= asyncCPU
a.cpuSyncUsed -= syncCPU
c.L.Unlock()
// WARNING: yes, we wake up everyone even async waiters when only sync pool has space, but
@@ -159,7 +182,7 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, i
return ch
}
// WaitAsyncResource will send a signal on the returned channel when RAM in-use
// WaitAsyncResource will send a signal on the returned channel when RAM and CPU in-use
// in the async area is less than high water mark
func (a *resourceTracker) WaitAsyncResource() chan struct{} {
ch := make(chan struct{})
@@ -167,7 +190,7 @@ func (a *resourceTracker) WaitAsyncResource() chan struct{} {
c := a.cond
go func() {
c.L.Lock()
for a.ramAsyncUsed >= a.ramAsyncHWMark {
for a.ramAsyncUsed >= a.ramAsyncHWMark || a.cpuAsyncUsed >= a.cpuAsyncHWMark {
c.Wait()
}
c.L.Unlock()
@@ -198,6 +221,71 @@ func clampUint64(val, min, max uint64) uint64 {
return val
}
func (a *resourceTracker) initializeCPU() {
var maxSyncCPU, maxAsyncCPU, cpuAsyncHWMark uint64
var totalCPU, availCPU uint64
if runtime.GOOS == "linux" {
// Why do we prefer /proc/cpuinfo for Linux and not just use runtime.NumCPU?
// This is because NumCPU is sched_getaffinity based and we prefer to check
// cgroup which will more likely be same cgroup for container runtime
numCPU, err := checkProcCPU()
if err != nil {
logrus.WithError(err).Error("Error checking for CPU, falling back to runtime CPU count.")
numCPU = uint64(runtime.NumCPU())
}
totalCPU = 1000 * numCPU
availCPU = totalCPU
// Clamp further if cgroups CFS quota/period limits are in place
cgroupCPU := checkCgroupCPU()
if cgroupCPU > 0 {
availCPU = minUint64(availCPU, cgroupCPU)
}
// TODO: check cgroup cpuset to clamp this further. We might be restricted into
// a subset of CPUs. (eg. /sys/fs/cgroup/cpuset/cpuset.effective_cpus)
// TODO: skip CPU headroom for ourselves for now
} else {
totalCPU = uint64(runtime.NumCPU() * 1000)
availCPU = totalCPU
}
logrus.WithFields(logrus.Fields{
"totalCPU": totalCPU,
"availCPU": availCPU,
}).Info("available cpu")
// %20 of cpu for sync only reserve
maxSyncCPU = uint64(availCPU * 2 / 10)
maxAsyncCPU = availCPU - maxSyncCPU
cpuAsyncHWMark = maxAsyncCPU * 8 / 10
logrus.WithFields(logrus.Fields{
"cpuSync": maxSyncCPU,
"cpuAsync": maxAsyncCPU,
"cpuAsyncHWMark": cpuAsyncHWMark,
}).Info("sync and async cpu reservations")
if maxSyncCPU == 0 || maxAsyncCPU == 0 {
logrus.Fatal("Cannot get the proper CPU information to size server")
}
if maxSyncCPU+maxAsyncCPU < 1000 {
logrus.Warn("Severaly Limited CPU: cpuSync + cpuAsync < 1000m (1 CPU)")
} else if maxAsyncCPU < 1000 {
logrus.Warn("Severaly Limited CPU: cpuAsync < 1000m (1 CPU)")
}
a.cpuAsyncHWMark = cpuAsyncHWMark
a.cpuSyncTotal = maxSyncCPU
a.cpuAsyncTotal = maxAsyncCPU
}
func (a *resourceTracker) initializeMemory() {
var maxSyncMemory, maxAsyncMemory, ramAsyncHWMark uint64
@@ -205,7 +293,7 @@ func (a *resourceTracker) initializeMemory() {
if runtime.GOOS == "linux" {
// system wide available memory
totalMemory, err := checkProc()
totalMemory, err := checkProcMem()
if err != nil {
logrus.WithError(err).Fatal("Cannot get the proper memory information to size server.")
}
@@ -213,7 +301,7 @@ func (a *resourceTracker) initializeMemory() {
availMemory := totalMemory
// cgroup limit restriction on memory usage
cGroupLimit, err := checkCgroup()
cGroupLimit, err := checkCgroupMem()
if err != nil {
logrus.WithError(err).Error("Error checking for cgroup memory limits, falling back to host memory available..")
} else {
@@ -251,7 +339,7 @@ func (a *resourceTracker) initializeMemory() {
"ramSync": maxSyncMemory,
"ramAsync": maxAsyncMemory,
"ramAsyncHWMark": ramAsyncHWMark,
}).Info("sync and async reservations")
}).Info("sync and async ram reservations")
if maxSyncMemory == 0 || maxAsyncMemory == 0 {
logrus.Fatal("Cannot get the proper memory pool information to size server")
@@ -286,24 +374,56 @@ func getMemoryHeadRoom(usableMemory uint64) (uint64, error) {
return headRoom, nil
}
func checkCgroup() (uint64, error) {
f, err := os.Open("/sys/fs/cgroup/memory/memory.limit_in_bytes")
func readString(fileName string) (string, error) {
b, err := ioutil.ReadFile(fileName)
if err != nil {
return "", err
}
value := string(b)
return strings.TrimSpace(value), nil
}
func checkCgroupMem() (uint64, error) {
value, err := readString("/sys/fs/cgroup/memory/memory.limit_in_bytes")
if err != nil {
return 0, err
}
defer f.Close()
b, err := ioutil.ReadAll(f)
return strconv.ParseUint(value, 10, 64)
}
func checkCgroupCPU() uint64 {
periodStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
if err != nil {
return 0, err
return 0
}
limBytes := string(b)
limBytes = strings.TrimSpace(limBytes)
return strconv.ParseUint(limBytes, 10, 64)
quotaStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
if err != nil {
return 0
}
period, err := strconv.ParseUint(periodStr, 10, 64)
if err != nil {
logrus.Warn("Cannot parse CFS period", err)
return 0
}
quota, err := strconv.ParseInt(quotaStr, 10, 64)
if err != nil {
logrus.Warn("Cannot parse CFS quota", err)
return 0
}
if quota <= 0 || period <= 0 {
return 0
}
return uint64(quota) * 1000 / period
}
var errCantReadMemInfo = errors.New("Didn't find MemAvailable in /proc/meminfo, kernel is probably < 3.14")
func checkProc() (uint64, error) {
func checkProcMem() (uint64, error) {
f, err := os.Open("/proc/meminfo")
if err != nil {
return 0, err
@@ -341,3 +461,30 @@ func checkProc() (uint64, error) {
return 0, errCantReadMemInfo
}
func checkProcCPU() (uint64, error) {
f, err := os.Open("/proc/cpuinfo")
if err != nil {
return 0, err
}
defer f.Close()
total := uint64(0)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
b := scanner.Text()
// processor : 0
toks := strings.Fields(b)
if len(toks) == 3 && toks[0] == "processor" && toks[1] == ":" {
total += 1
}
}
if total == 0 {
return 0, errors.New("Could not parse cpuinfo")
}
return total, nil
}

View File

@@ -1,6 +1,8 @@
package agent
import (
"context"
"errors"
"testing"
"time"
)
@@ -8,11 +10,17 @@ import (
func setTrackerTestVals(tr *resourceTracker, vals *trackerVals) {
tr.cond.L.Lock()
tr.ramSyncTotal = vals.st
tr.ramSyncUsed = vals.su
tr.ramAsyncTotal = vals.at
tr.ramAsyncUsed = vals.au
tr.ramAsyncHWMark = vals.am
tr.ramSyncTotal = vals.mst
tr.ramSyncUsed = vals.msu
tr.ramAsyncTotal = vals.mat
tr.ramAsyncUsed = vals.mau
tr.ramAsyncHWMark = vals.mam
tr.cpuSyncTotal = vals.cst
tr.cpuSyncUsed = vals.csu
tr.cpuAsyncTotal = vals.cat
tr.cpuAsyncUsed = vals.cau
tr.cpuAsyncHWMark = vals.cam
tr.cond.L.Unlock()
tr.cond.Broadcast()
@@ -22,24 +30,72 @@ func getTrackerTestVals(tr *resourceTracker, vals *trackerVals) {
tr.cond.L.Lock()
vals.st = tr.ramSyncTotal
vals.su = tr.ramSyncUsed
vals.at = tr.ramAsyncTotal
vals.au = tr.ramAsyncUsed
vals.am = tr.ramAsyncHWMark
vals.mst = tr.ramSyncTotal
vals.msu = tr.ramSyncUsed
vals.mat = tr.ramAsyncTotal
vals.mau = tr.ramAsyncUsed
vals.mam = tr.ramAsyncHWMark
vals.cst = tr.cpuSyncTotal
vals.csu = tr.cpuSyncUsed
vals.cat = tr.cpuAsyncTotal
vals.cau = tr.cpuAsyncUsed
vals.cam = tr.cpuAsyncHWMark
tr.cond.L.Unlock()
}
// helper to debug print (fields correspond to resourceTracker CPU/MEM fields)
type trackerVals struct {
st uint64
su uint64
at uint64
au uint64
am uint64
mst uint64
msu uint64
mat uint64
mau uint64
mam uint64
cst uint64
csu uint64
cat uint64
cau uint64
cam uint64
}
func TestResourceAsyncMem(t *testing.T) {
func (vals *trackerVals) setDefaults() {
// set set these to known vals (4GB total: 1GB sync, 3 async)
vals.mst = 1 * Mem1GB
vals.msu = 0
vals.mat = 3 * Mem1GB
vals.mau = 0
vals.mam = 1 * Mem1GB
// let's assume 10 CPUs (2 CPU sync, 8 CPU async)
vals.cst = 2000
vals.csu = 0
vals.cat = 8000
vals.cau = 0
vals.cam = 6000
}
func fetchToken(ch <-chan ResourceToken) (ResourceToken, error) {
select {
case tok := <-ch:
return tok, nil
case <-time.After(time.Duration(500) * time.Millisecond):
return nil, errors.New("expected token")
}
}
func isClosed(ch <-chan ResourceToken) bool {
select {
case _, ok := <-ch:
if !ok {
return true
}
default:
}
return false
}
func TestResourceAsyncWait(t *testing.T) {
var vals trackerVals
@@ -48,36 +104,242 @@ func TestResourceAsyncMem(t *testing.T) {
tr := trI.(*resourceTracker)
getTrackerTestVals(tr, &vals)
if vals.st <= 0 || vals.su != 0 || vals.at <= 0 || vals.au != 0 || vals.am <= 0 {
t.Fatalf("faulty init %#v", vals)
if vals.mst <= 0 || vals.msu != 0 || vals.mat <= 0 || vals.mau != 0 || vals.mam <= 0 {
t.Fatalf("faulty init MEM %#v", vals)
}
if vals.cst <= 0 || vals.csu != 0 || vals.cat <= 0 || vals.cau != 0 || vals.cam <= 0 {
t.Fatalf("faulty init CPU %#v", vals)
}
// set set these to known vals
vals.st = 1 * 1024 * 1024
vals.su = 0
vals.at = 2 * 1024 * 1024
vals.au = 0
vals.am = 1 * 1024 * 1024
vals.setDefaults()
// should block & wait
vals.au = vals.am
vals.mau = vals.mam
setTrackerTestVals(tr, &vals)
ch := tr.WaitAsyncResource()
select {
case <-ch:
t.Fatal("high water mark over, should not trigger")
t.Fatal("high water mark MEM over, should not trigger")
case <-time.After(time.Duration(500) * time.Millisecond):
}
// should not block & wait
vals.au = 0
vals.mau = 0
setTrackerTestVals(tr, &vals)
select {
case <-ch:
case <-time.After(time.Duration(500) * time.Millisecond):
t.Fatal("high water mark not over, should trigger")
t.Fatal("high water mark MEM not over, should trigger")
}
// get a new channel to prevent previous test interference
ch = tr.WaitAsyncResource()
// should block & wait
vals.cau = vals.cam
setTrackerTestVals(tr, &vals)
select {
case <-ch:
t.Fatal("high water mark CPU over, should not trigger")
case <-time.After(time.Duration(500) * time.Millisecond):
}
// should not block & wait
vals.cau = 0
setTrackerTestVals(tr, &vals)
select {
case <-ch:
case <-time.After(time.Duration(500) * time.Millisecond):
t.Fatal("high water mark CPU not over, should trigger")
}
}
func TestResourceGetSimple(t *testing.T) {
var vals trackerVals
trI := NewResourceTracker()
tr := trI.(*resourceTracker)
vals.setDefaults()
// let's make it like CPU and MEM are 100% full
vals.mau = vals.mat
vals.cau = vals.cat
setTrackerTestVals(tr, &vals)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// ask for 4GB and 10 CPU
ch := trI.GetResourceToken(ctx, 4*1024, 1000, false)
_, err := fetchToken(ch)
if err == nil {
t.Fatalf("full system should not hand out token")
}
// reset back
vals.setDefaults()
setTrackerTestVals(tr, &vals)
tok, err := fetchToken(ch)
if err != nil {
t.Fatalf("empty system should hand out token")
}
// ask for another 4GB and 10 CPU
ch = trI.GetResourceToken(ctx, 4*1024, 1000, false)
_, err = fetchToken(ch)
if err == nil {
t.Fatalf("full system should not hand out token")
}
// close means, giant token resources released
tok.Close()
tok, err = fetchToken(ch)
if err != nil {
t.Fatalf("empty system should hand out token")
}
tok.Close()
// POOLS should all be empty now
getTrackerTestVals(tr, &vals)
if vals.msu != 0 || vals.mau != 0 {
t.Fatalf("faulty state MEM %#v", vals)
}
if vals.csu != 0 || vals.cau != 0 {
t.Fatalf("faulty state CPU %#v", vals)
}
}
func TestResourceGetCombo(t *testing.T) {
var vals trackerVals
trI := NewResourceTracker()
tr := trI.(*resourceTracker)
vals.setDefaults()
setTrackerTestVals(tr, &vals)
ctx, cancel := context.WithCancel(context.Background())
// impossible request
ch := trI.GetResourceToken(ctx, 20*1024, 20000, false)
if !isClosed(ch) {
t.Fatalf("impossible request should return closed channel")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// let's use up 2 GB of 3GB async pool
ch = trI.GetResourceToken(ctx, 2*1024, 10, true)
tok1, err := fetchToken(ch)
if err != nil {
t.Fatalf("empty async system should hand out token1")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// remaining 1 GB async
ch = trI.GetResourceToken(ctx, 1*1024, 11, true)
tok2, err := fetchToken(ch)
if err != nil {
t.Fatalf("empty async system should hand out token2")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// NOW ASYNC POOL IS FULL
// SYNC POOL HAS 1GB
// we no longer can get async token
ch = trI.GetResourceToken(ctx, 1*1024, 12, true)
_, err = fetchToken(ch)
if err == nil {
t.Fatalf("full async system should not hand out a token")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// but we should get 1GB sync token
ch = trI.GetResourceToken(ctx, 1*1024, 13, false)
tok3, err := fetchToken(ch)
if err != nil {
t.Fatalf("empty sync system should hand out token3")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// NOW ASYNC AND SYNC POOLS ARE FULL
// this should fail
ch = trI.GetResourceToken(ctx, 1*1024, 14, false)
_, err = fetchToken(ch)
if err == nil {
t.Fatalf("full system should not hand out a token")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// now let's free up some async pool, release tok2 (1GB)
tok2.Close()
// NOW ASYNC POOL HAS 1GB FREE
// SYNC POOL IS FULL
// async pool should provide this
ch = trI.GetResourceToken(ctx, 1*1024, 15, false)
tok4, err := fetchToken(ch)
if err != nil {
t.Fatalf("async system should hand out token4")
}
cancel()
ctx, cancel = context.WithCancel(context.Background())
// NOW ASYNC AND SYNC POOLS ARE FULL
tok4.Close()
tok3.Close()
// NOW ASYNC POOL HAS 1GB FREE
// SYNC POOL HAS 1GB FREE
// now, we ask for 2GB sync token, it should be provided from both async+sync pools
ch = trI.GetResourceToken(ctx, 2*1024, 16, false)
tok5, err := fetchToken(ch)
if err != nil {
t.Fatalf("async+sync system should hand out token5")
}
cancel()
// NOW ASYNC AND SYNC POOLS ARE FULL
tok1.Close()
tok5.Close()
// attempt to close tok2 twice.. This should be OK.
tok2.Close()
// POOLS should all be empty now
getTrackerTestVals(tr, &vals)
if vals.msu != 0 || vals.mau != 0 {
t.Fatalf("faulty state MEM %#v", vals)
}
if vals.csu != 0 || vals.cau != 0 {
t.Fatalf("faulty state CPU %#v", vals)
}
}

View File

@@ -334,6 +334,7 @@ func getSlotQueueKey(call *call) string {
fmt.Fprint(hash, call.Timeout, "\x00")
fmt.Fprint(hash, call.IdleTimeout, "\x00")
fmt.Fprint(hash, call.Memory, "\x00")
fmt.Fprint(hash, call.CPUs, "\x00")
fmt.Fprint(hash, call.Format, "\x00")
// we have to sort these before printing, yay. TODO do better