Cpu resources (#642)

* fn: cpu quota implementation
2022-10-28 21:29:17 +03:00 · 2018-01-12 11:38:28 -08:00
parent fa59400a97
commit 39b2cb2d9b
29 changed files with 856 additions and 91 deletions
--- a/api/agent/agent.go
+++ b/api/agent/agent.go
@@ -338,7 +338,7 @@ func (a *agent) hotLauncher(ctx context.Context, callObj *call) {
 		}).Info("Hot function launcher starting hot container")

 		select {
-		case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, isAsync):
+		case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, uint64(callObj.CPUs), isAsync):
 			cancel()
 			if isOpen {
 				a.wg.Add(1)
@@ -404,7 +404,7 @@ func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
 	ch := make(chan Slot)

 	select {
-	case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, isAsync):
+	case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, uint64(call.CPUs), isAsync):
 		if !isOpen {
 			return nil, models.ErrCallTimeoutServerBusy
 		}
@@ -544,6 +544,7 @@ func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch
 		image:   call.Image,
 		env:     map[string]string(call.Config),
 		memory:  call.Memory,
+		cpus:    uint64(call.CPUs),
 		timeout: time.Duration(call.Timeout) * time.Second, // this is unnecessary, but in case removal fails...
 		stdin:   call.req.Body,
 		stdout:  call.w,
@@ -604,12 +605,13 @@ func (a *agent) runHot(ctxArg context.Context, call *call, tok ResourceToken) {
 		image:  call.Image,
 		env:    map[string]string(call.Config),
 		memory: call.Memory,
+		cpus:   uint64(call.CPUs),
 		stdin:  stdinRead,
 		stdout: stdoutWrite,
 		stderr: &ghostWriter{inner: stderr},
 	}

-	logger := logrus.WithFields(logrus.Fields{"id": container.id, "app": call.AppName, "route": call.Path, "image": call.Image, "memory": call.Memory, "format": call.Format, "idle_timeout": call.IdleTimeout})
+	logger := logrus.WithFields(logrus.Fields{"id": container.id, "app": call.AppName, "route": call.Path, "image": call.Image, "memory": call.Memory, "cpus": call.CPUs, "format": call.Format, "idle_timeout": call.IdleTimeout})
 	ctx = common.WithLogger(ctx, logger)

 	cookie, err := a.driver.Prepare(ctx, container)
@@ -698,6 +700,7 @@ type container struct {
 	image   string
 	env     map[string]string
 	memory  uint64
+	cpus    uint64
 	timeout time.Duration // cold only (superfluous, but in case)

 	stdin  io.Reader
@@ -735,6 +738,7 @@ func (c *container) Image() string                  { return c.image }
 func (c *container) Timeout() time.Duration         { return c.timeout }
 func (c *container) EnvVars() map[string]string     { return c.env }
 func (c *container) Memory() uint64                 { return c.memory * 1024 * 1024 } // convert MB
+func (c *container) CPUs() uint64                   { return c.cpus }

 // WriteStat publishes each metric in the specified Stats structure as a histogram metric
 func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {
--- a/api/agent/agent_test.go
+++ b/api/agent/agent_test.go
@@ -185,6 +185,7 @@ func TestCallConfigurationModel(t *testing.T) {
 	const timeout = 1
 	const idleTimeout = 20
 	const memory = 256
+	CPUs := models.MilliCPUs(1000)
 	method := "GET"
 	url := "http://127.0.0.1:8080/r/" + appName + path
 	payload := "payload"
@@ -195,6 +196,7 @@ func TestCallConfigurationModel(t *testing.T) {
 		"FN_APP_NAME": appName,
 		"FN_PATH":     path,
 		"FN_MEMORY":   strconv.Itoa(memory),
+		"FN_CPUS":     CPUs.String(),
 		"FN_TYPE":     typ,
 		"APP_VAR":     "FOO",
 		"ROUTE_VAR":   "BAR",
@@ -210,6 +212,7 @@ func TestCallConfigurationModel(t *testing.T) {
 		Timeout:     timeout,
 		IdleTimeout: idleTimeout,
 		Memory:      memory,
+		CPUs:        CPUs,
 		Payload:     payload,
 		URL:         url,
 		Method:      method,
@@ -243,6 +246,7 @@ func TestAsyncCallHeaders(t *testing.T) {
 	const timeout = 1
 	const idleTimeout = 20
 	const memory = 256
+	CPUs := models.MilliCPUs(200)
 	method := "GET"
 	url := "http://127.0.0.1:8080/r/" + appName + path
 	payload := "payload"
@@ -255,6 +259,7 @@ func TestAsyncCallHeaders(t *testing.T) {
 		"FN_APP_NAME": appName,
 		"FN_PATH":     path,
 		"FN_MEMORY":   strconv.Itoa(memory),
+		"FN_CPUS":     CPUs.String(),
 		"FN_TYPE":     typ,
 		"APP_VAR":     "FOO",
 		"ROUTE_VAR":   "BAR",
@@ -277,6 +282,7 @@ func TestAsyncCallHeaders(t *testing.T) {
 		Timeout:     timeout,
 		IdleTimeout: idleTimeout,
 		Memory:      memory,
+		CPUs:        CPUs,
 		Payload:     payload,
 		URL:         url,
 		Method:      method,
@@ -343,6 +349,7 @@ func TestSubmitError(t *testing.T) {
 	const timeout = 10
 	const idleTimeout = 20
 	const memory = 256
+	CPUs := models.MilliCPUs(200)
 	method := "GET"
 	url := "http://127.0.0.1:8080/r/" + appName + path
 	payload := "payload"
@@ -353,6 +360,7 @@ func TestSubmitError(t *testing.T) {
 		"FN_APP_NAME": appName,
 		"FN_PATH":     path,
 		"FN_MEMORY":   strconv.Itoa(memory),
+		"FN_CPUS":     CPUs.String(),
 		"FN_TYPE":     typ,
 		"APP_VAR":     "FOO",
 		"ROUTE_VAR":   "BAR",
@@ -369,6 +377,7 @@ func TestSubmitError(t *testing.T) {
 		Timeout:     timeout,
 		IdleTimeout: idleTimeout,
 		Memory:      memory,
+		CPUs:        CPUs,
 		Payload:     payload,
 		URL:         url,
 		Method:      method,
--- a/api/agent/call.go
+++ b/api/agent/call.go
@@ -103,6 +103,7 @@ func FromRequest(appName, path string, req *http.Request) CallOpt {
 			Timeout:     route.Timeout,
 			IdleTimeout: route.IdleTimeout,
 			Memory:      route.Memory,
+			CPUs:        route.CPUs,
 			Config:      buildConfig(app, route),
 			Headers:     req.Header,
 			CreatedAt:   strfmt.DateTime(time.Now()),
@@ -130,6 +131,11 @@ func buildConfig(app *models.App, route *models.Route) models.Config {
 	// TODO: might be a good idea to pass in: "FN_BASE_PATH" = fmt.Sprintf("/r/%s", appName) || "/" if using DNS entries per app
 	conf["FN_MEMORY"] = fmt.Sprintf("%d", route.Memory)
 	conf["FN_TYPE"] = route.Type
+
+	CPUs := route.CPUs.String()
+	if CPUs != "" {
+		conf["FN_CPUS"] = CPUs
+	}
 	return conf
 }

--- a/api/agent/drivers/docker/docker.go
+++ b/api/agent/drivers/docker/docker.go
@@ -115,6 +115,15 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
 		Context:    ctx,
 	}

+	// Translate milli cpus into CPUQuota & CPUPeriod (see Linux cGroups CFS cgroup v1 documentation)
+	// eg: task.CPUQuota() of 8000 means CPUQuota of 8 * 100000 usecs in 100000 usec period,
+	// which is approx 8 CPUS in CFS world.
+	// Also see docker run options --cpu-quota and --cpu-period
+	if task.CPUs() != 0 {
+		container.HostConfig.CPUQuota = int64(task.CPUs() * 100)
+		container.HostConfig.CPUPeriod = 100000
+	}
+
 	volumes := task.Volumes()
 	for _, mapping := range volumes {
 		hostDir := mapping[0]
@@ -140,7 +149,7 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
 		// since we retry under the hood, if the container gets created and retry fails, we can just ignore error
 		if err != docker.ErrContainerAlreadyExists {
 			log.WithFields(logrus.Fields{"call_id": task.Id(), "command": container.Config.Cmd, "memory": container.Config.Memory,
-				"cpu_shares": container.Config.CPUShares, "hostname": container.Config.Hostname, "name": container.Name,
+				"cpu_shares": container.Config.CPUShares, "cpu_quota": task.CPUs(), "hostname": container.Config.Hostname, "name": container.Name,
 				"image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": container.Name,
 			}).WithError(err).Error("Could not create container")

--- a/api/agent/drivers/docker/docker_test.go
+++ b/api/agent/drivers/docker/docker_test.go
@@ -31,6 +31,7 @@ func (f *taskDockerTest) Logger() (stdout, stderr io.Writer)      { return f.out
 func (f *taskDockerTest) WriteStat(context.Context, drivers.Stat) { /* TODO */ }
 func (f *taskDockerTest) Volumes() [][2]string                    { return [][2]string{} }
 func (f *taskDockerTest) Memory() uint64                          { return 256 * 1024 * 1024 }
+func (f *taskDockerTest) CPUs() uint64                            { return 0 }
 func (f *taskDockerTest) WorkDir() string                         { return "" }
 func (f *taskDockerTest) Close()                                  {}
 func (f *taskDockerTest) Input() io.Reader                        { return f.input }
--- a/api/agent/drivers/driver.go
+++ b/api/agent/drivers/driver.go
@@ -104,6 +104,9 @@ type ContainerTask interface {
 	// 0 is unlimited.
 	Memory() uint64

+	// CPUs in milli CPU units
+	CPUs() uint64
+
 	// WorkDir returns the working directory to use for the task. Empty string
 	// leaves it unset.
 	WorkDir() string
--- a/api/agent/resource.go
+++ b/api/agent/resource.go
@@ -26,7 +26,7 @@ const (
 type ResourceTracker interface {
 	WaitAsyncResource() chan struct{}
 	// returns a closed channel if the resource can never me met.
-	GetResourceToken(ctx context.Context, memory uint64, isAsync bool) <-chan ResourceToken
+	GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken
 }

 type resourceTracker struct {
@@ -42,6 +42,17 @@ type resourceTracker struct {
 	ramAsyncUsed uint64
 	// memory in use for async area in which agent stops dequeuing async jobs
 	ramAsyncHWMark uint64
+
+	// cpuTotal is the total usable cpu for sync functions
+	cpuSyncTotal uint64
+	// cpuSyncUsed is cpu reserved for running sync containers including hot/idle
+	cpuSyncUsed uint64
+	// cpuAsyncTotal is the total usable cpu for async + sync functions
+	cpuAsyncTotal uint64
+	// cpuAsyncUsed is cpu reserved for running async + sync containers including hot/idle
+	cpuAsyncUsed uint64
+	// cpu in use for async area in which agent stops dequeuing async jobs
+	cpuAsyncHWMark uint64
 }

 func NewResourceTracker() ResourceTracker {
@@ -51,6 +62,7 @@ func NewResourceTracker() ResourceTracker {
 	}

 	obj.initializeMemory()
+	obj.initializeCPU()
 	return obj
 }

@@ -60,46 +72,52 @@ type ResourceToken interface {
 }

 type resourceToken struct {
+	once      sync.Once
 	decrement func()
 }

 func (t *resourceToken) Close() error {
-	t.decrement()
+	t.once.Do(func() {
+		t.decrement()
+	})
 	return nil
 }

-func (a *resourceTracker) isResourceAvailableLocked(memory uint64, isAsync bool) bool {
+func (a *resourceTracker) isResourceAvailableLocked(memory uint64, cpuQuota uint64, isAsync bool) bool {

-	asyncAvail := a.ramAsyncTotal - a.ramAsyncUsed
-	syncAvail := a.ramSyncTotal - a.ramSyncUsed
+	asyncAvailMem := a.ramAsyncTotal - a.ramAsyncUsed
+	syncAvailMem := a.ramSyncTotal - a.ramSyncUsed
+
+	asyncAvailCPU := a.cpuAsyncTotal - a.cpuAsyncUsed
+	syncAvailCPU := a.cpuSyncTotal - a.cpuSyncUsed

 	// For sync functions, we can steal from async pool. For async, we restrict it to sync pool
 	if isAsync {
-		return asyncAvail >= memory
+		return asyncAvailMem >= memory && asyncAvailCPU >= cpuQuota
 	} else {
-		return asyncAvail+syncAvail >= memory
+		return asyncAvailMem+syncAvailMem >= memory && asyncAvailCPU+syncAvailCPU >= cpuQuota
 	}
 }

 // is this request possible to meet? If no, fail quick
-func (a *resourceTracker) isResourcePossible(memory uint64, isAsync bool) bool {
+func (a *resourceTracker) isResourcePossible(memory uint64, cpuQuota uint64, isAsync bool) bool {
 	if isAsync {
-		return memory <= a.ramAsyncTotal
+		return memory <= a.ramAsyncTotal && cpuQuota <= a.cpuAsyncTotal
 	} else {
-		return memory <= a.ramSyncTotal+a.ramAsyncTotal
+		return memory <= a.ramSyncTotal+a.ramAsyncTotal && cpuQuota <= a.cpuSyncTotal+a.cpuAsyncTotal
 	}
 }

 // the received token should be passed directly to launch (unconditionally), launch
 // will close this token (i.e. the receiver should not call Close)
-func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, isAsync bool) <-chan ResourceToken {
+func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken {

 	memory = memory * Mem1MB

 	c := a.cond
 	ch := make(chan ResourceToken)

-	if !a.isResourcePossible(memory, isAsync) {
+	if !a.isResourcePossible(memory, cpuQuota, isAsync) {
 		close(ch)
 		return ch
 	}
@@ -107,39 +125,44 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, i
 	go func() {
 		c.L.Lock()

-		for !a.isResourceAvailableLocked(memory, isAsync) {
-			select {
-			case <-ctx.Done():
-				c.L.Unlock()
-				return
-			default:
-			}
-
+		for !a.isResourceAvailableLocked(memory, cpuQuota, isAsync) && ctx.Err() == nil {
 			c.Wait()
 		}

+		if ctx.Err() != nil {
+			c.L.Unlock()
+			return
+		}
+
 		var asyncMem, syncMem uint64
+		var asyncCPU, syncCPU uint64

 		if isAsync {
 			// async uses async pool only
 			asyncMem = memory
-		} else if a.ramSyncTotal-a.ramSyncUsed >= memory {
-			// if sync fits in sync pool
-			syncMem = memory
+			asyncCPU = cpuQuota
 		} else {
 			// if sync fits async + sync pool
-			syncMem = a.ramSyncTotal - a.ramSyncUsed
+			syncMem = minUint64(a.ramSyncTotal-a.ramSyncUsed, memory)
+			syncCPU = minUint64(a.cpuSyncTotal-a.cpuSyncUsed, cpuQuota)
+
 			asyncMem = memory - syncMem
+			asyncCPU = cpuQuota - syncCPU
 		}

 		a.ramAsyncUsed += asyncMem
 		a.ramSyncUsed += syncMem
+		a.cpuAsyncUsed += asyncCPU
+		a.cpuSyncUsed += syncCPU
 		c.L.Unlock()

 		t := &resourceToken{decrement: func() {
+
 			c.L.Lock()
 			a.ramAsyncUsed -= asyncMem
 			a.ramSyncUsed -= syncMem
+			a.cpuAsyncUsed -= asyncCPU
+			a.cpuSyncUsed -= syncCPU
 			c.L.Unlock()

 			// WARNING: yes, we wake up everyone even async waiters when only sync pool has space, but
@@ -159,7 +182,7 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, i
 	return ch
 }

-// WaitAsyncResource will send a signal on the returned channel when RAM in-use
+// WaitAsyncResource will send a signal on the returned channel when RAM and CPU in-use
 // in the async area is less than high water mark
 func (a *resourceTracker) WaitAsyncResource() chan struct{} {
 	ch := make(chan struct{})
@@ -167,7 +190,7 @@ func (a *resourceTracker) WaitAsyncResource() chan struct{} {
 	c := a.cond
 	go func() {
 		c.L.Lock()
-		for a.ramAsyncUsed >= a.ramAsyncHWMark {
+		for a.ramAsyncUsed >= a.ramAsyncHWMark || a.cpuAsyncUsed >= a.cpuAsyncHWMark {
 			c.Wait()
 		}
 		c.L.Unlock()
@@ -198,6 +221,71 @@ func clampUint64(val, min, max uint64) uint64 {
 	return val
 }

+func (a *resourceTracker) initializeCPU() {
+
+	var maxSyncCPU, maxAsyncCPU, cpuAsyncHWMark uint64
+	var totalCPU, availCPU uint64
+
+	if runtime.GOOS == "linux" {
+
+		// Why do we prefer /proc/cpuinfo for Linux and not just use runtime.NumCPU?
+		// This is because NumCPU is sched_getaffinity based and we prefer to check
+		// cgroup which will more likely be same cgroup for container runtime
+		numCPU, err := checkProcCPU()
+		if err != nil {
+			logrus.WithError(err).Error("Error checking for CPU, falling back to runtime CPU count.")
+			numCPU = uint64(runtime.NumCPU())
+		}
+
+		totalCPU = 1000 * numCPU
+		availCPU = totalCPU
+
+		// Clamp further if cgroups CFS quota/period limits are in place
+		cgroupCPU := checkCgroupCPU()
+		if cgroupCPU > 0 {
+			availCPU = minUint64(availCPU, cgroupCPU)
+		}
+
+		// TODO: check cgroup cpuset to clamp this further. We might be restricted into
+		// a subset of CPUs. (eg. /sys/fs/cgroup/cpuset/cpuset.effective_cpus)
+
+		// TODO: skip CPU headroom for ourselves for now
+	} else {
+		totalCPU = uint64(runtime.NumCPU() * 1000)
+		availCPU = totalCPU
+	}
+
+	logrus.WithFields(logrus.Fields{
+		"totalCPU": totalCPU,
+		"availCPU": availCPU,
+	}).Info("available cpu")
+
+	// %20 of cpu for sync only reserve
+	maxSyncCPU = uint64(availCPU * 2 / 10)
+	maxAsyncCPU = availCPU - maxSyncCPU
+	cpuAsyncHWMark = maxAsyncCPU * 8 / 10
+
+	logrus.WithFields(logrus.Fields{
+		"cpuSync":        maxSyncCPU,
+		"cpuAsync":       maxAsyncCPU,
+		"cpuAsyncHWMark": cpuAsyncHWMark,
+	}).Info("sync and async cpu reservations")
+
+	if maxSyncCPU == 0 || maxAsyncCPU == 0 {
+		logrus.Fatal("Cannot get the proper CPU information to size server")
+	}
+
+	if maxSyncCPU+maxAsyncCPU < 1000 {
+		logrus.Warn("Severaly Limited CPU: cpuSync + cpuAsync < 1000m (1 CPU)")
+	} else if maxAsyncCPU < 1000 {
+		logrus.Warn("Severaly Limited CPU: cpuAsync < 1000m (1 CPU)")
+	}
+
+	a.cpuAsyncHWMark = cpuAsyncHWMark
+	a.cpuSyncTotal = maxSyncCPU
+	a.cpuAsyncTotal = maxAsyncCPU
+}
+
 func (a *resourceTracker) initializeMemory() {

 	var maxSyncMemory, maxAsyncMemory, ramAsyncHWMark uint64
@@ -205,7 +293,7 @@ func (a *resourceTracker) initializeMemory() {
 	if runtime.GOOS == "linux" {

 		// system wide available memory
-		totalMemory, err := checkProc()
+		totalMemory, err := checkProcMem()
 		if err != nil {
 			logrus.WithError(err).Fatal("Cannot get the proper memory information to size server.")
 		}
@@ -213,7 +301,7 @@ func (a *resourceTracker) initializeMemory() {
 		availMemory := totalMemory

 		// cgroup limit restriction on memory usage
-		cGroupLimit, err := checkCgroup()
+		cGroupLimit, err := checkCgroupMem()
 		if err != nil {
 			logrus.WithError(err).Error("Error checking for cgroup memory limits, falling back to host memory available..")
 		} else {
@@ -251,7 +339,7 @@ func (a *resourceTracker) initializeMemory() {
 		"ramSync":        maxSyncMemory,
 		"ramAsync":       maxAsyncMemory,
 		"ramAsyncHWMark": ramAsyncHWMark,
-	}).Info("sync and async reservations")
+	}).Info("sync and async ram reservations")

 	if maxSyncMemory == 0 || maxAsyncMemory == 0 {
 		logrus.Fatal("Cannot get the proper memory pool information to size server")
@@ -286,24 +374,56 @@ func getMemoryHeadRoom(usableMemory uint64) (uint64, error) {
 	return headRoom, nil
 }

-func checkCgroup() (uint64, error) {
-	f, err := os.Open("/sys/fs/cgroup/memory/memory.limit_in_bytes")
+func readString(fileName string) (string, error) {
+	b, err := ioutil.ReadFile(fileName)
+	if err != nil {
+		return "", err
+	}
+	value := string(b)
+	return strings.TrimSpace(value), nil
+}
+
+func checkCgroupMem() (uint64, error) {
+	value, err := readString("/sys/fs/cgroup/memory/memory.limit_in_bytes")
 	if err != nil {
 		return 0, err
 	}
-	defer f.Close()
-	b, err := ioutil.ReadAll(f)
+	return strconv.ParseUint(value, 10, 64)
+}
+
+func checkCgroupCPU() uint64 {
+
+	periodStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
 	if err != nil {
-		return 0, err
+		return 0
 	}
-	limBytes := string(b)
-	limBytes = strings.TrimSpace(limBytes)
-	return strconv.ParseUint(limBytes, 10, 64)
+	quotaStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
+	if err != nil {
+		return 0
+	}
+
+	period, err := strconv.ParseUint(periodStr, 10, 64)
+	if err != nil {
+		logrus.Warn("Cannot parse CFS period", err)
+		return 0
+	}
+
+	quota, err := strconv.ParseInt(quotaStr, 10, 64)
+	if err != nil {
+		logrus.Warn("Cannot parse CFS quota", err)
+		return 0
+	}
+
+	if quota <= 0 || period <= 0 {
+		return 0
+	}
+
+	return uint64(quota) * 1000 / period
 }

 var errCantReadMemInfo = errors.New("Didn't find MemAvailable in /proc/meminfo, kernel is probably < 3.14")

-func checkProc() (uint64, error) {
+func checkProcMem() (uint64, error) {
 	f, err := os.Open("/proc/meminfo")
 	if err != nil {
 		return 0, err
@@ -341,3 +461,30 @@ func checkProc() (uint64, error) {

 	return 0, errCantReadMemInfo
 }
+
+func checkProcCPU() (uint64, error) {
+	f, err := os.Open("/proc/cpuinfo")
+	if err != nil {
+		return 0, err
+	}
+	defer f.Close()
+
+	total := uint64(0)
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		b := scanner.Text()
+
+		// processor       : 0
+		toks := strings.Fields(b)
+		if len(toks) == 3 && toks[0] == "processor" && toks[1] == ":" {
+			total += 1
+		}
+	}
+
+	if total == 0 {
+		return 0, errors.New("Could not parse cpuinfo")
+	}
+
+	return total, nil
+}
--- a/api/agent/resource_test.go
+++ b/api/agent/resource_test.go
@@ -1,6 +1,8 @@
 package agent

 import (
+	"context"
+	"errors"
 	"testing"
 	"time"
 )
@@ -8,11 +10,17 @@ import (
 func setTrackerTestVals(tr *resourceTracker, vals *trackerVals) {
 	tr.cond.L.Lock()

-	tr.ramSyncTotal = vals.st
-	tr.ramSyncUsed = vals.su
-	tr.ramAsyncTotal = vals.at
-	tr.ramAsyncUsed = vals.au
-	tr.ramAsyncHWMark = vals.am
+	tr.ramSyncTotal = vals.mst
+	tr.ramSyncUsed = vals.msu
+	tr.ramAsyncTotal = vals.mat
+	tr.ramAsyncUsed = vals.mau
+	tr.ramAsyncHWMark = vals.mam
+
+	tr.cpuSyncTotal = vals.cst
+	tr.cpuSyncUsed = vals.csu
+	tr.cpuAsyncTotal = vals.cat
+	tr.cpuAsyncUsed = vals.cau
+	tr.cpuAsyncHWMark = vals.cam

 	tr.cond.L.Unlock()
 	tr.cond.Broadcast()
@@ -22,24 +30,72 @@ func getTrackerTestVals(tr *resourceTracker, vals *trackerVals) {

 	tr.cond.L.Lock()

-	vals.st = tr.ramSyncTotal
-	vals.su = tr.ramSyncUsed
-	vals.at = tr.ramAsyncTotal
-	vals.au = tr.ramAsyncUsed
-	vals.am = tr.ramAsyncHWMark
+	vals.mst = tr.ramSyncTotal
+	vals.msu = tr.ramSyncUsed
+	vals.mat = tr.ramAsyncTotal
+	vals.mau = tr.ramAsyncUsed
+	vals.mam = tr.ramAsyncHWMark
+
+	vals.cst = tr.cpuSyncTotal
+	vals.csu = tr.cpuSyncUsed
+	vals.cat = tr.cpuAsyncTotal
+	vals.cau = tr.cpuAsyncUsed
+	vals.cam = tr.cpuAsyncHWMark

 	tr.cond.L.Unlock()
 }

+// helper to debug print (fields correspond to resourceTracker CPU/MEM fields)
 type trackerVals struct {
-	st uint64
-	su uint64
-	at uint64
-	au uint64
-	am uint64
+	mst uint64
+	msu uint64
+	mat uint64
+	mau uint64
+	mam uint64
+	cst uint64
+	csu uint64
+	cat uint64
+	cau uint64
+	cam uint64
 }

-func TestResourceAsyncMem(t *testing.T) {
+func (vals *trackerVals) setDefaults() {
+	// set set these to known vals (4GB total: 1GB sync, 3 async)
+	vals.mst = 1 * Mem1GB
+	vals.msu = 0
+	vals.mat = 3 * Mem1GB
+	vals.mau = 0
+	vals.mam = 1 * Mem1GB
+
+	// let's assume 10 CPUs (2 CPU sync, 8 CPU async)
+	vals.cst = 2000
+	vals.csu = 0
+	vals.cat = 8000
+	vals.cau = 0
+	vals.cam = 6000
+}
+
+func fetchToken(ch <-chan ResourceToken) (ResourceToken, error) {
+	select {
+	case tok := <-ch:
+		return tok, nil
+	case <-time.After(time.Duration(500) * time.Millisecond):
+		return nil, errors.New("expected token")
+	}
+}
+
+func isClosed(ch <-chan ResourceToken) bool {
+	select {
+	case _, ok := <-ch:
+		if !ok {
+			return true
+		}
+	default:
+	}
+	return false
+}
+
+func TestResourceAsyncWait(t *testing.T) {

 	var vals trackerVals

@@ -48,36 +104,242 @@ func TestResourceAsyncMem(t *testing.T) {
 	tr := trI.(*resourceTracker)

 	getTrackerTestVals(tr, &vals)
-	if vals.st <= 0 || vals.su != 0 || vals.at <= 0 || vals.au != 0 || vals.am <= 0 {
-		t.Fatalf("faulty init %#v", vals)
+	if vals.mst <= 0 || vals.msu != 0 || vals.mat <= 0 || vals.mau != 0 || vals.mam <= 0 {
+		t.Fatalf("faulty init MEM %#v", vals)
+	}
+	if vals.cst <= 0 || vals.csu != 0 || vals.cat <= 0 || vals.cau != 0 || vals.cam <= 0 {
+		t.Fatalf("faulty init CPU %#v", vals)
 	}

-	// set set these to known vals
-	vals.st = 1 * 1024 * 1024
-	vals.su = 0
-	vals.at = 2 * 1024 * 1024
-	vals.au = 0
-	vals.am = 1 * 1024 * 1024
+	vals.setDefaults()

 	// should block & wait
-	vals.au = vals.am
+	vals.mau = vals.mam
 	setTrackerTestVals(tr, &vals)
 	ch := tr.WaitAsyncResource()

 	select {
 	case <-ch:
-		t.Fatal("high water mark over, should not trigger")
+		t.Fatal("high water mark MEM over, should not trigger")
 	case <-time.After(time.Duration(500) * time.Millisecond):
 	}

 	// should not block & wait
-	vals.au = 0
+	vals.mau = 0
 	setTrackerTestVals(tr, &vals)

 	select {
 	case <-ch:
 	case <-time.After(time.Duration(500) * time.Millisecond):
-		t.Fatal("high water mark not over, should trigger")
+		t.Fatal("high water mark MEM not over, should trigger")
+	}
+
+	// get a new channel to prevent previous test interference
+	ch = tr.WaitAsyncResource()
+
+	// should block & wait
+	vals.cau = vals.cam
+	setTrackerTestVals(tr, &vals)
+
+	select {
+	case <-ch:
+		t.Fatal("high water mark CPU over, should not trigger")
+	case <-time.After(time.Duration(500) * time.Millisecond):
+	}
+
+	// should not block & wait
+	vals.cau = 0
+	setTrackerTestVals(tr, &vals)
+
+	select {
+	case <-ch:
+	case <-time.After(time.Duration(500) * time.Millisecond):
+		t.Fatal("high water mark CPU not over, should trigger")
+	}
+
+}
+
+func TestResourceGetSimple(t *testing.T) {
+
+	var vals trackerVals
+	trI := NewResourceTracker()
+	tr := trI.(*resourceTracker)
+
+	vals.setDefaults()
+
+	// let's make it like CPU and MEM are 100% full
+	vals.mau = vals.mat
+	vals.cau = vals.cat
+
+	setTrackerTestVals(tr, &vals)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// ask for 4GB and 10 CPU
+	ch := trI.GetResourceToken(ctx, 4*1024, 1000, false)
+
+	_, err := fetchToken(ch)
+	if err == nil {
+		t.Fatalf("full system should not hand out token")
+	}
+
+	// reset back
+	vals.setDefaults()
+	setTrackerTestVals(tr, &vals)
+
+	tok, err := fetchToken(ch)
+	if err != nil {
+		t.Fatalf("empty system should hand out token")
+	}
+
+	// ask for another 4GB and 10 CPU
+	ch = trI.GetResourceToken(ctx, 4*1024, 1000, false)
+
+	_, err = fetchToken(ch)
+	if err == nil {
+		t.Fatalf("full system should not hand out token")
+	}
+
+	// close means, giant token resources released
+	tok.Close()
+
+	tok, err = fetchToken(ch)
+	if err != nil {
+		t.Fatalf("empty system should hand out token")
+	}
+
+	tok.Close()
+
+	// POOLS should all be empty now
+	getTrackerTestVals(tr, &vals)
+	if vals.msu != 0 || vals.mau != 0 {
+		t.Fatalf("faulty state MEM %#v", vals)
+	}
+	if vals.csu != 0 || vals.cau != 0 {
+		t.Fatalf("faulty state CPU %#v", vals)
+	}
+}
+
+func TestResourceGetCombo(t *testing.T) {
+
+	var vals trackerVals
+	trI := NewResourceTracker()
+	tr := trI.(*resourceTracker)
+
+	vals.setDefaults()
+	setTrackerTestVals(tr, &vals)
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	// impossible request
+	ch := trI.GetResourceToken(ctx, 20*1024, 20000, false)
+	if !isClosed(ch) {
+		t.Fatalf("impossible request should return closed channel")
+	}
+
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// let's use up 2 GB of 3GB async pool
+	ch = trI.GetResourceToken(ctx, 2*1024, 10, true)
+	tok1, err := fetchToken(ch)
+	if err != nil {
+		t.Fatalf("empty async system should hand out token1")
+	}
+
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// remaining 1 GB async
+	ch = trI.GetResourceToken(ctx, 1*1024, 11, true)
+	tok2, err := fetchToken(ch)
+	if err != nil {
+		t.Fatalf("empty async system should hand out token2")
+	}
+
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// NOW ASYNC POOL IS FULL
+	// SYNC POOL HAS 1GB
+
+	// we no longer can get async token
+	ch = trI.GetResourceToken(ctx, 1*1024, 12, true)
+	_, err = fetchToken(ch)
+	if err == nil {
+		t.Fatalf("full async system should not hand out a token")
+	}
+
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// but we should get 1GB sync token
+	ch = trI.GetResourceToken(ctx, 1*1024, 13, false)
+	tok3, err := fetchToken(ch)
+	if err != nil {
+		t.Fatalf("empty sync system should hand out token3")
+	}
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// NOW ASYNC AND SYNC POOLS ARE FULL
+
+	// this should fail
+	ch = trI.GetResourceToken(ctx, 1*1024, 14, false)
+	_, err = fetchToken(ch)
+	if err == nil {
+		t.Fatalf("full system should not hand out a token")
+	}
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// now let's free up some async pool, release tok2 (1GB)
+	tok2.Close()
+
+	// NOW ASYNC POOL HAS 1GB FREE
+	// SYNC POOL IS FULL
+
+	// async pool should provide this
+	ch = trI.GetResourceToken(ctx, 1*1024, 15, false)
+	tok4, err := fetchToken(ch)
+	if err != nil {
+		t.Fatalf("async system should hand out token4")
+	}
+	cancel()
+	ctx, cancel = context.WithCancel(context.Background())
+
+	// NOW ASYNC AND SYNC POOLS ARE FULL
+
+	tok4.Close()
+	tok3.Close()
+
+	// NOW ASYNC POOL HAS 1GB FREE
+	// SYNC POOL HAS 1GB FREE
+
+	// now, we ask for 2GB sync token, it should be provided from both async+sync pools
+	ch = trI.GetResourceToken(ctx, 2*1024, 16, false)
+	tok5, err := fetchToken(ch)
+	if err != nil {
+		t.Fatalf("async+sync system should hand out token5")
+	}
+	cancel()
+
+	// NOW ASYNC AND SYNC POOLS ARE FULL
+
+	tok1.Close()
+	tok5.Close()
+
+	// attempt to close tok2 twice.. This should be OK.
+	tok2.Close()
+
+	// POOLS should all be empty now
+	getTrackerTestVals(tr, &vals)
+	if vals.msu != 0 || vals.mau != 0 {
+		t.Fatalf("faulty state MEM %#v", vals)
+	}
+	if vals.csu != 0 || vals.cau != 0 {
+		t.Fatalf("faulty state CPU %#v", vals)
 	}

 }
--- a/api/agent/slots.go
+++ b/api/agent/slots.go
@@ -334,6 +334,7 @@ func getSlotQueueKey(call *call) string {
 	fmt.Fprint(hash, call.Timeout, "\x00")
 	fmt.Fprint(hash, call.IdleTimeout, "\x00")
 	fmt.Fprint(hash, call.Memory, "\x00")
+	fmt.Fprint(hash, call.CPUs, "\x00")
 	fmt.Fprint(hash, call.Format, "\x00")

 	// we have to sort these before printing, yay. TODO do better