mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
@@ -338,7 +338,7 @@ func (a *agent) hotLauncher(ctx context.Context, callObj *call) {
|
||||
}).Info("Hot function launcher starting hot container")
|
||||
|
||||
select {
|
||||
case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, isAsync):
|
||||
case tok, isOpen := <-a.resources.GetResourceToken(resourceCtx, callObj.Memory, uint64(callObj.CPUs), isAsync):
|
||||
cancel()
|
||||
if isOpen {
|
||||
a.wg.Add(1)
|
||||
@@ -404,7 +404,7 @@ func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
|
||||
ch := make(chan Slot)
|
||||
|
||||
select {
|
||||
case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, isAsync):
|
||||
case tok, isOpen := <-a.resources.GetResourceToken(ctx, call.Memory, uint64(call.CPUs), isAsync):
|
||||
if !isOpen {
|
||||
return nil, models.ErrCallTimeoutServerBusy
|
||||
}
|
||||
@@ -544,6 +544,7 @@ func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch
|
||||
image: call.Image,
|
||||
env: map[string]string(call.Config),
|
||||
memory: call.Memory,
|
||||
cpus: uint64(call.CPUs),
|
||||
timeout: time.Duration(call.Timeout) * time.Second, // this is unnecessary, but in case removal fails...
|
||||
stdin: call.req.Body,
|
||||
stdout: call.w,
|
||||
@@ -604,12 +605,13 @@ func (a *agent) runHot(ctxArg context.Context, call *call, tok ResourceToken) {
|
||||
image: call.Image,
|
||||
env: map[string]string(call.Config),
|
||||
memory: call.Memory,
|
||||
cpus: uint64(call.CPUs),
|
||||
stdin: stdinRead,
|
||||
stdout: stdoutWrite,
|
||||
stderr: &ghostWriter{inner: stderr},
|
||||
}
|
||||
|
||||
logger := logrus.WithFields(logrus.Fields{"id": container.id, "app": call.AppName, "route": call.Path, "image": call.Image, "memory": call.Memory, "format": call.Format, "idle_timeout": call.IdleTimeout})
|
||||
logger := logrus.WithFields(logrus.Fields{"id": container.id, "app": call.AppName, "route": call.Path, "image": call.Image, "memory": call.Memory, "cpus": call.CPUs, "format": call.Format, "idle_timeout": call.IdleTimeout})
|
||||
ctx = common.WithLogger(ctx, logger)
|
||||
|
||||
cookie, err := a.driver.Prepare(ctx, container)
|
||||
@@ -698,6 +700,7 @@ type container struct {
|
||||
image string
|
||||
env map[string]string
|
||||
memory uint64
|
||||
cpus uint64
|
||||
timeout time.Duration // cold only (superfluous, but in case)
|
||||
|
||||
stdin io.Reader
|
||||
@@ -735,6 +738,7 @@ func (c *container) Image() string { return c.image }
|
||||
func (c *container) Timeout() time.Duration { return c.timeout }
|
||||
func (c *container) EnvVars() map[string]string { return c.env }
|
||||
func (c *container) Memory() uint64 { return c.memory * 1024 * 1024 } // convert MB
|
||||
func (c *container) CPUs() uint64 { return c.cpus }
|
||||
|
||||
// WriteStat publishes each metric in the specified Stats structure as a histogram metric
|
||||
func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {
|
||||
|
||||
@@ -185,6 +185,7 @@ func TestCallConfigurationModel(t *testing.T) {
|
||||
const timeout = 1
|
||||
const idleTimeout = 20
|
||||
const memory = 256
|
||||
CPUs := models.MilliCPUs(1000)
|
||||
method := "GET"
|
||||
url := "http://127.0.0.1:8080/r/" + appName + path
|
||||
payload := "payload"
|
||||
@@ -195,6 +196,7 @@ func TestCallConfigurationModel(t *testing.T) {
|
||||
"FN_APP_NAME": appName,
|
||||
"FN_PATH": path,
|
||||
"FN_MEMORY": strconv.Itoa(memory),
|
||||
"FN_CPUS": CPUs.String(),
|
||||
"FN_TYPE": typ,
|
||||
"APP_VAR": "FOO",
|
||||
"ROUTE_VAR": "BAR",
|
||||
@@ -210,6 +212,7 @@ func TestCallConfigurationModel(t *testing.T) {
|
||||
Timeout: timeout,
|
||||
IdleTimeout: idleTimeout,
|
||||
Memory: memory,
|
||||
CPUs: CPUs,
|
||||
Payload: payload,
|
||||
URL: url,
|
||||
Method: method,
|
||||
@@ -243,6 +246,7 @@ func TestAsyncCallHeaders(t *testing.T) {
|
||||
const timeout = 1
|
||||
const idleTimeout = 20
|
||||
const memory = 256
|
||||
CPUs := models.MilliCPUs(200)
|
||||
method := "GET"
|
||||
url := "http://127.0.0.1:8080/r/" + appName + path
|
||||
payload := "payload"
|
||||
@@ -255,6 +259,7 @@ func TestAsyncCallHeaders(t *testing.T) {
|
||||
"FN_APP_NAME": appName,
|
||||
"FN_PATH": path,
|
||||
"FN_MEMORY": strconv.Itoa(memory),
|
||||
"FN_CPUS": CPUs.String(),
|
||||
"FN_TYPE": typ,
|
||||
"APP_VAR": "FOO",
|
||||
"ROUTE_VAR": "BAR",
|
||||
@@ -277,6 +282,7 @@ func TestAsyncCallHeaders(t *testing.T) {
|
||||
Timeout: timeout,
|
||||
IdleTimeout: idleTimeout,
|
||||
Memory: memory,
|
||||
CPUs: CPUs,
|
||||
Payload: payload,
|
||||
URL: url,
|
||||
Method: method,
|
||||
@@ -343,6 +349,7 @@ func TestSubmitError(t *testing.T) {
|
||||
const timeout = 10
|
||||
const idleTimeout = 20
|
||||
const memory = 256
|
||||
CPUs := models.MilliCPUs(200)
|
||||
method := "GET"
|
||||
url := "http://127.0.0.1:8080/r/" + appName + path
|
||||
payload := "payload"
|
||||
@@ -353,6 +360,7 @@ func TestSubmitError(t *testing.T) {
|
||||
"FN_APP_NAME": appName,
|
||||
"FN_PATH": path,
|
||||
"FN_MEMORY": strconv.Itoa(memory),
|
||||
"FN_CPUS": CPUs.String(),
|
||||
"FN_TYPE": typ,
|
||||
"APP_VAR": "FOO",
|
||||
"ROUTE_VAR": "BAR",
|
||||
@@ -369,6 +377,7 @@ func TestSubmitError(t *testing.T) {
|
||||
Timeout: timeout,
|
||||
IdleTimeout: idleTimeout,
|
||||
Memory: memory,
|
||||
CPUs: CPUs,
|
||||
Payload: payload,
|
||||
URL: url,
|
||||
Method: method,
|
||||
|
||||
@@ -103,6 +103,7 @@ func FromRequest(appName, path string, req *http.Request) CallOpt {
|
||||
Timeout: route.Timeout,
|
||||
IdleTimeout: route.IdleTimeout,
|
||||
Memory: route.Memory,
|
||||
CPUs: route.CPUs,
|
||||
Config: buildConfig(app, route),
|
||||
Headers: req.Header,
|
||||
CreatedAt: strfmt.DateTime(time.Now()),
|
||||
@@ -130,6 +131,11 @@ func buildConfig(app *models.App, route *models.Route) models.Config {
|
||||
// TODO: might be a good idea to pass in: "FN_BASE_PATH" = fmt.Sprintf("/r/%s", appName) || "/" if using DNS entries per app
|
||||
conf["FN_MEMORY"] = fmt.Sprintf("%d", route.Memory)
|
||||
conf["FN_TYPE"] = route.Type
|
||||
|
||||
CPUs := route.CPUs.String()
|
||||
if CPUs != "" {
|
||||
conf["FN_CPUS"] = CPUs
|
||||
}
|
||||
return conf
|
||||
}
|
||||
|
||||
|
||||
@@ -115,6 +115,15 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
|
||||
Context: ctx,
|
||||
}
|
||||
|
||||
// Translate milli cpus into CPUQuota & CPUPeriod (see Linux cGroups CFS cgroup v1 documentation)
|
||||
// eg: task.CPUQuota() of 8000 means CPUQuota of 8 * 100000 usecs in 100000 usec period,
|
||||
// which is approx 8 CPUS in CFS world.
|
||||
// Also see docker run options --cpu-quota and --cpu-period
|
||||
if task.CPUs() != 0 {
|
||||
container.HostConfig.CPUQuota = int64(task.CPUs() * 100)
|
||||
container.HostConfig.CPUPeriod = 100000
|
||||
}
|
||||
|
||||
volumes := task.Volumes()
|
||||
for _, mapping := range volumes {
|
||||
hostDir := mapping[0]
|
||||
@@ -140,7 +149,7 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
|
||||
// since we retry under the hood, if the container gets created and retry fails, we can just ignore error
|
||||
if err != docker.ErrContainerAlreadyExists {
|
||||
log.WithFields(logrus.Fields{"call_id": task.Id(), "command": container.Config.Cmd, "memory": container.Config.Memory,
|
||||
"cpu_shares": container.Config.CPUShares, "hostname": container.Config.Hostname, "name": container.Name,
|
||||
"cpu_shares": container.Config.CPUShares, "cpu_quota": task.CPUs(), "hostname": container.Config.Hostname, "name": container.Name,
|
||||
"image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": container.Name,
|
||||
}).WithError(err).Error("Could not create container")
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ func (f *taskDockerTest) Logger() (stdout, stderr io.Writer) { return f.out
|
||||
func (f *taskDockerTest) WriteStat(context.Context, drivers.Stat) { /* TODO */ }
|
||||
func (f *taskDockerTest) Volumes() [][2]string { return [][2]string{} }
|
||||
func (f *taskDockerTest) Memory() uint64 { return 256 * 1024 * 1024 }
|
||||
func (f *taskDockerTest) CPUs() uint64 { return 0 }
|
||||
func (f *taskDockerTest) WorkDir() string { return "" }
|
||||
func (f *taskDockerTest) Close() {}
|
||||
func (f *taskDockerTest) Input() io.Reader { return f.input }
|
||||
|
||||
@@ -104,6 +104,9 @@ type ContainerTask interface {
|
||||
// 0 is unlimited.
|
||||
Memory() uint64
|
||||
|
||||
// CPUs in milli CPU units
|
||||
CPUs() uint64
|
||||
|
||||
// WorkDir returns the working directory to use for the task. Empty string
|
||||
// leaves it unset.
|
||||
WorkDir() string
|
||||
|
||||
@@ -26,7 +26,7 @@ const (
|
||||
type ResourceTracker interface {
|
||||
WaitAsyncResource() chan struct{}
|
||||
// returns a closed channel if the resource can never me met.
|
||||
GetResourceToken(ctx context.Context, memory uint64, isAsync bool) <-chan ResourceToken
|
||||
GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken
|
||||
}
|
||||
|
||||
type resourceTracker struct {
|
||||
@@ -42,6 +42,17 @@ type resourceTracker struct {
|
||||
ramAsyncUsed uint64
|
||||
// memory in use for async area in which agent stops dequeuing async jobs
|
||||
ramAsyncHWMark uint64
|
||||
|
||||
// cpuTotal is the total usable cpu for sync functions
|
||||
cpuSyncTotal uint64
|
||||
// cpuSyncUsed is cpu reserved for running sync containers including hot/idle
|
||||
cpuSyncUsed uint64
|
||||
// cpuAsyncTotal is the total usable cpu for async + sync functions
|
||||
cpuAsyncTotal uint64
|
||||
// cpuAsyncUsed is cpu reserved for running async + sync containers including hot/idle
|
||||
cpuAsyncUsed uint64
|
||||
// cpu in use for async area in which agent stops dequeuing async jobs
|
||||
cpuAsyncHWMark uint64
|
||||
}
|
||||
|
||||
func NewResourceTracker() ResourceTracker {
|
||||
@@ -51,6 +62,7 @@ func NewResourceTracker() ResourceTracker {
|
||||
}
|
||||
|
||||
obj.initializeMemory()
|
||||
obj.initializeCPU()
|
||||
return obj
|
||||
}
|
||||
|
||||
@@ -60,46 +72,52 @@ type ResourceToken interface {
|
||||
}
|
||||
|
||||
type resourceToken struct {
|
||||
once sync.Once
|
||||
decrement func()
|
||||
}
|
||||
|
||||
func (t *resourceToken) Close() error {
|
||||
t.decrement()
|
||||
t.once.Do(func() {
|
||||
t.decrement()
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *resourceTracker) isResourceAvailableLocked(memory uint64, isAsync bool) bool {
|
||||
func (a *resourceTracker) isResourceAvailableLocked(memory uint64, cpuQuota uint64, isAsync bool) bool {
|
||||
|
||||
asyncAvail := a.ramAsyncTotal - a.ramAsyncUsed
|
||||
syncAvail := a.ramSyncTotal - a.ramSyncUsed
|
||||
asyncAvailMem := a.ramAsyncTotal - a.ramAsyncUsed
|
||||
syncAvailMem := a.ramSyncTotal - a.ramSyncUsed
|
||||
|
||||
asyncAvailCPU := a.cpuAsyncTotal - a.cpuAsyncUsed
|
||||
syncAvailCPU := a.cpuSyncTotal - a.cpuSyncUsed
|
||||
|
||||
// For sync functions, we can steal from async pool. For async, we restrict it to sync pool
|
||||
if isAsync {
|
||||
return asyncAvail >= memory
|
||||
return asyncAvailMem >= memory && asyncAvailCPU >= cpuQuota
|
||||
} else {
|
||||
return asyncAvail+syncAvail >= memory
|
||||
return asyncAvailMem+syncAvailMem >= memory && asyncAvailCPU+syncAvailCPU >= cpuQuota
|
||||
}
|
||||
}
|
||||
|
||||
// is this request possible to meet? If no, fail quick
|
||||
func (a *resourceTracker) isResourcePossible(memory uint64, isAsync bool) bool {
|
||||
func (a *resourceTracker) isResourcePossible(memory uint64, cpuQuota uint64, isAsync bool) bool {
|
||||
if isAsync {
|
||||
return memory <= a.ramAsyncTotal
|
||||
return memory <= a.ramAsyncTotal && cpuQuota <= a.cpuAsyncTotal
|
||||
} else {
|
||||
return memory <= a.ramSyncTotal+a.ramAsyncTotal
|
||||
return memory <= a.ramSyncTotal+a.ramAsyncTotal && cpuQuota <= a.cpuSyncTotal+a.cpuAsyncTotal
|
||||
}
|
||||
}
|
||||
|
||||
// the received token should be passed directly to launch (unconditionally), launch
|
||||
// will close this token (i.e. the receiver should not call Close)
|
||||
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, isAsync bool) <-chan ResourceToken {
|
||||
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, cpuQuota uint64, isAsync bool) <-chan ResourceToken {
|
||||
|
||||
memory = memory * Mem1MB
|
||||
|
||||
c := a.cond
|
||||
ch := make(chan ResourceToken)
|
||||
|
||||
if !a.isResourcePossible(memory, isAsync) {
|
||||
if !a.isResourcePossible(memory, cpuQuota, isAsync) {
|
||||
close(ch)
|
||||
return ch
|
||||
}
|
||||
@@ -107,39 +125,44 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, i
|
||||
go func() {
|
||||
c.L.Lock()
|
||||
|
||||
for !a.isResourceAvailableLocked(memory, isAsync) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
c.L.Unlock()
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
for !a.isResourceAvailableLocked(memory, cpuQuota, isAsync) && ctx.Err() == nil {
|
||||
c.Wait()
|
||||
}
|
||||
|
||||
if ctx.Err() != nil {
|
||||
c.L.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
var asyncMem, syncMem uint64
|
||||
var asyncCPU, syncCPU uint64
|
||||
|
||||
if isAsync {
|
||||
// async uses async pool only
|
||||
asyncMem = memory
|
||||
} else if a.ramSyncTotal-a.ramSyncUsed >= memory {
|
||||
// if sync fits in sync pool
|
||||
syncMem = memory
|
||||
asyncCPU = cpuQuota
|
||||
} else {
|
||||
// if sync fits async + sync pool
|
||||
syncMem = a.ramSyncTotal - a.ramSyncUsed
|
||||
syncMem = minUint64(a.ramSyncTotal-a.ramSyncUsed, memory)
|
||||
syncCPU = minUint64(a.cpuSyncTotal-a.cpuSyncUsed, cpuQuota)
|
||||
|
||||
asyncMem = memory - syncMem
|
||||
asyncCPU = cpuQuota - syncCPU
|
||||
}
|
||||
|
||||
a.ramAsyncUsed += asyncMem
|
||||
a.ramSyncUsed += syncMem
|
||||
a.cpuAsyncUsed += asyncCPU
|
||||
a.cpuSyncUsed += syncCPU
|
||||
c.L.Unlock()
|
||||
|
||||
t := &resourceToken{decrement: func() {
|
||||
|
||||
c.L.Lock()
|
||||
a.ramAsyncUsed -= asyncMem
|
||||
a.ramSyncUsed -= syncMem
|
||||
a.cpuAsyncUsed -= asyncCPU
|
||||
a.cpuSyncUsed -= syncCPU
|
||||
c.L.Unlock()
|
||||
|
||||
// WARNING: yes, we wake up everyone even async waiters when only sync pool has space, but
|
||||
@@ -159,7 +182,7 @@ func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, i
|
||||
return ch
|
||||
}
|
||||
|
||||
// WaitAsyncResource will send a signal on the returned channel when RAM in-use
|
||||
// WaitAsyncResource will send a signal on the returned channel when RAM and CPU in-use
|
||||
// in the async area is less than high water mark
|
||||
func (a *resourceTracker) WaitAsyncResource() chan struct{} {
|
||||
ch := make(chan struct{})
|
||||
@@ -167,7 +190,7 @@ func (a *resourceTracker) WaitAsyncResource() chan struct{} {
|
||||
c := a.cond
|
||||
go func() {
|
||||
c.L.Lock()
|
||||
for a.ramAsyncUsed >= a.ramAsyncHWMark {
|
||||
for a.ramAsyncUsed >= a.ramAsyncHWMark || a.cpuAsyncUsed >= a.cpuAsyncHWMark {
|
||||
c.Wait()
|
||||
}
|
||||
c.L.Unlock()
|
||||
@@ -198,6 +221,71 @@ func clampUint64(val, min, max uint64) uint64 {
|
||||
return val
|
||||
}
|
||||
|
||||
func (a *resourceTracker) initializeCPU() {
|
||||
|
||||
var maxSyncCPU, maxAsyncCPU, cpuAsyncHWMark uint64
|
||||
var totalCPU, availCPU uint64
|
||||
|
||||
if runtime.GOOS == "linux" {
|
||||
|
||||
// Why do we prefer /proc/cpuinfo for Linux and not just use runtime.NumCPU?
|
||||
// This is because NumCPU is sched_getaffinity based and we prefer to check
|
||||
// cgroup which will more likely be same cgroup for container runtime
|
||||
numCPU, err := checkProcCPU()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("Error checking for CPU, falling back to runtime CPU count.")
|
||||
numCPU = uint64(runtime.NumCPU())
|
||||
}
|
||||
|
||||
totalCPU = 1000 * numCPU
|
||||
availCPU = totalCPU
|
||||
|
||||
// Clamp further if cgroups CFS quota/period limits are in place
|
||||
cgroupCPU := checkCgroupCPU()
|
||||
if cgroupCPU > 0 {
|
||||
availCPU = minUint64(availCPU, cgroupCPU)
|
||||
}
|
||||
|
||||
// TODO: check cgroup cpuset to clamp this further. We might be restricted into
|
||||
// a subset of CPUs. (eg. /sys/fs/cgroup/cpuset/cpuset.effective_cpus)
|
||||
|
||||
// TODO: skip CPU headroom for ourselves for now
|
||||
} else {
|
||||
totalCPU = uint64(runtime.NumCPU() * 1000)
|
||||
availCPU = totalCPU
|
||||
}
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"totalCPU": totalCPU,
|
||||
"availCPU": availCPU,
|
||||
}).Info("available cpu")
|
||||
|
||||
// %20 of cpu for sync only reserve
|
||||
maxSyncCPU = uint64(availCPU * 2 / 10)
|
||||
maxAsyncCPU = availCPU - maxSyncCPU
|
||||
cpuAsyncHWMark = maxAsyncCPU * 8 / 10
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"cpuSync": maxSyncCPU,
|
||||
"cpuAsync": maxAsyncCPU,
|
||||
"cpuAsyncHWMark": cpuAsyncHWMark,
|
||||
}).Info("sync and async cpu reservations")
|
||||
|
||||
if maxSyncCPU == 0 || maxAsyncCPU == 0 {
|
||||
logrus.Fatal("Cannot get the proper CPU information to size server")
|
||||
}
|
||||
|
||||
if maxSyncCPU+maxAsyncCPU < 1000 {
|
||||
logrus.Warn("Severaly Limited CPU: cpuSync + cpuAsync < 1000m (1 CPU)")
|
||||
} else if maxAsyncCPU < 1000 {
|
||||
logrus.Warn("Severaly Limited CPU: cpuAsync < 1000m (1 CPU)")
|
||||
}
|
||||
|
||||
a.cpuAsyncHWMark = cpuAsyncHWMark
|
||||
a.cpuSyncTotal = maxSyncCPU
|
||||
a.cpuAsyncTotal = maxAsyncCPU
|
||||
}
|
||||
|
||||
func (a *resourceTracker) initializeMemory() {
|
||||
|
||||
var maxSyncMemory, maxAsyncMemory, ramAsyncHWMark uint64
|
||||
@@ -205,7 +293,7 @@ func (a *resourceTracker) initializeMemory() {
|
||||
if runtime.GOOS == "linux" {
|
||||
|
||||
// system wide available memory
|
||||
totalMemory, err := checkProc()
|
||||
totalMemory, err := checkProcMem()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Fatal("Cannot get the proper memory information to size server.")
|
||||
}
|
||||
@@ -213,7 +301,7 @@ func (a *resourceTracker) initializeMemory() {
|
||||
availMemory := totalMemory
|
||||
|
||||
// cgroup limit restriction on memory usage
|
||||
cGroupLimit, err := checkCgroup()
|
||||
cGroupLimit, err := checkCgroupMem()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("Error checking for cgroup memory limits, falling back to host memory available..")
|
||||
} else {
|
||||
@@ -251,7 +339,7 @@ func (a *resourceTracker) initializeMemory() {
|
||||
"ramSync": maxSyncMemory,
|
||||
"ramAsync": maxAsyncMemory,
|
||||
"ramAsyncHWMark": ramAsyncHWMark,
|
||||
}).Info("sync and async reservations")
|
||||
}).Info("sync and async ram reservations")
|
||||
|
||||
if maxSyncMemory == 0 || maxAsyncMemory == 0 {
|
||||
logrus.Fatal("Cannot get the proper memory pool information to size server")
|
||||
@@ -286,24 +374,56 @@ func getMemoryHeadRoom(usableMemory uint64) (uint64, error) {
|
||||
return headRoom, nil
|
||||
}
|
||||
|
||||
func checkCgroup() (uint64, error) {
|
||||
f, err := os.Open("/sys/fs/cgroup/memory/memory.limit_in_bytes")
|
||||
func readString(fileName string) (string, error) {
|
||||
b, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
value := string(b)
|
||||
return strings.TrimSpace(value), nil
|
||||
}
|
||||
|
||||
func checkCgroupMem() (uint64, error) {
|
||||
value, err := readString("/sys/fs/cgroup/memory/memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
b, err := ioutil.ReadAll(f)
|
||||
return strconv.ParseUint(value, 10, 64)
|
||||
}
|
||||
|
||||
func checkCgroupCPU() uint64 {
|
||||
|
||||
periodStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return 0
|
||||
}
|
||||
limBytes := string(b)
|
||||
limBytes = strings.TrimSpace(limBytes)
|
||||
return strconv.ParseUint(limBytes, 10, 64)
|
||||
quotaStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
period, err := strconv.ParseUint(periodStr, 10, 64)
|
||||
if err != nil {
|
||||
logrus.Warn("Cannot parse CFS period", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
quota, err := strconv.ParseInt(quotaStr, 10, 64)
|
||||
if err != nil {
|
||||
logrus.Warn("Cannot parse CFS quota", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
if quota <= 0 || period <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
return uint64(quota) * 1000 / period
|
||||
}
|
||||
|
||||
var errCantReadMemInfo = errors.New("Didn't find MemAvailable in /proc/meminfo, kernel is probably < 3.14")
|
||||
|
||||
func checkProc() (uint64, error) {
|
||||
func checkProcMem() (uint64, error) {
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
@@ -341,3 +461,30 @@ func checkProc() (uint64, error) {
|
||||
|
||||
return 0, errCantReadMemInfo
|
||||
}
|
||||
|
||||
func checkProcCPU() (uint64, error) {
|
||||
f, err := os.Open("/proc/cpuinfo")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
total := uint64(0)
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
b := scanner.Text()
|
||||
|
||||
// processor : 0
|
||||
toks := strings.Fields(b)
|
||||
if len(toks) == 3 && toks[0] == "processor" && toks[1] == ":" {
|
||||
total += 1
|
||||
}
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
return 0, errors.New("Could not parse cpuinfo")
|
||||
}
|
||||
|
||||
return total, nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -8,11 +10,17 @@ import (
|
||||
func setTrackerTestVals(tr *resourceTracker, vals *trackerVals) {
|
||||
tr.cond.L.Lock()
|
||||
|
||||
tr.ramSyncTotal = vals.st
|
||||
tr.ramSyncUsed = vals.su
|
||||
tr.ramAsyncTotal = vals.at
|
||||
tr.ramAsyncUsed = vals.au
|
||||
tr.ramAsyncHWMark = vals.am
|
||||
tr.ramSyncTotal = vals.mst
|
||||
tr.ramSyncUsed = vals.msu
|
||||
tr.ramAsyncTotal = vals.mat
|
||||
tr.ramAsyncUsed = vals.mau
|
||||
tr.ramAsyncHWMark = vals.mam
|
||||
|
||||
tr.cpuSyncTotal = vals.cst
|
||||
tr.cpuSyncUsed = vals.csu
|
||||
tr.cpuAsyncTotal = vals.cat
|
||||
tr.cpuAsyncUsed = vals.cau
|
||||
tr.cpuAsyncHWMark = vals.cam
|
||||
|
||||
tr.cond.L.Unlock()
|
||||
tr.cond.Broadcast()
|
||||
@@ -22,24 +30,72 @@ func getTrackerTestVals(tr *resourceTracker, vals *trackerVals) {
|
||||
|
||||
tr.cond.L.Lock()
|
||||
|
||||
vals.st = tr.ramSyncTotal
|
||||
vals.su = tr.ramSyncUsed
|
||||
vals.at = tr.ramAsyncTotal
|
||||
vals.au = tr.ramAsyncUsed
|
||||
vals.am = tr.ramAsyncHWMark
|
||||
vals.mst = tr.ramSyncTotal
|
||||
vals.msu = tr.ramSyncUsed
|
||||
vals.mat = tr.ramAsyncTotal
|
||||
vals.mau = tr.ramAsyncUsed
|
||||
vals.mam = tr.ramAsyncHWMark
|
||||
|
||||
vals.cst = tr.cpuSyncTotal
|
||||
vals.csu = tr.cpuSyncUsed
|
||||
vals.cat = tr.cpuAsyncTotal
|
||||
vals.cau = tr.cpuAsyncUsed
|
||||
vals.cam = tr.cpuAsyncHWMark
|
||||
|
||||
tr.cond.L.Unlock()
|
||||
}
|
||||
|
||||
// helper to debug print (fields correspond to resourceTracker CPU/MEM fields)
|
||||
type trackerVals struct {
|
||||
st uint64
|
||||
su uint64
|
||||
at uint64
|
||||
au uint64
|
||||
am uint64
|
||||
mst uint64
|
||||
msu uint64
|
||||
mat uint64
|
||||
mau uint64
|
||||
mam uint64
|
||||
cst uint64
|
||||
csu uint64
|
||||
cat uint64
|
||||
cau uint64
|
||||
cam uint64
|
||||
}
|
||||
|
||||
func TestResourceAsyncMem(t *testing.T) {
|
||||
func (vals *trackerVals) setDefaults() {
|
||||
// set set these to known vals (4GB total: 1GB sync, 3 async)
|
||||
vals.mst = 1 * Mem1GB
|
||||
vals.msu = 0
|
||||
vals.mat = 3 * Mem1GB
|
||||
vals.mau = 0
|
||||
vals.mam = 1 * Mem1GB
|
||||
|
||||
// let's assume 10 CPUs (2 CPU sync, 8 CPU async)
|
||||
vals.cst = 2000
|
||||
vals.csu = 0
|
||||
vals.cat = 8000
|
||||
vals.cau = 0
|
||||
vals.cam = 6000
|
||||
}
|
||||
|
||||
func fetchToken(ch <-chan ResourceToken) (ResourceToken, error) {
|
||||
select {
|
||||
case tok := <-ch:
|
||||
return tok, nil
|
||||
case <-time.After(time.Duration(500) * time.Millisecond):
|
||||
return nil, errors.New("expected token")
|
||||
}
|
||||
}
|
||||
|
||||
func isClosed(ch <-chan ResourceToken) bool {
|
||||
select {
|
||||
case _, ok := <-ch:
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
default:
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func TestResourceAsyncWait(t *testing.T) {
|
||||
|
||||
var vals trackerVals
|
||||
|
||||
@@ -48,36 +104,242 @@ func TestResourceAsyncMem(t *testing.T) {
|
||||
tr := trI.(*resourceTracker)
|
||||
|
||||
getTrackerTestVals(tr, &vals)
|
||||
if vals.st <= 0 || vals.su != 0 || vals.at <= 0 || vals.au != 0 || vals.am <= 0 {
|
||||
t.Fatalf("faulty init %#v", vals)
|
||||
if vals.mst <= 0 || vals.msu != 0 || vals.mat <= 0 || vals.mau != 0 || vals.mam <= 0 {
|
||||
t.Fatalf("faulty init MEM %#v", vals)
|
||||
}
|
||||
if vals.cst <= 0 || vals.csu != 0 || vals.cat <= 0 || vals.cau != 0 || vals.cam <= 0 {
|
||||
t.Fatalf("faulty init CPU %#v", vals)
|
||||
}
|
||||
|
||||
// set set these to known vals
|
||||
vals.st = 1 * 1024 * 1024
|
||||
vals.su = 0
|
||||
vals.at = 2 * 1024 * 1024
|
||||
vals.au = 0
|
||||
vals.am = 1 * 1024 * 1024
|
||||
vals.setDefaults()
|
||||
|
||||
// should block & wait
|
||||
vals.au = vals.am
|
||||
vals.mau = vals.mam
|
||||
setTrackerTestVals(tr, &vals)
|
||||
ch := tr.WaitAsyncResource()
|
||||
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("high water mark over, should not trigger")
|
||||
t.Fatal("high water mark MEM over, should not trigger")
|
||||
case <-time.After(time.Duration(500) * time.Millisecond):
|
||||
}
|
||||
|
||||
// should not block & wait
|
||||
vals.au = 0
|
||||
vals.mau = 0
|
||||
setTrackerTestVals(tr, &vals)
|
||||
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(time.Duration(500) * time.Millisecond):
|
||||
t.Fatal("high water mark not over, should trigger")
|
||||
t.Fatal("high water mark MEM not over, should trigger")
|
||||
}
|
||||
|
||||
// get a new channel to prevent previous test interference
|
||||
ch = tr.WaitAsyncResource()
|
||||
|
||||
// should block & wait
|
||||
vals.cau = vals.cam
|
||||
setTrackerTestVals(tr, &vals)
|
||||
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("high water mark CPU over, should not trigger")
|
||||
case <-time.After(time.Duration(500) * time.Millisecond):
|
||||
}
|
||||
|
||||
// should not block & wait
|
||||
vals.cau = 0
|
||||
setTrackerTestVals(tr, &vals)
|
||||
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(time.Duration(500) * time.Millisecond):
|
||||
t.Fatal("high water mark CPU not over, should trigger")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestResourceGetSimple(t *testing.T) {
|
||||
|
||||
var vals trackerVals
|
||||
trI := NewResourceTracker()
|
||||
tr := trI.(*resourceTracker)
|
||||
|
||||
vals.setDefaults()
|
||||
|
||||
// let's make it like CPU and MEM are 100% full
|
||||
vals.mau = vals.mat
|
||||
vals.cau = vals.cat
|
||||
|
||||
setTrackerTestVals(tr, &vals)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// ask for 4GB and 10 CPU
|
||||
ch := trI.GetResourceToken(ctx, 4*1024, 1000, false)
|
||||
|
||||
_, err := fetchToken(ch)
|
||||
if err == nil {
|
||||
t.Fatalf("full system should not hand out token")
|
||||
}
|
||||
|
||||
// reset back
|
||||
vals.setDefaults()
|
||||
setTrackerTestVals(tr, &vals)
|
||||
|
||||
tok, err := fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("empty system should hand out token")
|
||||
}
|
||||
|
||||
// ask for another 4GB and 10 CPU
|
||||
ch = trI.GetResourceToken(ctx, 4*1024, 1000, false)
|
||||
|
||||
_, err = fetchToken(ch)
|
||||
if err == nil {
|
||||
t.Fatalf("full system should not hand out token")
|
||||
}
|
||||
|
||||
// close means, giant token resources released
|
||||
tok.Close()
|
||||
|
||||
tok, err = fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("empty system should hand out token")
|
||||
}
|
||||
|
||||
tok.Close()
|
||||
|
||||
// POOLS should all be empty now
|
||||
getTrackerTestVals(tr, &vals)
|
||||
if vals.msu != 0 || vals.mau != 0 {
|
||||
t.Fatalf("faulty state MEM %#v", vals)
|
||||
}
|
||||
if vals.csu != 0 || vals.cau != 0 {
|
||||
t.Fatalf("faulty state CPU %#v", vals)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResourceGetCombo(t *testing.T) {
|
||||
|
||||
var vals trackerVals
|
||||
trI := NewResourceTracker()
|
||||
tr := trI.(*resourceTracker)
|
||||
|
||||
vals.setDefaults()
|
||||
setTrackerTestVals(tr, &vals)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// impossible request
|
||||
ch := trI.GetResourceToken(ctx, 20*1024, 20000, false)
|
||||
if !isClosed(ch) {
|
||||
t.Fatalf("impossible request should return closed channel")
|
||||
}
|
||||
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// let's use up 2 GB of 3GB async pool
|
||||
ch = trI.GetResourceToken(ctx, 2*1024, 10, true)
|
||||
tok1, err := fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("empty async system should hand out token1")
|
||||
}
|
||||
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// remaining 1 GB async
|
||||
ch = trI.GetResourceToken(ctx, 1*1024, 11, true)
|
||||
tok2, err := fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("empty async system should hand out token2")
|
||||
}
|
||||
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// NOW ASYNC POOL IS FULL
|
||||
// SYNC POOL HAS 1GB
|
||||
|
||||
// we no longer can get async token
|
||||
ch = trI.GetResourceToken(ctx, 1*1024, 12, true)
|
||||
_, err = fetchToken(ch)
|
||||
if err == nil {
|
||||
t.Fatalf("full async system should not hand out a token")
|
||||
}
|
||||
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// but we should get 1GB sync token
|
||||
ch = trI.GetResourceToken(ctx, 1*1024, 13, false)
|
||||
tok3, err := fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("empty sync system should hand out token3")
|
||||
}
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// NOW ASYNC AND SYNC POOLS ARE FULL
|
||||
|
||||
// this should fail
|
||||
ch = trI.GetResourceToken(ctx, 1*1024, 14, false)
|
||||
_, err = fetchToken(ch)
|
||||
if err == nil {
|
||||
t.Fatalf("full system should not hand out a token")
|
||||
}
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// now let's free up some async pool, release tok2 (1GB)
|
||||
tok2.Close()
|
||||
|
||||
// NOW ASYNC POOL HAS 1GB FREE
|
||||
// SYNC POOL IS FULL
|
||||
|
||||
// async pool should provide this
|
||||
ch = trI.GetResourceToken(ctx, 1*1024, 15, false)
|
||||
tok4, err := fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("async system should hand out token4")
|
||||
}
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
|
||||
// NOW ASYNC AND SYNC POOLS ARE FULL
|
||||
|
||||
tok4.Close()
|
||||
tok3.Close()
|
||||
|
||||
// NOW ASYNC POOL HAS 1GB FREE
|
||||
// SYNC POOL HAS 1GB FREE
|
||||
|
||||
// now, we ask for 2GB sync token, it should be provided from both async+sync pools
|
||||
ch = trI.GetResourceToken(ctx, 2*1024, 16, false)
|
||||
tok5, err := fetchToken(ch)
|
||||
if err != nil {
|
||||
t.Fatalf("async+sync system should hand out token5")
|
||||
}
|
||||
cancel()
|
||||
|
||||
// NOW ASYNC AND SYNC POOLS ARE FULL
|
||||
|
||||
tok1.Close()
|
||||
tok5.Close()
|
||||
|
||||
// attempt to close tok2 twice.. This should be OK.
|
||||
tok2.Close()
|
||||
|
||||
// POOLS should all be empty now
|
||||
getTrackerTestVals(tr, &vals)
|
||||
if vals.msu != 0 || vals.mau != 0 {
|
||||
t.Fatalf("faulty state MEM %#v", vals)
|
||||
}
|
||||
if vals.csu != 0 || vals.cau != 0 {
|
||||
t.Fatalf("faulty state CPU %#v", vals)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -334,6 +334,7 @@ func getSlotQueueKey(call *call) string {
|
||||
fmt.Fprint(hash, call.Timeout, "\x00")
|
||||
fmt.Fprint(hash, call.IdleTimeout, "\x00")
|
||||
fmt.Fprint(hash, call.Memory, "\x00")
|
||||
fmt.Fprint(hash, call.CPUs, "\x00")
|
||||
fmt.Fprint(hash, call.Format, "\x00")
|
||||
|
||||
// we have to sort these before printing, yay. TODO do better
|
||||
|
||||
Reference in New Issue
Block a user