mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
gosec severity=medium passes, all severity=low errors are from unhandled errors, we have 107 of them. tbh it doesn't look worth it to me, but maybe there are a few assholes even itchier than mine out there. medium has some good stuff in it, and of course high makes sense if we're gonna do this at all. this adds some nosec annotations for some things like sql sprintfs where we know it's clean (we're constructing the strings with variables in them). fixed up other spots where we were sprinting without need. some stuff like filepath.Clean when opening a file from a variable, and file permissions, easy stuff... I can't get the CI build to shut up, but I can locally get it to be pretty quiet about imports and it just outputs the gosec output. fortunately, it still works as expected even when it's noisy. I got it to shut up by unsetting some of the go mod flags locally, but that doesn't seem to quite do it in circle, printed the env out and don't see them, so idk... i give up, this works closes #1303
610 lines
15 KiB
Go
610 lines
15 KiB
Go
package agent
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/fnproject/fn/api/models"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
"go.opencensus.io/trace"
|
|
)
|
|
|
|
const (
|
|
Mem1MB = 1024 * 1024
|
|
Mem1GB = 1024 * 1024 * 1024
|
|
|
|
// Assume 2GB RAM on non-linux systems
|
|
DefaultNonLinuxMemory = 2048 * Mem1MB
|
|
)
|
|
|
|
var CapacityFull = errors.New("max capacity reached")
|
|
|
|
type ResourceUtilization struct {
|
|
// CPU in use
|
|
CpuUsed models.MilliCPUs
|
|
// CPU available
|
|
CpuAvail models.MilliCPUs
|
|
// Memory in use in bytes
|
|
MemUsed uint64
|
|
// Memory available in bytes
|
|
MemAvail uint64
|
|
}
|
|
|
|
// A simple resource (memory, cpu, disk, etc.) tracker for scheduling.
|
|
// TODO: disk, network IO for future
|
|
type ResourceTracker interface {
|
|
// WaitAsyncResource returns a channel that will send once when there seem to be sufficient
|
|
// resource levels to run an async task, it is up to the implementer to create policy here.
|
|
WaitAsyncResource(ctx context.Context) chan struct{}
|
|
|
|
// GetResourceToken returns a channel to wait for a resource token on. If the provided context is canceled,
|
|
// the channel will never receive anything. If it is not possible to fulfill this resource, the channel
|
|
// will never receive anything (use IsResourcePossible). If a resource token is available for the provided
|
|
// resource parameters, it will otherwise be sent once on the returned channel. The channel is never closed.
|
|
// if isNB is set, resource check is done and error token is returned without blocking.
|
|
// Memory is expected to be provided in MB units.
|
|
GetResourceToken(ctx context.Context, memory uint64, cpuQuota models.MilliCPUs, isNB bool) <-chan ResourceToken
|
|
|
|
// IsResourcePossible returns whether it's possible to fulfill the requested resources on this
|
|
// machine. It must be called before GetResourceToken or GetResourceToken may hang.
|
|
// Memory is expected to be provided in MB units.
|
|
IsResourcePossible(memory uint64, cpuQuota models.MilliCPUs) bool
|
|
|
|
// Retrieve current stats/usage
|
|
GetUtilization() ResourceUtilization
|
|
}
|
|
|
|
type resourceTracker struct {
|
|
// cond protects access to ram variables below
|
|
cond *sync.Cond
|
|
// ramTotal is the total usable memory for functions
|
|
ramTotal uint64
|
|
// ramUsed is ram reserved for running containers including hot/idle
|
|
ramUsed uint64
|
|
// memory in use in which agent stops dequeuing async jobs
|
|
ramAsyncHWMark uint64
|
|
// cpuTotal is the total usable cpu for functions
|
|
cpuTotal uint64
|
|
// cpuUsed is cpu reserved for running containers including hot/idle
|
|
cpuUsed uint64
|
|
// cpu in use in which agent stops dequeuing async jobs
|
|
cpuAsyncHWMark uint64
|
|
}
|
|
|
|
func NewResourceTracker(cfg *Config) ResourceTracker {
|
|
|
|
obj := &resourceTracker{
|
|
cond: sync.NewCond(new(sync.Mutex)),
|
|
}
|
|
|
|
obj.initializeMemory(cfg)
|
|
obj.initializeCPU(cfg)
|
|
return obj
|
|
}
|
|
|
|
type ResourceToken interface {
|
|
// Close must be called by any thread that receives a token.
|
|
io.Closer
|
|
Error() error
|
|
NeededCapacity() (uint64, models.MilliCPUs)
|
|
}
|
|
|
|
type resourceToken struct {
|
|
once sync.Once
|
|
err error
|
|
needCpu models.MilliCPUs
|
|
needMem uint64
|
|
decrement func()
|
|
}
|
|
|
|
func (t *resourceToken) Error() error {
|
|
return t.err
|
|
}
|
|
|
|
func (t *resourceToken) NeededCapacity() (uint64, models.MilliCPUs) {
|
|
return t.needMem, t.needCpu
|
|
}
|
|
|
|
func (t *resourceToken) Close() error {
|
|
t.once.Do(func() {
|
|
if t.decrement != nil {
|
|
t.decrement()
|
|
}
|
|
})
|
|
return nil
|
|
}
|
|
|
|
func (a *resourceTracker) isResourceAvailableLocked(memory uint64, cpuQuota models.MilliCPUs) bool {
|
|
|
|
availMem := a.ramTotal - a.ramUsed
|
|
availCPU := a.cpuTotal - a.cpuUsed
|
|
|
|
return availMem >= memory && availCPU >= uint64(cpuQuota)
|
|
}
|
|
|
|
func (a *resourceTracker) GetUtilization() ResourceUtilization {
|
|
var util ResourceUtilization
|
|
|
|
a.cond.L.Lock()
|
|
|
|
util.CpuUsed = models.MilliCPUs(a.cpuUsed)
|
|
util.MemUsed = a.ramUsed
|
|
|
|
a.cond.L.Unlock()
|
|
|
|
util.CpuAvail = models.MilliCPUs(a.cpuTotal) - util.CpuUsed
|
|
util.MemAvail = a.ramTotal - util.MemUsed
|
|
|
|
return util
|
|
}
|
|
|
|
// is this request possible to meet? If no, fail quick
|
|
func (a *resourceTracker) IsResourcePossible(memory uint64, cpuQuota models.MilliCPUs) bool {
|
|
memory = memory * Mem1MB
|
|
return memory <= a.ramTotal && uint64(cpuQuota) <= a.cpuTotal
|
|
}
|
|
|
|
func (a *resourceTracker) allocResourcesLocked(memory uint64, cpuQuota models.MilliCPUs) ResourceToken {
|
|
|
|
a.ramUsed += memory
|
|
a.cpuUsed += uint64(cpuQuota)
|
|
|
|
return &resourceToken{decrement: func() {
|
|
|
|
a.cond.L.Lock()
|
|
a.ramUsed -= memory
|
|
a.cpuUsed -= uint64(cpuQuota)
|
|
a.cond.L.Unlock()
|
|
|
|
// WARNING: yes, we wake up everyone even async waiters when only sync pool has space, but
|
|
// the cost of this spurious wake up is unlikely to impact much performance. Simpler
|
|
// to use one cond variable for the time being.
|
|
a.cond.Broadcast()
|
|
}}
|
|
}
|
|
|
|
func (a *resourceTracker) getResourceTokenNB(memory uint64, cpuQuota models.MilliCPUs) ResourceToken {
|
|
if !a.IsResourcePossible(memory, cpuQuota) {
|
|
return &resourceToken{err: CapacityFull, needCpu: cpuQuota, needMem: memory}
|
|
}
|
|
memory = memory * Mem1MB
|
|
|
|
var t ResourceToken
|
|
var needMem uint64
|
|
var needCpu models.MilliCPUs
|
|
|
|
a.cond.L.Lock()
|
|
|
|
availMem := a.ramTotal - a.ramUsed
|
|
availCPU := a.cpuTotal - a.cpuUsed
|
|
|
|
if availMem >= memory && availCPU >= uint64(cpuQuota) {
|
|
t = a.allocResourcesLocked(memory, cpuQuota)
|
|
} else {
|
|
if availMem < memory {
|
|
needMem = (memory - availMem) / Mem1MB
|
|
}
|
|
if availCPU < uint64(cpuQuota) {
|
|
needCpu = models.MilliCPUs(uint64(cpuQuota) - availCPU)
|
|
}
|
|
t = &resourceToken{err: CapacityFull, needCpu: needCpu, needMem: needMem}
|
|
}
|
|
|
|
a.cond.L.Unlock()
|
|
return t
|
|
}
|
|
|
|
func (a *resourceTracker) getResourceTokenNBChan(ctx context.Context, memory uint64, cpuQuota models.MilliCPUs) <-chan ResourceToken {
|
|
ctx, span := trace.StartSpan(ctx, "agent_get_resource_token_nbio_chan")
|
|
|
|
ch := make(chan ResourceToken)
|
|
go func() {
|
|
defer span.End()
|
|
t := a.getResourceTokenNB(memory, cpuQuota)
|
|
|
|
select {
|
|
case ch <- t:
|
|
case <-ctx.Done():
|
|
// if we can't send b/c nobody is waiting anymore, need to decrement here
|
|
t.Close()
|
|
}
|
|
}()
|
|
|
|
return ch
|
|
}
|
|
|
|
// the received token should be passed directly to launch (unconditionally), launch
|
|
// will close this token (i.e. the receiver should not call Close)
|
|
func (a *resourceTracker) GetResourceToken(ctx context.Context, memory uint64, cpuQuota models.MilliCPUs, isNB bool) <-chan ResourceToken {
|
|
if isNB {
|
|
return a.getResourceTokenNBChan(ctx, memory, cpuQuota)
|
|
}
|
|
|
|
ch := make(chan ResourceToken)
|
|
|
|
if !a.IsResourcePossible(memory, cpuQuota) {
|
|
// return the channel, but never send anything.
|
|
return ch
|
|
}
|
|
|
|
c := a.cond
|
|
isWaiting := false
|
|
|
|
memory = memory * Mem1MB
|
|
|
|
// if we find a resource token, shut down the thread waiting on ctx finish.
|
|
// alternatively, if the ctx is done, wake up the cond loop.
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
|
|
go func() {
|
|
<-ctx.Done()
|
|
c.L.Lock()
|
|
if isWaiting {
|
|
c.Broadcast()
|
|
}
|
|
c.L.Unlock()
|
|
}()
|
|
|
|
ctx, span := trace.StartSpan(ctx, "agent_get_resource_token")
|
|
go func() {
|
|
defer span.End()
|
|
defer cancel()
|
|
c.L.Lock()
|
|
|
|
isWaiting = true
|
|
for !a.isResourceAvailableLocked(memory, cpuQuota) && ctx.Err() == nil {
|
|
c.Wait()
|
|
}
|
|
isWaiting = false
|
|
|
|
if ctx.Err() != nil {
|
|
c.L.Unlock()
|
|
return
|
|
}
|
|
|
|
t := a.allocResourcesLocked(memory, cpuQuota)
|
|
c.L.Unlock()
|
|
|
|
select {
|
|
case ch <- t:
|
|
case <-ctx.Done():
|
|
// if we can't send b/c nobody is waiting anymore, need to decrement here
|
|
t.Close()
|
|
}
|
|
}()
|
|
|
|
return ch
|
|
}
|
|
|
|
// WaitAsyncResource will send a signal on the returned channel when RAM and CPU in-use
|
|
// in the async area is less than high water mark
|
|
func (a *resourceTracker) WaitAsyncResource(ctx context.Context) chan struct{} {
|
|
ch := make(chan struct{}, 1)
|
|
|
|
isWaiting := false
|
|
c := a.cond
|
|
|
|
// if we find a resource token, shut down the thread waiting on ctx finish.
|
|
// alternatively, if the ctx is done, wake up the cond loop.
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
|
|
go func() {
|
|
<-ctx.Done()
|
|
c.L.Lock()
|
|
if isWaiting {
|
|
c.Broadcast()
|
|
}
|
|
c.L.Unlock()
|
|
}()
|
|
|
|
ctx, span := trace.StartSpan(ctx, "agent_wait_async_resource")
|
|
go func() {
|
|
defer span.End()
|
|
defer cancel()
|
|
c.L.Lock()
|
|
isWaiting = true
|
|
for (a.ramUsed >= a.ramAsyncHWMark || a.cpuUsed >= a.cpuAsyncHWMark) && ctx.Err() == nil {
|
|
c.Wait()
|
|
}
|
|
isWaiting = false
|
|
c.L.Unlock()
|
|
|
|
if ctx.Err() == nil {
|
|
ch <- struct{}{}
|
|
}
|
|
}()
|
|
|
|
return ch
|
|
}
|
|
|
|
func minUint64(a, b uint64) uint64 {
|
|
if a <= b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func maxUint64(a, b uint64) uint64 {
|
|
if a >= b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func clampUint64(val, min, max uint64) uint64 {
|
|
val = minUint64(val, max)
|
|
val = maxUint64(val, min)
|
|
return val
|
|
}
|
|
|
|
func (a *resourceTracker) initializeCPU(cfg *Config) {
|
|
|
|
// Use all available CPU from go.runtime in non-linux systems. We ignore
|
|
// non-linux container implementations and their limits on CPU if there's any.
|
|
// (This is also the default if we cannot determine limits from proc or sysfs)
|
|
totalCPU := uint64(runtime.NumCPU() * 1000)
|
|
availCPU := totalCPU
|
|
|
|
if runtime.GOOS == "linux" {
|
|
|
|
// Why do we prefer /proc/cpuinfo for Linux and not just use runtime.NumCPU?
|
|
// This is because NumCPU is sched_getaffinity based and we prefer to check
|
|
// cgroup which will more likely be same cgroup for container runtime
|
|
numCPU, err := checkProcCPU()
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Error checking for CPU, falling back to runtime CPU count.")
|
|
} else {
|
|
totalCPU = 1000 * numCPU
|
|
availCPU = totalCPU
|
|
}
|
|
|
|
// Clamp further if cgroups CFS quota/period limits are in place
|
|
cgroupCPU := checkCgroupCPU()
|
|
if cgroupCPU > 0 {
|
|
availCPU = minUint64(availCPU, cgroupCPU)
|
|
}
|
|
|
|
// TODO: check cgroup cpuset to clamp this further. We might be restricted into
|
|
// a subset of CPUs. (eg. /sys/fs/cgroup/cpuset/cpuset.effective_cpus)
|
|
|
|
// TODO: skip CPU headroom for ourselves for now
|
|
}
|
|
|
|
// now based on cfg, further clamp on calculated values
|
|
if cfg != nil && cfg.MaxTotalCPU != 0 {
|
|
availCPU = minUint64(cfg.MaxTotalCPU, availCPU)
|
|
}
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
"total_cpu": totalCPU,
|
|
"avail_cpu": availCPU,
|
|
}).Info("available cpu")
|
|
|
|
a.cpuTotal = availCPU
|
|
a.cpuAsyncHWMark = availCPU * 8 / 10
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
"cpu": a.cpuTotal,
|
|
"cpu_async_hw_mark": a.cpuAsyncHWMark,
|
|
}).Info("cpu reservations")
|
|
|
|
if a.cpuTotal == 0 {
|
|
logrus.Fatal("Cannot get the proper CPU information to size server")
|
|
}
|
|
|
|
if a.cpuTotal < 1000 {
|
|
logrus.Warn("Severaly Limited CPU: cpu < 1000m (1 CPU)")
|
|
}
|
|
}
|
|
|
|
func (a *resourceTracker) initializeMemory(cfg *Config) {
|
|
|
|
availMemory := uint64(DefaultNonLinuxMemory)
|
|
|
|
if runtime.GOOS == "linux" {
|
|
|
|
// system wide available memory
|
|
totalMemory, err := checkProcMem()
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("Cannot get the proper memory information to size server.")
|
|
}
|
|
|
|
availMemory = totalMemory
|
|
|
|
// cgroup limit restriction on memory usage
|
|
cGroupLimit, err := checkCgroupMem()
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Error checking for cgroup memory limits, falling back to host memory available..")
|
|
} else {
|
|
availMemory = minUint64(cGroupLimit, availMemory)
|
|
}
|
|
|
|
// clamp the available memory by head room (for docker, ourselves, other processes)
|
|
headRoom, err := getMemoryHeadRoom(availMemory, cfg)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("Out of memory")
|
|
}
|
|
availMemory = availMemory - headRoom
|
|
|
|
logrus.WithFields(logrus.Fields{
|
|
"total_memory": totalMemory,
|
|
"head_room": headRoom,
|
|
"cgroup_limit": cGroupLimit,
|
|
}).Info("available memory")
|
|
}
|
|
|
|
// now based on cfg, further clamp on calculated values
|
|
if cfg != nil && cfg.MaxTotalMemory != 0 {
|
|
availMemory = minUint64(cfg.MaxTotalMemory, availMemory)
|
|
}
|
|
|
|
a.ramTotal = availMemory
|
|
a.ramAsyncHWMark = availMemory * 8 / 10
|
|
|
|
// For non-linux OS, we expect these (or their defaults) properly configured from command-line/env
|
|
logrus.WithFields(logrus.Fields{
|
|
"avail_memory": a.ramTotal,
|
|
"ram_async_hw_mark": a.ramAsyncHWMark,
|
|
}).Info("ram reservations")
|
|
|
|
if a.ramTotal == 0 {
|
|
logrus.Fatal("Cannot get the proper memory pool information to size server")
|
|
}
|
|
|
|
if a.ramTotal < 256*Mem1MB {
|
|
logrus.Warn("Severely Limited memory: ram < 256MB")
|
|
}
|
|
}
|
|
|
|
// headroom estimation in order not to consume entire RAM if possible
|
|
func getMemoryHeadRoom(usableMemory uint64, cfg *Config) (uint64, error) {
|
|
|
|
// get %10 of the RAM
|
|
headRoom := uint64(usableMemory / 10)
|
|
|
|
// TODO: improve this pre-fork calculation, we should fetch/query this
|
|
// instead of estimate below.
|
|
// if pre-fork pool is enabled, add 1 MB per pool-item
|
|
if cfg != nil && cfg.PreForkPoolSize != 0 {
|
|
headRoom += Mem1MB * cfg.PreForkPoolSize
|
|
}
|
|
|
|
// TODO: improve these calculations.
|
|
// clamp this with 256MB min -- 5GB max
|
|
maxHeadRoom := uint64(5 * Mem1GB)
|
|
minHeadRoom := uint64(256 * Mem1MB)
|
|
|
|
if minHeadRoom >= usableMemory {
|
|
return 0, fmt.Errorf("Not enough memory: %v", usableMemory)
|
|
}
|
|
|
|
headRoom = clampUint64(headRoom, minHeadRoom, maxHeadRoom)
|
|
return headRoom, nil
|
|
}
|
|
|
|
func readString(fileName string) (string, error) {
|
|
b, err := ioutil.ReadFile(filepath.Clean(fileName))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
value := string(b)
|
|
return strings.TrimSpace(value), nil
|
|
}
|
|
|
|
func checkCgroupMem() (uint64, error) {
|
|
value, err := readString("/sys/fs/cgroup/memory/memory.limit_in_bytes")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return strconv.ParseUint(value, 10, 64)
|
|
}
|
|
|
|
func checkCgroupCPU() uint64 {
|
|
|
|
periodStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
quotaStr, err := readString("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
|
|
period, err := strconv.ParseUint(periodStr, 10, 64)
|
|
if err != nil {
|
|
logrus.Warn("Cannot parse CFS period", err)
|
|
return 0
|
|
}
|
|
|
|
quota, err := strconv.ParseInt(quotaStr, 10, 64)
|
|
if err != nil {
|
|
logrus.Warn("Cannot parse CFS quota", err)
|
|
return 0
|
|
}
|
|
|
|
if quota <= 0 || period <= 0 {
|
|
return 0
|
|
}
|
|
|
|
return uint64(quota) * 1000 / period
|
|
}
|
|
|
|
var errCantReadMemInfo = errors.New("Didn't find MemAvailable in /proc/meminfo, kernel is probably < 3.14")
|
|
|
|
func checkProcMem() (uint64, error) {
|
|
f, err := os.Open("/proc/meminfo")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer f.Close()
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
b := scanner.Text()
|
|
if !strings.HasPrefix(b, "MemAvailable") {
|
|
continue
|
|
}
|
|
|
|
// expect form:
|
|
// MemAvailable: 1234567890 kB
|
|
tri := strings.Fields(b)
|
|
if len(tri) != 3 {
|
|
return 0, fmt.Errorf("MemAvailable line has unexpected format: %v", b)
|
|
}
|
|
|
|
c, err := strconv.ParseUint(tri[1], 10, 64)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("Could not parse MemAvailable: %v", b)
|
|
}
|
|
switch tri[2] { // convert units to bytes
|
|
case "kB":
|
|
c *= 1024
|
|
case "MB":
|
|
c *= 1024 * 1024
|
|
default:
|
|
return 0, fmt.Errorf("Unexpected units for MemAvailable in /proc/meminfo, need kB or MB, got: %v", tri[2])
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
return 0, errCantReadMemInfo
|
|
}
|
|
|
|
func checkProcCPU() (uint64, error) {
|
|
f, err := os.Open("/proc/cpuinfo")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer f.Close()
|
|
|
|
total := uint64(0)
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
b := scanner.Text()
|
|
|
|
// processor : 0
|
|
toks := strings.Fields(b)
|
|
if len(toks) == 3 && toks[0] == "processor" && toks[1] == ":" {
|
|
total += 1
|
|
}
|
|
}
|
|
|
|
if total == 0 {
|
|
return 0, errors.New("Could not parse cpuinfo")
|
|
}
|
|
|
|
return total, nil
|
|
}
|