fn: new agent resource tracker metrics (#1215)

New metrics for agent resource tracker: CpuUsed, CpuAvail,
MemUsed, MemAvail.
This commit is contained in:
Tolga Ceylan
2018-09-17 10:31:17 -07:00
committed by GitHub
parent dd727dfd12
commit b0c93dbd82
6 changed files with 109 additions and 38 deletions

View File

@@ -313,7 +313,7 @@ func (a *agent) submit(ctx context.Context, call *call) error {
func (a *agent) handleCallEnd(ctx context.Context, call *call, slot Slot, err error, isStarted bool) error {
if slot != nil {
slot.Close(common.BackgroundContext(ctx))
slot.Close()
}
// This means call was routed (executed)
@@ -466,7 +466,7 @@ func (a *agent) checkLaunch(ctx context.Context, call *call, notifyChan chan err
// Non-blocking mode only applies to cpu+mem, and if isNewContainerNeeded decided that we do not
// need to start a new container, then waiters will wait.
select {
case tok := <-a.resources.GetResourceToken(ctx, mem, uint64(call.CPUs), isAsync, isNB):
case tok := <-a.resources.GetResourceToken(ctx, mem, call.CPUs, isAsync, isNB):
if tok != nil && tok.Error() != nil {
// before returning error response, as a last resort, try evicting idle containers.
if tok.Error() != CapacityFull || !a.evictor.PerformEviction(call.slotHashId, mem, uint64(call.CPUs)) {
@@ -482,6 +482,7 @@ func (a *agent) checkLaunch(ctx context.Context, call *call, notifyChan chan err
return
}
if tok != nil {
statsUtilization(ctx, a.resources.GetUtilization())
tok.Close()
}
// Request routines are polling us with this a.cfg.HotPoll frequency. We can use this
@@ -519,7 +520,7 @@ func (a *agent) waitHot(ctx context.Context, call *call) (Slot, error) {
case s := <-ch:
if call.slots.acquireSlot(s) {
if s.slot.Error() != nil {
s.slot.Close(ctx)
s.slot.Close()
return nil, s.slot.Error()
}
return s.slot, nil
@@ -559,7 +560,7 @@ func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
mem := call.Memory + uint64(call.TmpFsSize)
select {
case tok := <-a.resources.GetResourceToken(ctx, mem, uint64(call.CPUs), isAsync, isNB):
case tok := <-a.resources.GetResourceToken(ctx, mem, call.CPUs, isAsync, isNB):
if tok.Error() != nil {
return nil, tok.Error()
}
@@ -573,7 +574,7 @@ func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
select {
case s := <-ch:
if s.Error() != nil {
s.Close(ctx)
s.Close()
return nil, s.Error()
}
return s, nil
@@ -586,6 +587,7 @@ func (a *agent) launchCold(ctx context.Context, call *call) (Slot, error) {
type coldSlot struct {
cookie drivers.Cookie
tok ResourceToken
closer func()
fatalErr error
}
@@ -615,12 +617,10 @@ func (s *coldSlot) exec(ctx context.Context, call *call) error {
return ctx.Err()
}
func (s *coldSlot) Close(ctx context.Context) error {
if s.cookie != nil {
s.cookie.Close(ctx)
}
if s.tok != nil {
s.tok.Close()
func (s *coldSlot) Close() error {
if s.closer != nil {
s.closer()
s.closer = nil
}
return nil
}
@@ -636,7 +636,7 @@ type hotSlot struct {
containerSpan trace.SpanContext
}
func (s *hotSlot) Close(ctx context.Context) error {
func (s *hotSlot) Close() error {
close(s.done)
return nil
}
@@ -809,6 +809,7 @@ func (s *hotSlot) dispatchOldFormats(ctx context.Context, call *call) chan error
func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch chan Slot) {
ctx, span := trace.StartSpan(ctx, "agent_prep_cold")
defer span.End()
statsUtilization(ctx, a.resources.GetUtilization())
call.containerState.UpdateState(ctx, ContainerStateStart, call.slots)
@@ -856,11 +857,21 @@ func (a *agent) prepCold(ctx context.Context, call *call, tok ResourceToken, ch
call.containerState.UpdateState(ctx, ContainerStateIdle, call.slots)
slot := &coldSlot{cookie, tok, err}
closer := func() {
if cookie != nil {
cookie.Close(ctx)
}
if tok != nil {
tok.Close()
}
statsUtilization(ctx, a.resources.GetUtilization())
}
slot := &coldSlot{cookie: cookie, tok: tok, closer: closer, fatalErr: err}
select {
case ch <- slot:
case <-ctx.Done():
slot.Close(ctx)
slot.Close()
}
}
@@ -870,6 +881,12 @@ func (a *agent) runHot(ctx context.Context, call *call, tok ResourceToken, state
ctx = common.BackgroundContext(ctx)
ctx, span := trace.StartSpan(ctx, "agent_run_hot")
defer span.End()
statsUtilization(ctx, a.resources.GetUtilization())
defer func() {
statsUtilization(ctx, a.resources.GetUtilization())
}()
defer tok.Close() // IMPORTANT: this MUST get called
state.UpdateState(ctx, ContainerStateStart, call.slots)
@@ -1154,7 +1171,7 @@ func (a *agent) runHotReq(ctx context.Context, call *call, state ContainerState,
// abort/shutdown/timeout, attempt to acquire and terminate,
// otherwise continue processing the request
if call.slots.acquireSlot(s) {
slot.Close(ctx)
slot.Close()
if isEvictEvent {
statsContainerEvicted(ctx)
}