add per call stats field as histogram (#528)

* add per call stats field as histogram

this will add a histogram of up to 240 data points of call data, produced
every second, stored at the end of a call invocation in the db. the same
metrics are also still shipped to prometheus (prometheus has the
not-potentially-reduced version). for the API reference, see the updates to
the swagger spec, this is just added onto the get call endpoint.

this does not add any extra db calls and the field for stats in call is a json
blob, which is easily modified to add / omit future fields. this is just
tacked on to the call we're making to InsertCall, and expect this to add very
little overhead; we are bounding the set to be relatively small, planning to
clean out the db of calls periodically, functions will generally be short, and
the same code used at a previous firm did not cause a notable db size increase
with production workload that is worse, wrt histogram size (I checked). the
code changes are really small aside from changing to strfmt.DateTime,
adding a migration and implementing sql.Valuer; needed to slightly modify the
swap function so that we can safely read `call.Stats` field to upload at end.

with the full histogram in hand, we can compute max/min/average/median/growth
rate/bernoulli distributions/whatever very easily in a UI or tooling. in
particular, this data is easily chartable [for a UI], which is beneficial.

* adds swagger spec of api update to calls endpoint
* adds migration for call.stats field
* adds call.stats field to sql queries
* change swapping of hot logger to exec, so we know that call.Stats is no
longer being modified after `exec` [in call.End]
* throws out docker stats between function invocations in hot functions (no
call to store them on, we could change this later for debug; they're in prom)
* tested in tests and API

closes #19

* add format of ints to swag
This commit is contained in:
Reed Allman
2017-11-27 08:52:53 -06:00
committed by GitHub
parent d8c41aac9d
commit c9198b8525
12 changed files with 202 additions and 32 deletions

View File

@@ -428,8 +428,9 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
// link the container id and id in the logs [for us!]
common.Logger(ctx).WithField("container_id", s.container.id).Info("starting call")
// swap in the new stderr logger
s.container.swap(call.stderr)
// swap in the new stderr logger & stat accumulator
oldStderr := s.container.swap(call.stderr, &call.Stats)
defer s.container.swap(oldStderr, nil) // once we're done, swap out in this scope to prevent races
errApp := make(chan error, 1)
go func() {
@@ -442,8 +443,7 @@ func (s *hotSlot) exec(ctx context.Context, call *call) error {
select {
case err := <-s.errC: // error from container
return err
case err := <-errApp:
// would be great to be able to decipher what error is returning from here so we can show better messages
case err := <-errApp: // from dispatch
return err
case <-ctx.Done(): // call timeout
return ctx.Err()
@@ -488,6 +488,7 @@ func (a *agent) prepCold(ctx context.Context, slots chan<- slot, call *call, tok
stdin: call.req.Body,
stdout: call.w,
stderr: call.stderr,
stats: &call.Stats,
}
// pull & create container before we return a slot, so as to be friendly
@@ -605,7 +606,6 @@ func (a *agent) runHot(ctxArg context.Context, slots chan<- slot, call *call, to
// wait for this call to finish
// NOTE do NOT select with shutdown / other channels. slot handles this.
<-done
container.swap(stderr) // log between tasks
}
}()
@@ -634,14 +634,25 @@ type container struct {
stdin io.Reader
stdout io.Writer
stderr io.Writer
// lock protects the swap and any fields that need to be swapped
sync.Mutex
stats *drivers.Stats
}
func (c *container) swap(stderr io.Writer) {
func (c *container) swap(stderr io.Writer, cs *drivers.Stats) (old io.Writer) {
c.Lock()
defer c.Unlock()
// TODO meh, maybe shouldn't bury this
old = c.stderr
gw, ok := c.stderr.(*ghostWriter)
if ok {
gw.swap(stderr)
old = gw.swap(stderr)
}
c.stats = cs
return old
}
func (c *container) Id() string { return c.id }
@@ -665,6 +676,12 @@ func (c *container) WriteStat(ctx context.Context, stat drivers.Stat) {
for key, value := range stat.Metrics {
span.LogFields(log.Uint64("fn_"+key, value))
}
c.Lock()
defer c.Unlock()
if c.stats != nil {
*(c.stats) = append(*(c.stats), stat)
}
}
//func (c *container) DockerAuth() (docker.AuthConfiguration, error) {
@@ -679,10 +696,12 @@ type ghostWriter struct {
inner io.Writer
}
func (g *ghostWriter) swap(w io.Writer) {
func (g *ghostWriter) swap(w io.Writer) (old io.Writer) {
g.Lock()
old = g.inner
g.inner = w
g.Unlock()
return old
}
func (g *ghostWriter) Write(b []byte) (int, error) {