mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
add opentracing spans for metrics
This commit is contained in:
committed by
Travis Reeder
parent
1cc1a5ad49
commit
dc5e67b6d2
@@ -17,8 +17,8 @@ import (
|
||||
manifest "github.com/docker/distribution/manifest/schema1"
|
||||
"github.com/fsouza/go-dockerclient"
|
||||
"github.com/heroku/docker-registry-client/registry"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"gitlab-odx.oracle.com/odx/functions/api/runner/common"
|
||||
"gitlab-odx.oracle.com/odx/functions/api/runner/common/stats"
|
||||
"gitlab-odx.oracle.com/odx/functions/api/runner/drivers"
|
||||
)
|
||||
|
||||
@@ -268,9 +268,7 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
|
||||
return nil, err
|
||||
}
|
||||
|
||||
createTimer := drv.NewTimer("docker", "create_container", 1.0)
|
||||
_, err = drv.docker.CreateContainer(container)
|
||||
createTimer.Measure()
|
||||
if err != nil {
|
||||
// since we retry under the hood, if the container gets created and retry fails, we can just ignore error
|
||||
if err != docker.ErrContainerAlreadyExists {
|
||||
@@ -296,17 +294,15 @@ type cookie struct {
|
||||
drv *DockerDriver
|
||||
}
|
||||
|
||||
func (c *cookie) Close() error { return c.drv.removeContainer(c.id) }
|
||||
func (c *cookie) Close(ctx context.Context) error { return c.drv.removeContainer(ctx, c.id) }
|
||||
|
||||
func (c *cookie) Run(ctx context.Context) (drivers.RunResult, error) {
|
||||
return c.drv.run(ctx, c.id, c.task)
|
||||
}
|
||||
|
||||
func (drv *DockerDriver) removeContainer(container string) error {
|
||||
removeTimer := drv.NewTimer("docker", "remove_container", 1.0)
|
||||
defer removeTimer.Measure()
|
||||
func (drv *DockerDriver) removeContainer(ctx context.Context, container string) error {
|
||||
err := drv.docker.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: container, Force: true, RemoveVolumes: true})
|
||||
ID: container, Force: true, RemoveVolumes: true, Context: ctx})
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).WithFields(logrus.Fields{"container": container}).Error("error removing container")
|
||||
@@ -323,7 +319,9 @@ func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.Container
|
||||
var config docker.AuthConfiguration // default, tries docker hub w/o user/pass
|
||||
if task, ok := task.(Auther); ok {
|
||||
var err error
|
||||
span, _ := opentracing.StartSpanFromContext(ctx, "docker_auth")
|
||||
config, err = task.DockerAuth()
|
||||
span.Finish()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -334,7 +332,7 @@ func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.Container
|
||||
}
|
||||
|
||||
// see if we already have it, if not, pull it
|
||||
_, err := drv.docker.InspectImage(task.Image())
|
||||
_, err := drv.docker.InspectImage(ctx, task.Image())
|
||||
if err == docker.ErrNoSuchImage {
|
||||
err = drv.pullImage(ctx, task, config)
|
||||
}
|
||||
@@ -344,15 +342,8 @@ func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.Container
|
||||
|
||||
func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTask, config docker.AuthConfiguration) error {
|
||||
log := common.Logger(ctx)
|
||||
|
||||
reg, repo, tag := drivers.ParseImage(task.Image())
|
||||
globalRepo := path.Join(reg, repo)
|
||||
|
||||
pullTimer := drv.NewTimer("docker", "pull_image", 1.0)
|
||||
defer pullTimer.Measure()
|
||||
|
||||
drv.Inc("docker", "pull_image_count."+stats.AsStatField(task.Image()), 1, 1)
|
||||
|
||||
if reg != "" {
|
||||
config.ServerAddress = reg
|
||||
}
|
||||
@@ -367,7 +358,6 @@ func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTa
|
||||
|
||||
err = drv.docker.PullImage(docker.PullImageOptions{Repository: globalRepo, Tag: tag, Context: ctx}, config)
|
||||
if err != nil {
|
||||
drv.Inc("task", "error.pull."+stats.AsStatField(task.Image()), 1, 1)
|
||||
log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).WithError(err).Error("Failed to pull image")
|
||||
|
||||
// TODO need to inspect for hub or network errors and pick.
|
||||
@@ -397,12 +387,10 @@ func (drv *DockerDriver) run(ctx context.Context, container string, task drivers
|
||||
|
||||
mwOut, mwErr := task.Logger()
|
||||
|
||||
timer := drv.NewTimer("docker", "attach_container", 1)
|
||||
waiter, err := drv.docker.AttachToContainerNonBlocking(docker.AttachToContainerOptions{
|
||||
waiter, err := drv.docker.AttachToContainerNonBlocking(ctx, docker.AttachToContainerOptions{
|
||||
Container: container, OutputStream: mwOut, ErrorStream: mwErr,
|
||||
Stream: true, Logs: true, Stdout: true, Stderr: true,
|
||||
Stdin: true, InputStream: task.Input()})
|
||||
timer.Measure()
|
||||
if err != nil && ctx.Err() == nil {
|
||||
// ignore if ctx has errored, rewrite status lay below
|
||||
return nil, err
|
||||
@@ -416,10 +404,7 @@ func (drv *DockerDriver) run(ctx context.Context, container string, task drivers
|
||||
return nil, err
|
||||
}
|
||||
|
||||
taskTimer := drv.NewTimer("docker", "container_runtime", 1)
|
||||
|
||||
defer func() {
|
||||
taskTimer.Measure()
|
||||
waiter.Close()
|
||||
waiter.Wait() // make sure we gather all logs
|
||||
}()
|
||||
@@ -528,10 +513,8 @@ func newContainerID(task drivers.ContainerTask) string {
|
||||
|
||||
func (drv *DockerDriver) startTask(ctx context.Context, container string) error {
|
||||
log := common.Logger(ctx)
|
||||
startTimer := drv.NewTimer("docker", "start_container", 1.0)
|
||||
log.WithFields(logrus.Fields{"container": container}).Debug("Starting container execution")
|
||||
err := drv.docker.StartContainerWithContext(container, nil, ctx)
|
||||
startTimer.Measure()
|
||||
if err != nil {
|
||||
dockerErr, ok := err.(*docker.Error)
|
||||
_, containerAlreadyRunning := err.(*docker.ContainerAlreadyRunning)
|
||||
|
||||
@@ -14,6 +14,8 @@ import (
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/fsouza/go-dockerclient"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"github.com/opentracing/opentracing-go/log"
|
||||
"gitlab-odx.oracle.com/odx/functions/api/runner/common"
|
||||
)
|
||||
|
||||
@@ -29,13 +31,13 @@ const (
|
||||
type dockerClient interface {
|
||||
// Each of these are github.com/fsouza/go-dockerclient methods
|
||||
|
||||
AttachToContainerNonBlocking(opts docker.AttachToContainerOptions) (docker.CloseWaiter, error)
|
||||
AttachToContainerNonBlocking(ctx context.Context, opts docker.AttachToContainerOptions) (docker.CloseWaiter, error)
|
||||
WaitContainerWithContext(id string, ctx context.Context) (int, error)
|
||||
StartContainerWithContext(id string, hostConfig *docker.HostConfig, ctx context.Context) error
|
||||
CreateContainer(opts docker.CreateContainerOptions) (*docker.Container, error)
|
||||
RemoveContainer(opts docker.RemoveContainerOptions) error
|
||||
PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error
|
||||
InspectImage(name string) (*docker.Image, error)
|
||||
InspectImage(ctx context.Context, name string) (*docker.Image, error)
|
||||
Stats(opts docker.StatsOptions) error
|
||||
}
|
||||
|
||||
@@ -95,20 +97,24 @@ type dockerWrap struct {
|
||||
}
|
||||
|
||||
func (d *dockerWrap) retry(ctx context.Context, f func() error) error {
|
||||
log := common.Logger(ctx)
|
||||
var i int
|
||||
span := opentracing.SpanFromContext(ctx)
|
||||
defer func() { span.LogFields(log.Int("docker_call_retries", i)) }()
|
||||
|
||||
logger := common.Logger(ctx)
|
||||
var b common.Backoff
|
||||
for {
|
||||
for ; ; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
d.Inc("task", "fail.docker", 1, 1)
|
||||
log.WithError(ctx.Err()).Warnf("retrying on docker errors timed out, restart docker or rotate this instance?")
|
||||
logger.WithError(ctx.Err()).Warnf("retrying on docker errors timed out, restart docker or rotate this instance?")
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
err := filter(ctx, f())
|
||||
if common.IsTemporary(err) || isDocker50x(err) {
|
||||
log.WithError(err).Warn("docker temporary error, retrying")
|
||||
logger.WithError(err).Warn("docker temporary error, retrying")
|
||||
b.Sleep()
|
||||
d.Inc("task", "error.docker", 1, 1)
|
||||
continue
|
||||
@@ -183,24 +189,11 @@ func filterNoSuchContainer(ctx context.Context, err error) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func filterNotRunning(ctx context.Context, err error) error {
|
||||
log := common.Logger(ctx)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
func (d *dockerWrap) AttachToContainerNonBlocking(ctx context.Context, opts docker.AttachToContainerOptions) (w docker.CloseWaiter, err error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_attach_container")
|
||||
defer span.Finish()
|
||||
|
||||
_, containerNotRunning := err.(*docker.ContainerNotRunning)
|
||||
dockerErr, ok := err.(*docker.Error)
|
||||
if containerNotRunning || (ok && dockerErr.Status == 304) {
|
||||
log.WithError(err).Error("filtering error")
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *dockerWrap) AttachToContainerNonBlocking(opts docker.AttachToContainerOptions) (w docker.CloseWaiter, err error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), retryTimeout)
|
||||
ctx, cancel := context.WithTimeout(ctx, retryTimeout)
|
||||
defer cancel()
|
||||
err = d.retry(ctx, func() error {
|
||||
w, err = d.docker.AttachToContainerNonBlocking(opts)
|
||||
@@ -214,6 +207,8 @@ func (d *dockerWrap) AttachToContainerNonBlocking(opts docker.AttachToContainerO
|
||||
}
|
||||
|
||||
func (d *dockerWrap) WaitContainerWithContext(id string, ctx context.Context) (code int, err error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_wait_container")
|
||||
defer span.Finish()
|
||||
err = d.retry(ctx, func() error {
|
||||
code, err = d.dockerNoTimeout.WaitContainerWithContext(id, ctx)
|
||||
return err
|
||||
@@ -222,6 +217,8 @@ func (d *dockerWrap) WaitContainerWithContext(id string, ctx context.Context) (c
|
||||
}
|
||||
|
||||
func (d *dockerWrap) StartContainerWithContext(id string, hostConfig *docker.HostConfig, ctx context.Context) (err error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_start_container")
|
||||
defer span.Finish()
|
||||
err = d.retry(ctx, func() error {
|
||||
err = d.dockerNoTimeout.StartContainerWithContext(id, hostConfig, ctx)
|
||||
if _, ok := err.(*docker.NoSuchContainer); ok {
|
||||
@@ -234,7 +231,9 @@ func (d *dockerWrap) StartContainerWithContext(id string, hostConfig *docker.Hos
|
||||
}
|
||||
|
||||
func (d *dockerWrap) CreateContainer(opts docker.CreateContainerOptions) (c *docker.Container, err error) {
|
||||
err = d.retry(opts.Context, func() error {
|
||||
span, ctx := opentracing.StartSpanFromContext(opts.Context, "docker_create_container")
|
||||
defer span.Finish()
|
||||
err = d.retry(ctx, func() error {
|
||||
c, err = d.dockerNoTimeout.CreateContainer(opts)
|
||||
return err
|
||||
})
|
||||
@@ -242,7 +241,9 @@ func (d *dockerWrap) CreateContainer(opts docker.CreateContainerOptions) (c *doc
|
||||
}
|
||||
|
||||
func (d *dockerWrap) PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) (err error) {
|
||||
err = d.retry(opts.Context, func() error {
|
||||
span, ctx := opentracing.StartSpanFromContext(opts.Context, "docker_pull_image")
|
||||
defer span.Finish()
|
||||
err = d.retry(ctx, func() error {
|
||||
err = d.dockerNoTimeout.PullImage(opts, auth)
|
||||
return err
|
||||
})
|
||||
@@ -250,7 +251,13 @@ func (d *dockerWrap) PullImage(opts docker.PullImageOptions, auth docker.AuthCon
|
||||
}
|
||||
|
||||
func (d *dockerWrap) RemoveContainer(opts docker.RemoveContainerOptions) (err error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), retryTimeout)
|
||||
// extract the span, but do not keep the context, since the enclosing context
|
||||
// may be timed out, and we still want to remove the container. TODO in caller? who cares?
|
||||
span, _ := opentracing.StartSpanFromContext(opts.Context, "docker_remove_container")
|
||||
defer span.Finish()
|
||||
ctx := opentracing.ContextWithSpan(context.Background(), span)
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, retryTimeout)
|
||||
defer cancel()
|
||||
err = d.retry(ctx, func() error {
|
||||
err = d.docker.RemoveContainer(opts)
|
||||
@@ -259,8 +266,10 @@ func (d *dockerWrap) RemoveContainer(opts docker.RemoveContainerOptions) (err er
|
||||
return filterNoSuchContainer(ctx, err)
|
||||
}
|
||||
|
||||
func (d *dockerWrap) InspectImage(name string) (i *docker.Image, err error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), retryTimeout)
|
||||
func (d *dockerWrap) InspectImage(ctx context.Context, name string) (i *docker.Image, err error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "docker_inspect_image")
|
||||
defer span.Finish()
|
||||
ctx, cancel := context.WithTimeout(ctx, retryTimeout)
|
||||
defer cancel()
|
||||
err = d.retry(ctx, func() error {
|
||||
i, err = d.docker.InspectImage(name)
|
||||
|
||||
@@ -47,7 +47,7 @@ func TestRunnerDocker(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal("Couldn't prepare task test")
|
||||
}
|
||||
defer cookie.Close()
|
||||
defer cookie.Close(ctx)
|
||||
|
||||
result, err := cookie.Run(ctx)
|
||||
if err != nil {
|
||||
@@ -73,7 +73,7 @@ func TestRunnerDockerStdin(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal("Couldn't prepare task test")
|
||||
}
|
||||
defer cookie.Close()
|
||||
defer cookie.Close(ctx)
|
||||
|
||||
result, err := cookie.Run(ctx)
|
||||
if err != nil {
|
||||
|
||||
@@ -17,7 +17,8 @@ import (
|
||||
// Clients should always call Close() on a DriverCookie after they are done
|
||||
// with it.
|
||||
type Cookie interface {
|
||||
io.Closer
|
||||
// Close should clean up any resources the cookie was using, or was going to use.
|
||||
Close(ctx context.Context) error
|
||||
|
||||
// Run should execute task on the implementation.
|
||||
// RunResult captures the result of task execution. This means if task
|
||||
|
||||
@@ -24,7 +24,7 @@ type cookie struct {
|
||||
m *Mocker
|
||||
}
|
||||
|
||||
func (c *cookie) Close() error { return nil }
|
||||
func (c *cookie) Close(context.Context) error { return nil }
|
||||
|
||||
func (c *cookie) Run(ctx context.Context) (drivers.RunResult, error) {
|
||||
c.m.count++
|
||||
|
||||
Reference in New Issue
Block a user