diff --git a/api/agent/drivers/docker/docker.go b/api/agent/drivers/docker/docker.go index 7009bc0e1..0c3a00f84 100644 --- a/api/agent/drivers/docker/docker.go +++ b/api/agent/drivers/docker/docker.go @@ -10,6 +10,7 @@ import ( "os" "path" "strings" + "time" "github.com/fnproject/fn/api/agent/drivers" "github.com/fnproject/fn/api/common" @@ -440,6 +441,35 @@ func (drv *DockerDriver) startTask(ctx context.Context, container string) error return err } } + + // see if there's any healthcheck, and if so, wait for it to complete + return drv.awaitHealthcheck(ctx, container) +} + +func (drv *DockerDriver) awaitHealthcheck(ctx context.Context, container string) error { + // inspect the container and check if there is any health check presented, + // if there is, then wait for it to move to healthy before returning. + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + cont, err := drv.docker.InspectContainerWithContext(container, ctx) + if err != nil { + // TODO unknown fiddling to be had + return err + } + + // if no health check for this image (""), or it's healthy, then stop waiting. + // state machine is "starting" -> "healthy" | "unhealthy" + if cont.State.Health.Status == "" || cont.State.Health.Status == "healthy" { + break + } + + time.Sleep(100 * time.Millisecond) // avoid spin loop in case docker is actually fast + } return nil } diff --git a/api/agent/drivers/docker/docker_client.go b/api/agent/drivers/docker/docker_client.go index ee94fd430..82b24f9bc 100644 --- a/api/agent/drivers/docker/docker_client.go +++ b/api/agent/drivers/docker/docker_client.go @@ -36,6 +36,7 @@ type dockerClient interface { RemoveContainer(opts docker.RemoveContainerOptions) error PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error InspectImage(ctx context.Context, name string) (*docker.Image, error) + InspectContainerWithContext(container string, ctx context.Context) (*docker.Container, error) Stats(opts docker.StatsOptions) error } @@ -257,6 +258,18 @@ func (d *dockerWrap) InspectImage(ctx context.Context, name string) (i *docker.I return i, err } +func (d *dockerWrap) InspectContainerWithContext(container string, ctx context.Context) (c *docker.Container, err error) { + span, ctx := opentracing.StartSpanFromContext(ctx, "docker_inspect_container") + defer span.Finish() + ctx, cancel := context.WithTimeout(ctx, retryTimeout) + defer cancel() + err = d.retry(ctx, func() error { + c, err = d.docker.InspectContainerWithContext(container, ctx) + return err + }) + return c, err +} + func (d *dockerWrap) Stats(opts docker.StatsOptions) (err error) { // we can't retry this one this way since the callee closes the // stats chan, need a fancier retry mechanism where we can swap out