Merge branch 'rm-inspect' into 'master'

remove docker inspect container See merge request !126
2022-10-28 21:29:17 +03:00 · 2017-07-24 15:44:44 -07:00
parent 1e74cd5c4c c215dcf5dd
commit 1cc1a5ad49
2 changed files with 28 additions and 71 deletions
--- a/api/runner/drivers/docker/docker.go
+++ b/api/runner/drivers/docker/docker.go
@@ -3,7 +3,6 @@ package docker
 import (
 	"context"
 	"crypto/tls"
-	"errors"
 	"fmt"
 	"io"
 	"net"
@@ -385,7 +384,6 @@ func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTa
 // Run executes the docker container. If task runs, drivers.RunResult will be returned. If something fails outside the task (ie: Docker), it will return error.
 // The docker driver will attempt to cast the task to a Auther. If that succeeds, private image support is available. See the Auther interface for how to implement this.
 func (drv *DockerDriver) run(ctx context.Context, container string, task drivers.ContainerTask) (drivers.RunResult, error) {
-	log := common.Logger(ctx)
 	timeout := task.Timeout()

 	var cancel context.CancelFunc
@@ -405,35 +403,28 @@ func (drv *DockerDriver) run(ctx context.Context, container string, task drivers
 		Stream: true, Logs: true, Stdout: true, Stderr: true,
 		Stdin: true, InputStream: task.Input()})
 	timer.Measure()
-	if err != nil {
+	if err != nil && ctx.Err() == nil {
+		// ignore if ctx has errored, rewrite status lay below
 		return nil, err
 	}

 	start := time.Now()

 	err = drv.startTask(ctx, container)
-	if err != nil {
-		if err == context.DeadlineExceeded {
-			// if there's just a timeout making the docker calls, rewrite it as such
-			return &runResult{start: start, status: drivers.StatusTimeout}, nil
-		}
+	if err != nil && ctx.Err() == nil {
+		// if there's just a timeout making the docker calls, drv.wait below will rewrite it to timeout
 		return nil, err
 	}

 	taskTimer := drv.NewTimer("docker", "container_runtime", 1)

-	// can discard error, inspect will tell us about the task and wait will retry under the hood
-	drv.docker.WaitContainerWithContext(container, ctx)
-	taskTimer.Measure()
+	defer func() {
+		taskTimer.Measure()
+		waiter.Close()
+		waiter.Wait() // make sure we gather all logs
+	}()

-	waiter.Close()
-	err = waiter.Wait()
-	if err != nil {
-		// TODO need to make sure this error isn't just a context error or something we can ignore
-		log.WithError(err).Error("attach to container returned error, task may be missing logs")
-	}
-
-	status, err := drv.status(ctx, container)
+	status, err := drv.wait(ctx, container)
 	return &runResult{
 		start:  start,
 		status: status,
@@ -553,40 +544,26 @@ func (drv *DockerDriver) startTask(ctx context.Context, container string) error
 	return nil
 }

-func (drv *DockerDriver) status(ctx context.Context, container string) (status string, err error) {
-	log := common.Logger(ctx)
+func (drv *DockerDriver) wait(ctx context.Context, container string) (status string, err error) {
+	// wait retries internally until ctx is up, so we can ignore the error and
+	// just say it was a timeout if we have [fatal] errors talking to docker, etc.
+	// a more prevalent case is calling wait & container already finished, so again ignore err.
+	exitCode, _ := drv.docker.WaitContainerWithContext(container, ctx)

-	cinfo, err := drv.docker.InspectContainer(container)
-	if err != nil {
-		// this is pretty sad, but better to say we had an error than to not.
-		// task has run to completion and logs will be uploaded, user can decide
-		log.WithFields(logrus.Fields{"container": container}).WithError(err).Error("Inspecting container")
-		return drivers.StatusError, err
-	}
-
-	exitCode := cinfo.State.ExitCode
-	log.WithFields(logrus.Fields{
-		"exit_code":          exitCode,
-		"container_running":  cinfo.State.Running,
-		"container_status":   cinfo.State.Status,
-		"container_finished": cinfo.State.FinishedAt,
-		"container_error":    cinfo.State.Error,
-	}).Info("container status")
-
-	select { // do this after inspect so we can see exit code
-	case <-ctx.Done(): // check if task was canceled or timed out
-		switch ctx.Err() {
-		case context.DeadlineExceeded:
-			return drivers.StatusTimeout, nil
-		case context.Canceled:
-			return drivers.StatusCancelled, nil
+	// check the context first, if it's done then exitCode is invalid iff zero
+	// (can't know 100% without inspecting, but that's expensive and this is a good guess)
+	// if exitCode is non-zero, we prefer that since it proves termination.
+	if exitCode == 0 {
+		select {
+		case <-ctx.Done(): // check if task was canceled or timed out
+			switch ctx.Err() {
+			case context.DeadlineExceeded:
+				return drivers.StatusTimeout, nil
+			case context.Canceled:
+				return drivers.StatusCancelled, nil
+			}
+		default:
 		}
-	default:
-	}
-
-	if cinfo.State.Running {
-		log.Warn("getting status of task that is still running, need to fix this")
-		return drivers.StatusError, errors.New("task in running state but not timed out. weird")
 	}

 	switch exitCode {
@@ -596,15 +573,6 @@ func (drv *DockerDriver) status(ctx context.Context, container string) (status s
 		return drivers.StatusSuccess, nil
 	case 137: // OOM
 		drv.Inc("docker", "oom", 1, 1)
-		if !cinfo.State.OOMKilled {
-			// It is possible that the host itself is running out of memory and
-			// the host kernel killed one of the container processes.
-			// See: https://github.com/moby/moby/issues/15621
-			// TODO reed: isn't an OOM an OOM? this is wasting space imo
-			log.WithFields(logrus.Fields{"container": container}).Info("Setting task as OOM killed, but docker disagreed.")
-			drv.Inc("docker", "possible_oom_false_alarm", 1, 1.0)
-		}
-
 		return drivers.StatusKilled, drivers.ErrOutOfMemory
 	}
 }
--- a/api/runner/drivers/docker/docker_client.go
+++ b/api/runner/drivers/docker/docker_client.go
@@ -36,7 +36,6 @@ type dockerClient interface {
 	RemoveContainer(opts docker.RemoveContainerOptions) error
 	PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error
 	InspectImage(name string) (*docker.Image, error)
-	InspectContainer(id string) (*docker.Container, error)
 	Stats(opts docker.StatsOptions) error
 }

@@ -270,16 +269,6 @@ func (d *dockerWrap) InspectImage(name string) (i *docker.Image, err error) {
 	return i, err
 }

-func (d *dockerWrap) InspectContainer(id string) (c *docker.Container, err error) {
-	ctx, cancel := context.WithTimeout(context.Background(), retryTimeout)
-	defer cancel()
-	err = d.retry(ctx, func() error {
-		c, err = d.docker.InspectContainer(id)
-		return err
-	})
-	return c, err
-}
-
 func (d *dockerWrap) Stats(opts docker.StatsOptions) (err error) {
 	// we can't retry this one this way since the callee closes the
 	// stats chan, need a fancier retry mechanism where we can swap out