bubble up some docker errors to user

currently:

* container ran out of memory (code 137)
* container exited with other code != 0
* unable to pull image (auth/404)

there may be others but this is a good start (the most common). notably, for
both hot and cold these should bubble up (if deterministic, which hub isn't
always), and these are useful for users to use in debugging why things aren't
working.

added tests to make sure that these behaviors are working.

also changed the behavior such that when the container exits we return a 502
instead of a 503, just to be able to distinguish the fact that fn is working
as expected but the container is acting funky (400 is weird here, so idk).

removed references to old IsUserVisible crap and slightly changed the
interface for RunResult for plumbing reasons (to get the error type,
specifically).

fixed an issue where if ~/.docker/config.json exists sometimes pulling images
wouldn't work deterministically (should be more inline w/ expectations now)

closes #275
This commit is contained in:
Reed Allman
2017-09-07 11:36:30 -07:00
parent 779b53425c
commit 700078ccb9
10 changed files with 72 additions and 104 deletions

View File

@@ -2,14 +2,18 @@ package docker
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"path"
"strings"
"github.com/fnproject/fn/api/agent/drivers"
"github.com/fnproject/fn/api/common"
"github.com/fnproject/fn/api/models"
"github.com/fsouza/go-dockerclient"
"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/log"
@@ -32,19 +36,12 @@ type Auther interface {
}
type runResult struct {
error
err error
status string
}
func (r *runResult) Error() string {
if r.error == nil {
return ""
}
return r.error.Error()
}
func (r *runResult) Status() string { return r.status }
func (r *runResult) UserVisible() bool { return common.IsUserVisibleError(r.error) }
func (r *runResult) Error() error { return r.err }
func (r *runResult) Status() string { return r.status }
type DockerDriver struct {
conf drivers.Config
@@ -146,9 +143,7 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
"image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": container.Name,
}).WithError(err).Error("Could not create container")
if ce := containerConfigError(err); ce != nil {
return nil, common.UserError(fmt.Errorf("Failed to create container from task configuration '%s'", ce))
}
// NOTE: if the container fails to create we don't really want to show to user since they aren't directly configuring the container
return nil, err
}
}
@@ -190,10 +185,12 @@ func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.Container
var config docker.AuthConfiguration // default, tries docker hub w/o user/pass
// if any configured host auths match task registry, try them (task docker auth can override)
// TODO this is still a little hairy using suffix, we should probably try to parse it as a
// url and extract the host (from both the config file & image)
for _, v := range drv.auths {
// TODO doubt this works. copied to attempt to keep parity. nobody using so... glhf
if strings.HasSuffix(v.ServerAddress, reg) {
if reg != "" && strings.HasSuffix(v.ServerAddress, reg) {
config = v
break
}
}
@@ -240,17 +237,37 @@ func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTa
if err != nil {
log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).WithError(err).Error("Failed to pull image")
// TODO need to inspect for hub or network errors and pick.
return common.UserError(fmt.Errorf("Failed to pull image '%s': %s", task.Image(), err))
// TODO need to inspect for hub or network errors and pick; for now, assume
// 500 if not a docker error
msg := err.Error()
code := http.StatusInternalServerError
if dErr, ok := err.(*docker.Error); ok {
msg = dockerMsg(dErr)
code = dErr.Status // 401/404
}
// TODO what about a case where credentials were good, then credentials
// were invalidated -- do we need to keep the credential cache docker
// driver side and after pull for this case alone?
return models.NewAPIError(code, fmt.Errorf("Failed to pull image '%s': %s", task.Image(), msg))
}
return nil
}
// removes docker err formatting: 'API Error (code) {"message":"..."}'
func dockerMsg(derr *docker.Error) string {
// derr.Message is a JSON response from docker, which has a "message" field we want to extract if possible.
// this is pretty lame, but it is what it is
var v struct {
Msg string `json:"message"`
}
err := json.Unmarshal([]byte(derr.Message), &v)
if err != nil {
// If message was not valid JSON, the raw body is still better than nothing.
return derr.Message
}
return v.Msg
}
// Run executes the docker container. If task runs, drivers.RunResult will be returned. If something fails outside the task (ie: Docker), it will return error.
// The docker driver will attempt to cast the task to a Auther. If that succeeds, private image support is available. See the Auther interface for how to implement this.
func (drv *DockerDriver) run(ctx context.Context, container string, task drivers.ContainerTask) (drivers.WaitResult, error) {
@@ -305,7 +322,7 @@ func (w *waitResult) Wait(ctx context.Context) (drivers.RunResult, error) {
status, err := w.drv.wait(ctx, w.container)
return &runResult{
status: status,
error: err,
err: err,
}, nil
}
@@ -428,9 +445,9 @@ func (drv *DockerDriver) wait(ctx context.Context, container string) (status str
case <-ctx.Done(): // check if task was canceled or timed out
switch ctx.Err() {
case context.DeadlineExceeded:
return drivers.StatusTimeout, nil
return drivers.StatusTimeout, context.DeadlineExceeded
case context.Canceled:
return drivers.StatusCancelled, nil
return drivers.StatusCancelled, context.Canceled
}
default:
}
@@ -438,11 +455,11 @@ func (drv *DockerDriver) wait(ctx context.Context, container string) (status str
switch exitCode {
default:
return drivers.StatusError, common.UserError(fmt.Errorf("container exit code %d", exitCode))
return drivers.StatusError, models.NewAPIError(http.StatusBadGateway, fmt.Errorf("container exit code %d", exitCode))
case 0:
return drivers.StatusSuccess, nil
case 137: // OOM
opentracing.SpanFromContext(ctx).LogFields(log.String("docker", "oom"))
return drivers.StatusKilled, drivers.ErrOutOfMemory
return drivers.StatusKilled, models.NewAPIError(http.StatusBadGateway, errors.New("container out of memory"))
}
}

View File

@@ -5,8 +5,6 @@ package docker
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"net"
"net/http"
"strings"
@@ -130,25 +128,7 @@ func isDocker50x(err error) bool {
return ok && derr.Status >= 500
}
func containerConfigError(err error) error {
derr, ok := err.(*docker.Error)
if ok && derr.Status == 400 {
// derr.Message is a JSON response from docker, which has a "message" field we want to extract if possible.
var v struct {
Msg string `json:"message"`
}
err := json.Unmarshal([]byte(derr.Message), &v)
if err != nil {
// If message was not valid JSON, the raw body is still better than nothing.
return fmt.Errorf("%s", derr.Message)
}
return fmt.Errorf("%s", v.Msg)
}
return nil
}
// implement common.Temporary()
type temporary struct {
error
}

View File

@@ -4,7 +4,6 @@ package drivers
import (
"context"
"errors"
"io"
"strings"
"time"
@@ -53,19 +52,17 @@ type Driver interface {
// RunResult indicates only the final state of the task.
type RunResult interface {
// Error is an actionable/checkable error from the container.
error
// Error is an actionable/checkable error from the container, nil if
// Status() returns "success", otherwise non-nil
Error() error
// Status should return the current status of the task.
// Only valid options are {"error", "success", "timeout", "killed", "cancelled"}.
Status() string
}
// The ContainerTask interface guides task execution across a wide variety of
// The ContainerTask interface guides container execution across a wide variety of
// container oriented runtimes.
// This interface is unstable.
//
// FIXME: This interface is large, and it is currently a little Docker specific.
type ContainerTask interface {
// Command returns the command to run within the container.
Command() string
@@ -116,21 +113,6 @@ type Stat struct {
Metrics map[string]uint64
}
// Set of acceptable errors coming from container engines to TaskRunner
var (
// ErrOutOfMemory for OOM in container engine
ErrOutOfMemory = userError(errors.New("out of memory error"))
)
// TODO agent.UserError should be elsewhere
func userError(err error) error { return &ue{err} }
type ue struct {
error
}
func (u *ue) UserVisible() bool { return true }
// TODO: ensure some type is applied to these statuses.
const (
// task statuses

View File

@@ -32,18 +32,18 @@ func (c *cookie) Run(ctx context.Context) (drivers.WaitResult, error) {
return nil, fmt.Errorf("Mocker error! Bad.")
}
return &runResult{
error: nil,
err: nil,
status: "success",
start: time.Now(),
}, nil
}
type runResult struct {
error
err error
status string
start time.Time
}
func (r *runResult) Wait(context.Context) (drivers.RunResult, error) { return r, nil }
func (r *runResult) Status() string { return r.status }
func (r *runResult) StartTime() time.Time { return r.start }
func (r *runResult) Error() error { return r.err }