bubble up some docker errors to user

currently:

* container ran out of memory (code 137)
* container exited with other code != 0
* unable to pull image (auth/404)

there may be others but this is a good start (the most common). notably, for
both hot and cold these should bubble up (if deterministic, which hub isn't
always), and these are useful for users to use in debugging why things aren't
working.

added tests to make sure that these behaviors are working.

also changed the behavior such that when the container exits we return a 502
instead of a 503, just to be able to distinguish the fact that fn is working
as expected but the container is acting funky (400 is weird here, so idk).

removed references to old IsUserVisible crap and slightly changed the
interface for RunResult for plumbing reasons (to get the error type,
specifically).

fixed an issue where if ~/.docker/config.json exists sometimes pulling images
wouldn't work deterministically (should be more inline w/ expectations now)

closes #275
This commit is contained in:
Reed Allman
2017-09-07 11:36:30 -07:00
parent 779b53425c
commit 700078ccb9
10 changed files with 72 additions and 104 deletions

View File

@@ -442,12 +442,12 @@ func (s *coldSlot) exec(ctx context.Context, call *call) error {
res, err := waiter.Wait(ctx) res, err := waiter.Wait(ctx)
if err != nil { if err != nil {
return err return err
} else if res.Error() != "" { } else if res.Error() != nil {
// check for call error (oom/exit) and beam it up // check for call error (oom/exit) and beam it up
return res return res.Error()
} }
// nil or timed out (Wait will silently return nil if it encounters a timeout, maybe TODO) // nil or timed out
return ctx.Err() return ctx.Err()
} }
@@ -666,10 +666,11 @@ func (a *agent) runHot(slots chan<- slot, call *call, tok Token) error {
} }
}() }()
// we can discard the result, mostly for timeouts / cancels. res, err := waiter.Wait(ctx)
_, err = waiter.Wait(ctx)
if err != nil { if err != nil {
errC <- err errC <- err
} else if res.Error() != nil {
errC <- res.Error()
} }
logger.WithError(err).Info("hot function terminated") logger.WithError(err).Info("hot function terminated")

View File

@@ -2,14 +2,18 @@ package docker
import ( import (
"context" "context"
"encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"net/http"
"os" "os"
"path" "path"
"strings" "strings"
"github.com/fnproject/fn/api/agent/drivers" "github.com/fnproject/fn/api/agent/drivers"
"github.com/fnproject/fn/api/common" "github.com/fnproject/fn/api/common"
"github.com/fnproject/fn/api/models"
"github.com/fsouza/go-dockerclient" "github.com/fsouza/go-dockerclient"
"github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/log" "github.com/opentracing/opentracing-go/log"
@@ -32,19 +36,12 @@ type Auther interface {
} }
type runResult struct { type runResult struct {
error err error
status string status string
} }
func (r *runResult) Error() string { func (r *runResult) Error() error { return r.err }
if r.error == nil { func (r *runResult) Status() string { return r.status }
return ""
}
return r.error.Error()
}
func (r *runResult) Status() string { return r.status }
func (r *runResult) UserVisible() bool { return common.IsUserVisibleError(r.error) }
type DockerDriver struct { type DockerDriver struct {
conf drivers.Config conf drivers.Config
@@ -146,9 +143,7 @@ func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask
"image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": container.Name, "image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": container.Name,
}).WithError(err).Error("Could not create container") }).WithError(err).Error("Could not create container")
if ce := containerConfigError(err); ce != nil { // NOTE: if the container fails to create we don't really want to show to user since they aren't directly configuring the container
return nil, common.UserError(fmt.Errorf("Failed to create container from task configuration '%s'", ce))
}
return nil, err return nil, err
} }
} }
@@ -190,10 +185,12 @@ func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.Container
var config docker.AuthConfiguration // default, tries docker hub w/o user/pass var config docker.AuthConfiguration // default, tries docker hub w/o user/pass
// if any configured host auths match task registry, try them (task docker auth can override) // if any configured host auths match task registry, try them (task docker auth can override)
// TODO this is still a little hairy using suffix, we should probably try to parse it as a
// url and extract the host (from both the config file & image)
for _, v := range drv.auths { for _, v := range drv.auths {
// TODO doubt this works. copied to attempt to keep parity. nobody using so... glhf if reg != "" && strings.HasSuffix(v.ServerAddress, reg) {
if strings.HasSuffix(v.ServerAddress, reg) {
config = v config = v
break
} }
} }
@@ -240,17 +237,37 @@ func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTa
if err != nil { if err != nil {
log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).WithError(err).Error("Failed to pull image") log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).WithError(err).Error("Failed to pull image")
// TODO need to inspect for hub or network errors and pick. // TODO need to inspect for hub or network errors and pick; for now, assume
return common.UserError(fmt.Errorf("Failed to pull image '%s': %s", task.Image(), err)) // 500 if not a docker error
msg := err.Error()
code := http.StatusInternalServerError
if dErr, ok := err.(*docker.Error); ok {
msg = dockerMsg(dErr)
code = dErr.Status // 401/404
}
// TODO what about a case where credentials were good, then credentials return models.NewAPIError(code, fmt.Errorf("Failed to pull image '%s': %s", task.Image(), msg))
// were invalidated -- do we need to keep the credential cache docker
// driver side and after pull for this case alone?
} }
return nil return nil
} }
// removes docker err formatting: 'API Error (code) {"message":"..."}'
func dockerMsg(derr *docker.Error) string {
// derr.Message is a JSON response from docker, which has a "message" field we want to extract if possible.
// this is pretty lame, but it is what it is
var v struct {
Msg string `json:"message"`
}
err := json.Unmarshal([]byte(derr.Message), &v)
if err != nil {
// If message was not valid JSON, the raw body is still better than nothing.
return derr.Message
}
return v.Msg
}
// Run executes the docker container. If task runs, drivers.RunResult will be returned. If something fails outside the task (ie: Docker), it will return error. // Run executes the docker container. If task runs, drivers.RunResult will be returned. If something fails outside the task (ie: Docker), it will return error.
// The docker driver will attempt to cast the task to a Auther. If that succeeds, private image support is available. See the Auther interface for how to implement this. // The docker driver will attempt to cast the task to a Auther. If that succeeds, private image support is available. See the Auther interface for how to implement this.
func (drv *DockerDriver) run(ctx context.Context, container string, task drivers.ContainerTask) (drivers.WaitResult, error) { func (drv *DockerDriver) run(ctx context.Context, container string, task drivers.ContainerTask) (drivers.WaitResult, error) {
@@ -305,7 +322,7 @@ func (w *waitResult) Wait(ctx context.Context) (drivers.RunResult, error) {
status, err := w.drv.wait(ctx, w.container) status, err := w.drv.wait(ctx, w.container)
return &runResult{ return &runResult{
status: status, status: status,
error: err, err: err,
}, nil }, nil
} }
@@ -428,9 +445,9 @@ func (drv *DockerDriver) wait(ctx context.Context, container string) (status str
case <-ctx.Done(): // check if task was canceled or timed out case <-ctx.Done(): // check if task was canceled or timed out
switch ctx.Err() { switch ctx.Err() {
case context.DeadlineExceeded: case context.DeadlineExceeded:
return drivers.StatusTimeout, nil return drivers.StatusTimeout, context.DeadlineExceeded
case context.Canceled: case context.Canceled:
return drivers.StatusCancelled, nil return drivers.StatusCancelled, context.Canceled
} }
default: default:
} }
@@ -438,11 +455,11 @@ func (drv *DockerDriver) wait(ctx context.Context, container string) (status str
switch exitCode { switch exitCode {
default: default:
return drivers.StatusError, common.UserError(fmt.Errorf("container exit code %d", exitCode)) return drivers.StatusError, models.NewAPIError(http.StatusBadGateway, fmt.Errorf("container exit code %d", exitCode))
case 0: case 0:
return drivers.StatusSuccess, nil return drivers.StatusSuccess, nil
case 137: // OOM case 137: // OOM
opentracing.SpanFromContext(ctx).LogFields(log.String("docker", "oom")) opentracing.SpanFromContext(ctx).LogFields(log.String("docker", "oom"))
return drivers.StatusKilled, drivers.ErrOutOfMemory return drivers.StatusKilled, models.NewAPIError(http.StatusBadGateway, errors.New("container out of memory"))
} }
} }

View File

@@ -5,8 +5,6 @@ package docker
import ( import (
"context" "context"
"crypto/tls" "crypto/tls"
"encoding/json"
"fmt"
"net" "net"
"net/http" "net/http"
"strings" "strings"
@@ -130,25 +128,7 @@ func isDocker50x(err error) bool {
return ok && derr.Status >= 500 return ok && derr.Status >= 500
} }
func containerConfigError(err error) error { // implement common.Temporary()
derr, ok := err.(*docker.Error)
if ok && derr.Status == 400 {
// derr.Message is a JSON response from docker, which has a "message" field we want to extract if possible.
var v struct {
Msg string `json:"message"`
}
err := json.Unmarshal([]byte(derr.Message), &v)
if err != nil {
// If message was not valid JSON, the raw body is still better than nothing.
return fmt.Errorf("%s", derr.Message)
}
return fmt.Errorf("%s", v.Msg)
}
return nil
}
type temporary struct { type temporary struct {
error error
} }

View File

@@ -4,7 +4,6 @@ package drivers
import ( import (
"context" "context"
"errors"
"io" "io"
"strings" "strings"
"time" "time"
@@ -53,19 +52,17 @@ type Driver interface {
// RunResult indicates only the final state of the task. // RunResult indicates only the final state of the task.
type RunResult interface { type RunResult interface {
// Error is an actionable/checkable error from the container. // Error is an actionable/checkable error from the container, nil if
error // Status() returns "success", otherwise non-nil
Error() error
// Status should return the current status of the task. // Status should return the current status of the task.
// Only valid options are {"error", "success", "timeout", "killed", "cancelled"}. // Only valid options are {"error", "success", "timeout", "killed", "cancelled"}.
Status() string Status() string
} }
// The ContainerTask interface guides task execution across a wide variety of // The ContainerTask interface guides container execution across a wide variety of
// container oriented runtimes. // container oriented runtimes.
// This interface is unstable.
//
// FIXME: This interface is large, and it is currently a little Docker specific.
type ContainerTask interface { type ContainerTask interface {
// Command returns the command to run within the container. // Command returns the command to run within the container.
Command() string Command() string
@@ -116,21 +113,6 @@ type Stat struct {
Metrics map[string]uint64 Metrics map[string]uint64
} }
// Set of acceptable errors coming from container engines to TaskRunner
var (
// ErrOutOfMemory for OOM in container engine
ErrOutOfMemory = userError(errors.New("out of memory error"))
)
// TODO agent.UserError should be elsewhere
func userError(err error) error { return &ue{err} }
type ue struct {
error
}
func (u *ue) UserVisible() bool { return true }
// TODO: ensure some type is applied to these statuses. // TODO: ensure some type is applied to these statuses.
const ( const (
// task statuses // task statuses

View File

@@ -32,18 +32,18 @@ func (c *cookie) Run(ctx context.Context) (drivers.WaitResult, error) {
return nil, fmt.Errorf("Mocker error! Bad.") return nil, fmt.Errorf("Mocker error! Bad.")
} }
return &runResult{ return &runResult{
error: nil, err: nil,
status: "success", status: "success",
start: time.Now(), start: time.Now(),
}, nil }, nil
} }
type runResult struct { type runResult struct {
error err error
status string status string
start time.Time start time.Time
} }
func (r *runResult) Wait(context.Context) (drivers.RunResult, error) { return r, nil } func (r *runResult) Wait(context.Context) (drivers.RunResult, error) { return r, nil }
func (r *runResult) Status() string { return r.status } func (r *runResult) Status() string { return r.status }
func (r *runResult) StartTime() time.Time { return r.start } func (r *runResult) Error() error { return r.err }

View File

@@ -6,26 +6,6 @@ import (
"syscall" "syscall"
) )
// Errors that can be directly exposed to call creators/users.
type UserVisibleError interface {
UserVisible() bool
}
func IsUserVisibleError(err error) bool {
ue, ok := err.(UserVisibleError)
return ok && ue.UserVisible()
}
type userVisibleError struct {
error
}
func (u *userVisibleError) UserVisible() bool { return true }
func UserError(err error) error {
return &userVisibleError{err}
}
type Temporary interface { type Temporary interface {
Temporary() bool Temporary() bool
} }

View File

@@ -168,6 +168,8 @@ type err struct {
func (e err) Code() int { return e.code } func (e err) Code() int { return e.code }
func NewAPIError(code int, e error) APIError { return err{code, e} }
// uniform error output // uniform error output
type Error struct { type Error struct {
Error *ErrorBody `json:"error,omitempty"` Error *ErrorBody `json:"error,omitempty"`

View File

@@ -3,7 +3,6 @@ package server
import ( import (
"bytes" "bytes"
"context" "context"
"fmt"
"net/http" "net/http"
"testing" "testing"
@@ -33,6 +32,8 @@ func testRouterAsync(ds models.Datastore, mq models.MessageQueue, rnr agent.Agen
} }
func TestRouteRunnerAsyncExecution(t *testing.T) { func TestRouteRunnerAsyncExecution(t *testing.T) {
buf := setLogBuffer()
ds := datastore.NewMockInit( ds := datastore.NewMockInit(
[]*models.App{ []*models.App{
{Name: "myapp", Config: map[string]string{"app": "true"}}, {Name: "myapp", Config: map[string]string{"app": "true"}},
@@ -71,20 +72,21 @@ func TestRouteRunnerAsyncExecution(t *testing.T) {
} { } {
body := bytes.NewBuffer([]byte(test.body)) body := bytes.NewBuffer([]byte(test.body))
fmt.Println("About to start router") t.Log("About to start router")
rnr, cancel := testRunner(t, ds) rnr, cancel := testRunner(t, ds)
router := testRouterAsync(ds, mq, rnr) router := testRouterAsync(ds, mq, rnr)
fmt.Println("makeing requests") t.Log("making requests")
req, rec := newRouterRequest(t, "POST", test.path, body) req, rec := newRouterRequest(t, "POST", test.path, body)
for name, value := range test.headers { for name, value := range test.headers {
req.Header.Set(name, value[0]) req.Header.Set(name, value[0])
} }
fmt.Println("About to start router2") t.Log("About to start router2")
router.ServeHTTP(rec, req) router.ServeHTTP(rec, req)
fmt.Println("after servehttp") t.Log("after servehttp")
if rec.Code != test.expectedCode { if rec.Code != test.expectedCode {
t.Log(buf.String())
t.Errorf("Test %d: Expected status code to be %d but was %d", t.Errorf("Test %d: Expected status code to be %d but was %d",
i, test.expectedCode, rec.Code) i, test.expectedCode, rec.Code)
} }

View File

@@ -131,6 +131,8 @@ func TestRouteRunnerExecution(t *testing.T) {
{Path: "/", AppName: "myapp", Image: "fnproject/hello", Headers: map[string][]string{"X-Function": {"Test"}}}, {Path: "/", AppName: "myapp", Image: "fnproject/hello", Headers: map[string][]string{"X-Function": {"Test"}}},
{Path: "/myroute", AppName: "myapp", Image: "fnproject/hello", Headers: map[string][]string{"X-Function": {"Test"}}}, {Path: "/myroute", AppName: "myapp", Image: "fnproject/hello", Headers: map[string][]string{"X-Function": {"Test"}}},
{Path: "/myerror", AppName: "myapp", Image: "fnproject/error", Headers: map[string][]string{"X-Function": {"Test"}}}, {Path: "/myerror", AppName: "myapp", Image: "fnproject/error", Headers: map[string][]string{"X-Function": {"Test"}}},
{Path: "/mydne", AppName: "myapp", Image: "fnproject/imagethatdoesnotexist"},
{Path: "/mydnehot", AppName: "myapp", Image: "fnproject/imagethatdoesnotexist", Format: "http"},
}, nil, }, nil,
) )
@@ -149,12 +151,14 @@ func TestRouteRunnerExecution(t *testing.T) {
}{ }{
{"/r/myapp/", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}}, {"/r/myapp/", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}},
{"/r/myapp/myroute", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}}, {"/r/myapp/myroute", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}},
{"/r/myapp/myerror", ``, "GET", http.StatusInternalServerError, map[string][]string{"X-Function": {"Test"}}}, {"/r/myapp/myerror", ``, "GET", http.StatusBadGateway, map[string][]string{"X-Function": {"Test"}}},
{"/r/myapp/mydne", ``, "GET", http.StatusNotFound, nil},
{"/r/myapp/mydnehot", ``, "GET", http.StatusNotFound, nil},
// Added same tests again to check if time is reduced by the auth cache // Added same tests again to check if time is reduced by the auth cache
{"/r/myapp/", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}}, {"/r/myapp/", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}},
{"/r/myapp/myroute", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}}, {"/r/myapp/myroute", ``, "GET", http.StatusOK, map[string][]string{"X-Function": {"Test"}}},
{"/r/myapp/myerror", ``, "GET", http.StatusInternalServerError, map[string][]string{"X-Function": {"Test"}}}, {"/r/myapp/myerror", ``, "GET", http.StatusBadGateway, map[string][]string{"X-Function": {"Test"}}},
} { } {
body := strings.NewReader(test.body) body := strings.NewReader(test.body)
_, rec := routerRequest(t, srv.Router, test.method, test.path, body) _, rec := routerRequest(t, srv.Router, test.method, test.path, body)

View File

@@ -118,7 +118,7 @@ func TestFullStack(t *testing.T) {
{"get myroute2", "GET", "/v1/apps/myapp/routes/myroute2", ``, http.StatusOK, 0}, {"get myroute2", "GET", "/v1/apps/myapp/routes/myroute2", ``, http.StatusOK, 0},
{"get all routes", "GET", "/v1/apps/myapp/routes", ``, http.StatusOK, 0}, {"get all routes", "GET", "/v1/apps/myapp/routes", ``, http.StatusOK, 0},
{"execute myroute", "POST", "/r/myapp/myroute", `{ "name": "Teste" }`, http.StatusOK, 1}, {"execute myroute", "POST", "/r/myapp/myroute", `{ "name": "Teste" }`, http.StatusOK, 1},
{"execute myroute2", "POST", "/r/myapp/myroute2", `{ "name": "Teste" }`, http.StatusInternalServerError, 2}, {"execute myroute2", "POST", "/r/myapp/myroute2", `{ "name": "Teste" }`, http.StatusBadGateway, 2},
{"get myroute2", "GET", "/v1/apps/myapp/routes/myroute2", ``, http.StatusOK, 2}, {"get myroute2", "GET", "/v1/apps/myapp/routes/myroute2", ``, http.StatusOK, 2},
{"delete myroute", "DELETE", "/v1/apps/myapp/routes/myroute", ``, http.StatusOK, 1}, {"delete myroute", "DELETE", "/v1/apps/myapp/routes/myroute", ``, http.StatusOK, 1},
{"delete myroute2", "DELETE", "/v1/apps/myapp/routes/myroute2", ``, http.StatusOK, 0}, {"delete myroute2", "DELETE", "/v1/apps/myapp/routes/myroute2", ``, http.StatusOK, 0},