mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
gosec severity=medium passes, all severity=low errors are from unhandled errors, we have 107 of them. tbh it doesn't look worth it to me, but maybe there are a few assholes even itchier than mine out there. medium has some good stuff in it, and of course high makes sense if we're gonna do this at all. this adds some nosec annotations for some things like sql sprintfs where we know it's clean (we're constructing the strings with variables in them). fixed up other spots where we were sprinting without need. some stuff like filepath.Clean when opening a file from a variable, and file permissions, easy stuff... I can't get the CI build to shut up, but I can locally get it to be pretty quiet about imports and it just outputs the gosec output. fortunately, it still works as expected even when it's noisy. I got it to shut up by unsetting some of the go mod flags locally, but that doesn't seem to quite do it in circle, printed the env out and don't see them, so idk... i give up, this works closes #1303
493 lines
15 KiB
Go
493 lines
15 KiB
Go
// +build go1.7
|
|
|
|
package docker
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/fnproject/fn/api/common"
|
|
"github.com/fsouza/go-dockerclient"
|
|
"github.com/sirupsen/logrus"
|
|
"go.opencensus.io/stats"
|
|
"go.opencensus.io/stats/view"
|
|
"go.opencensus.io/tag"
|
|
"go.opencensus.io/trace"
|
|
)
|
|
|
|
const (
|
|
eventRetryDelay = 1 * time.Second
|
|
)
|
|
|
|
// wrap docker client calls so we can retry 500s, kind of sucks but fsouza doesn't
|
|
// bake in retries we can use internally, could contribute it at some point, would
|
|
// be much more convenient if we didn't have to do this, but it's better than ad hoc retries.
|
|
// also adds timeouts to many operations, varying by operation
|
|
// TODO could generate this, maybe not worth it, may not change often
|
|
type dockerClient interface {
|
|
// Each of these are github.com/fsouza/go-dockerclient methods
|
|
|
|
AttachToContainerNonBlocking(ctx context.Context, opts docker.AttachToContainerOptions) (docker.CloseWaiter, error)
|
|
WaitContainerWithContext(id string, ctx context.Context) (int, error)
|
|
StartContainerWithContext(id string, hostConfig *docker.HostConfig, ctx context.Context) error
|
|
KillContainer(opts docker.KillContainerOptions) error
|
|
CreateContainer(opts docker.CreateContainerOptions) (*docker.Container, error)
|
|
RemoveContainer(opts docker.RemoveContainerOptions) error
|
|
PauseContainer(id string, ctx context.Context) error
|
|
UnpauseContainer(id string, ctx context.Context) error
|
|
PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) error
|
|
InspectImage(ctx context.Context, name string) (*docker.Image, error)
|
|
ListImages(opts docker.ListImagesOptions) ([]docker.APIImages, error)
|
|
RemoveImage(id string, opts docker.RemoveImageOptions) error
|
|
Stats(opts docker.StatsOptions) error
|
|
Info(ctx context.Context) (*docker.DockerInfo, error)
|
|
DiskUsage(opts docker.DiskUsageOptions) (*docker.DiskUsage, error)
|
|
LoadImages(ctx context.Context, filePath string) error
|
|
}
|
|
|
|
// TODO: switch to github.com/docker/engine-api
|
|
func newClient(ctx context.Context, maxRetries uint64) dockerClient {
|
|
// TODO this was much easier, don't need special settings at the moment
|
|
// docker, err := docker.NewClient(conf.Docker)
|
|
client, err := docker.NewClientFromEnv()
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("couldn't create docker client")
|
|
}
|
|
|
|
if err := client.Ping(); err != nil {
|
|
logrus.WithError(err).Fatal("couldn't connect to docker daemon")
|
|
}
|
|
|
|
// punch in default if not set
|
|
if maxRetries == 0 {
|
|
maxRetries = 10
|
|
}
|
|
|
|
go listenEventLoop(ctx, client)
|
|
return &dockerWrap{docker: client, maxRetries: maxRetries}
|
|
}
|
|
|
|
type dockerWrap struct {
|
|
docker *docker.Client
|
|
maxRetries uint64
|
|
}
|
|
|
|
var (
|
|
apiNameKey = common.MakeKey("api_name")
|
|
exitStatusKey = common.MakeKey("exit_status")
|
|
eventActionKey = common.MakeKey("event_action")
|
|
eventTypeKey = common.MakeKey("event_type")
|
|
|
|
dockerRetriesMeasure = common.MakeMeasure("docker_api_retries", "docker api retries", "")
|
|
dockerTimeoutMeasure = common.MakeMeasure("docker_api_timeout", "docker api timeouts", "")
|
|
dockerErrorMeasure = common.MakeMeasure("docker_api_error", "docker api errors", "")
|
|
dockerExitMeasure = common.MakeMeasure("docker_exits", "docker exit counts", "")
|
|
|
|
// WARNING: this metric reports total latency per *wrapper* call, which will add up multiple retry latencies per wrapper call.
|
|
dockerLatencyMeasure = common.MakeMeasure("docker_api_latency", "Docker wrapper latency", "msecs")
|
|
|
|
dockerEventsMeasure = common.MakeMeasure("docker_events", "docker events", "")
|
|
)
|
|
|
|
// listenEventLoop listens for docker events and reconnects if necessary
|
|
func listenEventLoop(ctx context.Context, client *docker.Client) {
|
|
for ctx.Err() == nil {
|
|
err := listenEvents(ctx, client)
|
|
if err != nil {
|
|
logrus.WithError(err).Error("listenEvents failed, will retry...")
|
|
// slow down reconnects. yes we will miss events during this time.
|
|
select {
|
|
case <-time.After(eventRetryDelay):
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// listenEvents registers an event listener to docker to stream docker events
|
|
// and records these in stats.
|
|
func listenEvents(ctx context.Context, client *docker.Client) error {
|
|
listener := make(chan *docker.APIEvents)
|
|
|
|
err := client.AddEventListener(listener)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer client.RemoveEventListener(listener)
|
|
|
|
for {
|
|
select {
|
|
case ev := <-listener:
|
|
if ev == nil {
|
|
return errors.New("event listener closed")
|
|
}
|
|
|
|
ctx, err := tag.New(context.Background(),
|
|
tag.Upsert(eventActionKey, ev.Action),
|
|
tag.Upsert(eventTypeKey, ev.Type),
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatalf("cannot add event tags %v=%v %v=%v",
|
|
eventActionKey, ev.Action,
|
|
eventTypeKey, ev.Type,
|
|
)
|
|
}
|
|
|
|
stats.Record(ctx, dockerEventsMeasure.M(0))
|
|
case <-ctx.Done():
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create a span/tracker with required context tags
|
|
func makeTracker(ctx context.Context, name string) (context.Context, func()) {
|
|
ctx, err := tag.New(ctx, tag.Upsert(apiNameKey, name))
|
|
if err != nil {
|
|
logrus.WithError(err).Fatalf("cannot add tag %v=%v", apiNameKey, name)
|
|
}
|
|
|
|
// It would have been nice to pull the latency (end-start) elapsed time
|
|
// from Spans but this is hidden from us, so we have to call time.Now()
|
|
// twice ourselves.
|
|
ctx, span := trace.StartSpan(ctx, name)
|
|
start := time.Now()
|
|
|
|
return ctx, func() {
|
|
stats.Record(ctx, dockerLatencyMeasure.M(int64(time.Now().Sub(start)/time.Millisecond)))
|
|
span.End()
|
|
}
|
|
}
|
|
|
|
func RecordWaitContainerResult(ctx context.Context, exitCode int) {
|
|
|
|
// Tag the metric with error-code or context-cancel/deadline info
|
|
exitStr := fmt.Sprintf("exit_%d", exitCode)
|
|
if exitCode == 0 && ctx.Err() != nil {
|
|
switch ctx.Err() {
|
|
case context.DeadlineExceeded:
|
|
exitStr = "ctx_deadline"
|
|
case context.Canceled:
|
|
exitStr = "ctx_canceled"
|
|
}
|
|
}
|
|
|
|
newCtx, err := tag.New(ctx,
|
|
tag.Upsert(apiNameKey, "docker_wait_container"),
|
|
tag.Upsert(exitStatusKey, exitStr),
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatalf("cannot add tag %v=%v or tag %v=docker_wait_container", exitStatusKey, exitStr, apiNameKey)
|
|
}
|
|
stats.Record(newCtx, dockerExitMeasure.M(0))
|
|
}
|
|
|
|
// RegisterViews creates and registers views with provided tag keys
|
|
func RegisterViews(tagKeys []string, latencyDist []float64) {
|
|
|
|
defaultTags := []tag.Key{apiNameKey}
|
|
exitTags := []tag.Key{apiNameKey, exitStatusKey}
|
|
eventTags := []tag.Key{eventActionKey, eventTypeKey}
|
|
|
|
// add extra tags if not already in default tags for req/resp
|
|
for _, key := range tagKeys {
|
|
if key != "api_name" {
|
|
defaultTags = append(defaultTags, common.MakeKey(key))
|
|
}
|
|
if key != "api_name" && key != "exit_status" {
|
|
exitTags = append(exitTags, common.MakeKey(key))
|
|
}
|
|
}
|
|
|
|
err := view.Register(
|
|
common.CreateViewWithTags(dockerRetriesMeasure, view.Sum(), defaultTags),
|
|
common.CreateViewWithTags(dockerTimeoutMeasure, view.Count(), defaultTags),
|
|
common.CreateViewWithTags(dockerErrorMeasure, view.Count(), defaultTags),
|
|
common.CreateViewWithTags(dockerExitMeasure, view.Count(), exitTags),
|
|
common.CreateViewWithTags(dockerLatencyMeasure, view.Distribution(latencyDist...), defaultTags),
|
|
common.CreateViewWithTags(dockerEventsMeasure, view.Count(), eventTags),
|
|
)
|
|
if err != nil {
|
|
logrus.WithError(err).Fatal("cannot register view")
|
|
}
|
|
}
|
|
|
|
func (d *dockerWrap) retry(ctx context.Context, logger logrus.FieldLogger, f func() error) error {
|
|
var i uint64
|
|
var err error
|
|
defer func() { stats.Record(ctx, dockerRetriesMeasure.M(int64(i))) }()
|
|
|
|
var b common.Backoff
|
|
// 10 retries w/o change to backoff is ~13s if ops take ~0 time
|
|
for ; i < d.maxRetries; i++ {
|
|
select {
|
|
case <-ctx.Done():
|
|
stats.Record(ctx, dockerTimeoutMeasure.M(0))
|
|
logger.WithError(ctx.Err()).Warnf("docker call timed out")
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
|
|
err = filter(ctx, f())
|
|
if common.IsTemporary(err) || isDocker50x(err) {
|
|
logger.WithError(err).Warn("docker temporary error, retrying")
|
|
b.Sleep(ctx)
|
|
continue
|
|
}
|
|
if err != nil {
|
|
stats.Record(ctx, dockerErrorMeasure.M(0))
|
|
}
|
|
return err
|
|
}
|
|
return err // TODO could return context.DeadlineExceeded which ~makes sense
|
|
}
|
|
|
|
func isDocker50x(err error) bool {
|
|
derr, ok := err.(*docker.Error)
|
|
return ok && derr.Status >= 500
|
|
}
|
|
|
|
// implement common.Temporary()
|
|
type temporary struct {
|
|
error
|
|
}
|
|
|
|
func (t *temporary) Temporary() bool { return true }
|
|
|
|
func temp(err error) error {
|
|
return &temporary{err}
|
|
}
|
|
|
|
// some 500s are totally cool
|
|
func filter(ctx context.Context, err error) error {
|
|
log := common.Logger(ctx)
|
|
// "API error (500): {\"message\":\"service endpoint with name task-57d722ecdecb9e7be16aff17 already exists\"}\n" -> ok since container exists
|
|
switch {
|
|
default:
|
|
return err
|
|
case err == nil:
|
|
return err
|
|
case strings.Contains(err.Error(), "service endpoint with name"):
|
|
}
|
|
log.WithError(err).Warn("filtering error")
|
|
return nil
|
|
}
|
|
|
|
func filterNoSuchContainer(ctx context.Context, err error) error {
|
|
log := common.Logger(ctx)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
_, containerNotFound := err.(*docker.NoSuchContainer)
|
|
dockerErr, ok := err.(*docker.Error)
|
|
if containerNotFound || (ok && dockerErr.Status == 404) {
|
|
log.WithError(err).Info("filtering error")
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (d *dockerWrap) LoadImages(ctx context.Context, filePath string) error {
|
|
ctx, closer := makeTracker(ctx, "docker_load_images")
|
|
defer closer()
|
|
|
|
file, err := os.Open(filepath.Clean(filePath))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
// No retries here. LoadImage is typically called at startup and we fail/timeout
|
|
// at first attempt.
|
|
return d.docker.LoadImage(docker.LoadImageOptions{
|
|
InputStream: file,
|
|
Context: ctx,
|
|
})
|
|
}
|
|
|
|
func (d *dockerWrap) ListImages(opts docker.ListImagesOptions) (imgs []docker.APIImages, err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_list_images")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "ListImages")
|
|
err = d.retry(ctx, logger, func() error {
|
|
imgs, err = d.docker.ListImages(opts)
|
|
return err
|
|
})
|
|
|
|
return imgs, err
|
|
}
|
|
|
|
func (d *dockerWrap) Info(ctx context.Context) (info *docker.DockerInfo, err error) {
|
|
// NOTE: we're not very responsible and prometheus wasn't loved as a child, this
|
|
// threads through directly down to the docker call, skipping retires, so that we
|
|
// don't have to add tags / tracing / logger to the bare context handed to the one
|
|
// place this is called in initialization that has no context to report consistent
|
|
// stats like everything else in here. tl;dr this works, just don't use it for anything else.
|
|
return d.docker.Info()
|
|
}
|
|
|
|
func (d *dockerWrap) AttachToContainerNonBlocking(ctx context.Context, opts docker.AttachToContainerOptions) (docker.CloseWaiter, error) {
|
|
ctx, closer := makeTracker(ctx, "docker_attach_container")
|
|
defer closer()
|
|
|
|
return d.docker.AttachToContainerNonBlocking(opts)
|
|
}
|
|
|
|
func (d *dockerWrap) WaitContainerWithContext(id string, ctx context.Context) (code int, err error) {
|
|
ctx, closer := makeTracker(ctx, "docker_wait_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "WaitContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
code, err = d.docker.WaitContainerWithContext(id, ctx)
|
|
return err
|
|
})
|
|
return code, filterNoSuchContainer(ctx, err)
|
|
}
|
|
|
|
func (d *dockerWrap) StartContainerWithContext(id string, hostConfig *docker.HostConfig, ctx context.Context) (err error) {
|
|
ctx, closer := makeTracker(ctx, "docker_start_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "StartContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.docker.StartContainerWithContext(id, hostConfig, ctx)
|
|
if _, ok := err.(*docker.NoSuchContainer); ok {
|
|
// for some reason create will sometimes return successfully then say no such container here. wtf. so just retry like normal
|
|
return temp(err)
|
|
}
|
|
return err
|
|
})
|
|
return err
|
|
}
|
|
|
|
func (d *dockerWrap) CreateContainer(opts docker.CreateContainerOptions) (c *docker.Container, err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_create_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "CreateContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
c, err = d.docker.CreateContainer(opts)
|
|
return err
|
|
})
|
|
return c, err
|
|
}
|
|
|
|
func (d *dockerWrap) KillContainer(opts docker.KillContainerOptions) (err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_kill_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "KillContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.docker.KillContainer(opts)
|
|
return err
|
|
})
|
|
return err
|
|
}
|
|
|
|
func (d *dockerWrap) PullImage(opts docker.PullImageOptions, auth docker.AuthConfiguration) (err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_pull_image")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "PullImage")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.docker.PullImage(opts, auth)
|
|
return err
|
|
})
|
|
return err
|
|
}
|
|
|
|
func (d *dockerWrap) RemoveImage(image string, opts docker.RemoveImageOptions) (err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_remove_image")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "RemoveImage")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.RemoveImage(image, opts)
|
|
return err
|
|
})
|
|
return err
|
|
|
|
}
|
|
|
|
func (d *dockerWrap) RemoveContainer(opts docker.RemoveContainerOptions) (err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_remove_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "RemoveContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.docker.RemoveContainer(opts)
|
|
return err
|
|
})
|
|
return filterNoSuchContainer(ctx, err)
|
|
}
|
|
|
|
func (d *dockerWrap) PauseContainer(id string, ctx context.Context) (err error) {
|
|
ctx, closer := makeTracker(ctx, "docker_pause_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "PauseContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.docker.PauseContainer(id)
|
|
return err
|
|
})
|
|
return filterNoSuchContainer(ctx, err)
|
|
}
|
|
|
|
func (d *dockerWrap) UnpauseContainer(id string, ctx context.Context) (err error) {
|
|
ctx, closer := makeTracker(ctx, "docker_unpause_container")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "UnpauseContainer")
|
|
err = d.retry(ctx, logger, func() error {
|
|
err = d.docker.UnpauseContainer(id)
|
|
return err
|
|
})
|
|
return filterNoSuchContainer(ctx, err)
|
|
}
|
|
|
|
func (d *dockerWrap) InspectImage(ctx context.Context, name string) (i *docker.Image, err error) {
|
|
ctx, closer := makeTracker(ctx, "docker_inspect_image")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "InspectImage")
|
|
err = d.retry(ctx, logger, func() error {
|
|
i, err = d.docker.InspectImage(name)
|
|
return err
|
|
})
|
|
return i, err
|
|
}
|
|
|
|
func (d *dockerWrap) Stats(opts docker.StatsOptions) (err error) {
|
|
// we can't retry this one this way since the callee closes the
|
|
// stats chan, need a fancier retry mechanism where we can swap out
|
|
// channels, but stats isn't crucial so... be lazy for now
|
|
return d.docker.Stats(opts)
|
|
|
|
//err = d.retry(func() error {
|
|
//err = d.docker.Stats(opts)
|
|
//return err
|
|
//})
|
|
//return err
|
|
}
|
|
|
|
func (d *dockerWrap) DiskUsage(opts docker.DiskUsageOptions) (du *docker.DiskUsage, err error) {
|
|
ctx, closer := makeTracker(opts.Context, "docker_disk_usage")
|
|
defer closer()
|
|
|
|
logger := common.Logger(ctx).WithField("docker_cmd", "DiskUsage")
|
|
err = d.retry(ctx, logger, func() error {
|
|
du, err = d.docker.DiskUsage(opts)
|
|
return err
|
|
})
|
|
return du, err
|
|
}
|