Files
fn-serverless/api/runner/drivers/docker/docker.go

639 lines
22 KiB
Go

// Copyright 2016 Iron.io
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package docker
import (
"context"
"crypto/tls"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"path"
"strings"
"time"
"github.com/Sirupsen/logrus"
manifest "github.com/docker/distribution/manifest/schema1"
"github.com/fsouza/go-dockerclient"
"github.com/heroku/docker-registry-client/registry"
"gitlab.oracledx.com/odx/functions/api/runner/common"
"gitlab.oracledx.com/odx/functions/api/runner/common/stats"
"gitlab.oracledx.com/odx/functions/api/runner/drivers"
)
const hubURL = "https://registry.hub.docker.com"
var registryClient = &http.Client{
Transport: &http.Transport{
Dial: (&net.Dialer{
Timeout: 10 * time.Second,
KeepAlive: 2 * time.Minute,
}).Dial,
TLSClientConfig: &tls.Config{
ClientSessionCache: tls.NewLRUClientSessionCache(8192),
},
TLSHandshakeTimeout: 10 * time.Second,
MaxIdleConnsPerHost: 32, // TODO tune; we will likely be making lots of requests to same place
Proxy: http.ProxyFromEnvironment,
IdleConnTimeout: 90 * time.Second,
MaxIdleConns: 512,
ExpectContinueTimeout: 1 * time.Second,
},
}
// A drivers.ContainerTask should implement the Auther interface if it would
// like to use not-necessarily-public docker images for any or all task
// invocations.
type Auther interface {
// DockerAuth should return docker auth credentials that will authenticate
// against a docker registry for a given drivers.ContainerTask.Image(). An
// error may be returned which will cause the task not to be run, this can be
// useful for an implementer to do things like testing auth configurations
// before returning them; e.g. if the implementer would like to impose
// certain restrictions on images or if credentials must be acquired right
// before runtime and there's an error doing so. If these credentials don't
// work, the docker pull will fail and the task will be set to error status.
DockerAuth() (docker.AuthConfiguration, error)
}
type runResult struct {
error
StatusValue string
}
func (r *runResult) Error() string {
if r.error == nil {
return ""
}
return r.error.Error()
}
func (r *runResult) Status() string { return r.StatusValue }
func (r *runResult) UserVisible() bool { return common.IsUserVisibleError(r.error) }
type DockerDriver struct {
conf drivers.Config
docker dockerClient // retries on *docker.Client, restricts ad hoc *docker.Client usage / retries
hostname string
*common.Environment
}
// implements drivers.Driver
func NewDocker(env *common.Environment, conf drivers.Config) *DockerDriver {
hostname, err := os.Hostname()
if err != nil {
logrus.WithError(err).Fatal("couldn't resolve hostname")
}
return &DockerDriver{
conf: conf,
docker: newClient(env),
hostname: hostname,
Environment: env,
}
}
// CheckRegistry will return a sizer, which can be used to check the size of an
// image if the returned error is nil. If the error returned is nil, then
// authentication against the given credentials was successful, if the
// configuration does not specify a config.ServerAddress,
// https://hub.docker.com will be tried. CheckRegistry is a package level
// method since rkt can also use docker images, we may be interested in using
// rkt w/o a docker driver configured; also, we don't have to tote around a
// driver in any tasker that may be interested in registry information (2/2
// cases thus far).
func CheckRegistry(image string, config docker.AuthConfiguration) (Sizer, error) {
registry, repo, tag := drivers.ParseImage(image)
reg, err := registryForConfig(config, registry)
if err != nil {
return nil, err
}
mani, err := reg.Manifest(repo, tag)
if err != nil {
logrus.WithFields(logrus.Fields{"username": config.Username, "server": config.ServerAddress, "image": image}).WithError(err).Error("Credentials not authorized, trying next.")
//if !isAuthError(err) {
// // TODO we might retry this, since if this was the registry that was supposed to
// // auth the task will erroneously be set to 'error'
//}
return nil, err
}
return &sizer{mani, reg, repo}, nil
}
// Sizer returns size information. This interface is liable to contain more
// than a size at some point, change as needed.
type Sizer interface {
Size() (int64, error)
}
type sizer struct {
mani *manifest.SignedManifest
reg *registry.Registry
repo string
}
func (s *sizer) Size() (int64, error) {
var sum int64
for _, r := range s.mani.References() {
desc, err := s.reg.LayerMetadata(s.repo, r.Digest)
if err != nil {
return 0, err
}
sum += desc.Size
}
return sum, nil
}
func registryURL(addr string) (string, error) {
if addr == "" || strings.Contains(addr, "hub.docker.com") || strings.Contains(addr, "index.docker.io") {
return hubURL, nil
}
url, err := url.Parse(addr)
if err != nil {
// TODO we could error the task out from this with a user error but since
// we have a list of auths to check, just return the error so as to be
// skipped... horrible api as it is
logrus.WithFields(logrus.Fields{"auth_addr": addr}).WithError(err).Error("error parsing server address url, skipping")
return "", err
}
if url.Scheme == "" {
url.Scheme = "https"
}
url.Path = strings.TrimSuffix(url.Path, "/")
url.Path = strings.TrimPrefix(url.Path, "/v2")
url.Path = strings.TrimPrefix(url.Path, "/v1") // just try this, if it fails it fails, not supporting v1
return url.String(), nil
}
func isAuthError(err error) bool {
// AARGH!
if urlError, ok := err.(*url.Error); ok {
if httpError, ok := urlError.Err.(*registry.HttpStatusError); ok {
if httpError.Response.StatusCode == 401 {
return true
}
}
}
return false
}
func registryForConfig(config docker.AuthConfiguration, reg string) (*registry.Registry, error) {
if reg == "" {
reg = config.ServerAddress
}
var err error
config.ServerAddress, err = registryURL(reg)
if err != nil {
return nil, err
}
// Use this instead of registry.New to avoid the Ping().
transport := registry.WrapTransport(registryClient.Transport, reg, config.Username, config.Password)
r := &registry.Registry{
URL: config.ServerAddress,
Client: &http.Client{
Transport: transport,
},
Logf: registry.Quiet,
}
return r, nil
}
func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask) (drivers.Cookie, error) {
var cmd []string
if task.Command() != "" {
// NOTE: this is hyper-sensitive and may not be correct like this even, but it passes old tests
// task.Command() in swapi is always "sh /mnt/task/.runtask" so fields is safe
cmd = strings.Fields(task.Command())
logrus.WithFields(logrus.Fields{"task_id": task.Id(), "cmd": cmd, "len": len(cmd)}).Debug("docker command")
}
envvars := make([]string, 0, len(task.EnvVars()))
for name, val := range task.EnvVars() {
envvars = append(envvars, name+"="+val)
}
containerName := newContainerID(task)
container := docker.CreateContainerOptions{
Name: containerName,
Config: &docker.Config{
Env: envvars,
Cmd: cmd,
Memory: int64(drv.conf.Memory),
CPUShares: drv.conf.CPUShares,
Hostname: drv.hostname,
Image: task.Image(),
Volumes: map[string]struct{}{},
Labels: task.Labels(),
OpenStdin: true,
AttachStdin: true,
StdinOnce: true,
},
HostConfig: &docker.HostConfig{},
Context: ctx,
}
volumes := task.Volumes()
for _, mapping := range volumes {
hostDir := mapping[0]
containerDir := mapping[1]
container.Config.Volumes[containerDir] = struct{}{}
mapn := fmt.Sprintf("%s:%s", hostDir, containerDir)
container.HostConfig.Binds = append(container.HostConfig.Binds, mapn)
logrus.WithFields(logrus.Fields{"volumes": mapn, "task_id": task.Id()}).Debug("setting volumes")
}
if wd := task.WorkDir(); wd != "" {
logrus.WithFields(logrus.Fields{"wd": wd, "task_id": task.Id()}).Debug("setting work dir")
container.Config.WorkingDir = wd
}
err := drv.ensureImage(ctx, task)
if err != nil {
return nil, err
}
createTimer := drv.NewTimer("docker", "create_container", 1.0)
_, err = drv.docker.CreateContainer(container)
createTimer.Measure()
if err != nil {
// since we retry under the hood, if the container gets created and retry fails, we can just ignore error
if err != docker.ErrContainerAlreadyExists {
logrus.WithFields(logrus.Fields{"task_id": task.Id(), "command": container.Config.Cmd, "memory": container.Config.Memory,
"cpu_shares": container.Config.CPUShares, "hostname": container.Config.Hostname, "name": container.Name,
"image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": containerName,
}).WithError(err).Error("Could not create container")
if ce := containerConfigError(err); ce != nil {
return nil, common.UserError(fmt.Errorf("Failed to create container from task configuration '%s'", ce))
}
return nil, err
}
}
// discard removal error
return &cookie{id: containerName, task: task, drv: drv}, nil
}
type cookie struct {
id string
task drivers.ContainerTask
drv *DockerDriver
}
func (c *cookie) Close() error { return c.drv.removeContainer(c.id) }
func (c *cookie) Run(ctx context.Context) (drivers.RunResult, error) {
return c.drv.run(ctx, c.id, c.task)
}
func (drv *DockerDriver) removeContainer(container string) error {
removeTimer := drv.NewTimer("docker", "remove_container", 1.0)
defer removeTimer.Measure()
err := drv.docker.RemoveContainer(docker.RemoveContainerOptions{
ID: container, Force: true, RemoveVolumes: true})
if err != nil {
logrus.WithError(err).WithFields(logrus.Fields{"container": container}).Error("error removing container")
}
return nil
}
func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.ContainerTask) error {
reg, _, _ := drivers.ParseImage(task.Image())
// ask for docker creds before looking for image, as the tasker may need to
// validate creds even if the image is downloaded.
var config docker.AuthConfiguration // default, tries docker hub w/o user/pass
if task, ok := task.(Auther); ok {
var err error
config, err = task.DockerAuth()
if err != nil {
return err
}
}
if reg != "" {
config.ServerAddress = reg
}
// see if we already have it, if not, pull it
_, err := drv.docker.InspectImage(task.Image())
if err == docker.ErrNoSuchImage {
err = drv.pullImage(ctx, task, config)
}
return err
}
func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTask, config docker.AuthConfiguration) error {
log := common.Logger(ctx)
reg, repo, tag := drivers.ParseImage(task.Image())
globalRepo := path.Join(reg, repo)
pullTimer := drv.NewTimer("docker", "pull_image", 1.0)
defer pullTimer.Measure()
drv.Inc("docker", "pull_image_count."+stats.AsStatField(task.Image()), 1, 1)
if reg != "" {
config.ServerAddress = reg
}
var err error
config.ServerAddress, err = registryURL(config.ServerAddress)
if err != nil {
return err
}
log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).Info("Pulling image")
err = drv.docker.PullImage(docker.PullImageOptions{Repository: globalRepo, Tag: tag, Context: ctx}, config)
if err != nil {
drv.Inc("task", "error.pull."+stats.AsStatField(task.Image()), 1, 1)
log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).WithError(err).Error("Failed to pull image")
// TODO need to inspect for hub or network errors and pick.
return common.UserError(fmt.Errorf("Failed to pull image '%s': %s", task.Image(), err))
// TODO what about a case where credentials were good, then credentials
// were invalidated -- do we need to keep the credential cache docker
// driver side and after pull for this case alone?
}
return nil
}
// Run executes the docker container. If task runs, drivers.RunResult will be returned. If something fails outside the task (ie: Docker), it will return error.
// The docker driver will attempt to cast the task to a Auther. If that succeeds, private image support is available. See the Auther interface for how to implement this.
func (drv *DockerDriver) run(ctx context.Context, container string, task drivers.ContainerTask) (drivers.RunResult, error) {
log := common.Logger(ctx)
timeout := task.Timeout()
var cancel context.CancelFunc
if timeout <= 0 {
ctx, cancel = context.WithCancel(ctx)
} else {
ctx, cancel = context.WithTimeout(ctx, timeout)
}
defer cancel() // do this so that after Run exits, nanny and collect stop
var complete bool
defer func() { complete = true }() // run before cancel is called
ctx = context.WithValue(ctx, completeKey, &complete)
go drv.nanny(ctx, container)
go drv.collectStats(ctx, container, task)
mwOut, mwErr := task.Logger()
timer := drv.NewTimer("docker", "attach_container", 1)
waiter, err := drv.docker.AttachToContainerNonBlocking(docker.AttachToContainerOptions{
Container: container, OutputStream: mwOut, ErrorStream: mwErr,
Stream: true, Logs: true, Stdout: true, Stderr: true,
Stdin: true, InputStream: task.Input()})
timer.Measure()
if err != nil {
return nil, err
}
err = drv.startTask(ctx, container)
if err != nil {
return nil, err
}
taskTimer := drv.NewTimer("docker", "container_runtime", 1)
// can discard error, inspect will tell us about the task and wait will retry under the hood
drv.docker.WaitContainerWithContext(container, ctx)
taskTimer.Measure()
waiter.Close()
err = waiter.Wait()
if err != nil {
// TODO need to make sure this error isn't just a context error or something we can ignore
log.WithError(err).Error("attach to container returned error, task may be missing logs")
}
status, err := drv.status(ctx, container)
return &runResult{
StatusValue: status,
error: err,
}, nil
}
const completeKey = "complete"
// watch for cancel or timeout and kill process.
func (drv *DockerDriver) nanny(ctx context.Context, container string) {
select {
case <-ctx.Done():
if *(ctx.Value(completeKey).(*bool)) {
return
}
drv.cancel(container)
}
}
func (drv *DockerDriver) cancel(container string) {
stopTimer := drv.NewTimer("docker", "stop_container", 1.0)
err := drv.docker.StopContainer(container, 30)
stopTimer.Measure()
if err != nil {
logrus.WithError(err).WithFields(logrus.Fields{"container": container, "errType": fmt.Sprintf("%T", err)}).Error("something managed to escape our retries web, could not kill container")
}
}
func (drv *DockerDriver) collectStats(ctx context.Context, container string, task drivers.ContainerTask) {
done := make(chan bool)
defer close(done)
dstats := make(chan *docker.Stats, 1)
go func() {
// NOTE: docker automatically streams every 1s. we can skip or avg samples if we'd like but
// the memory overhead is < 1MB for 3600 stat points so this seems fine, seems better to stream
// (internal docker api streams) than open/close stream for 1 sample over and over.
// must be called in goroutine, docker.Stats() blocks
err := drv.docker.Stats(docker.StatsOptions{
ID: container,
Stats: dstats,
Stream: true,
Done: done, // A flag that enables stopping the stats operation
})
if err != nil && err != io.ErrClosedPipe {
logrus.WithError(err).WithFields(logrus.Fields{"container": container, "task_id": task.Id()}).Error("error streaming docker stats for task")
}
}()
for {
select {
case <-ctx.Done():
return
case ds, ok := <-dstats:
if !ok {
return
}
task.WriteStat(cherryPick(ds))
}
}
}
func cherryPick(ds *docker.Stats) drivers.Stat {
// TODO cpu % is as a % of the whole system... cpu is weird since we're sharing it
// across a bunch of containers and it scales based on how many we're sharing with,
// do we want users to see as a % of system?
systemDelta := float64(ds.CPUStats.SystemCPUUsage - ds.PreCPUStats.SystemCPUUsage)
cores := float64(len(ds.CPUStats.CPUUsage.PercpuUsage))
var cpuUser, cpuKernel, cpuTotal float64
if systemDelta > 0 {
// TODO we could leave these in docker format and let hud/viz tools do this instead of us... like net is, could do same for mem, too. thoughts?
cpuUser = (float64(ds.CPUStats.CPUUsage.UsageInUsermode-ds.PreCPUStats.CPUUsage.UsageInUsermode) / systemDelta) * cores * 100.0
cpuKernel = (float64(ds.CPUStats.CPUUsage.UsageInKernelmode-ds.PreCPUStats.CPUUsage.UsageInKernelmode) / systemDelta) * cores * 100.0
cpuTotal = (float64(ds.CPUStats.CPUUsage.TotalUsage-ds.PreCPUStats.CPUUsage.TotalUsage) / systemDelta) * cores * 100.0
}
var rx, tx float64
for _, v := range ds.Networks {
rx += float64(v.RxBytes)
tx += float64(v.TxBytes)
}
var blkRead, blkWrite uint64
for _, bioEntry := range ds.BlkioStats.IOServiceBytesRecursive {
switch strings.ToLower(bioEntry.Op) {
case "read":
blkRead = blkRead + bioEntry.Value
case "write":
blkWrite = blkWrite + bioEntry.Value
}
}
return drivers.Stat{
Timestamp: ds.Read,
Metrics: map[string]uint64{
// source: https://godoc.org/github.com/fsouza/go-dockerclient#Stats
// ex (for future expansion): {"read":"2016-08-03T18:08:05Z","pids_stats":{},"network":{},"networks":{"eth0":{"rx_bytes":508,"tx_packets":6,"rx_packets":6,"tx_bytes":508}},"memory_stats":{"stats":{"cache":16384,"pgpgout":281,"rss":8826880,"pgpgin":2440,"total_rss":8826880,"hierarchical_memory_limit":536870912,"total_pgfault":3809,"active_anon":8843264,"total_active_anon":8843264,"total_pgpgout":281,"total_cache":16384,"pgfault":3809,"total_pgpgin":2440},"max_usage":8953856,"usage":8953856,"limit":536870912},"blkio_stats":{"io_service_bytes_recursive":[{"major":202,"op":"Read"},{"major":202,"op":"Write"},{"major":202,"op":"Sync"},{"major":202,"op":"Async"},{"major":202,"op":"Total"}],"io_serviced_recursive":[{"major":202,"op":"Read"},{"major":202,"op":"Write"},{"major":202,"op":"Sync"},{"major":202,"op":"Async"},{"major":202,"op":"Total"}]},"cpu_stats":{"cpu_usage":{"percpu_usage":[47641874],"usage_in_usermode":30000000,"total_usage":47641874},"system_cpu_usage":8880800500000000,"throttling_data":{}},"precpu_stats":{"cpu_usage":{"percpu_usage":[44946186],"usage_in_usermode":30000000,"total_usage":44946186},"system_cpu_usage":8880799510000000,"throttling_data":{}}}
// mostly stolen values from docker stats cli api...
// net
"net_rx": uint64(rx),
"net_tx": uint64(tx),
// mem
"mem_limit": ds.MemoryStats.Limit,
"mem_usage": ds.MemoryStats.Usage,
// i/o
"disk_read": blkRead,
"disk_write": blkWrite,
// cpu
"cpu_user": uint64(cpuUser),
"cpu_total": uint64(cpuTotal),
"cpu_kernel": uint64(cpuKernel),
},
}
}
// Introduces some randomness to prevent container name clashes where task ID remains the same.
func newContainerID(task drivers.ContainerTask) string {
return fmt.Sprintf("task-%d-%s", time.Now().UnixNano(), task.Id())
}
func (drv *DockerDriver) startTask(ctx context.Context, container string) error {
log := common.Logger(ctx)
startTimer := drv.NewTimer("docker", "start_container", 1.0)
log.WithFields(logrus.Fields{"container": container}).Debug("Starting container execution")
err := drv.docker.StartContainerWithContext(container, nil, ctx)
startTimer.Measure()
if err != nil {
dockerErr, ok := err.(*docker.Error)
_, containerAlreadyRunning := err.(*docker.ContainerAlreadyRunning)
if containerAlreadyRunning || (ok && dockerErr.Status == 304) {
// 304=container already started -- so we can ignore error
} else {
return err
}
}
return nil
}
func (drv *DockerDriver) status(ctx context.Context, container string) (status string, err error) {
log := common.Logger(ctx)
cinfo, err := drv.docker.InspectContainer(container)
if err != nil {
// this is pretty sad, but better to say we had an error than to not.
// task has run to completion and logs will be uploaded, user can decide
log.WithFields(logrus.Fields{"container": container}).WithError(err).Error("Inspecting container")
return drivers.StatusError, err
}
exitCode := cinfo.State.ExitCode
log.WithFields(logrus.Fields{
"exit_code": exitCode,
"container_running": cinfo.State.Running,
"container_status": cinfo.State.Status,
"container_finished": cinfo.State.FinishedAt,
"container_error": cinfo.State.Error,
}).Info("container status")
select { // do this after inspect so we can see exit code
case <-ctx.Done(): // check if task was canceled or timed out
switch ctx.Err() {
case context.DeadlineExceeded:
return drivers.StatusTimeout, nil
case context.Canceled:
return drivers.StatusCancelled, nil
}
default:
}
if cinfo.State.Running {
log.Warn("getting status of task that is still running, need to fix this")
return drivers.StatusError, errors.New("task in running state but not timed out. weird")
}
switch exitCode {
default:
return drivers.StatusError, common.UserError(fmt.Errorf("exit code %d", exitCode))
case 0:
return drivers.StatusSuccess, nil
case 137: // OOM
drv.Inc("docker", "oom", 1, 1)
if !cinfo.State.OOMKilled {
// It is possible that the host itself is running out of memory and
// the host kernel killed one of the container processes.
// See: https://github.com/moby/moby/issues/15621
// TODO reed: isn't an OOM an OOM? this is wasting space imo
log.WithFields(logrus.Fields{"container": container}).Info("Setting task as OOM killed, but docker disagreed.")
drv.Inc("docker", "possible_oom_false_alarm", 1, 1.0)
}
return drivers.StatusKilled, drivers.ErrOutOfMemory
}
}