// Copyright 2016 Iron.io // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package docker import ( "context" "crypto/tls" "errors" "fmt" "io" "net" "net/http" "net/url" "os" "path" "strings" "time" "github.com/Sirupsen/logrus" manifest "github.com/docker/distribution/manifest/schema1" "github.com/fsouza/go-dockerclient" "github.com/heroku/docker-registry-client/registry" "github.com/iron-io/runner/common" "github.com/iron-io/runner/common/stats" "github.com/iron-io/runner/drivers" ) const hubURL = "https://registry.hub.docker.com" var registryClient = &http.Client{ Transport: &http.Transport{ Dial: (&net.Dialer{ Timeout: 10 * time.Second, KeepAlive: 2 * time.Minute, }).Dial, TLSClientConfig: &tls.Config{ ClientSessionCache: tls.NewLRUClientSessionCache(8192), }, TLSHandshakeTimeout: 10 * time.Second, MaxIdleConnsPerHost: 32, // TODO tune; we will likely be making lots of requests to same place Proxy: http.ProxyFromEnvironment, IdleConnTimeout: 90 * time.Second, MaxIdleConns: 512, ExpectContinueTimeout: 1 * time.Second, }, } // A drivers.ContainerTask should implement the Auther interface if it would // like to use not-necessarily-public docker images for any or all task // invocations. type Auther interface { // DockerAuth should return docker auth credentials that will authenticate // against a docker registry for a given drivers.ContainerTask.Image(). An // error may be returned which will cause the task not to be run, this can be // useful for an implementer to do things like testing auth configurations // before returning them; e.g. if the implementer would like to impose // certain restrictions on images or if credentials must be acquired right // before runtime and there's an error doing so. If these credentials don't // work, the docker pull will fail and the task will be set to error status. DockerAuth() (docker.AuthConfiguration, error) } type runResult struct { error StatusValue string } func (r *runResult) Error() string { if r.error == nil { return "" } return r.error.Error() } func (r *runResult) Status() string { return r.StatusValue } func (r *runResult) UserVisible() bool { return common.IsUserVisibleError(r.error) } type DockerDriver struct { conf drivers.Config docker dockerClient // retries on *docker.Client, restricts ad hoc *docker.Client usage / retries hostname string *common.Environment } // implements drivers.Driver func NewDocker(env *common.Environment, conf drivers.Config) *DockerDriver { hostname, err := os.Hostname() if err != nil { logrus.WithError(err).Fatal("couldn't resolve hostname") } return &DockerDriver{ conf: conf, docker: newClient(env), hostname: hostname, Environment: env, } } // CheckRegistry will return a sizer, which can be used to check the size of an // image if the returned error is nil. If the error returned is nil, then // authentication against the given credentials was successful, if the // configuration does not specify a config.ServerAddress, // https://hub.docker.com will be tried. CheckRegistry is a package level // method since rkt can also use docker images, we may be interested in using // rkt w/o a docker driver configured; also, we don't have to tote around a // driver in any tasker that may be interested in registry information (2/2 // cases thus far). func CheckRegistry(image string, config docker.AuthConfiguration) (Sizer, error) { registry, repo, tag := drivers.ParseImage(image) reg, err := registryForConfig(config, registry) if err != nil { return nil, err } mani, err := reg.Manifest(repo, tag) if err != nil { logrus.WithFields(logrus.Fields{"username": config.Username, "server": config.ServerAddress, "image": image}).WithError(err).Error("Credentials not authorized, trying next.") //if !isAuthError(err) { // // TODO we might retry this, since if this was the registry that was supposed to // // auth the task will erroneously be set to 'error' //} return nil, err } return &sizer{mani, reg, repo}, nil } // Sizer returns size information. This interface is liable to contain more // than a size at some point, change as needed. type Sizer interface { Size() (int64, error) } type sizer struct { mani *manifest.SignedManifest reg *registry.Registry repo string } func (s *sizer) Size() (int64, error) { var sum int64 for _, r := range s.mani.References() { desc, err := s.reg.LayerMetadata(s.repo, r.Digest) if err != nil { return 0, err } sum += desc.Size } return sum, nil } func registryURL(addr string) (string, error) { if addr == "" || strings.Contains(addr, "hub.docker.com") || strings.Contains(addr, "index.docker.io") { return hubURL, nil } url, err := url.Parse(addr) if err != nil { // TODO we could error the task out from this with a user error but since // we have a list of auths to check, just return the error so as to be // skipped... horrible api as it is logrus.WithFields(logrus.Fields{"auth_addr": addr}).WithError(err).Error("error parsing server address url, skipping") return "", err } if url.Scheme == "" { url.Scheme = "https" } url.Path = strings.TrimSuffix(url.Path, "/") url.Path = strings.TrimPrefix(url.Path, "/v2") url.Path = strings.TrimPrefix(url.Path, "/v1") // just try this, if it fails it fails, not supporting v1 return url.String(), nil } func isAuthError(err error) bool { // AARGH! if urlError, ok := err.(*url.Error); ok { if httpError, ok := urlError.Err.(*registry.HttpStatusError); ok { if httpError.Response.StatusCode == 401 { return true } } } return false } func registryForConfig(config docker.AuthConfiguration, reg string) (*registry.Registry, error) { if reg == "" { reg = config.ServerAddress } var err error config.ServerAddress, err = registryURL(reg) if err != nil { return nil, err } // Use this instead of registry.New to avoid the Ping(). transport := registry.WrapTransport(registryClient.Transport, reg, config.Username, config.Password) r := ®istry.Registry{ URL: config.ServerAddress, Client: &http.Client{ Transport: transport, }, Logf: registry.Quiet, } return r, nil } func (drv *DockerDriver) Prepare(ctx context.Context, task drivers.ContainerTask) (drivers.Cookie, error) { var cmd []string if task.Command() != "" { // NOTE: this is hyper-sensitive and may not be correct like this even, but it passes old tests // task.Command() in swapi is always "sh /mnt/task/.runtask" so fields is safe cmd = strings.Fields(task.Command()) logrus.WithFields(logrus.Fields{"task_id": task.Id(), "cmd": cmd, "len": len(cmd)}).Debug("docker command") } envvars := make([]string, 0, len(task.EnvVars())) for name, val := range task.EnvVars() { envvars = append(envvars, name+"="+val) } containerName := newContainerID(task) container := docker.CreateContainerOptions{ Name: containerName, Config: &docker.Config{ Env: envvars, Cmd: cmd, Memory: int64(drv.conf.Memory), CPUShares: drv.conf.CPUShares, Hostname: drv.hostname, Image: task.Image(), Volumes: map[string]struct{}{}, Labels: task.Labels(), OpenStdin: true, AttachStdin: true, StdinOnce: true, }, HostConfig: &docker.HostConfig{}, Context: ctx, } volumes := task.Volumes() for _, mapping := range volumes { hostDir := mapping[0] containerDir := mapping[1] container.Config.Volumes[containerDir] = struct{}{} mapn := fmt.Sprintf("%s:%s", hostDir, containerDir) container.HostConfig.Binds = append(container.HostConfig.Binds, mapn) logrus.WithFields(logrus.Fields{"volumes": mapn, "task_id": task.Id()}).Debug("setting volumes") } if wd := task.WorkDir(); wd != "" { logrus.WithFields(logrus.Fields{"wd": wd, "task_id": task.Id()}).Debug("setting work dir") container.Config.WorkingDir = wd } err := drv.ensureImage(ctx, task) if err != nil { return nil, err } createTimer := drv.NewTimer("docker", "create_container", 1.0) _, err = drv.docker.CreateContainer(container) createTimer.Measure() if err != nil { // since we retry under the hood, if the container gets created and retry fails, we can just ignore error if err != docker.ErrContainerAlreadyExists { logrus.WithFields(logrus.Fields{"task_id": task.Id(), "command": container.Config.Cmd, "memory": container.Config.Memory, "cpu_shares": container.Config.CPUShares, "hostname": container.Config.Hostname, "name": container.Name, "image": container.Config.Image, "volumes": container.Config.Volumes, "binds": container.HostConfig.Binds, "container": containerName, }).WithError(err).Error("Could not create container") if ce := containerConfigError(err); ce != nil { return nil, common.UserError(fmt.Errorf("Failed to create container from task configuration '%s'", ce)) } return nil, err } } // discard removal error return &cookie{id: containerName, task: task, drv: drv}, nil } func (drv *DockerDriver) IsAlive(ctx context.Context) bool { err := drv.docker.Ping() return err == nil } type cookie struct { id string task drivers.ContainerTask drv *DockerDriver } func (c *cookie) Close() error { return c.drv.removeContainer(c.id) } func (c *cookie) Run(ctx context.Context) (drivers.RunResult, error) { return c.drv.run(ctx, c.id, c.task) } func (drv *DockerDriver) removeContainer(container string) error { removeTimer := drv.NewTimer("docker", "remove_container", 1.0) defer removeTimer.Measure() err := drv.docker.RemoveContainer(docker.RemoveContainerOptions{ ID: container, Force: true, RemoveVolumes: true}) if err != nil { logrus.WithError(err).WithFields(logrus.Fields{"container": container}).Error("error removing container") } return nil } func (drv *DockerDriver) ensureImage(ctx context.Context, task drivers.ContainerTask) error { reg, _, _ := drivers.ParseImage(task.Image()) // ask for docker creds before looking for image, as the tasker may need to // validate creds even if the image is downloaded. var config docker.AuthConfiguration // default, tries docker hub w/o user/pass if task, ok := task.(Auther); ok { var err error config, err = task.DockerAuth() if err != nil { return err } } if reg != "" { config.ServerAddress = reg } // see if we already have it, if not, pull it _, err := drv.docker.InspectImage(task.Image()) if err == docker.ErrNoSuchImage { err = drv.pullImage(ctx, task, config) } return err } func (drv *DockerDriver) pullImage(ctx context.Context, task drivers.ContainerTask, config docker.AuthConfiguration) error { log := common.Logger(ctx) reg, repo, tag := drivers.ParseImage(task.Image()) globalRepo := path.Join(reg, repo) pullTimer := drv.NewTimer("docker", "pull_image", 1.0) defer pullTimer.Measure() drv.Inc("docker", "pull_image_count."+stats.AsStatField(task.Image()), 1, 1) if reg != "" { config.ServerAddress = reg } var err error config.ServerAddress, err = registryURL(config.ServerAddress) if err != nil { return err } log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).Info("Pulling image") err = drv.docker.PullImage(docker.PullImageOptions{Repository: globalRepo, Tag: tag, Context: ctx}, config) if err != nil { drv.Inc("task", "error.pull."+stats.AsStatField(task.Image()), 1, 1) log.WithFields(logrus.Fields{"registry": config.ServerAddress, "username": config.Username, "image": task.Image()}).WithError(err).Error("Failed to pull image") // TODO need to inspect for hub or network errors and pick. return common.UserError(fmt.Errorf("Failed to pull image '%s': %s", task.Image(), err)) // TODO what about a case where credentials were good, then credentials // were invalidated -- do we need to keep the credential cache docker // driver side and after pull for this case alone? } return nil } // Run executes the docker container. If task runs, drivers.RunResult will be returned. If something fails outside the task (ie: Docker), it will return error. // The docker driver will attempt to cast the task to a Auther. If that succeeds, private image support is available. See the Auther interface for how to implement this. func (drv *DockerDriver) run(ctx context.Context, container string, task drivers.ContainerTask) (drivers.RunResult, error) { log := common.Logger(ctx) timeout := task.Timeout() var cancel context.CancelFunc if timeout <= 0 { ctx, cancel = context.WithCancel(ctx) } else { ctx, cancel = context.WithTimeout(ctx, timeout) } defer cancel() // do this so that after Run exits, nanny and collect stop var complete bool defer func() { complete = true }() // run before cancel is called ctx = context.WithValue(ctx, completeKey, &complete) go drv.nanny(ctx, container) go drv.collectStats(ctx, container, task) mwOut, mwErr := task.Logger() timer := drv.NewTimer("docker", "attach_container", 1) waiter, err := drv.docker.AttachToContainerNonBlocking(docker.AttachToContainerOptions{ Container: container, OutputStream: mwOut, ErrorStream: mwErr, Stream: true, Logs: true, Stdout: true, Stderr: true, Stdin: true, InputStream: task.Input()}) timer.Measure() if err != nil { return nil, err } err = drv.startTask(ctx, container) if err != nil { return nil, err } taskTimer := drv.NewTimer("docker", "container_runtime", 1) // can discard error, inspect will tell us about the task and wait will retry under the hood drv.docker.WaitContainerWithContext(container, ctx) taskTimer.Measure() waiter.Close() err = waiter.Wait() if err != nil { // TODO need to make sure this error isn't just a context error or something we can ignore log.WithError(err).Error("attach to container returned error, task may be missing logs") } status, err := drv.status(ctx, container) return &runResult{ StatusValue: status, error: err, }, nil } const completeKey = "complete" // watch for cancel or timeout and kill process. func (drv *DockerDriver) nanny(ctx context.Context, container string) { select { case <-ctx.Done(): if *(ctx.Value(completeKey).(*bool)) { return } drv.cancel(container) } } func (drv *DockerDriver) cancel(container string) { stopTimer := drv.NewTimer("docker", "stop_container", 1.0) err := drv.docker.StopContainer(container, 30) stopTimer.Measure() if err != nil { logrus.WithError(err).WithFields(logrus.Fields{"container": container, "errType": fmt.Sprintf("%T", err)}).Error("something managed to escape our retries web, could not kill container") } } func (drv *DockerDriver) collectStats(ctx context.Context, container string, task drivers.ContainerTask) { done := make(chan bool) defer close(done) dstats := make(chan *docker.Stats, 1) go func() { // NOTE: docker automatically streams every 1s. we can skip or avg samples if we'd like but // the memory overhead is < 1MB for 3600 stat points so this seems fine, seems better to stream // (internal docker api streams) than open/close stream for 1 sample over and over. // must be called in goroutine, docker.Stats() blocks err := drv.docker.Stats(docker.StatsOptions{ ID: container, Stats: dstats, Stream: true, Done: done, // A flag that enables stopping the stats operation }) if err != nil && err != io.ErrClosedPipe { logrus.WithError(err).WithFields(logrus.Fields{"container": container, "task_id": task.Id()}).Error("error streaming docker stats for task") } }() for { select { case <-ctx.Done(): return case ds, ok := <-dstats: if !ok { return } task.WriteStat(cherryPick(ds)) } } } func cherryPick(ds *docker.Stats) drivers.Stat { // TODO cpu % is as a % of the whole system... cpu is weird since we're sharing it // across a bunch of containers and it scales based on how many we're sharing with, // do we want users to see as a % of system? systemDelta := float64(ds.CPUStats.SystemCPUUsage - ds.PreCPUStats.SystemCPUUsage) cores := float64(len(ds.CPUStats.CPUUsage.PercpuUsage)) var cpuUser, cpuKernel, cpuTotal float64 if systemDelta > 0 { // TODO we could leave these in docker format and let hud/viz tools do this instead of us... like net is, could do same for mem, too. thoughts? cpuUser = (float64(ds.CPUStats.CPUUsage.UsageInUsermode-ds.PreCPUStats.CPUUsage.UsageInUsermode) / systemDelta) * cores * 100.0 cpuKernel = (float64(ds.CPUStats.CPUUsage.UsageInKernelmode-ds.PreCPUStats.CPUUsage.UsageInKernelmode) / systemDelta) * cores * 100.0 cpuTotal = (float64(ds.CPUStats.CPUUsage.TotalUsage-ds.PreCPUStats.CPUUsage.TotalUsage) / systemDelta) * cores * 100.0 } var rx, tx float64 for _, v := range ds.Networks { rx += float64(v.RxBytes) tx += float64(v.TxBytes) } var blkRead, blkWrite uint64 for _, bioEntry := range ds.BlkioStats.IOServiceBytesRecursive { switch strings.ToLower(bioEntry.Op) { case "read": blkRead = blkRead + bioEntry.Value case "write": blkWrite = blkWrite + bioEntry.Value } } return drivers.Stat{ Timestamp: ds.Read, Metrics: map[string]uint64{ // source: https://godoc.org/github.com/fsouza/go-dockerclient#Stats // ex (for future expansion): {"read":"2016-08-03T18:08:05Z","pids_stats":{},"network":{},"networks":{"eth0":{"rx_bytes":508,"tx_packets":6,"rx_packets":6,"tx_bytes":508}},"memory_stats":{"stats":{"cache":16384,"pgpgout":281,"rss":8826880,"pgpgin":2440,"total_rss":8826880,"hierarchical_memory_limit":536870912,"total_pgfault":3809,"active_anon":8843264,"total_active_anon":8843264,"total_pgpgout":281,"total_cache":16384,"pgfault":3809,"total_pgpgin":2440},"max_usage":8953856,"usage":8953856,"limit":536870912},"blkio_stats":{"io_service_bytes_recursive":[{"major":202,"op":"Read"},{"major":202,"op":"Write"},{"major":202,"op":"Sync"},{"major":202,"op":"Async"},{"major":202,"op":"Total"}],"io_serviced_recursive":[{"major":202,"op":"Read"},{"major":202,"op":"Write"},{"major":202,"op":"Sync"},{"major":202,"op":"Async"},{"major":202,"op":"Total"}]},"cpu_stats":{"cpu_usage":{"percpu_usage":[47641874],"usage_in_usermode":30000000,"total_usage":47641874},"system_cpu_usage":8880800500000000,"throttling_data":{}},"precpu_stats":{"cpu_usage":{"percpu_usage":[44946186],"usage_in_usermode":30000000,"total_usage":44946186},"system_cpu_usage":8880799510000000,"throttling_data":{}}} // mostly stolen values from docker stats cli api... // net "net_rx": uint64(rx), "net_tx": uint64(tx), // mem "mem_limit": ds.MemoryStats.Limit, "mem_usage": ds.MemoryStats.Usage, // i/o "disk_read": blkRead, "disk_write": blkWrite, // cpu "cpu_user": uint64(cpuUser), "cpu_total": uint64(cpuTotal), "cpu_kernel": uint64(cpuKernel), }, } } // Introduces some randomness to prevent container name clashes where task ID remains the same. func newContainerID(task drivers.ContainerTask) string { return fmt.Sprintf("task-%d-%s", time.Now().UnixNano(), task.Id()) } func (drv *DockerDriver) startTask(ctx context.Context, container string) error { log := common.Logger(ctx) startTimer := drv.NewTimer("docker", "start_container", 1.0) log.WithFields(logrus.Fields{"container": container}).Debug("Starting container execution") err := drv.docker.StartContainerWithContext(container, nil, ctx) startTimer.Measure() if err != nil { dockerErr, ok := err.(*docker.Error) _, containerAlreadyRunning := err.(*docker.ContainerAlreadyRunning) if containerAlreadyRunning || (ok && dockerErr.Status == 304) { // 304=container already started -- so we can ignore error } else { return err } } return nil } func (drv *DockerDriver) status(ctx context.Context, container string) (status string, err error) { log := common.Logger(ctx) cinfo, err := drv.docker.InspectContainer(container) if err != nil { // this is pretty sad, but better to say we had an error than to not. // task has run to completion and logs will be uploaded, user can decide log.WithFields(logrus.Fields{"container": container}).WithError(err).Error("Inspecting container") return drivers.StatusError, err } exitCode := cinfo.State.ExitCode log.WithFields(logrus.Fields{ "exit_code": exitCode, "container_running": cinfo.State.Running, "container_status": cinfo.State.Status, "container_finished": cinfo.State.FinishedAt, "container_error": cinfo.State.Error, }).Info("container status") select { // do this after inspect so we can see exit code case <-ctx.Done(): // check if task was canceled or timed out switch ctx.Err() { case context.DeadlineExceeded: return drivers.StatusTimeout, nil case context.Canceled: return drivers.StatusCancelled, nil } default: } if cinfo.State.Running { log.Warn("getting status of task that is still running, need to fix this") return drivers.StatusError, errors.New("task in running state but not timed out. weird") } switch exitCode { default: return drivers.StatusError, common.UserError(fmt.Errorf("exit code %d", exitCode)) case 0: return drivers.StatusSuccess, nil case 137: // OOM drv.Inc("docker", "oom", 1, 1) if !cinfo.State.OOMKilled { // It is possible that the host itself is running out of memory and // the host kernel killed one of the container processes. // See: https://github.com/docker/docker/issues/15621 // TODO reed: isn't an OOM an OOM? this is wasting space imo log.WithFields(logrus.Fields{"container": container}).Info("Setting task as OOM killed, but docker disagreed.") drv.Inc("docker", "possible_oom_false_alarm", 1, 1.0) } return drivers.StatusKilled, drivers.ErrOutOfMemory } }