From 6a7973e6b6cecc780f271d4e6f05b91e3babd5e2 Mon Sep 17 00:00:00 2001 From: Reed Allman Date: Thu, 3 Aug 2017 04:30:16 -0700 Subject: [PATCH] plumb all config fields into task the mqs are storing a models.Task, which was not incorporating all the fields that are in a task.Config. I would very much like to merge these two things, but expect to do this in a future restructuring as both are used widely and not cordoned off properly (Config has a channel, stdin, stdout, stderr -- and isn't just a 'config', so to speak, as Task is). Since a task.Config is what is used to actually run a container, the result of the aforementioned deficiency was #193 where tasks are improperly configured and ran (namely, memory wrong). async tasks can still not be hot, they will be reverted to default format. would also like to fix this (also part of restructuring). I actually started doing this, hence the changes to those files (the surface area of the change is small and discourages improper future use, so I've left what I've done). this will: closes #193 closes #195 closes #154 removes many unused fields in models.Task, since we have not implemented retries. priority & delay are left, even though they are not used either, the main goal of this is to resolve #193 and both these fields are strongly plumbed into all the mqs, so punting on those two. --- api/models/id_status.go | 3 + api/models/mq.go | 6 +- api/models/task.go | 182 +++++++------------------------- api/mqs/ironmq.go | 2 +- api/runner/async_runner.go | 33 +----- api/runner/async_runner_test.go | 2 +- api/runner/protocol/default.go | 2 +- api/runner/protocol/factory.go | 6 +- api/runner/protocol/http.go | 13 +-- api/runner/runner.go | 2 +- api/runner/task/task.go | 44 +++++++- api/runner/worker.go | 2 +- api/server/runner.go | 22 ++-- docs/operating/mqs/README.md | 2 +- docs/swagger.yml | 2 +- 15 files changed, 115 insertions(+), 208 deletions(-) diff --git a/api/models/id_status.go b/api/models/id_status.go index c31c7a385..5d7cedbbb 100644 --- a/api/models/id_status.go +++ b/api/models/id_status.go @@ -9,6 +9,9 @@ import ( "github.com/go-openapi/validate" ) +// TODO get rid of this. id and status are not more coupled than anything else? +// burn it at the stake + /*IDStatus Id status swagger:model IdStatus diff --git a/api/models/mq.go b/api/models/mq.go index b754c4666..bf3a1da8d 100644 --- a/api/models/mq.go +++ b/api/models/mq.go @@ -2,9 +2,9 @@ package models import "context" -// Titan uses a Message Queue to impose a total ordering on jobs that it will +// Message Queue is used to impose a total ordering on jobs that it will // execute in order. Tasks are added to the queue via the Push() interface. The -// MQ must support a reserve-delete 2 step dequeue to allow Titan to implement +// MQ must support a reserve-delete 2 step dequeue to allow implementing // timeouts and retries. // // The Reserve() operation must return a job based on this total ordering @@ -29,7 +29,7 @@ import "context" type MessageQueue interface { // Push a Task onto the queue. If any error is returned, the Task SHOULD not be // queued. Note that this does not completely avoid double queueing, that is - // OK, Titan will perform a check against the datastore after a dequeue. + // OK, a check against the datastore will be performed after a dequeue. // // If the job's Delay value is > 0, the job should NOT be enqueued. The job // should only be available in the queue after at least Delay seconds have diff --git a/api/models/task.go b/api/models/task.go index 7373441c7..d9f686bc5 100644 --- a/api/models/task.go +++ b/api/models/task.go @@ -1,10 +1,7 @@ package models import ( - "encoding/json" - strfmt "github.com/go-openapi/strfmt" - "github.com/go-openapi/validate" ) const ( @@ -23,6 +20,7 @@ const ( FormatHTTP = "http" ) +// TODO this should either be Task, or should be removed in favor of Task type FnCall struct { IDStatus CompletedAt strfmt.DateTime `json:"completed_at,omitempty"` @@ -51,163 +49,57 @@ func (fnCall *FnCall) FromTask(task *Task) *FnCall { } } -/*Task task - -swagger:model Task -*/ +// Task is a representation of a specific invocation of a route. type Task struct { IDStatus - /* Number of seconds to wait before queueing the task for consumption for the first time. Must be a positive integer. Tasks with a delay start in state "delayed" and transition to "running" after delay seconds. - */ - Delay int32 `json:"delay,omitempty"` - - /* Name of Docker image to use. This is optional and can be used to override the image defined at the route level. - - Required: true - */ - Image *string `json:"image"` - - /* "Number of automatic retries this task is allowed. A retry will be attempted if a task fails. Max 25. Automatic retries are performed by titan when a task reaches a failed state and has `max_retries` > 0. A retry is performed by queueing a new task with the same image id and payload. The new task's max_retries is one less than the original. The new task's `retry_of` field is set to the original Task ID. The old task's `retry_at` field is set to the new Task's ID. Titan will delay the new task for retries_delay seconds before queueing it. Cancelled or successful tasks are never automatically retried." - - */ - MaxRetries int32 `json:"max_retries,omitempty"` - - /* Payload for the task. This is what you pass into each task to make it do something. - */ - Payload string `json:"payload,omitempty"` - - /* Priority of the task. Higher has more priority. 3 levels from 0-2. Tasks at same priority are processed in FIFO order. - - Required: true - */ - Priority *int32 `json:"priority"` - - /* Time in seconds to wait before retrying the task. Must be a non-negative integer. - */ - RetriesDelay *int32 `json:"retries_delay,omitempty"` - - /* Maximum runtime in seconds. If a consumer retrieves the - task, but does not change it's status within timeout seconds, the task - is considered failed, with reason timeout (Titan may allow a small - grace period). The consumer should also kill the task after timeout - seconds. If a consumer tries to change status after Titan has already - timed out the task, the consumer will be ignored. - - */ - Timeout *int32 `json:"timeout,omitempty"` - - /* Hot function idle timeout in seconds before termination. - - */ - IdleTimeout *int32 `json:"idle_timeout,omitempty"` - - /* Time when task completed, whether it was successul or failed. Always in UTC. - */ - CompletedAt strfmt.DateTime `json:"completed_at,omitempty"` - - /* Time when task was submitted. Always in UTC. - - Read Only: true - */ - CreatedAt strfmt.DateTime `json:"created_at,omitempty"` - - /* Env vars for the task. Comes from the ones set on the Route. - */ - EnvVars map[string]string `json:"env_vars,omitempty"` - - /* The error message, if status is 'error'. This is errors due to things outside the task itself. Errors from user code will be found in the log. - */ - Error string `json:"error,omitempty"` - - /* App this task belongs to. - - Read Only: true - */ - AppName string `json:"app_name,omitempty"` + // App this task belongs to. + AppName string `json:"app_name"` + // Path of the route that is responsible for this task Path string `json:"path"` - /* Machine usable reason for task being in this state. - Valid values for error status are `timeout | killed | bad_exit`. - Valid values for cancelled status are `client_request`. - For everything else, this is undefined. + // Name of Docker image to use. + Image string `json:"image"` - */ - Reason string `json:"reason,omitempty"` + // Number of seconds to wait before queueing the task for consumption for the first time. Must be a positive integer. Tasks with a delay start in state "delayed" and transition to "running" after delay seconds. + Delay int32 `json:"delay,omitempty"` - /* If this field is set, then this task was retried by the task referenced in this field. + // Payload for the task. This is only used by async tasks, to store their input. + Payload string `json:"payload,omitempty"` - Read Only: true - */ - RetryAt string `json:"retry_at,omitempty"` + // Priority of the task. Higher has more priority. 3 levels from 0-2. Tasks at same priority are processed in FIFO order. + Priority *int32 `json:"priority"` - /* If this field is set, then this task is a retry of the ID in this field. + // Maximum runtime in seconds. + Timeout int32 `json:"timeout,omitempty"` - Read Only: true - */ - RetryOf string `json:"retry_of,omitempty"` + // Hot function idle timeout in seconds before termination. + IdleTimeout int32 `json:"idle_timeout,omitempty"` - /* Time when task started execution. Always in UTC. - */ + // Memory is the amount of RAM this task is allocated. + Memory uint64 `json:"memory,omitempty"` + + // BaseEnv are the env vars for hot containers, not request specific. + BaseEnv map[string]string `json:"base_env,omitempty"` + + // Env vars for the task. Comes from the ones set on the Route. + EnvVars map[string]string `json:"env_vars,omitempty"` + + // Format is the format to pass input into the function. + // TODO plumb this in async land + // Format string `json:"format,omitempty"` + + // Time when task completed, whether it was successul or failed. Always in UTC. + CompletedAt strfmt.DateTime `json:"completed_at,omitempty"` + + // Time when task was submitted. Always in UTC. + CreatedAt strfmt.DateTime `json:"created_at,omitempty"` + + // Time when task started execution. Always in UTC. StartedAt strfmt.DateTime `json:"started_at,omitempty"` } -// Validate validates this task -func (m *Task) Validate(formats strfmt.Registry) error { - if err := m.IDStatus.Validate(formats); err != nil { - return err - } - - if err := m.validateEnvVars(formats); err != nil { - return err - } - - if err := m.validateReason(formats); err != nil { - return err - } - - return nil -} - -func (m *Task) validateEnvVars(formats strfmt.Registry) error { - - if err := validate.Required("env_vars", "body", m.EnvVars); err != nil { - return err - } - - return nil -} - -var taskTypeReasonPropEnum []interface{} - -// property enum -func (m *Task) validateReasonEnum(path, location string, value string) error { - if taskTypeReasonPropEnum == nil { - var res []string - if err := json.Unmarshal([]byte(`["timeout","killed","bad_exit","client_request"]`), &res); err != nil { - return err - } - for _, v := range res { - taskTypeReasonPropEnum = append(taskTypeReasonPropEnum, v) - } - } - if err := validate.Enum(path, location, value, taskTypeReasonPropEnum); err != nil { - return err - } - return nil -} - -func (m *Task) validateReason(formats strfmt.Registry) error { - - // value enum - if err := m.validateReasonEnum("reason", "body", m.Reason); err != nil { - return err - } - - return nil -} - type CallFilter struct { Path string AppName string diff --git a/api/mqs/ironmq.go b/api/mqs/ironmq.go index 6a260be82..19d884854 100644 --- a/api/mqs/ironmq.go +++ b/api/mqs/ironmq.go @@ -71,7 +71,7 @@ func NewIronMQ(url *url.URL) *IronMQ { if url.Path != "" { queueName = url.Path } else { - queueName = "titan" + queueName = "fn" } mq := &IronMQ{ queues: make([]ironmq.Queue, 3), diff --git a/api/runner/async_runner.go b/api/runner/async_runner.go index 73a9d06a4..8c3a1591a 100644 --- a/api/runner/async_runner.go +++ b/api/runner/async_runner.go @@ -3,23 +3,22 @@ package runner import ( "bytes" "context" + "crypto/tls" "encoding/json" "errors" + "fmt" "io" "io/ioutil" "net" "net/http" "net/url" - "strings" "sync" "time" - "crypto/tls" - "fmt" "github.com/Sirupsen/logrus" "github.com/fnproject/fn/api/models" "github.com/fnproject/fn/api/runner/common" - "github.com/fnproject/fn/api/runner/task" + taskpkg "github.com/fnproject/fn/api/runner/task" "github.com/opentracing/opentracing-go" ) @@ -67,30 +66,6 @@ func getTask(ctx context.Context, url string) (*models.Task, error) { return &task, nil } -func getCfg(t *models.Task) *task.Config { - cfg := &task.Config{ - Image: *t.Image, - ID: t.ID, - AppName: t.AppName, - Memory: 128, - Env: t.EnvVars, - Ready: make(chan struct{}), - Stdin: strings.NewReader(t.Payload), - } - if t.Timeout == nil || *t.Timeout <= 0 { - cfg.Timeout = DefaultTimeout - } else { - cfg.Timeout = time.Duration(*t.Timeout) * time.Second - } - if t.IdleTimeout == nil || *t.IdleTimeout <= 0 { - cfg.IdleTimeout = DefaultIdleTimeout - } else { - cfg.IdleTimeout = time.Duration(*t.IdleTimeout) * time.Second - } - - return cfg -} - func deleteTask(ctx context.Context, url string, task *models.Task) error { span, _ := opentracing.StartSpanFromContext(ctx, "delete_task") defer span.Finish() @@ -186,7 +161,7 @@ func runAsyncTask(ctx context.Context, url string, rnr *Runner, ds models.Datast go func() { defer wg.Done() // Process Task - _, err := rnr.RunTrackedTask(task, ctx, getCfg(task)) + _, err := rnr.RunTrackedTask(task, ctx, taskpkg.ConfigFromTask(task)) if err != nil { log.WithError(err).Error("Cannot run task") } diff --git a/api/runner/async_runner_test.go b/api/runner/async_runner_test.go index c0ad456ae..ec39345fe 100644 --- a/api/runner/async_runner_test.go +++ b/api/runner/async_runner_test.go @@ -37,7 +37,7 @@ func getMockTask() models.Task { priority := int32(0) image := fmt.Sprintf("Image-%d", rand.Int31()%1000) task := &models.Task{} - task.Image = &image + task.Image = image task.ID = fmt.Sprintf("ID-%d", rand.Int31()%1000) task.AppName = fmt.Sprintf("RouteName-%d", rand.Int31()%1000) task.Priority = &priority diff --git a/api/runner/protocol/default.go b/api/runner/protocol/default.go index 7795d0dcc..dd33b3dee 100644 --- a/api/runner/protocol/default.go +++ b/api/runner/protocol/default.go @@ -14,6 +14,6 @@ func (p *DefaultProtocol) IsStreamable() bool { return false } -func (p *DefaultProtocol) Dispatch(ctx context.Context, t task.Request) error { +func (p *DefaultProtocol) Dispatch(context.Context, *task.Config) error { return nil } diff --git a/api/runner/protocol/factory.go b/api/runner/protocol/factory.go index 1f3b86325..84d896cfb 100644 --- a/api/runner/protocol/factory.go +++ b/api/runner/protocol/factory.go @@ -16,7 +16,9 @@ var errInvalidProtocol = errors.New("Invalid Protocol") // It returns any protocol error, if present. type ContainerIO interface { IsStreamable() bool - Dispatch(ctx context.Context, t task.Request) error + + // TODO this should take a drivers.ContainerTask? + Dispatch(ctx context.Context, t *task.Config) error } // Protocol defines all protocols that operates a ContainerIO. @@ -55,7 +57,7 @@ func (p Protocol) MarshalJSON() ([]byte, error) { type errorProto struct{} func (e *errorProto) IsStreamable() bool { return false } -func (e *errorProto) Dispatch(ctx context.Context, t task.Request) error { return errInvalidProtocol } +func (e *errorProto) Dispatch(ctx context.Context, t *task.Config) error { return errInvalidProtocol } // New creates a valid protocol handler from a I/O pipe representing containers // stdin/stdout. diff --git a/api/runner/protocol/http.go b/api/runner/protocol/http.go index 989d08616..ffc2cdc4b 100644 --- a/api/runner/protocol/http.go +++ b/api/runner/protocol/http.go @@ -27,22 +27,23 @@ func (p *HTTPProtocol) IsStreamable() bool { return true } -func (p *HTTPProtocol) Dispatch(ctx context.Context, t task.Request) error { +func (p *HTTPProtocol) Dispatch(ctx context.Context, cfg *task.Config) error { var retErr error done := make(chan struct{}) go func() { + // TODO not okay. plumb content-length from req into cfg.. var body bytes.Buffer - io.Copy(&body, t.Config.Stdin) + io.Copy(&body, cfg.Stdin) req, err := http.NewRequest("GET", "/", &body) if err != nil { retErr = err return } - for k, v := range t.Config.Env { + for k, v := range cfg.Env { req.Header.Set(k, v) } req.Header.Set("Content-Length", fmt.Sprint(body.Len())) - req.Header.Set("Task-ID", t.Config.ID) + req.Header.Set("Task-ID", cfg.ID) raw, err := httputil.DumpRequest(req, true) if err != nil { retErr = err @@ -56,11 +57,11 @@ func (p *HTTPProtocol) Dispatch(ctx context.Context, t task.Request) error { return } - io.Copy(t.Config.Stdout, res.Body) + io.Copy(cfg.Stdout, res.Body) done <- struct{}{} }() - timeout := time.After(t.Config.Timeout) + timeout := time.After(cfg.Timeout) select { case <-ctx.Done(): diff --git a/api/runner/runner.go b/api/runner/runner.go index 57fd4180e..f0d33764a 100644 --- a/api/runner/runner.go +++ b/api/runner/runner.go @@ -55,7 +55,7 @@ func New(ctx context.Context, flog FuncLogger, ds models.Datastore) (*Runner, er // TODO: Is this really required for the container drivers? Can we remove it? env := common.NewEnvironment(func(e *common.Environment) {}) - // TODO: Create a drivers.New(runnerConfig) in Titan + // TODO: Create drivers.New(runnerConfig) driver, err := selectDriver("docker", env, &drivers.Config{}) if err != nil { return nil, err diff --git a/api/runner/task/task.go b/api/runner/task/task.go index cc568ca38..f1cd002e8 100644 --- a/api/runner/task/task.go +++ b/api/runner/task/task.go @@ -3,18 +3,23 @@ package task import ( "context" "io" + "strings" "time" + "github.com/fnproject/fn/api/models" "github.com/fnproject/fn/api/runner/drivers" + "github.com/go-openapi/strfmt" ) +// TODO this whole package should be hanged, drawn & quartered + type Config struct { ID string + AppName string Path string Image string Timeout time.Duration IdleTimeout time.Duration - AppName string Memory uint64 BaseEnv map[string]string // only app & route config vals [for hot] Env map[string]string // includes BaseEnv @@ -28,6 +33,43 @@ type Config struct { Stderr io.WriteCloser // closer for flushy poo } +// TODO Task & Config should be merged +func TaskFromConfig(cfg *Config) *models.Task { + return &models.Task{ + IDStatus: models.IDStatus{ID: cfg.ID}, + AppName: cfg.AppName, + Path: cfg.Path, + Image: cfg.Image, + Timeout: int32(cfg.Timeout.Seconds()), + IdleTimeout: int32(cfg.IdleTimeout.Seconds()), + Memory: cfg.Memory, + BaseEnv: cfg.BaseEnv, + EnvVars: cfg.Env, + // Format: cfg.Format, TODO plumb this + CreatedAt: strfmt.DateTime(time.Now()), + + Delay: 0, // TODO not wired to users + // Payload: stdin + Priority: new(int32), // 0, TODO not wired atm to users. + } +} + +func ConfigFromTask(t *models.Task) *Config { + return &Config{ + ID: t.ID, + AppName: t.AppName, + Path: t.Path, + Image: t.Image, + Timeout: time.Duration(t.Timeout) * time.Second, + IdleTimeout: time.Duration(t.IdleTimeout) * time.Second, + Memory: t.Memory, + BaseEnv: t.BaseEnv, + Env: t.EnvVars, + Stdin: strings.NewReader(t.Payload), + Ready: make(chan struct{}), + } +} + // Request stores the task to be executed, It holds in itself the channel to // return its response to its caller. type Request struct { diff --git a/api/runner/worker.go b/api/runner/worker.go index ec2935125..6ccafeec6 100644 --- a/api/runner/worker.go +++ b/api/runner/worker.go @@ -353,7 +353,7 @@ func (hc *htfn) serve(ctx context.Context) { stderr.swap(tlog) start := time.Now() - err := hc.proto.Dispatch(ctx, t) + err := hc.proto.Dispatch(ctx, t.Config) status := "success" if err != nil { status = "error" diff --git a/api/server/runner.go b/api/server/runner.go index baeaf3987..b9ddab4e2 100644 --- a/api/server/runner.go +++ b/api/server/runner.go @@ -19,7 +19,6 @@ import ( "github.com/fnproject/fn/api/runner/common" "github.com/fnproject/fn/api/runner/task" "github.com/gin-gonic/gin" - "github.com/go-openapi/strfmt" cache "github.com/patrickmn/go-cache" ) @@ -148,8 +147,7 @@ func (s *Server) serve(ctx context.Context, c *gin.Context, appName string, rout baseVars["FN_FORMAT"] = route.Format baseVars["APP_NAME"] = appName baseVars["ROUTE"] = route.Path - // TODO add this back after #193 #195 (fix async RAM) - // baseVars["MEMORY_MB"] = fmt.Sprintf("%d", route.Memory) + baseVars["MEMORY_MB"] = fmt.Sprintf("%d", route.Memory) // app config for k, v := range app.Config { @@ -213,26 +211,20 @@ func (s *Server) serve(ctx context.Context, c *gin.Context, appName string, rout } s.Runner.Enqueue() - createdAt := strfmt.DateTime(time.Now()) - newTask := &models.Task{} - newTask.Image = &cfg.Image - newTask.ID = cfg.ID - newTask.CreatedAt = createdAt - newTask.Path = route.Path - newTask.EnvVars = cfg.Env - newTask.AppName = cfg.AppName + newTask := task.TaskFromConfig(cfg) switch route.Type { case "async": + // TODO we should be able to do hot input to async. plumb protocol stuff + // TODO enqueue should unravel the payload? + // Read payload pl, err := ioutil.ReadAll(cfg.Stdin) if err != nil { handleErrorResponse(c, models.ErrInvalidPayload) return true } - // Create Task - priority := int32(0) - newTask.Priority = &priority + // Add in payload newTask.Payload = string(pl) // Push to queue @@ -243,7 +235,7 @@ func (s *Server) serve(ctx context.Context, c *gin.Context, appName string, rout } log.Info("Added new task to queue") - c.JSON(http.StatusAccepted, map[string]string{"call_id": newTask.ID}) + c.JSON(http.StatusAccepted, map[string]string{"call_id": cfg.ID}) default: result, err := s.Runner.RunTrackedTask(newTask, ctx, cfg) diff --git a/docs/operating/mqs/README.md b/docs/operating/mqs/README.md index c08d2d7ff..b6f846dc1 100644 --- a/docs/operating/mqs/README.md +++ b/docs/operating/mqs/README.md @@ -10,7 +10,7 @@ docker run -e "MQ_URL=redis://localhost:6379/" ... ## [Bolt](https://github.com/boltdb/bolt) (default) -URL: `bolt:///titan/data/functions-mq.db` +URL: `bolt:///fn/data/functions-mq.db` See Bolt in databases above. The Bolt database is locked at the file level, so the file cannot be the same as the one used for the Bolt Datastore. diff --git a/docs/swagger.yml b/docs/swagger.yml index 02a8f6c9f..31b307474 100644 --- a/docs/swagger.yml +++ b/docs/swagger.yml @@ -682,7 +682,7 @@ definitions: payload: type: string # 256k - # maxLength breaks ruby generator too: https://github.com/treeder/worker_ruby/blob/0aa9236ce5060af3f15758937712973f80dd54fe/lib/iron_titan/models/task.rb#L272 + # maxLength breaks ruby generator too # maxLength: 268435456 description: Payload for the task. This is what you pass into each task to make it do something. group_name: