From 892c843d8773ce19a0c2c47b692fdbc3df6d5064 Mon Sep 17 00:00:00 2001
From: Reed Allman <rdallman10@gmail.com>
Date: Tue, 28 Nov 2017 11:21:39 -0600
Subject: [PATCH] add error to call model (#539)

* add error to call model

closes #331

previously, for async this error was being masked completely even if it was
something useful like the image not existing. for sync, the error was returned
in the http request but now it's also being stored. this error itself can
cover a lot of landscape, it could be an error in getting a slot, pulling an
image, running a container, among other things. anyway, no longer being
masked. we can likely improve it in certain cases we run into in the future,
but it's open ended at the moment and not being masked like some errors in
sync http request returns (503 non-models.APIError) for now.

* tucks in callTrigger stuff to keep api clean
* adds swagger
* adds migration
* adds tests for datastore and agent to ensure behavior

* pull images before tests are ran

* gofmt migrations file
---
 api/agent/agent.go                            |  5 +-
 api/agent/agent_test.go                       | 64 +++++++++++++++++++
 api/agent/call.go                             | 21 +++---
 api/datastore/internal/datastoretest/test.go  | 33 ++++++++--
 .../sql/migrations/3_add_call_error.down.sql  |  1 +
 .../sql/migrations/3_add_call_error.up.sql    |  1 +
 api/datastore/sql/migrations/migrations.go    | 54 ++++++++++++++--
 api/datastore/sql/sql.go                      |  9 ++-
 api/models/call.go                            |  4 ++
 docs/swagger.yml                              |  4 ++
 test.sh                                       |  6 +-
 11 files changed, 178 insertions(+), 24 deletions(-)
 create mode 100644 api/datastore/sql/migrations/3_add_call_error.down.sql
 create mode 100644 api/datastore/sql/migrations/3_add_call_error.up.sql

diff --git a/api/agent/agent.go b/api/agent/agent.go
index 0bba47d34..784c03532 100644
--- a/api/agent/agent.go
+++ b/api/agent/agent.go
@@ -181,7 +181,6 @@ func transformTimeout(e error, isRetriable bool) error {
 }
 
 func (a *agent) Submit(callI Call) error {
-
 	a.wg.Add(1)
 	defer a.wg.Done()
 
@@ -217,7 +216,7 @@ func (a *agent) Submit(callI Call) error {
 	defer slot.Close() // notify our slot is free once we're done
 
 	// TODO Start is checking the timer now, we could do it here, too.
-	err = call.Start(ctx, a)
+	err = call.Start(ctx)
 	if err != nil {
 		a.stats.Dequeue(callI.Model().AppName, callI.Model().Path)
 		return transformTimeout(err, true)
@@ -241,7 +240,7 @@ func (a *agent) Submit(callI Call) error {
 	// TODO: we need to allocate more time to store the call + logs in case the call timed out,
 	// but this could put us over the timeout if the call did not reply yet (need better policy).
 	ctx = opentracing.ContextWithSpan(context.Background(), span)
-	err = call.End(ctx, err, a)
+	err = call.End(ctx, err)
 	return transformTimeout(err, false)
 }
 
diff --git a/api/agent/agent_test.go b/api/agent/agent_test.go
index 3d468eee7..ed3f33dc7 100644
--- a/api/agent/agent_test.go
+++ b/api/agent/agent_test.go
@@ -310,3 +310,67 @@ func TestLoggerIsStringerAndWorks(t *testing.T) {
 
 	// TODO we could check for the toilet to flush here to logrus
 }
+
+func TestSubmitError(t *testing.T) {
+	appName := "myapp"
+	path := "/error"
+	image := "fnproject/error"
+	const timeout = 1
+	const idleTimeout = 20
+	const memory = 256
+	method := "GET"
+	url := "http://127.0.0.1:8080/r/" + appName + path
+	payload := "payload"
+	typ := "sync"
+	format := "default"
+	env := map[string]string{
+		"FN_FORMAT":   format,
+		"FN_APP_NAME": appName,
+		"FN_PATH":     path,
+		"FN_MEMORY":   strconv.Itoa(memory),
+		"FN_TYPE":     typ,
+		"APP_VAR":     "FOO",
+		"ROUTE_VAR":   "BAR",
+		"DOUBLE_VAR":  "BIZ, BAZ",
+	}
+
+	cm := &models.Call{
+		BaseEnv:     env,
+		EnvVars:     env,
+		AppName:     appName,
+		Path:        path,
+		Image:       image,
+		Type:        typ,
+		Format:      format,
+		Timeout:     timeout,
+		IdleTimeout: idleTimeout,
+		Memory:      memory,
+		Payload:     payload,
+		URL:         url,
+		Method:      method,
+	}
+
+	// FromModel doesn't need a datastore, for now...
+	ds := datastore.NewMockInit(nil, nil, nil)
+
+	a := New(ds, ds, new(mqs.Mock))
+	defer a.Close()
+
+	callI, err := a.GetCall(FromModel(cm))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = a.Submit(callI)
+	if err == nil {
+		t.Fatal("expected error but got none")
+	}
+
+	if cm.Status != "error" {
+		t.Fatal("expected status to be set to 'error' but was", cm.Status)
+	}
+
+	if cm.Error == "" {
+		t.Fatal("expected error string to be set on call")
+	}
+}
diff --git a/api/agent/call.go b/api/agent/call.go
index 34474bb08..a2c4c845f 100644
--- a/api/agent/call.go
+++ b/api/agent/call.go
@@ -30,13 +30,13 @@ type Call interface {
 	// etc.
 	// TODO Start and End can likely be unexported as they are only used in the agent,
 	// and on a type which is constructed in a specific agent. meh.
-	Start(ctx context.Context, t callTrigger) error
+	Start(ctx context.Context) error
 
 	// End will be called immediately after attempting a call execution,
 	// regardless of whether the execution failed or not. An error will be passed
 	// to End, which if nil indicates a successful execution. Any error returned
 	// from End will be returned as the error from Submit.
-	End(ctx context.Context, err error, t callTrigger) error
+	End(ctx context.Context, err error) error
 }
 
 // TODO build w/o closures... lazy
@@ -251,6 +251,7 @@ func (a *agent) GetCall(opts ...CallOpt) (Call, error) {
 	c.ds = a.ds
 	c.ls = a.ls
 	c.mq = a.mq
+	c.ct = a
 
 	ctx, _ := common.LoggerWithFields(c.req.Context(),
 		logrus.Fields{"id": c.ID, "app": c.AppName, "route": c.Path})
@@ -277,11 +278,12 @@ type call struct {
 	w      io.Writer
 	req    *http.Request
 	stderr io.ReadWriteCloser
+	ct     callTrigger
 }
 
 func (c *call) Model() *models.Call { return c.Call }
 
-func (c *call) Start(ctx context.Context, t callTrigger) error {
+func (c *call) Start(ctx context.Context) error {
 	span, ctx := opentracing.StartSpanFromContext(ctx, "agent_call_start")
 	defer span.Finish()
 
@@ -320,7 +322,7 @@ func (c *call) Start(ctx context.Context, t callTrigger) error {
 		}
 	}
 
-	err := t.fireBeforeCall(ctx, c.Model())
+	err := c.ct.fireBeforeCall(ctx, c.Model())
 	if err != nil {
 		return fmt.Errorf("BeforeCall: %v", err)
 	}
@@ -328,7 +330,7 @@ func (c *call) Start(ctx context.Context, t callTrigger) error {
 	return nil
 }
 
-func (c *call) End(ctx context.Context, errIn error, t callTrigger) error {
+func (c *call) End(ctx context.Context, errIn error) error {
 	span, ctx := opentracing.StartSpanFromContext(ctx, "agent_call_end")
 	defer span.Finish()
 
@@ -340,8 +342,8 @@ func (c *call) End(ctx context.Context, errIn error, t callTrigger) error {
 	case context.DeadlineExceeded:
 		c.Status = "timeout"
 	default:
-		// XXX (reed): should we append the error to logs? Error field? (TR) yes, think so, otherwise it's lost looks like?
 		c.Status = "error"
+		c.Error = errIn.Error()
 	}
 
 	if c.Type == models.TypeAsync {
@@ -366,11 +368,12 @@ func (c *call) End(ctx context.Context, errIn error, t callTrigger) error {
 
 	// NOTE call this after InsertLog or the buffer will get reset
 	c.stderr.Close()
-	err := t.fireAfterCall(ctx, c.Model())
-	if err != nil {
+
+	if err := c.ct.fireAfterCall(ctx, c.Model()); err != nil {
 		return fmt.Errorf("AfterCall: %v", err)
 	}
-	return errIn
+
+	return errIn // original error, important for use in sync call returns
 }
 
 func toEnvName(envtype, name string) string {
diff --git a/api/datastore/internal/datastoretest/test.go b/api/datastore/internal/datastoretest/test.go
index 07093f887..6d0b20c5c 100644
--- a/api/datastore/internal/datastoretest/test.go
+++ b/api/datastore/internal/datastoretest/test.go
@@ -37,7 +37,8 @@ func Test(t *testing.T, dsf func(t *testing.T) models.Datastore) {
 
 	call := new(models.Call)
 	call.CreatedAt = strfmt.DateTime(time.Now())
-	call.Status = "success"
+	call.Status = "error"
+	call.Error = "ya dun goofed"
 	call.StartedAt = strfmt.DateTime(time.Now())
 	call.CompletedAt = strfmt.DateTime(time.Now())
 	call.AppName = testApp.Name
@@ -55,13 +56,37 @@ func Test(t *testing.T, dsf func(t *testing.T) models.Datastore) {
 	t.Run("call-get", func(t *testing.T) {
 		ds := dsf(t)
 		call.ID = id.New().String()
-		ds.InsertCall(ctx, call)
+		err := ds.InsertCall(ctx, call)
+		if err != nil {
+			t.Fatalf("Test GetCall: unexpected error `%v`", err)
+		}
 		newCall, err := ds.GetCall(ctx, call.AppName, call.ID)
 		if err != nil {
-			t.Fatalf("Test GetCall(ctx, call.ID): unexpected error `%v`", err)
+			t.Fatalf("Test GetCall: unexpected error `%v`", err)
 		}
 		if call.ID != newCall.ID {
-			t.Fatalf("Test GetCall(ctx, call.ID): unexpected error `%v`", err)
+			t.Fatalf("Test GetCall: id mismatch `%v` `%v`", call.ID, newCall.ID)
+		}
+		if call.Status != newCall.Status {
+			t.Fatalf("Test GetCall: status mismatch `%v` `%v`", call.Status, newCall.Status)
+		}
+		if call.Error != newCall.Error {
+			t.Fatalf("Test GetCall: error mismatch `%v` `%v`", call.Error, newCall.Error)
+		}
+		if time.Time(call.CreatedAt).Unix() != time.Time(newCall.CreatedAt).Unix() {
+			t.Fatalf("Test GetCall: created_at mismatch `%v` `%v`", call.CreatedAt, newCall.CreatedAt)
+		}
+		if time.Time(call.StartedAt).Unix() != time.Time(newCall.StartedAt).Unix() {
+			t.Fatalf("Test GetCall: started_at mismatch `%v` `%v`", call.StartedAt, newCall.StartedAt)
+		}
+		if time.Time(call.CompletedAt).Unix() != time.Time(newCall.CompletedAt).Unix() {
+			t.Fatalf("Test GetCall: completed_at mismatch `%v` `%v`", call.CompletedAt, newCall.CompletedAt)
+		}
+		if call.AppName != newCall.AppName {
+			t.Fatalf("Test GetCall: app_name mismatch `%v` `%v`", call.AppName, newCall.AppName)
+		}
+		if call.Path != newCall.Path {
+			t.Fatalf("Test GetCall: path mismatch `%v` `%v`", call.Path, newCall.Path)
 		}
 	})
 
diff --git a/api/datastore/sql/migrations/3_add_call_error.down.sql b/api/datastore/sql/migrations/3_add_call_error.down.sql
new file mode 100644
index 000000000..5319e0a8b
--- /dev/null
+++ b/api/datastore/sql/migrations/3_add_call_error.down.sql
@@ -0,0 +1 @@
+ALTER TABLE calls DROP COLUMN error;
diff --git a/api/datastore/sql/migrations/3_add_call_error.up.sql b/api/datastore/sql/migrations/3_add_call_error.up.sql
new file mode 100644
index 000000000..f2c4b12a0
--- /dev/null
+++ b/api/datastore/sql/migrations/3_add_call_error.up.sql
@@ -0,0 +1 @@
+ALTER TABLE calls ADD error text;
diff --git a/api/datastore/sql/migrations/migrations.go b/api/datastore/sql/migrations/migrations.go
index 98503061f..54af44440 100644
--- a/api/datastore/sql/migrations/migrations.go
+++ b/api/datastore/sql/migrations/migrations.go
@@ -4,6 +4,8 @@
 // 1_add_route_created_at.up.sql
 // 2_add_call_stats.down.sql
 // 2_add_call_stats.up.sql
+// 3_add_call_error.down.sql
+// 3_add_call_error.up.sql
 // DO NOT EDIT!
 
 package migrations
@@ -86,7 +88,7 @@ func _1_add_route_created_atDownSql() (*asset, error) {
 		return nil, err
 	}
 
-	info := bindataFileInfo{name: "1_add_route_created_at.down.sql", size: 43, mode: os.FileMode(420), modTime: time.Unix(1510786558, 0)}
+	info := bindataFileInfo{name: "1_add_route_created_at.down.sql", size: 43, mode: os.FileMode(420), modTime: time.Unix(1511259011, 0)}
 	a := &asset{bytes: bytes, info: info}
 	return a, nil
 }
@@ -106,7 +108,7 @@ func _1_add_route_created_atUpSql() (*asset, error) {
 		return nil, err
 	}
 
-	info := bindataFileInfo{name: "1_add_route_created_at.up.sql", size: 40, mode: os.FileMode(420), modTime: time.Unix(1510786558, 0)}
+	info := bindataFileInfo{name: "1_add_route_created_at.up.sql", size: 40, mode: os.FileMode(420), modTime: time.Unix(1511259011, 0)}
 	a := &asset{bytes: bytes, info: info}
 	return a, nil
 }
@@ -126,7 +128,7 @@ func _2_add_call_statsDownSql() (*asset, error) {
 		return nil, err
 	}
 
-	info := bindataFileInfo{name: "2_add_call_stats.down.sql", size: 37, mode: os.FileMode(420), modTime: time.Unix(1511225799, 0)}
+	info := bindataFileInfo{name: "2_add_call_stats.down.sql", size: 37, mode: os.FileMode(420), modTime: time.Unix(1511259011, 0)}
 	a := &asset{bytes: bytes, info: info}
 	return a, nil
 }
@@ -146,7 +148,47 @@ func _2_add_call_statsUpSql() (*asset, error) {
 		return nil, err
 	}
 
-	info := bindataFileInfo{name: "2_add_call_stats.up.sql", size: 34, mode: os.FileMode(420), modTime: time.Unix(1511225651, 0)}
+	info := bindataFileInfo{name: "2_add_call_stats.up.sql", size: 34, mode: os.FileMode(420), modTime: time.Unix(1511259011, 0)}
+	a := &asset{bytes: bytes, info: info}
+	return a, nil
+}
+
+var __3_add_call_errorDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x48\x4e\xcc\xc9\x29\x56\x70\x09\xf2\x0f\x50\x70\xf6\xf7\x09\xf5\xf5\x53\x48\x2d\x2a\xca\x2f\xb2\xe6\x02\x04\x00\x00\xff\xff\xc1\x14\x26\x51\x25\x00\x00\x00")
+
+func _3_add_call_errorDownSqlBytes() ([]byte, error) {
+	return bindataRead(
+		__3_add_call_errorDownSql,
+		"3_add_call_error.down.sql",
+	)
+}
+
+func _3_add_call_errorDownSql() (*asset, error) {
+	bytes, err := _3_add_call_errorDownSqlBytes()
+	if err != nil {
+		return nil, err
+	}
+
+	info := bindataFileInfo{name: "3_add_call_error.down.sql", size: 37, mode: os.FileMode(420), modTime: time.Unix(1511265731, 0)}
+	a := &asset{bytes: bytes, info: info}
+	return a, nil
+}
+
+var __3_add_call_errorUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x48\x4e\xcc\xc9\x29\x56\x70\x74\x71\x51\x48\x2d\x2a\xca\x2f\x52\x28\x49\xad\x28\xb1\xe6\x02\x04\x00\x00\xff\xff\xaf\xba\x27\xcd\x22\x00\x00\x00")
+
+func _3_add_call_errorUpSqlBytes() ([]byte, error) {
+	return bindataRead(
+		__3_add_call_errorUpSql,
+		"3_add_call_error.up.sql",
+	)
+}
+
+func _3_add_call_errorUpSql() (*asset, error) {
+	bytes, err := _3_add_call_errorUpSqlBytes()
+	if err != nil {
+		return nil, err
+	}
+
+	info := bindataFileInfo{name: "3_add_call_error.up.sql", size: 34, mode: os.FileMode(420), modTime: time.Unix(1511265909, 0)}
 	a := &asset{bytes: bytes, info: info}
 	return a, nil
 }
@@ -207,6 +249,8 @@ var _bindata = map[string]func() (*asset, error){
 	"1_add_route_created_at.up.sql":   _1_add_route_created_atUpSql,
 	"2_add_call_stats.down.sql":       _2_add_call_statsDownSql,
 	"2_add_call_stats.up.sql":         _2_add_call_statsUpSql,
+	"3_add_call_error.down.sql":       _3_add_call_errorDownSql,
+	"3_add_call_error.up.sql":         _3_add_call_errorUpSql,
 }
 
 // AssetDir returns the file names below a certain
@@ -254,6 +298,8 @@ var _bintree = &bintree{nil, map[string]*bintree{
 	"1_add_route_created_at.up.sql":   &bintree{_1_add_route_created_atUpSql, map[string]*bintree{}},
 	"2_add_call_stats.down.sql":       &bintree{_2_add_call_statsDownSql, map[string]*bintree{}},
 	"2_add_call_stats.up.sql":         &bintree{_2_add_call_statsUpSql, map[string]*bintree{}},
+	"3_add_call_error.down.sql":       &bintree{_3_add_call_errorDownSql, map[string]*bintree{}},
+	"3_add_call_error.up.sql":         &bintree{_3_add_call_errorUpSql, map[string]*bintree{}},
 }}
 
 // RestoreAsset restores an asset under the given directory
diff --git a/api/datastore/sql/sql.go b/api/datastore/sql/sql.go
index 8a5b7ba65..57f880d5c 100644
--- a/api/datastore/sql/sql.go
+++ b/api/datastore/sql/sql.go
@@ -66,6 +66,7 @@ var tables = [...]string{`CREATE TABLE IF NOT EXISTS routes (
 	app_name varchar(256) NOT NULL,
 	path varchar(256) NOT NULL,
 	stats text,
+	error text,
 	PRIMARY KEY (id)
 );`,
 
@@ -78,7 +79,7 @@ var tables = [...]string{`CREATE TABLE IF NOT EXISTS routes (
 
 const (
 	routeSelector = `SELECT app_name, path, image, format, memory, type, timeout, idle_timeout, headers, config, created_at FROM routes`
-	callSelector  = `SELECT id, created_at, started_at, completed_at, status, app_name, path, stats FROM calls`
+	callSelector  = `SELECT id, created_at, started_at, completed_at, status, app_name, path, stats, error FROM calls`
 )
 
 type sqlStore struct {
@@ -587,7 +588,8 @@ func (ds *sqlStore) InsertCall(ctx context.Context, call *models.Call) error {
 		status,
 		app_name,
 		path,
-		stats
+		stats,
+		error
 	)
 	VALUES (
 		:id,
@@ -597,7 +599,8 @@ func (ds *sqlStore) InsertCall(ctx context.Context, call *models.Call) error {
 		:status,
 		:app_name,
 		:path,
-		:stats
+		:stats,
+		:error
 	);`)
 
 	_, err := ds.db.NamedExecContext(ctx, query, call)
diff --git a/api/models/call.go b/api/models/call.go
index c5de8a0dd..27f5f37b9 100644
--- a/api/models/call.go
+++ b/api/models/call.go
@@ -133,6 +133,10 @@ type Call struct {
 
 	// Stats is a list of metrics from this call's execution, possibly empty.
 	Stats drivers.Stats `json:"stats,omitempty" db:"stats"`
+
+	// Error is the reason why the call failed, it is only non-empty if
+	// status is equal to "error".
+	Error string `json:"error,omitempty" db:"error"`
 }
 
 type CallFilter struct {
diff --git a/docs/swagger.yml b/docs/swagger.yml
index 3b4167840..e741fe84c 100644
--- a/docs/swagger.yml
+++ b/docs/swagger.yml
@@ -646,6 +646,10 @@ definitions:
         type: string
         description: Call execution status.
         readOnly: true
+      error:
+        type: string
+        description: Call execution error, if status is 'error'.
+        readOnly: true
       app_name:
         type: string
         description: App name that is assigned to a route that is being executed.
diff --git a/test.sh b/test.sh
index 048323cab..449bb2644 100755
--- a/test.sh
+++ b/test.sh
@@ -28,7 +28,11 @@ docker rm -fv func-mysql-test || echo No prev mysql test db container
 docker run --name func-mysql-test -p 3306:3306 -e MYSQL_DATABASE=funcs -e MYSQL_ROOT_PASSWORD=root -d mysql
 docker rm -fv func-minio-test || echo No prev minio test container
 docker run -d -p 9000:9000 --name func-minio-test -e "MINIO_ACCESS_KEY=admin" -e "MINIO_SECRET_KEY=password" minio/minio server /data
-sleep 5
+
+# pull all images used in tests so that tests don't time out and fail spuriously
+docker pull fnproject/sleeper
+docker pull fnproject/error
+docker pull fnproject/hello
 
 MYSQL_HOST=`host func-mysql-test`
 MYSQL_PORT=3306