Files
fn-serverless/test/fn-system-tests/exec_runner_status_test.go
Tolga Ceylan df53250958 fn: latency metrics for various call states (#1332)
* fn: latency metrics for various call states

This complements the API latency metrics available
on LB agent. In this case, we would like to measure
calls that have finished with the following status:
     "completed"
     "canceled"
     "timeouts"
     "errors"
     "server_busy"
and while measuring this latency, we subtract the
amount of time actual function execution took. This
is not precise, but an approximation mostly suitable
for trending.

Going forward, we could also subtract UDS wait time and/or
docker pull latency from this latency as an enhancement
to this PR.
2018-11-30 13:20:59 -08:00

195 lines
4.8 KiB
Go

package tests
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
"net/url"
"path"
"testing"
"time"
"github.com/fnproject/fn/api/id"
"github.com/fnproject/fn/api/models"
"github.com/fnproject/fn/api/runnerpool"
)
func callFN(ctx context.Context, u string, content io.Reader, output io.Writer, invokeType string) (*http.Response, error) {
method := "POST"
req, err := http.NewRequest(method, u, content)
req.Header.Set("Fn-Invoke-Type", invokeType)
if err != nil {
return nil, fmt.Errorf("error running fn: %s", err)
}
req.Header.Set("Content-Type", "application/json")
req = req.WithContext(ctx)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("error running fn: %s", err)
}
io.Copy(output, resp.Body)
return resp, nil
}
// We should not be able to invoke a StatusImage
func TestCannotExecuteStatusImage(t *testing.T) {
buf := setLogBuffer()
defer func() {
if t.Failed() {
t.Log(buf.String())
}
}()
if StatusImage == "" {
t.Skip("no status image defined")
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
app := &models.App{Name: id.New().String()}
app = ensureApp(t, app)
fn := &models.Fn{
AppID: app.ID,
Name: id.New().String(),
Image: StatusImage,
ResourceConfig: models.ResourceConfig{
Memory: memory,
},
}
fn = ensureFn(t, fn)
lb, err := LB()
if err != nil {
t.Fatalf("Got unexpected error: %v", err)
}
u := url.URL{
Scheme: "http",
Host: lb,
}
u.Path = path.Join(u.Path, "invoke", fn.ID)
content := bytes.NewBuffer([]byte(`status`))
output := &bytes.Buffer{}
resp, err := callFN(ctx, u.String(), content, output, models.TypeSync)
if err != nil {
t.Fatalf("Got unexpected error: %v", err)
}
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("StatusCode check failed on %v", resp.StatusCode)
}
}
// Some dummy RunnerCall implementation
type myCall struct{}
// implements RunnerCall
func (c *myCall) SlotHashId() string { return "" }
func (c *myCall) Extensions() map[string]string { return nil }
func (c *myCall) RequestBody() io.ReadCloser { return nil }
func (c *myCall) ResponseWriter() http.ResponseWriter { return nil }
func (c *myCall) StdErr() io.ReadWriteCloser { return nil }
func (c *myCall) Model() *models.Call { return nil }
func (c *myCall) GetUserExecutionTime() *time.Duration { return nil }
func (c *myCall) AddUserExecutionTime(time.Duration) {}
func TestExecuteRunnerStatus(t *testing.T) {
buf := setLogBuffer()
defer func() {
if t.Failed() {
t.Log(buf.String())
}
}()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
var zoo myCall
pool, err := NewSystemTestNodePool()
if err != nil {
t.Fatalf("Creating Node Pool failed %v", err)
}
runners, err := pool.Runners(context.Background(), &zoo)
if err != nil {
t.Fatalf("Getting Runners from Pool failed %v", err)
}
if len(runners) == 0 {
t.Fatalf("Getting Runners from Pool failed no-runners")
}
concurrency := 10
res := make(chan *runnerpool.RunnerStatus, concurrency*len(runners))
errs := make(chan error, concurrency*len(runners))
for _, runner := range runners {
for i := 0; i < concurrency; i++ {
go func(dest runnerpool.Runner) {
status, err := dest.Status(ctx)
if err != nil {
errs <- err
} else {
t.Logf("Runner %v got Status=%+v", dest.Address(), status)
res <- status
}
}(runner)
}
}
lookup := make(map[string][]*runnerpool.RunnerStatus)
for i := 0; i < concurrency*len(runners); i++ {
select {
case status := <-res:
if status == nil || status.StatusFailed {
t.Fatalf("Runners Status not OK for %+v", status)
}
lookup[status.StatusId] = append(lookup[status.StatusId], status)
case err := <-errs:
if err != nil {
t.Fatal(err)
}
}
}
// WARNING: Possibly flappy test below. Might need to relax the numbers below.
// Why 3? We have a idleTimeout + gracePeriod = 1.5 secs (for cache timeout) for status calls.
// This normally should easily serve all the queries above. (We have 3 runners, each should
// easily take on 10 status calls for that period.
if len(lookup) > 3 {
for key, arr := range lookup {
t.Fatalf("key=%v count=%v", key, len(arr))
}
}
// delay
time.Sleep(time.Duration(2 * time.Second))
// now we should get fresh data
for _, dest := range runners {
status, err := dest.Status(ctx)
if err != nil {
t.Fatalf("Runners Status failed for %v err=%v", dest.Address(), err)
}
if status == nil || status.StatusFailed {
t.Fatalf("Runners Status not OK for %v %v", dest.Address(), status)
}
t.Logf("Runner %v got Status=%+v", dest.Address(), status)
_, ok := lookup[status.StatusId]
if ok {
t.Fatalf("Runners Status did not return fresh status id %v %v", dest.Address(), status)
}
}
}