Make dataplane system test behave deterministically (#849)

Make dataplane system test deterministic by injecting capacity constraints
This commit is contained in:
Dario Domizioli
2018-03-16 11:50:44 +00:00
committed by GitHub
parent 9ed3dd86ea
commit 362e910d9d
3 changed files with 74 additions and 17 deletions

View File

@@ -194,6 +194,17 @@ func (ch *callHandle) cancel(ctx context.Context, err error) {
} }
} }
type CapacityGate interface {
// CheckAndReserveCapacity must perform an atomic check plus reservation. If an error is returned, then it is
// guaranteed that no capacity has been committed. If nil is returned, then it is guaranteed that the provided units
// of capacity have been committed.
CheckAndReserveCapacity(units uint64) error
// ReleaseCapacity must perform an atomic release of capacity. The units provided must not bring the capacity under
// zero; implementations are free to panic in that case.
ReleaseCapacity(units uint64)
}
type pureRunnerCapacityManager struct { type pureRunnerCapacityManager struct {
totalCapacityUnits uint64 totalCapacityUnits uint64
committedCapacityUnits uint64 committedCapacityUnits uint64
@@ -202,24 +213,24 @@ type pureRunnerCapacityManager struct {
type capacityDeallocator func() type capacityDeallocator func()
func newPureRunnerCapacityManager(units uint64) pureRunnerCapacityManager { func newPureRunnerCapacityManager(units uint64) *pureRunnerCapacityManager {
return pureRunnerCapacityManager{ return &pureRunnerCapacityManager{
totalCapacityUnits: units, totalCapacityUnits: units,
committedCapacityUnits: 0, committedCapacityUnits: 0,
} }
} }
func (prcm *pureRunnerCapacityManager) checkAndReserveCapacity(units uint64) error { func (prcm *pureRunnerCapacityManager) CheckAndReserveCapacity(units uint64) error {
prcm.mtx.Lock() prcm.mtx.Lock()
defer prcm.mtx.Unlock() defer prcm.mtx.Unlock()
if prcm.committedCapacityUnits+units < prcm.totalCapacityUnits { if prcm.totalCapacityUnits-prcm.committedCapacityUnits >= units {
prcm.committedCapacityUnits = prcm.committedCapacityUnits + units prcm.committedCapacityUnits = prcm.committedCapacityUnits + units
return nil return nil
} }
return models.ErrCallTimeoutServerBusy return models.ErrCallTimeoutServerBusy
} }
func (prcm *pureRunnerCapacityManager) releaseCapacity(units uint64) { func (prcm *pureRunnerCapacityManager) ReleaseCapacity(units uint64) {
prcm.mtx.Lock() prcm.mtx.Lock()
defer prcm.mtx.Unlock() defer prcm.mtx.Unlock()
if units <= prcm.committedCapacityUnits { if units <= prcm.committedCapacityUnits {
@@ -236,7 +247,7 @@ type pureRunner struct {
listen string listen string
a Agent a Agent
inflight int32 inflight int32
capacity pureRunnerCapacityManager capacity CapacityGate
} }
func (pr *pureRunner) GetCall(opts ...CallOpt) (Call, error) { func (pr *pureRunner) GetCall(opts ...CallOpt) (Call, error) {
@@ -368,7 +379,7 @@ func (pr *pureRunner) handleTryCall(ctx context.Context, tc *runner.TryCall, sta
} }
// Capacity check first // Capacity check first
err = pr.capacity.checkAndReserveCapacity(c.Memory) err = pr.capacity.CheckAndReserveCapacity(c.Memory)
if err != nil { if err != nil {
return func() {}, err return func() {}, err
} }
@@ -379,13 +390,13 @@ func (pr *pureRunner) handleTryCall(ctx context.Context, tc *runner.TryCall, sta
inR, inW := io.Pipe() inR, inW := io.Pipe()
agent_call, err := pr.a.GetCall(FromModelAndInput(&c, inR), WithWriter(w)) agent_call, err := pr.a.GetCall(FromModelAndInput(&c, inR), WithWriter(w))
if err != nil { if err != nil {
return func() { pr.capacity.releaseCapacity(c.Memory) }, err return func() { pr.capacity.ReleaseCapacity(c.Memory) }, err
} }
state.c = agent_call.(*call) state.c = agent_call.(*call)
state.input = inW state.input = inW
state.allocatedTime = strfmt.DateTime(time.Now()) state.allocatedTime = strfmt.DateTime(time.Now())
return func() { pr.capacity.releaseCapacity(c.Memory) }, nil return func() { pr.capacity.ReleaseCapacity(c.Memory) }, nil
} }
// Handles a client engagement // Handles a client engagement
@@ -535,7 +546,15 @@ func (pr *pureRunner) Start() error {
return err return err
} }
func NewPureRunner(cancel context.CancelFunc, addr string, a Agent, cert string, key string, ca string) (*pureRunner, error) { func UnsecuredPureRunner(cancel context.CancelFunc, addr string, a Agent) (*pureRunner, error) {
return NewPureRunner(cancel, addr, a, "", "", "", nil)
}
func DefaultPureRunner(cancel context.CancelFunc, addr string, a Agent, cert string, key string, ca string) (*pureRunner, error) {
return NewPureRunner(cancel, addr, a, cert, key, ca, nil)
}
func NewPureRunner(cancel context.CancelFunc, addr string, a Agent, cert string, key string, ca string, gate CapacityGate) (*pureRunner, error) {
var pr *pureRunner var pr *pureRunner
var err error var err error
if cert != "" && key != "" && ca != "" { if cert != "" && key != "" && ca != "" {
@@ -544,13 +563,13 @@ func NewPureRunner(cancel context.CancelFunc, addr string, a Agent, cert string,
logrus.WithField("runner_addr", addr).Warn("Failed to create credentials!") logrus.WithField("runner_addr", addr).Warn("Failed to create credentials!")
return nil, err return nil, err
} }
pr, err = createPureRunner(addr, a, c) pr, err = createPureRunner(addr, a, c, gate)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} else { } else {
logrus.Warn("Running pure runner in insecure mode!") logrus.Warn("Running pure runner in insecure mode!")
pr, err = createPureRunner(addr, a, nil) pr, err = createPureRunner(addr, a, nil, gate)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -592,19 +611,22 @@ func creds(cert string, key string, ca string) (credentials.TransportCredentials
}), nil }), nil
} }
func createPureRunner(addr string, a Agent, creds credentials.TransportCredentials) (*pureRunner, error) { func createPureRunner(addr string, a Agent, creds credentials.TransportCredentials, gate CapacityGate) (*pureRunner, error) {
var srv *grpc.Server var srv *grpc.Server
if creds != nil { if creds != nil {
srv = grpc.NewServer(grpc.Creds(creds)) srv = grpc.NewServer(grpc.Creds(creds))
} else { } else {
srv = grpc.NewServer() srv = grpc.NewServer()
} }
memUnits := getAvailableMemoryUnits() if gate == nil {
memUnits := getAvailableMemoryUnits()
gate = newPureRunnerCapacityManager(memUnits)
}
pr := &pureRunner{ pr := &pureRunner{
gRPCServer: srv, gRPCServer: srv,
listen: addr, listen: addr,
a: a, a: a,
capacity: newPureRunnerCapacityManager(memUnits), capacity: gate,
} }
runner.RegisterRunnerProtocolServer(srv, pr) runner.RegisterRunnerProtocolServer(srv, pr)

View File

@@ -409,7 +409,7 @@ func WithAgentFromEnv() ServerOption {
grpcAddr := fmt.Sprintf(":%d", s.grpcListenPort) grpcAddr := fmt.Sprintf(":%d", s.grpcListenPort)
delegatedAgent := agent.NewSyncOnly(agent.NewCachedDataAccess(ds)) delegatedAgent := agent.NewSyncOnly(agent.NewCachedDataAccess(ds))
cancelCtx, cancel := context.WithCancel(ctx) cancelCtx, cancel := context.WithCancel(ctx)
prAgent, err := agent.NewPureRunner(cancel, grpcAddr, delegatedAgent, s.cert, s.certKey, s.certAuthority) prAgent, err := agent.DefaultPureRunner(cancel, grpcAddr, delegatedAgent, s.cert, s.certKey, s.certAuthority)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -8,6 +8,7 @@ import (
"github.com/fnproject/fn/api/agent" "github.com/fnproject/fn/api/agent"
"github.com/fnproject/fn/api/agent/hybrid" "github.com/fnproject/fn/api/agent/hybrid"
agent_grpc "github.com/fnproject/fn/api/agent/nodepool/grpc" agent_grpc "github.com/fnproject/fn/api/agent/nodepool/grpc"
"github.com/fnproject/fn/api/models"
"github.com/fnproject/fn/api/server" "github.com/fnproject/fn/api/server"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@@ -17,6 +18,7 @@ import (
"os" "os"
"strconv" "strconv"
"strings" "strings"
"sync"
"testing" "testing"
"time" "time"
) )
@@ -154,6 +156,39 @@ func SetUpLBNode(ctx context.Context) (*server.Server, error) {
return server.New(ctx, opts...), nil return server.New(ctx, opts...), nil
} }
type testCapacityGate struct {
runnerNumber int
committedCapacityUnits uint64
mtx sync.Mutex
}
const (
FixedTestCapacityUnitsPerRunner = 512
)
func (tcg *testCapacityGate) CheckAndReserveCapacity(units uint64) error {
tcg.mtx.Lock()
defer tcg.mtx.Unlock()
if tcg.committedCapacityUnits+units <= FixedTestCapacityUnitsPerRunner {
logrus.WithField("nodeNumber", tcg.runnerNumber).WithField("units", units).WithField("currentlyCommitted", tcg.committedCapacityUnits).Info("Runner is committing capacity")
tcg.committedCapacityUnits = tcg.committedCapacityUnits + units
return nil
}
logrus.WithField("nodeNumber", tcg.runnerNumber).WithField("currentlyCommitted", tcg.committedCapacityUnits).Info("Runner is out of capacity")
return models.ErrCallTimeoutServerBusy
}
func (tcg *testCapacityGate) ReleaseCapacity(units uint64) {
tcg.mtx.Lock()
defer tcg.mtx.Unlock()
if units <= tcg.committedCapacityUnits {
logrus.WithField("nodeNumber", tcg.runnerNumber).WithField("units", units).WithField("currentlyCommitted", tcg.committedCapacityUnits).Info("Runner is releasing capacity")
tcg.committedCapacityUnits = tcg.committedCapacityUnits - units
return
}
panic("Fatal error in test capacity calculation, getting to sub-zero capacity")
}
func SetUpPureRunnerNode(ctx context.Context, nodeNum int) (*server.Server, error) { func SetUpPureRunnerNode(ctx context.Context, nodeNum int) (*server.Server, error) {
nodeType := server.ServerTypePureRunner nodeType := server.ServerTypePureRunner
opts := make([]server.ServerOption, 0) opts := make([]server.ServerOption, 0)
@@ -174,7 +209,7 @@ func SetUpPureRunnerNode(ctx context.Context, nodeNum int) (*server.Server, erro
grpcAddr := fmt.Sprintf(":%d", 9190+nodeNum) grpcAddr := fmt.Sprintf(":%d", 9190+nodeNum)
delegatedAgent := agent.NewSyncOnly(agent.NewCachedDataAccess(ds)) delegatedAgent := agent.NewSyncOnly(agent.NewCachedDataAccess(ds))
cancelCtx, cancel := context.WithCancel(ctx) cancelCtx, cancel := context.WithCancel(ctx)
prAgent, err := agent.NewPureRunner(cancel, grpcAddr, delegatedAgent, "", "", "") prAgent, err := agent.NewPureRunner(cancel, grpcAddr, delegatedAgent, "", "", "", &testCapacityGate{runnerNumber: nodeNum})
if err != nil { if err != nil {
return nil, err return nil, err
} }