fn: hot container timer improvements (#751)

* fn: hot container timer improvements With this change, now we are allocating the timers when the container starts and managing them via stop/clear as needed, which should not only be more efficient, but also easier to follow. For example, previously, if eject time out was set to 10 secs, this could have delayed idle timeout up to 10 secs as well. It is also not necessary to do any math for elapsed time. Now consumers avoid any requeuing when startDequeuer() is cancelled. This was triggering additional dequeue/requeue causing containers to wake up spuriously. Also in startDequeuer(), we no longer remove the item from the actual queue and leave this to acquire/eject, which side steps issues related with item landing in the channel, not consumed, etc.
2022-10-28 21:29:17 +03:00 · 2018-02-12 14:12:03 -08:00
parent ffcda9b823
commit c848fc6181
4 changed files with 191 additions and 241 deletions
--- a/api/agent/slots_test.go
+++ b/api/agent/slots_test.go
@@ -37,6 +37,33 @@ func NewTestSlot(id uint64) Slot {
 	return mySlot
 }

+func checkGetTokenId(t *testing.T, a *slotQueue, dur time.Duration, id uint64) error {
+
+	ctx, cancel := context.WithTimeout(context.Background(), dur)
+	defer cancel()
+
+	outChan := a.startDequeuer(ctx)
+
+	for {
+		select {
+		case z := <-outChan:
+			if !a.acquireSlot(z) {
+				continue
+			}
+
+			z.slot.Close(ctx)
+
+			if z.id != id {
+				return fmt.Errorf("Bad slotToken received: %#v expected: %d", z, id)
+			}
+			return nil
+
+		case <-ctx.Done():
+			return ctx.Err()
+		}
+	}
+}
+
 func TestSlotQueueBasic1(t *testing.T) {

 	maxId := uint64(10)
@@ -47,14 +74,14 @@ func TestSlotQueueBasic1(t *testing.T) {

 	obj := NewSlotQueue(slotName)

-	ctx, cancel := context.WithCancel(context.Background())
-	outChan := obj.startDequeuer(ctx)
-	select {
-	case z := <-outChan:
-		t.Fatalf("Should not get anything from queue: %#v", z)
-	case <-time.After(time.Duration(500) * time.Millisecond):
+	timeout := time.Duration(500) * time.Millisecond
+	err := checkGetTokenId(t, obj, timeout, 6)
+	if err == nil {
+		t.Fatalf("Should not get anything from queue")
+	}
+	if err != context.DeadlineExceeded {
+		t.Fatalf(err.Error())
 	}
-	cancel()

 	// create slots
 	for id := uint64(0); id < maxId; id += 1 {
@@ -76,83 +103,36 @@ func TestSlotQueueBasic1(t *testing.T) {
 	}

 	// Now according to LIFO semantics, we should get 9,8,7,6,5,4,3,2,1,0 if we dequeued right now.
-	// but let's eject 9
-	if !obj.ejectSlot(ctx, tokens[9]) {
-		t.Fatalf("Cannot eject slotToken: %#v", tokens[9])
+	// but let's acquire 9
+	if !obj.acquireSlot(tokens[9]) {
+		t.Fatalf("Cannot acquire slotToken: %#v", tokens[9])
 	}
-	// let eject 0
-	if !obj.ejectSlot(ctx, tokens[0]) {
-		t.Fatalf("Cannot eject slotToken: %#v", tokens[0])
+	// let acquire 0
+	if !obj.acquireSlot(tokens[0]) {
+		t.Fatalf("Cannot acquire slotToken: %#v", tokens[0])
 	}
-	// let eject 5
-	if !obj.ejectSlot(ctx, tokens[5]) {
-		t.Fatalf("Cannot eject slotToken: %#v", tokens[5])
+	// let acquire 5
+	if !obj.acquireSlot(tokens[5]) {
+		t.Fatalf("Cannot acquire slotToken: %#v", tokens[5])
 	}
-	// try ejecting 5 again, it should fail
-	if obj.ejectSlot(ctx, tokens[5]) {
-		t.Fatalf("Shouldn't be able to eject slotToken: %#v", tokens[5])
+	// try acquire 5 again, it should fail
+	if obj.acquireSlot(tokens[5]) {
+		t.Fatalf("Shouldn't be able to acquire slotToken: %#v", tokens[5])
 	}

-	ctx, cancel = context.WithCancel(context.Background())
-	outChan = obj.startDequeuer(ctx)
-
-	// now we should get 8
-	select {
-	case z := <-outChan:
-		if z.id != 8 {
-			t.Fatalf("Bad slotToken received: %#v", z)
-		}
-
-		if !z.acquireSlot() {
-			t.Fatalf("Cannot acquire slotToken received: %#v", z)
-		}
-
-		// second acquire shoudl fail
-		if z.acquireSlot() {
-			t.Fatalf("Should not be able to acquire twice slotToken: %#v", z)
-		}
-
-		z.slot.Close(ctx)
-
-	case <-time.After(time.Duration(1) * time.Second):
-		t.Fatal("timeout in waiting slotToken")
+	err = checkGetTokenId(t, obj, timeout, 8)
+	if err != nil {
+		t.Fatalf(err.Error())
 	}

-	// now we should get 7
-	select {
-	case z := <-outChan:
-		if z.id != 7 {
-			t.Fatalf("Bad slotToken received: %#v", z)
-		}
-
-		// eject it before we can consume
-		if !obj.ejectSlot(ctx, tokens[7]) {
-			t.Fatalf("Cannot eject slotToken: %#v", tokens[2])
-		}
-
-		// we shouldn't be able to consume an ejected slotToken
-		if z.acquireSlot() {
-			t.Fatalf("We should not be able to acquire slotToken received: %#v", z)
-		}
-
-	case <-time.After(time.Duration(1) * time.Second):
-		t.Fatal("timeout in waiting slotToken")
+	// acquire 7 before we can consume
+	if !obj.acquireSlot(tokens[7]) {
+		t.Fatalf("Cannot acquire slotToken: %#v", tokens[2])
 	}

-	cancel()
-
-	// we should get nothing or 6
-	select {
-	case z, ok := <-outChan:
-		if ok {
-			if z.id != 6 {
-				t.Fatalf("Should not get anything except for 6 from queue: %#v", z)
-			}
-			if !z.acquireSlot() {
-				t.Fatalf("cannot acquire token: %#v", z)
-			}
-		}
-	case <-time.After(time.Duration(500) * time.Millisecond):
+	err = checkGetTokenId(t, obj, timeout, 6)
+	if err != nil {
+		t.Fatalf(err.Error())
 	}
 }

@@ -164,13 +144,13 @@ func TestSlotQueueBasic2(t *testing.T) {
 		t.Fatalf("Should be idle")
 	}

-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
-	select {
-	case z := <-obj.startDequeuer(ctx):
-		t.Fatalf("Should not get anything from queue: %#v", z)
-	case <-time.After(time.Duration(500) * time.Millisecond):
+	timeout := time.Duration(500) * time.Millisecond
+	err := checkGetTokenId(t, obj, timeout, 6)
+	if err == nil {
+		t.Fatalf("Should not get anything from queue")
+	}
+	if err != context.DeadlineExceeded {
+		t.Fatalf(err.Error())
 	}
 }

@@ -227,92 +207,56 @@ func TestSlotQueueBasic3(t *testing.T) {
 	slotName := "test3"

 	obj := NewSlotQueue(slotName)
-	ctx, cancel := context.WithCancel(context.Background())
-	obj.startDequeuer(ctx)

 	slot1 := NewTestSlot(1)
 	slot2 := NewTestSlot(2)
 	token1 := obj.queueSlot(slot1)
 	obj.queueSlot(slot2)

-	// now our slot must be ready in outChan, but let's cancel it
-	// to cause a requeue. This should cause [1, 2] ordering to [2, 1]
-	cancel()
-
-	ctx, cancel = context.WithCancel(context.Background())
-	outChan := obj.startDequeuer(ctx)
-
-	// we should get '2' since cancel1() reordered the queue
-	select {
-	case item, ok := <-outChan:
-		if !ok {
-			t.Fatalf("outChan should be open")
-		}
-
-		inner := item.slot.(*testSlot)
-		outer := slot2.(*testSlot)
-
-		if inner.id != outer.id {
-			t.Fatalf("item should be 2")
-		}
-		if inner.isClosed {
-			t.Fatalf("2 should not yet be closed")
-		}
-
-		if !item.acquireSlot() {
-			t.Fatalf("2 acquire should not fail")
-		}
-
-		item.slot.Close(ctx)
-
-	case <-time.After(time.Duration(1) * time.Second):
-		t.Fatal("timeout in waiting slotToken")
+	timeout := time.Duration(500) * time.Millisecond
+	err := checkGetTokenId(t, obj, timeout, 1)
+	if err != nil {
+		t.Fatalf(err.Error())
 	}

-	// let's eject 1
-	if !obj.ejectSlot(ctx, token1) {
-		t.Fatalf("failed to eject 1")
-	}
-	if !slot1.(*testSlot).isClosed {
-		t.Fatalf("1 should be closed")
+	// let's acquire 1
+	if !obj.acquireSlot(token1) {
+		t.Fatalf("should fail to acquire %#v", token1)
 	}

-	// spin up bunch of go routines, where each should get a non-acquirable
-	// token or timeout due the imminent obj.destroySlotQueue()
-	var wg sync.WaitGroup
 	goMax := 10
+	out := make(chan error, goMax)
+	var wg sync.WaitGroup
+
 	wg.Add(goMax)
 	for i := 0; i < goMax; i += 1 {
 		go func(id int) {
 			defer wg.Done()
-
-			ctx, cancel = context.WithCancel(context.Background())
-			defer cancel()
-
-			select {
-			case z := <-obj.startDequeuer(ctx):
-				t.Fatalf("%v we shouldn't get anything from queue %#v", id, z)
-			case <-time.After(time.Duration(500) * time.Millisecond):
-			}
+			err := checkGetTokenId(t, obj, timeout, 1)
+			out <- err
 		}(i)
 	}

-	// let's cancel after destroy this time
-	cancel()
-
 	wg.Wait()

-	select {
-	case z := <-outChan:
-		t.Fatalf("Should not get anything from queue: %#v", z)
-	case <-time.After(time.Duration(500) * time.Millisecond):
+	deadlineErrors := 0
+	for i := 0; i < goMax; i += 1 {
+		err := <-out
+		if err == context.DeadlineExceeded {
+			deadlineErrors++
+		} else if err == nil {
+			t.Fatalf("Unexpected success")
+		} else {
+			t.Fatalf("Unexpected error: %s", err.Error())
+		}
 	}

-	// both should be closed
-	if !slot1.(*testSlot).isClosed {
-		t.Fatalf("item1 should be closed")
+	if deadlineErrors != goMax {
+		t.Fatalf("Expected %d got %d deadline exceeded errors", goMax, deadlineErrors)
 	}
-	if !slot2.(*testSlot).isClosed {
-		t.Fatalf("item2 should be closed")
+
+	err = checkGetTokenId(t, obj, timeout, 2)
+	if err != context.DeadlineExceeded {
+		t.Fatalf(err.Error())
 	}
 }