fn: lb-agent: Initial TryCall result can be retriable. (#1035)

Before this change, we assumed data may end up in a container
once we placed a TryCall() and if gRPC send failed, we did not
retry. However, a send failure cannot result in data in a
container, since only upon successful receipt of a TryCall can
pure-runner schedule a call into a container. Here we trust
gRPC and if gRPC layer says it could not send a msg, then
the receiver did not receive it.
This commit is contained in:
Tolga Ceylan
2018-06-05 14:41:13 -07:00
committed by GitHub
parent 64431b4497
commit 4af53025d8

View File

@@ -123,17 +123,19 @@ func (r *gRPCRunner) TryExec(ctx context.Context, call pool.RunnerCall) (bool, e
return false, err
}
// After this point, we assume "COMMITTED" unless pure runner
// send explicit NACK
err = runnerConnection.Send(&pb.ClientMsg{Body: &pb.ClientMsg_Try{Try: &pb.TryCall{
ModelsCallJson: string(modelJSON),
SlotHashId: hex.EncodeToString([]byte(call.SlotHashId())),
}}})
if err != nil {
logrus.WithError(err).Error("Failed to send message to runner node")
return true, err
// Try on next runner
return false, err
}
// After this point TryCall was sent, we assume "COMMITTED" unless pure runner
// send explicit NACK
recvDone := make(chan error, 1)
go receiveFromRunner(runnerConnection, call, recvDone)