mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: lb-agent: Initial TryCall result can be retriable. (#1035)
Before this change, we assumed data may end up in a container once we placed a TryCall() and if gRPC send failed, we did not retry. However, a send failure cannot result in data in a container, since only upon successful receipt of a TryCall can pure-runner schedule a call into a container. Here we trust gRPC and if gRPC layer says it could not send a msg, then the receiver did not receive it.
This commit is contained in:
@@ -123,17 +123,19 @@ func (r *gRPCRunner) TryExec(ctx context.Context, call pool.RunnerCall) (bool, e
|
||||
return false, err
|
||||
}
|
||||
|
||||
// After this point, we assume "COMMITTED" unless pure runner
|
||||
// send explicit NACK
|
||||
err = runnerConnection.Send(&pb.ClientMsg{Body: &pb.ClientMsg_Try{Try: &pb.TryCall{
|
||||
ModelsCallJson: string(modelJSON),
|
||||
SlotHashId: hex.EncodeToString([]byte(call.SlotHashId())),
|
||||
}}})
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("Failed to send message to runner node")
|
||||
return true, err
|
||||
// Try on next runner
|
||||
return false, err
|
||||
}
|
||||
|
||||
// After this point TryCall was sent, we assume "COMMITTED" unless pure runner
|
||||
// send explicit NACK
|
||||
|
||||
recvDone := make(chan error, 1)
|
||||
|
||||
go receiveFromRunner(runnerConnection, call, recvDone)
|
||||
|
||||
Reference in New Issue
Block a user