syntax = "proto3"; import "google/protobuf/empty.proto"; // Request to allocate a slot for a call message TryCall { string models_call_json = 1; string slot_hash_id = 2; map extensions = 3; } // Data sent C2S and S2C - as soon as the runner sees the first of these it // will start running. If empty content, there must be one of these with eof. // The runner will send these for the body of the response, AFTER it has sent // a CallEnding message. message DataFrame { bytes data = 1; bool eof = 2; } message HttpHeader { string key = 1; string value = 2; } message HttpRespMeta { int32 status_code = 1; repeated HttpHeader headers = 2; } // Call has started to finish - data might not be here yet and it will be sent // as DataFrames. message CallResultStart { oneof meta { HttpRespMeta http = 100; } } // Call has really finished, it might have completed or crashed message CallFinished { bool success = 1; string details = 2; int32 errorCode = 3; string errorStr = 4; string createdAt = 5; string startedAt = 6; string completedAt = 7; } message ClientMsg { oneof body { TryCall try = 1; DataFrame data = 2; } } message RunnerMsg { oneof body { CallResultStart result_start = 1; DataFrame data = 2; CallFinished finished = 3; } } message RunnerStatus { int32 active = 2; // Number of currently inflight responses } service RunnerProtocol { rpc Engage (stream ClientMsg) returns (stream RunnerMsg); // Rather than rely on Prometheus for this, expose status that's specific to the runner lifecycle through this. rpc Status(google.protobuf.Empty) returns (RunnerStatus); }