fn-serverless/api/agent/grpc/runner.proto

syntax = "proto3";

import "google/protobuf/empty.proto";

// Request to allocate a slot for a call
message TryCall {
    string models_call_json = 1;
    string slot_hash_id = 2;
    map<string,string> extensions = 3;
}

// Data sent C2S and S2C - as soon as the runner sees the first of these it
// will start running. If empty content, there must be one of these with eof.
// The runner will send these for the body of the response, AFTER it has sent
// a CallEnding message.
message DataFrame {
    bytes data = 1;
    bool eof = 2;
}

message HttpHeader {
    string key = 1;
    string value = 2;
}

message HttpRespMeta {
    int32 status_code = 1;
    repeated HttpHeader headers = 2;
}

// Call has started to finish - data might not be here yet and it will be sent
// as DataFrames.
message CallResultStart {
    oneof meta {
        HttpRespMeta http = 100;
    }
}

// Call has really finished, it might have completed or crashed
message CallFinished {
    bool success = 1;
    string details = 2;
    int32 errorCode = 3;
    string errorStr = 4;
    string createdAt = 5;
    string startedAt = 6;
    string completedAt = 7;
}

message ClientMsg {
    oneof body {
        TryCall try = 1;
        DataFrame data = 2;
    }
}

message RunnerMsg {
    oneof body {
        CallResultStart result_start = 1;
        DataFrame data = 2;
        CallFinished finished = 3;
    }
}

message RunnerStatus {
    int32 active = 2;  // Number of currently inflight responses
    bool failed = 3; // if status was successful or not
    string id = 4; // call id if status image was used
    string details = 5; // details for logging/debug
    int32 errorCode = 6; // error code if not successful
    string errorStr = 7; // error description if not successful
    string createdAt = 8; // call latency details: initialization time
    string startedAt = 9; // call latency details: start time in container
    string completedAt = 10; // call latency details: end time
    bool cached = 11; // true if status response was provided from cache
    uint64 requestsReceived = 12; // number of requests received by runner
    uint64 requestsHandled = 13; // number of requests processed by runner without NACK
}

service RunnerProtocol {
    rpc Engage (stream ClientMsg) returns (stream RunnerMsg);

    // Rather than rely on Prometheus for this, expose status that's specific to the runner lifecycle through this.
    rpc Status(google.protobuf.Empty) returns (RunnerStatus);
}