mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
fn: lb-agent: Add Runner Scheduler/Execution Stats (#1107)
LB agent reports lb placer latency. It should also report how long it took for the runner to initiate the call as well as execution time inside the container if the runner has accepted (committed) to the call.
This commit is contained in:
@@ -242,10 +242,13 @@ func _CallResultStart_OneofSizer(msg proto.Message) (n int) {
|
||||
|
||||
// Call has really finished, it might have completed or crashed
|
||||
type CallFinished struct {
|
||||
Success bool `protobuf:"varint,1,opt,name=success" json:"success,omitempty"`
|
||||
Details string `protobuf:"bytes,2,opt,name=details" json:"details,omitempty"`
|
||||
ErrorCode int32 `protobuf:"varint,3,opt,name=errorCode" json:"errorCode,omitempty"`
|
||||
ErrorStr string `protobuf:"bytes,4,opt,name=errorStr" json:"errorStr,omitempty"`
|
||||
Success bool `protobuf:"varint,1,opt,name=success" json:"success,omitempty"`
|
||||
Details string `protobuf:"bytes,2,opt,name=details" json:"details,omitempty"`
|
||||
ErrorCode int32 `protobuf:"varint,3,opt,name=errorCode" json:"errorCode,omitempty"`
|
||||
ErrorStr string `protobuf:"bytes,4,opt,name=errorStr" json:"errorStr,omitempty"`
|
||||
CreatedAt string `protobuf:"bytes,5,opt,name=createdAt" json:"createdAt,omitempty"`
|
||||
StartedAt string `protobuf:"bytes,6,opt,name=startedAt" json:"startedAt,omitempty"`
|
||||
CompletedAt string `protobuf:"bytes,7,opt,name=completedAt" json:"completedAt,omitempty"`
|
||||
}
|
||||
|
||||
func (m *CallFinished) Reset() { *m = CallFinished{} }
|
||||
@@ -281,6 +284,27 @@ func (m *CallFinished) GetErrorStr() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CallFinished) GetCreatedAt() string {
|
||||
if m != nil {
|
||||
return m.CreatedAt
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CallFinished) GetStartedAt() string {
|
||||
if m != nil {
|
||||
return m.StartedAt
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CallFinished) GetCompletedAt() string {
|
||||
if m != nil {
|
||||
return m.CompletedAt
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type ClientMsg struct {
|
||||
// Types that are valid to be assigned to Body:
|
||||
// *ClientMsg_Try
|
||||
@@ -721,41 +745,44 @@ var _RunnerProtocol_serviceDesc = grpc.ServiceDesc{
|
||||
func init() { proto.RegisterFile("runner.proto", fileDescriptor0) }
|
||||
|
||||
var fileDescriptor0 = []byte{
|
||||
// 576 bytes of a gzipped FileDescriptorProto
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x53, 0xdb, 0x8a, 0x13, 0x4d,
|
||||
0x10, 0xce, 0x24, 0xd9, 0x6c, 0x52, 0x99, 0x3d, 0xd0, 0xfc, 0xfc, 0x0c, 0x71, 0xc1, 0x30, 0x1e,
|
||||
0x08, 0x08, 0xbd, 0x1a, 0x15, 0x16, 0x41, 0x2f, 0x5c, 0xb3, 0x8c, 0xc2, 0x82, 0x74, 0xc4, 0xdb,
|
||||
0xd0, 0x3b, 0x53, 0x99, 0x19, 0xed, 0x4c, 0x87, 0xee, 0x9e, 0xc5, 0x80, 0x2f, 0xe2, 0x4b, 0xf9,
|
||||
0x4c, 0xd2, 0x3d, 0x87, 0x0d, 0x7b, 0xa3, 0x77, 0x5d, 0xf5, 0x55, 0xd5, 0x57, 0xf5, 0x55, 0x17,
|
||||
0xf8, 0xaa, 0x2c, 0x0a, 0x54, 0x74, 0xab, 0xa4, 0x91, 0x93, 0x07, 0xa9, 0x94, 0xa9, 0xc0, 0x73,
|
||||
0x67, 0xdd, 0x94, 0xeb, 0x73, 0xdc, 0x6c, 0xcd, 0xae, 0x02, 0xc3, 0xdf, 0x1e, 0x1c, 0x7e, 0x51,
|
||||
0xbb, 0x4b, 0x2e, 0x04, 0x99, 0xc1, 0xe9, 0x46, 0x26, 0x28, 0xf4, 0x2a, 0xe6, 0x42, 0xac, 0xbe,
|
||||
0x69, 0x59, 0x04, 0xde, 0xd4, 0x9b, 0x8d, 0xd8, 0x71, 0xe5, 0xb7, 0x51, 0x9f, 0xb4, 0x2c, 0xc8,
|
||||
0x14, 0x7c, 0x2d, 0xa4, 0x59, 0x65, 0x5c, 0x67, 0xab, 0x3c, 0x09, 0xba, 0x2e, 0x0a, 0xac, 0x2f,
|
||||
0xe2, 0x3a, 0xfb, 0x98, 0x90, 0x0b, 0x00, 0xfc, 0x61, 0xb0, 0xd0, 0xb9, 0x2c, 0x74, 0xd0, 0x9b,
|
||||
0xf6, 0x66, 0xe3, 0x79, 0x40, 0x6b, 0x26, 0xba, 0x68, 0xa1, 0x45, 0x61, 0xd4, 0x8e, 0xed, 0xc5,
|
||||
0x4e, 0xde, 0xc2, 0xc9, 0x3d, 0x98, 0x9c, 0x42, 0xef, 0x3b, 0xee, 0xea, 0x5e, 0xec, 0x93, 0xfc,
|
||||
0x07, 0x07, 0xb7, 0x5c, 0x94, 0x58, 0x33, 0x57, 0xc6, 0x9b, 0xee, 0x85, 0x17, 0xbe, 0x80, 0xd1,
|
||||
0x07, 0x6e, 0xf8, 0x95, 0xe2, 0x1b, 0x24, 0x04, 0xfa, 0x09, 0x37, 0xdc, 0x65, 0xfa, 0xcc, 0xbd,
|
||||
0x6d, 0x31, 0x94, 0x6b, 0x97, 0x38, 0x64, 0xf6, 0x19, 0xbe, 0x02, 0x88, 0x8c, 0xd9, 0x46, 0xc8,
|
||||
0x13, 0x54, 0xff, 0x4a, 0x16, 0x7e, 0x05, 0xdf, 0x66, 0x31, 0xd4, 0xdb, 0x6b, 0x34, 0x9c, 0x3c,
|
||||
0x84, 0xb1, 0x36, 0xdc, 0x94, 0x7a, 0x15, 0xcb, 0x04, 0x5d, 0xfe, 0x01, 0x83, 0xca, 0x75, 0x29,
|
||||
0x13, 0x24, 0x4f, 0xe0, 0x30, 0x73, 0x14, 0x3a, 0xe8, 0x3a, 0x3d, 0xc6, 0xf4, 0x8e, 0x96, 0x35,
|
||||
0x58, 0xf8, 0x0e, 0x4e, 0xac, 0x46, 0x0c, 0x75, 0x29, 0xcc, 0xd2, 0x70, 0x65, 0xc8, 0x23, 0xe8,
|
||||
0x67, 0xc6, 0x6c, 0x83, 0x64, 0xea, 0xcd, 0xc6, 0xf3, 0x23, 0xba, 0xcf, 0x1b, 0x75, 0x98, 0x03,
|
||||
0xdf, 0x0f, 0xa0, 0xbf, 0x41, 0xc3, 0xc3, 0x9f, 0xe0, 0xdb, 0xfc, 0xab, 0xbc, 0xc8, 0x75, 0x86,
|
||||
0x09, 0x09, 0xe0, 0x50, 0x97, 0x71, 0x8c, 0x5a, 0xbb, 0x9e, 0x86, 0xac, 0x31, 0x2d, 0x92, 0xa0,
|
||||
0xe1, 0xb9, 0xd0, 0xf5, 0x64, 0x8d, 0x49, 0xce, 0x60, 0x84, 0x4a, 0x49, 0x65, 0xfb, 0x0e, 0x7a,
|
||||
0x6e, 0x92, 0x3b, 0x07, 0x99, 0xc0, 0xd0, 0x19, 0x4b, 0xa3, 0x82, 0xbe, 0x4b, 0x6c, 0xed, 0x70,
|
||||
0x09, 0xa3, 0x4b, 0x91, 0x63, 0x61, 0xae, 0x75, 0x4a, 0xce, 0xa0, 0x67, 0x54, 0x25, 0xe5, 0x78,
|
||||
0x3e, 0x6c, 0xb6, 0x1f, 0x75, 0x98, 0x75, 0x93, 0x69, 0xbd, 0x9c, 0xae, 0x83, 0x81, 0xb6, 0x6b,
|
||||
0xb3, 0x23, 0x59, 0xc4, 0x8e, 0x74, 0x23, 0x93, 0x5d, 0xf8, 0xcb, 0x83, 0x11, 0x73, 0x5f, 0xda,
|
||||
0x56, 0x7d, 0x0d, 0xbe, 0x72, 0xe2, 0xac, 0xb4, 0x55, 0xa7, 0x2e, 0x7f, 0x4a, 0xef, 0xa9, 0x16,
|
||||
0x75, 0xd8, 0x58, 0xed, 0x89, 0xf8, 0x57, 0x3a, 0xf2, 0x0c, 0x86, 0xeb, 0x5a, 0x35, 0x37, 0xb4,
|
||||
0x95, 0x7a, 0x5f, 0xca, 0xa8, 0xc3, 0xda, 0x80, 0xb6, 0xb7, 0xa7, 0xe0, 0x57, 0xad, 0x2d, 0xdd,
|
||||
0xa6, 0xc9, 0xff, 0x30, 0xe0, 0xb1, 0xc9, 0x6f, 0xab, 0xdf, 0x72, 0xc0, 0x6a, 0x6b, 0x9e, 0xc2,
|
||||
0x71, 0x15, 0xf7, 0xd9, 0xde, 0x5d, 0x2c, 0x05, 0x79, 0x0c, 0x83, 0x45, 0x91, 0xf2, 0x14, 0x09,
|
||||
0xd0, 0x56, 0xb3, 0x09, 0xd0, 0x76, 0xd2, 0x99, 0xf7, 0xdc, 0x23, 0xe7, 0x30, 0x68, 0x2a, 0xd3,
|
||||
0xea, 0x90, 0x69, 0x73, 0xc8, 0x74, 0x61, 0x0f, 0x79, 0x72, 0x44, 0xf7, 0x1b, 0xb8, 0x19, 0x38,
|
||||
0xf8, 0xe5, 0x9f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x8c, 0x6e, 0xb0, 0x89, 0x05, 0x04, 0x00, 0x00,
|
||||
// 611 bytes of a gzipped FileDescriptorProto
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x94, 0xfd, 0x8a, 0xd3, 0x40,
|
||||
0x10, 0xc0, 0x9b, 0x7e, 0x77, 0x92, 0xfb, 0x60, 0x11, 0x09, 0xf5, 0xc0, 0x12, 0x3f, 0x28, 0x08,
|
||||
0x39, 0xad, 0x0a, 0x87, 0xa0, 0xa0, 0x67, 0x8f, 0x28, 0x1c, 0xc8, 0x56, 0xfc, 0xb7, 0xec, 0x25,
|
||||
0x73, 0x6d, 0x74, 0x9b, 0x2d, 0xbb, 0xd3, 0xc3, 0x3e, 0x8a, 0x2f, 0xe5, 0x03, 0xf8, 0x34, 0xb2,
|
||||
0x9b, 0x34, 0x57, 0xee, 0x1f, 0xfd, 0x6f, 0x67, 0x7e, 0xf3, 0xb5, 0x33, 0xb3, 0x0b, 0x81, 0xde,
|
||||
0x14, 0x05, 0xea, 0x78, 0xad, 0x15, 0xa9, 0xe1, 0x83, 0x85, 0x52, 0x0b, 0x89, 0xa7, 0x4e, 0xba,
|
||||
0xda, 0x5c, 0x9f, 0xe2, 0x6a, 0x4d, 0xdb, 0x12, 0x46, 0xbf, 0x3d, 0xe8, 0x7d, 0xd5, 0xdb, 0x73,
|
||||
0x21, 0x25, 0x1b, 0xc3, 0xf1, 0x4a, 0x65, 0x28, 0xcd, 0x3c, 0x15, 0x52, 0xce, 0xbf, 0x1b, 0x55,
|
||||
0x84, 0xde, 0xc8, 0x1b, 0x0f, 0xf8, 0x61, 0xa9, 0xb7, 0x56, 0x9f, 0x8d, 0x2a, 0xd8, 0x08, 0x02,
|
||||
0x23, 0x15, 0xcd, 0x97, 0xc2, 0x2c, 0xe7, 0x79, 0x16, 0x36, 0x9d, 0x15, 0x58, 0x5d, 0x22, 0xcc,
|
||||
0xf2, 0x53, 0xc6, 0xce, 0x00, 0xf0, 0x27, 0x61, 0x61, 0x72, 0x55, 0x98, 0xb0, 0x35, 0x6a, 0x8d,
|
||||
0xfd, 0x49, 0x18, 0x57, 0x99, 0xe2, 0x69, 0x8d, 0xa6, 0x05, 0xe9, 0x2d, 0xdf, 0xb3, 0x1d, 0xbe,
|
||||
0x85, 0xa3, 0x3b, 0x98, 0x1d, 0x43, 0xeb, 0x07, 0x6e, 0xab, 0x5a, 0xec, 0x91, 0xdd, 0x83, 0xce,
|
||||
0x8d, 0x90, 0x1b, 0xac, 0x32, 0x97, 0xc2, 0x9b, 0xe6, 0x99, 0x17, 0xbd, 0x80, 0xc1, 0x47, 0x41,
|
||||
0xe2, 0x42, 0x8b, 0x15, 0x32, 0x06, 0xed, 0x4c, 0x90, 0x70, 0x9e, 0x01, 0x77, 0x67, 0x1b, 0x0c,
|
||||
0xd5, 0xb5, 0x73, 0xec, 0x73, 0x7b, 0x8c, 0x5e, 0x01, 0x24, 0x44, 0xeb, 0x04, 0x45, 0x86, 0xfa,
|
||||
0x7f, 0x93, 0x45, 0xdf, 0x20, 0xb0, 0x5e, 0x1c, 0xcd, 0xfa, 0x12, 0x49, 0xb0, 0x87, 0xe0, 0x1b,
|
||||
0x12, 0xb4, 0x31, 0xf3, 0x54, 0x65, 0xe8, 0xfc, 0x3b, 0x1c, 0x4a, 0xd5, 0xb9, 0xca, 0x90, 0x3d,
|
||||
0x81, 0xde, 0xd2, 0xa5, 0x30, 0x61, 0xd3, 0xf5, 0xc3, 0x8f, 0x6f, 0xd3, 0xf2, 0x1d, 0x8b, 0xde,
|
||||
0xc1, 0x91, 0xed, 0x11, 0x47, 0xb3, 0x91, 0x34, 0x23, 0xa1, 0x89, 0x3d, 0x82, 0xf6, 0x92, 0x68,
|
||||
0x1d, 0x66, 0x23, 0x6f, 0xec, 0x4f, 0x0e, 0xe2, 0xfd, 0xbc, 0x49, 0x83, 0x3b, 0xf8, 0xa1, 0x0b,
|
||||
0xed, 0x15, 0x92, 0x88, 0xfe, 0x78, 0x10, 0xd8, 0x00, 0x17, 0x79, 0x91, 0x9b, 0x25, 0x66, 0x2c,
|
||||
0x84, 0x9e, 0xd9, 0xa4, 0x29, 0x1a, 0xe3, 0x8a, 0xea, 0xf3, 0x9d, 0x68, 0x49, 0x86, 0x24, 0x72,
|
||||
0x69, 0xaa, 0xab, 0xed, 0x44, 0x76, 0x02, 0x03, 0xd4, 0x5a, 0x69, 0x5b, 0x78, 0xd8, 0x72, 0x57,
|
||||
0xb9, 0x55, 0xb0, 0x21, 0xf4, 0x9d, 0x30, 0x23, 0x1d, 0xb6, 0x9d, 0x63, 0x2d, 0x5b, 0xcf, 0x54,
|
||||
0xa3, 0x20, 0xcc, 0xde, 0x53, 0xd8, 0x71, 0xf0, 0x56, 0x61, 0xa9, 0xb1, 0x57, 0x72, 0xb4, 0x5b,
|
||||
0xd2, 0x5a, 0xc1, 0x46, 0xe0, 0xa7, 0x6a, 0xb5, 0x96, 0x58, 0xf2, 0x9e, 0xe3, 0xfb, 0xaa, 0x68,
|
||||
0x06, 0x83, 0x73, 0x99, 0x63, 0x41, 0x97, 0x66, 0xc1, 0x4e, 0xa0, 0x45, 0xba, 0x9c, 0x94, 0x3f,
|
||||
0xe9, 0xef, 0x96, 0x2b, 0x69, 0x70, 0xab, 0x66, 0xa3, 0x6a, 0xf6, 0x4d, 0x87, 0x21, 0xae, 0xb7,
|
||||
0xc2, 0x76, 0xcc, 0x12, 0xdb, 0xb1, 0x2b, 0x95, 0x6d, 0xa3, 0x5f, 0x1e, 0x0c, 0xb8, 0x7b, 0x31,
|
||||
0x36, 0xea, 0x6b, 0x08, 0xb4, 0xeb, 0xfd, 0xdc, 0x15, 0x56, 0x85, 0x3f, 0x8e, 0xef, 0x0c, 0x25,
|
||||
0x69, 0x70, 0x5f, 0xef, 0xcd, 0xe8, 0x9f, 0xe9, 0xd8, 0x33, 0xe8, 0x5f, 0x57, 0x33, 0x71, 0x2d,
|
||||
0xb5, 0x93, 0xdc, 0x1f, 0x54, 0xd2, 0xe0, 0xb5, 0x41, 0x5d, 0xdb, 0x53, 0x08, 0xca, 0xd2, 0x66,
|
||||
0x6e, 0x91, 0xd8, 0x7d, 0xe8, 0x8a, 0x94, 0xf2, 0x9b, 0x72, 0x19, 0x3b, 0xbc, 0x92, 0x26, 0x0b,
|
||||
0x38, 0x2c, 0xed, 0xbe, 0xd8, 0x67, 0x9d, 0x2a, 0xc9, 0x1e, 0x43, 0x77, 0x5a, 0x2c, 0xc4, 0x02,
|
||||
0x19, 0xc4, 0x75, 0xcf, 0x86, 0x10, 0xd7, 0x37, 0x1d, 0x7b, 0xcf, 0x3d, 0x76, 0x0a, 0xdd, 0x5d,
|
||||
0xe4, 0xb8, 0xfc, 0x27, 0xe2, 0xdd, 0x3f, 0x11, 0x4f, 0xed, 0x3f, 0x31, 0x3c, 0x88, 0xf7, 0x0b,
|
||||
0xb8, 0xea, 0x3a, 0xfc, 0xf2, 0x6f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x6e, 0x67, 0x4d, 0xd3, 0x64,
|
||||
0x04, 0x00, 0x00,
|
||||
}
|
||||
|
||||
@@ -42,6 +42,9 @@ message CallFinished {
|
||||
string details = 2;
|
||||
int32 errorCode = 3;
|
||||
string errorStr = 4;
|
||||
string createdAt = 5;
|
||||
string startedAt = 6;
|
||||
string completedAt = 7;
|
||||
}
|
||||
|
||||
message ClientMsg {
|
||||
|
||||
@@ -67,9 +67,6 @@ type callHandle struct {
|
||||
ctx context.Context
|
||||
c *call // the agent's version of call
|
||||
|
||||
// Timings, for metrics:
|
||||
receivedTime common.DateTime // When was the call received?
|
||||
|
||||
// For implementing http.ResponseWriter:
|
||||
headers http.Header
|
||||
status int
|
||||
@@ -203,9 +200,14 @@ func (ch *callHandle) enqueueMsgStrict(msg *runner.RunnerMsg) error {
|
||||
// enqueueCallResponse enqueues a Submit() response to the LB
|
||||
// and initiates a graceful shutdown of the session.
|
||||
func (ch *callHandle) enqueueCallResponse(err error) {
|
||||
|
||||
var createdAt string
|
||||
var startedAt string
|
||||
var completedAt string
|
||||
var details string
|
||||
var errCode int
|
||||
var errStr string
|
||||
|
||||
log := common.Logger(ch.ctx)
|
||||
|
||||
if err != nil {
|
||||
@@ -214,16 +216,40 @@ func (ch *callHandle) enqueueCallResponse(err error) {
|
||||
}
|
||||
|
||||
if ch.c != nil {
|
||||
details = ch.c.Model().ID
|
||||
mcall := ch.c.Model()
|
||||
|
||||
// These timestamps are related. To avoid confusion
|
||||
// and for robustness, nested if stmts below.
|
||||
if !time.Time(mcall.CreatedAt).IsZero() {
|
||||
createdAt = mcall.CreatedAt.String()
|
||||
|
||||
if !time.Time(mcall.StartedAt).IsZero() {
|
||||
startedAt = mcall.StartedAt.String()
|
||||
|
||||
if !time.Time(mcall.CompletedAt).IsZero() {
|
||||
completedAt = mcall.CompletedAt.String()
|
||||
} else {
|
||||
// IMPORTANT: We punch this in ourselves.
|
||||
// This is because call.End() is executed asynchronously.
|
||||
completedAt = common.DateTime(time.Now()).String()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
details = mcall.ID
|
||||
|
||||
}
|
||||
log.Debugf("Sending Call Finish details=%v", details)
|
||||
|
||||
errTmp := ch.enqueueMsgStrict(&runner.RunnerMsg{
|
||||
Body: &runner.RunnerMsg_Finished{Finished: &runner.CallFinished{
|
||||
Success: err == nil,
|
||||
Details: details,
|
||||
ErrorCode: int32(errCode),
|
||||
ErrorStr: errStr,
|
||||
Success: err == nil,
|
||||
Details: details,
|
||||
ErrorCode: int32(errCode),
|
||||
ErrorStr: errStr,
|
||||
CreatedAt: createdAt,
|
||||
StartedAt: startedAt,
|
||||
CompletedAt: completedAt,
|
||||
}}})
|
||||
|
||||
if errTmp != nil {
|
||||
@@ -524,7 +550,9 @@ func (pr *pureRunner) spawnSubmit(state *callHandle) {
|
||||
|
||||
// handleTryCall based on the TryCall message, tries to place the call on NBIO Agent
|
||||
func (pr *pureRunner) handleTryCall(tc *runner.TryCall, state *callHandle) error {
|
||||
state.receivedTime = common.DateTime(time.Now())
|
||||
|
||||
start := time.Now()
|
||||
|
||||
var c models.Call
|
||||
err := json.Unmarshal([]byte(tc.ModelsCallJson), &c)
|
||||
if err != nil {
|
||||
@@ -532,6 +560,13 @@ func (pr *pureRunner) handleTryCall(tc *runner.TryCall, state *callHandle) error
|
||||
return err
|
||||
}
|
||||
|
||||
// IMPORTANT: We clear/initialize these dates as start/created/completed dates from
|
||||
// unmarshalled Model from LB-agent represent unrelated time-line events.
|
||||
// From this point, CreatedAt/StartedAt/CompletedAt are based on our local clock.
|
||||
c.CreatedAt = common.DateTime(start)
|
||||
c.StartedAt = common.DateTime(time.Time{})
|
||||
c.CompletedAt = common.DateTime(time.Time{})
|
||||
|
||||
agent_call, err := pr.a.GetCall(FromModelAndInput(&c, state.pipeToFnR),
|
||||
WithWriter(state),
|
||||
WithContext(state.ctx),
|
||||
|
||||
@@ -233,6 +233,29 @@ func tryQueueError(err error, done chan error) {
|
||||
}
|
||||
}
|
||||
|
||||
func translateDate(dt string) time.Time {
|
||||
if dt != "" {
|
||||
trx, err := common.ParseDateTime(dt)
|
||||
if err == nil {
|
||||
return time.Time(trx)
|
||||
}
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
func recordFinishStats(ctx context.Context, msg *pb.CallFinished) {
|
||||
|
||||
creatTs := translateDate(msg.GetCreatedAt())
|
||||
startTs := translateDate(msg.GetStartedAt())
|
||||
complTs := translateDate(msg.GetCompletedAt())
|
||||
|
||||
// Validate this as info *is* coming from runner and its local clock.
|
||||
if !creatTs.IsZero() && !startTs.IsZero() && !complTs.IsZero() && !startTs.Before(creatTs) && !complTs.Before(startTs) {
|
||||
statsLBAgentRunnerSchedLatency(ctx, startTs.Sub(creatTs))
|
||||
statsLBAgentRunnerExecLatency(ctx, complTs.Sub(startTs))
|
||||
}
|
||||
}
|
||||
|
||||
func receiveFromRunner(ctx context.Context, protocolClient pb.RunnerProtocol_EngageClient, c pool.RunnerCall, done chan error) {
|
||||
w := c.ResponseWriter()
|
||||
defer close(done)
|
||||
@@ -286,6 +309,7 @@ DataLoop:
|
||||
// Finish messages required for finish/finalize the processing.
|
||||
case *pb.RunnerMsg_Finished:
|
||||
log.Infof("Call finished Success=%v %v", body.Finished.Success, body.Finished.Details)
|
||||
recordFinishStats(ctx, body.Finished)
|
||||
if !body.Finished.Success {
|
||||
err := parseError(body.Finished)
|
||||
tryQueueError(err, done)
|
||||
|
||||
@@ -3,6 +3,7 @@ package agent
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"go.opencensus.io/stats"
|
||||
@@ -56,6 +57,14 @@ func statsTooBusy(ctx context.Context) {
|
||||
stats.Record(ctx, serverBusyMeasure.M(1))
|
||||
}
|
||||
|
||||
func statsLBAgentRunnerSchedLatency(ctx context.Context, dur time.Duration) {
|
||||
stats.Record(ctx, runnerSchedLatencyMeasure.M(int64(dur/time.Millisecond)))
|
||||
}
|
||||
|
||||
func statsLBAgentRunnerExecLatency(ctx context.Context, dur time.Duration) {
|
||||
stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond)))
|
||||
}
|
||||
|
||||
const (
|
||||
// TODO we should probably prefix these with calls_ ?
|
||||
queuedMetricName = "queued"
|
||||
@@ -66,6 +75,10 @@ const (
|
||||
timedoutMetricName = "timeouts"
|
||||
errorsMetricName = "errors"
|
||||
serverBusyMetricName = "server_busy"
|
||||
|
||||
// Reported By LB
|
||||
runnerSchedLatencyMetricName = "lb_runner_sched_latency"
|
||||
runnerExecLatencyMetricName = "lb_runner_exec_latency"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -81,8 +94,23 @@ var (
|
||||
dockerMeasures = initDockerMeasures()
|
||||
containerGaugeMeasures = initContainerGaugeMeasures()
|
||||
containerTimeMeasures = initContainerTimeMeasures()
|
||||
|
||||
// Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers
|
||||
runnerSchedLatencyMeasure = makeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs")
|
||||
// Reported By LB: Function execution time inside a container.
|
||||
runnerExecLatencyMeasure = makeMeasure(runnerExecLatencyMetricName, "Runner Container Execution Latency Reported By LBAgent", "msecs")
|
||||
)
|
||||
|
||||
func RegisterLBAgentViews(tagKeys []string) {
|
||||
err := view.Register(
|
||||
createView(runnerSchedLatencyMeasure, view.Distribution(1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000), tagKeys),
|
||||
createView(runnerExecLatencyMeasure, view.Distribution(1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000), tagKeys),
|
||||
)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Fatal("cannot register view")
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterAgentViews creates and registers all agent views
|
||||
func RegisterAgentViews(tagKeys []string) {
|
||||
err := view.Register(
|
||||
|
||||
@@ -38,18 +38,18 @@ func newAttemptTracker(ctx context.Context) *attemptTracker {
|
||||
}
|
||||
}
|
||||
|
||||
func (data *attemptTracker) finalizeAttempts(isSuccess bool) {
|
||||
func (data *attemptTracker) finalizeAttempts(isCommited bool) {
|
||||
stats.Record(data.ctx, attemptCountMeasure.M(data.attemptCount))
|
||||
|
||||
// IMPORTANT: here we use (lastAttemptTime - startTime). We want to exclude TryExec
|
||||
// latency *if* TryExec() goes through with success. Placer latency metric only shows
|
||||
// latency *if* TryExec() goes through with commit. Placer latency metric only shows
|
||||
// how much time are spending in Placer loop/retries. The metric includes rtt/latency of
|
||||
// *all* unsuccessful NACK (retriable) responses from runners as well. For example, if
|
||||
// Placer loop here retries 4 runners (which takes 5 msecs each) and then 5th runner
|
||||
// succeeds (but takes 35 seconds to finish execution), we report 20 msecs as our LB
|
||||
// latency.
|
||||
endTime := data.lastAttemptTime
|
||||
if !isSuccess {
|
||||
if !isCommited {
|
||||
endTime = time.Now()
|
||||
}
|
||||
|
||||
|
||||
@@ -474,6 +474,7 @@ func WithAgentFromEnv() ServerOption {
|
||||
|
||||
keys := []string{"fn_appname", "fn_path"}
|
||||
pool.RegisterPlacerViews(keys)
|
||||
agent.RegisterLBAgentViews(keys)
|
||||
|
||||
s.agent, err = agent.NewLBAgent(agent.NewCachedDataAccess(cl), runnerPool, placer)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user