diff --git a/api/agent/grpc/runner.pb.go b/api/agent/grpc/runner.pb.go index 3059aac06..c72d0cc7b 100644 --- a/api/agent/grpc/runner.pb.go +++ b/api/agent/grpc/runner.pb.go @@ -242,10 +242,13 @@ func _CallResultStart_OneofSizer(msg proto.Message) (n int) { // Call has really finished, it might have completed or crashed type CallFinished struct { - Success bool `protobuf:"varint,1,opt,name=success" json:"success,omitempty"` - Details string `protobuf:"bytes,2,opt,name=details" json:"details,omitempty"` - ErrorCode int32 `protobuf:"varint,3,opt,name=errorCode" json:"errorCode,omitempty"` - ErrorStr string `protobuf:"bytes,4,opt,name=errorStr" json:"errorStr,omitempty"` + Success bool `protobuf:"varint,1,opt,name=success" json:"success,omitempty"` + Details string `protobuf:"bytes,2,opt,name=details" json:"details,omitempty"` + ErrorCode int32 `protobuf:"varint,3,opt,name=errorCode" json:"errorCode,omitempty"` + ErrorStr string `protobuf:"bytes,4,opt,name=errorStr" json:"errorStr,omitempty"` + CreatedAt string `protobuf:"bytes,5,opt,name=createdAt" json:"createdAt,omitempty"` + StartedAt string `protobuf:"bytes,6,opt,name=startedAt" json:"startedAt,omitempty"` + CompletedAt string `protobuf:"bytes,7,opt,name=completedAt" json:"completedAt,omitempty"` } func (m *CallFinished) Reset() { *m = CallFinished{} } @@ -281,6 +284,27 @@ func (m *CallFinished) GetErrorStr() string { return "" } +func (m *CallFinished) GetCreatedAt() string { + if m != nil { + return m.CreatedAt + } + return "" +} + +func (m *CallFinished) GetStartedAt() string { + if m != nil { + return m.StartedAt + } + return "" +} + +func (m *CallFinished) GetCompletedAt() string { + if m != nil { + return m.CompletedAt + } + return "" +} + type ClientMsg struct { // Types that are valid to be assigned to Body: // *ClientMsg_Try @@ -721,41 +745,44 @@ var _RunnerProtocol_serviceDesc = grpc.ServiceDesc{ func init() { proto.RegisterFile("runner.proto", fileDescriptor0) } var fileDescriptor0 = []byte{ - // 576 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x53, 0xdb, 0x8a, 0x13, 0x4d, - 0x10, 0xce, 0x24, 0xd9, 0x6c, 0x52, 0x99, 0x3d, 0xd0, 0xfc, 0xfc, 0x0c, 0x71, 0xc1, 0x30, 0x1e, - 0x08, 0x08, 0xbd, 0x1a, 0x15, 0x16, 0x41, 0x2f, 0x5c, 0xb3, 0x8c, 0xc2, 0x82, 0x74, 0xc4, 0xdb, - 0xd0, 0x3b, 0x53, 0x99, 0x19, 0xed, 0x4c, 0x87, 0xee, 0x9e, 0xc5, 0x80, 0x2f, 0xe2, 0x4b, 0xf9, - 0x4c, 0xd2, 0x3d, 0x87, 0x0d, 0x7b, 0xa3, 0x77, 0x5d, 0xf5, 0x55, 0xd5, 0x57, 0xf5, 0x55, 0x17, - 0xf8, 0xaa, 0x2c, 0x0a, 0x54, 0x74, 0xab, 0xa4, 0x91, 0x93, 0x07, 0xa9, 0x94, 0xa9, 0xc0, 0x73, - 0x67, 0xdd, 0x94, 0xeb, 0x73, 0xdc, 0x6c, 0xcd, 0xae, 0x02, 0xc3, 0xdf, 0x1e, 0x1c, 0x7e, 0x51, - 0xbb, 0x4b, 0x2e, 0x04, 0x99, 0xc1, 0xe9, 0x46, 0x26, 0x28, 0xf4, 0x2a, 0xe6, 0x42, 0xac, 0xbe, - 0x69, 0x59, 0x04, 0xde, 0xd4, 0x9b, 0x8d, 0xd8, 0x71, 0xe5, 0xb7, 0x51, 0x9f, 0xb4, 0x2c, 0xc8, - 0x14, 0x7c, 0x2d, 0xa4, 0x59, 0x65, 0x5c, 0x67, 0xab, 0x3c, 0x09, 0xba, 0x2e, 0x0a, 0xac, 0x2f, - 0xe2, 0x3a, 0xfb, 0x98, 0x90, 0x0b, 0x00, 0xfc, 0x61, 0xb0, 0xd0, 0xb9, 0x2c, 0x74, 0xd0, 0x9b, - 0xf6, 0x66, 0xe3, 0x79, 0x40, 0x6b, 0x26, 0xba, 0x68, 0xa1, 0x45, 0x61, 0xd4, 0x8e, 0xed, 0xc5, - 0x4e, 0xde, 0xc2, 0xc9, 0x3d, 0x98, 0x9c, 0x42, 0xef, 0x3b, 0xee, 0xea, 0x5e, 0xec, 0x93, 0xfc, - 0x07, 0x07, 0xb7, 0x5c, 0x94, 0x58, 0x33, 0x57, 0xc6, 0x9b, 0xee, 0x85, 0x17, 0xbe, 0x80, 0xd1, - 0x07, 0x6e, 0xf8, 0x95, 0xe2, 0x1b, 0x24, 0x04, 0xfa, 0x09, 0x37, 0xdc, 0x65, 0xfa, 0xcc, 0xbd, - 0x6d, 0x31, 0x94, 0x6b, 0x97, 0x38, 0x64, 0xf6, 0x19, 0xbe, 0x02, 0x88, 0x8c, 0xd9, 0x46, 0xc8, - 0x13, 0x54, 0xff, 0x4a, 0x16, 0x7e, 0x05, 0xdf, 0x66, 0x31, 0xd4, 0xdb, 0x6b, 0x34, 0x9c, 0x3c, - 0x84, 0xb1, 0x36, 0xdc, 0x94, 0x7a, 0x15, 0xcb, 0x04, 0x5d, 0xfe, 0x01, 0x83, 0xca, 0x75, 0x29, - 0x13, 0x24, 0x4f, 0xe0, 0x30, 0x73, 0x14, 0x3a, 0xe8, 0x3a, 0x3d, 0xc6, 0xf4, 0x8e, 0x96, 0x35, - 0x58, 0xf8, 0x0e, 0x4e, 0xac, 0x46, 0x0c, 0x75, 0x29, 0xcc, 0xd2, 0x70, 0x65, 0xc8, 0x23, 0xe8, - 0x67, 0xc6, 0x6c, 0x83, 0x64, 0xea, 0xcd, 0xc6, 0xf3, 0x23, 0xba, 0xcf, 0x1b, 0x75, 0x98, 0x03, - 0xdf, 0x0f, 0xa0, 0xbf, 0x41, 0xc3, 0xc3, 0x9f, 0xe0, 0xdb, 0xfc, 0xab, 0xbc, 0xc8, 0x75, 0x86, - 0x09, 0x09, 0xe0, 0x50, 0x97, 0x71, 0x8c, 0x5a, 0xbb, 0x9e, 0x86, 0xac, 0x31, 0x2d, 0x92, 0xa0, - 0xe1, 0xb9, 0xd0, 0xf5, 0x64, 0x8d, 0x49, 0xce, 0x60, 0x84, 0x4a, 0x49, 0x65, 0xfb, 0x0e, 0x7a, - 0x6e, 0x92, 0x3b, 0x07, 0x99, 0xc0, 0xd0, 0x19, 0x4b, 0xa3, 0x82, 0xbe, 0x4b, 0x6c, 0xed, 0x70, - 0x09, 0xa3, 0x4b, 0x91, 0x63, 0x61, 0xae, 0x75, 0x4a, 0xce, 0xa0, 0x67, 0x54, 0x25, 0xe5, 0x78, - 0x3e, 0x6c, 0xb6, 0x1f, 0x75, 0x98, 0x75, 0x93, 0x69, 0xbd, 0x9c, 0xae, 0x83, 0x81, 0xb6, 0x6b, - 0xb3, 0x23, 0x59, 0xc4, 0x8e, 0x74, 0x23, 0x93, 0x5d, 0xf8, 0xcb, 0x83, 0x11, 0x73, 0x5f, 0xda, - 0x56, 0x7d, 0x0d, 0xbe, 0x72, 0xe2, 0xac, 0xb4, 0x55, 0xa7, 0x2e, 0x7f, 0x4a, 0xef, 0xa9, 0x16, - 0x75, 0xd8, 0x58, 0xed, 0x89, 0xf8, 0x57, 0x3a, 0xf2, 0x0c, 0x86, 0xeb, 0x5a, 0x35, 0x37, 0xb4, - 0x95, 0x7a, 0x5f, 0xca, 0xa8, 0xc3, 0xda, 0x80, 0xb6, 0xb7, 0xa7, 0xe0, 0x57, 0xad, 0x2d, 0xdd, - 0xa6, 0xc9, 0xff, 0x30, 0xe0, 0xb1, 0xc9, 0x6f, 0xab, 0xdf, 0x72, 0xc0, 0x6a, 0x6b, 0x9e, 0xc2, - 0x71, 0x15, 0xf7, 0xd9, 0xde, 0x5d, 0x2c, 0x05, 0x79, 0x0c, 0x83, 0x45, 0x91, 0xf2, 0x14, 0x09, - 0xd0, 0x56, 0xb3, 0x09, 0xd0, 0x76, 0xd2, 0x99, 0xf7, 0xdc, 0x23, 0xe7, 0x30, 0x68, 0x2a, 0xd3, - 0xea, 0x90, 0x69, 0x73, 0xc8, 0x74, 0x61, 0x0f, 0x79, 0x72, 0x44, 0xf7, 0x1b, 0xb8, 0x19, 0x38, - 0xf8, 0xe5, 0x9f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x8c, 0x6e, 0xb0, 0x89, 0x05, 0x04, 0x00, 0x00, + // 611 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x94, 0xfd, 0x8a, 0xd3, 0x40, + 0x10, 0xc0, 0x9b, 0x7e, 0x77, 0x92, 0xfb, 0x60, 0x11, 0x09, 0xf5, 0xc0, 0x12, 0x3f, 0x28, 0x08, + 0x39, 0xad, 0x0a, 0x87, 0xa0, 0xa0, 0x67, 0x8f, 0x28, 0x1c, 0xc8, 0x56, 0xfc, 0xb7, 0xec, 0x25, + 0x73, 0x6d, 0x74, 0x9b, 0x2d, 0xbb, 0xd3, 0xc3, 0x3e, 0x8a, 0x2f, 0xe5, 0x03, 0xf8, 0x34, 0xb2, + 0x9b, 0x34, 0x57, 0xee, 0x1f, 0xfd, 0x6f, 0x67, 0x7e, 0xf3, 0xb5, 0x33, 0xb3, 0x0b, 0x81, 0xde, + 0x14, 0x05, 0xea, 0x78, 0xad, 0x15, 0xa9, 0xe1, 0x83, 0x85, 0x52, 0x0b, 0x89, 0xa7, 0x4e, 0xba, + 0xda, 0x5c, 0x9f, 0xe2, 0x6a, 0x4d, 0xdb, 0x12, 0x46, 0xbf, 0x3d, 0xe8, 0x7d, 0xd5, 0xdb, 0x73, + 0x21, 0x25, 0x1b, 0xc3, 0xf1, 0x4a, 0x65, 0x28, 0xcd, 0x3c, 0x15, 0x52, 0xce, 0xbf, 0x1b, 0x55, + 0x84, 0xde, 0xc8, 0x1b, 0x0f, 0xf8, 0x61, 0xa9, 0xb7, 0x56, 0x9f, 0x8d, 0x2a, 0xd8, 0x08, 0x02, + 0x23, 0x15, 0xcd, 0x97, 0xc2, 0x2c, 0xe7, 0x79, 0x16, 0x36, 0x9d, 0x15, 0x58, 0x5d, 0x22, 0xcc, + 0xf2, 0x53, 0xc6, 0xce, 0x00, 0xf0, 0x27, 0x61, 0x61, 0x72, 0x55, 0x98, 0xb0, 0x35, 0x6a, 0x8d, + 0xfd, 0x49, 0x18, 0x57, 0x99, 0xe2, 0x69, 0x8d, 0xa6, 0x05, 0xe9, 0x2d, 0xdf, 0xb3, 0x1d, 0xbe, + 0x85, 0xa3, 0x3b, 0x98, 0x1d, 0x43, 0xeb, 0x07, 0x6e, 0xab, 0x5a, 0xec, 0x91, 0xdd, 0x83, 0xce, + 0x8d, 0x90, 0x1b, 0xac, 0x32, 0x97, 0xc2, 0x9b, 0xe6, 0x99, 0x17, 0xbd, 0x80, 0xc1, 0x47, 0x41, + 0xe2, 0x42, 0x8b, 0x15, 0x32, 0x06, 0xed, 0x4c, 0x90, 0x70, 0x9e, 0x01, 0x77, 0x67, 0x1b, 0x0c, + 0xd5, 0xb5, 0x73, 0xec, 0x73, 0x7b, 0x8c, 0x5e, 0x01, 0x24, 0x44, 0xeb, 0x04, 0x45, 0x86, 0xfa, + 0x7f, 0x93, 0x45, 0xdf, 0x20, 0xb0, 0x5e, 0x1c, 0xcd, 0xfa, 0x12, 0x49, 0xb0, 0x87, 0xe0, 0x1b, + 0x12, 0xb4, 0x31, 0xf3, 0x54, 0x65, 0xe8, 0xfc, 0x3b, 0x1c, 0x4a, 0xd5, 0xb9, 0xca, 0x90, 0x3d, + 0x81, 0xde, 0xd2, 0xa5, 0x30, 0x61, 0xd3, 0xf5, 0xc3, 0x8f, 0x6f, 0xd3, 0xf2, 0x1d, 0x8b, 0xde, + 0xc1, 0x91, 0xed, 0x11, 0x47, 0xb3, 0x91, 0x34, 0x23, 0xa1, 0x89, 0x3d, 0x82, 0xf6, 0x92, 0x68, + 0x1d, 0x66, 0x23, 0x6f, 0xec, 0x4f, 0x0e, 0xe2, 0xfd, 0xbc, 0x49, 0x83, 0x3b, 0xf8, 0xa1, 0x0b, + 0xed, 0x15, 0x92, 0x88, 0xfe, 0x78, 0x10, 0xd8, 0x00, 0x17, 0x79, 0x91, 0x9b, 0x25, 0x66, 0x2c, + 0x84, 0x9e, 0xd9, 0xa4, 0x29, 0x1a, 0xe3, 0x8a, 0xea, 0xf3, 0x9d, 0x68, 0x49, 0x86, 0x24, 0x72, + 0x69, 0xaa, 0xab, 0xed, 0x44, 0x76, 0x02, 0x03, 0xd4, 0x5a, 0x69, 0x5b, 0x78, 0xd8, 0x72, 0x57, + 0xb9, 0x55, 0xb0, 0x21, 0xf4, 0x9d, 0x30, 0x23, 0x1d, 0xb6, 0x9d, 0x63, 0x2d, 0x5b, 0xcf, 0x54, + 0xa3, 0x20, 0xcc, 0xde, 0x53, 0xd8, 0x71, 0xf0, 0x56, 0x61, 0xa9, 0xb1, 0x57, 0x72, 0xb4, 0x5b, + 0xd2, 0x5a, 0xc1, 0x46, 0xe0, 0xa7, 0x6a, 0xb5, 0x96, 0x58, 0xf2, 0x9e, 0xe3, 0xfb, 0xaa, 0x68, + 0x06, 0x83, 0x73, 0x99, 0x63, 0x41, 0x97, 0x66, 0xc1, 0x4e, 0xa0, 0x45, 0xba, 0x9c, 0x94, 0x3f, + 0xe9, 0xef, 0x96, 0x2b, 0x69, 0x70, 0xab, 0x66, 0xa3, 0x6a, 0xf6, 0x4d, 0x87, 0x21, 0xae, 0xb7, + 0xc2, 0x76, 0xcc, 0x12, 0xdb, 0xb1, 0x2b, 0x95, 0x6d, 0xa3, 0x5f, 0x1e, 0x0c, 0xb8, 0x7b, 0x31, + 0x36, 0xea, 0x6b, 0x08, 0xb4, 0xeb, 0xfd, 0xdc, 0x15, 0x56, 0x85, 0x3f, 0x8e, 0xef, 0x0c, 0x25, + 0x69, 0x70, 0x5f, 0xef, 0xcd, 0xe8, 0x9f, 0xe9, 0xd8, 0x33, 0xe8, 0x5f, 0x57, 0x33, 0x71, 0x2d, + 0xb5, 0x93, 0xdc, 0x1f, 0x54, 0xd2, 0xe0, 0xb5, 0x41, 0x5d, 0xdb, 0x53, 0x08, 0xca, 0xd2, 0x66, + 0x6e, 0x91, 0xd8, 0x7d, 0xe8, 0x8a, 0x94, 0xf2, 0x9b, 0x72, 0x19, 0x3b, 0xbc, 0x92, 0x26, 0x0b, + 0x38, 0x2c, 0xed, 0xbe, 0xd8, 0x67, 0x9d, 0x2a, 0xc9, 0x1e, 0x43, 0x77, 0x5a, 0x2c, 0xc4, 0x02, + 0x19, 0xc4, 0x75, 0xcf, 0x86, 0x10, 0xd7, 0x37, 0x1d, 0x7b, 0xcf, 0x3d, 0x76, 0x0a, 0xdd, 0x5d, + 0xe4, 0xb8, 0xfc, 0x27, 0xe2, 0xdd, 0x3f, 0x11, 0x4f, 0xed, 0x3f, 0x31, 0x3c, 0x88, 0xf7, 0x0b, + 0xb8, 0xea, 0x3a, 0xfc, 0xf2, 0x6f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x6e, 0x67, 0x4d, 0xd3, 0x64, + 0x04, 0x00, 0x00, } diff --git a/api/agent/grpc/runner.proto b/api/agent/grpc/runner.proto index e174e631b..feb0c017b 100644 --- a/api/agent/grpc/runner.proto +++ b/api/agent/grpc/runner.proto @@ -42,6 +42,9 @@ message CallFinished { string details = 2; int32 errorCode = 3; string errorStr = 4; + string createdAt = 5; + string startedAt = 6; + string completedAt = 7; } message ClientMsg { diff --git a/api/agent/pure_runner.go b/api/agent/pure_runner.go index 5fa334479..6b3942063 100644 --- a/api/agent/pure_runner.go +++ b/api/agent/pure_runner.go @@ -67,9 +67,6 @@ type callHandle struct { ctx context.Context c *call // the agent's version of call - // Timings, for metrics: - receivedTime common.DateTime // When was the call received? - // For implementing http.ResponseWriter: headers http.Header status int @@ -203,9 +200,14 @@ func (ch *callHandle) enqueueMsgStrict(msg *runner.RunnerMsg) error { // enqueueCallResponse enqueues a Submit() response to the LB // and initiates a graceful shutdown of the session. func (ch *callHandle) enqueueCallResponse(err error) { + + var createdAt string + var startedAt string + var completedAt string var details string var errCode int var errStr string + log := common.Logger(ch.ctx) if err != nil { @@ -214,16 +216,40 @@ func (ch *callHandle) enqueueCallResponse(err error) { } if ch.c != nil { - details = ch.c.Model().ID + mcall := ch.c.Model() + + // These timestamps are related. To avoid confusion + // and for robustness, nested if stmts below. + if !time.Time(mcall.CreatedAt).IsZero() { + createdAt = mcall.CreatedAt.String() + + if !time.Time(mcall.StartedAt).IsZero() { + startedAt = mcall.StartedAt.String() + + if !time.Time(mcall.CompletedAt).IsZero() { + completedAt = mcall.CompletedAt.String() + } else { + // IMPORTANT: We punch this in ourselves. + // This is because call.End() is executed asynchronously. + completedAt = common.DateTime(time.Now()).String() + } + } + } + + details = mcall.ID + } log.Debugf("Sending Call Finish details=%v", details) errTmp := ch.enqueueMsgStrict(&runner.RunnerMsg{ Body: &runner.RunnerMsg_Finished{Finished: &runner.CallFinished{ - Success: err == nil, - Details: details, - ErrorCode: int32(errCode), - ErrorStr: errStr, + Success: err == nil, + Details: details, + ErrorCode: int32(errCode), + ErrorStr: errStr, + CreatedAt: createdAt, + StartedAt: startedAt, + CompletedAt: completedAt, }}}) if errTmp != nil { @@ -524,7 +550,9 @@ func (pr *pureRunner) spawnSubmit(state *callHandle) { // handleTryCall based on the TryCall message, tries to place the call on NBIO Agent func (pr *pureRunner) handleTryCall(tc *runner.TryCall, state *callHandle) error { - state.receivedTime = common.DateTime(time.Now()) + + start := time.Now() + var c models.Call err := json.Unmarshal([]byte(tc.ModelsCallJson), &c) if err != nil { @@ -532,6 +560,13 @@ func (pr *pureRunner) handleTryCall(tc *runner.TryCall, state *callHandle) error return err } + // IMPORTANT: We clear/initialize these dates as start/created/completed dates from + // unmarshalled Model from LB-agent represent unrelated time-line events. + // From this point, CreatedAt/StartedAt/CompletedAt are based on our local clock. + c.CreatedAt = common.DateTime(start) + c.StartedAt = common.DateTime(time.Time{}) + c.CompletedAt = common.DateTime(time.Time{}) + agent_call, err := pr.a.GetCall(FromModelAndInput(&c, state.pipeToFnR), WithWriter(state), WithContext(state.ctx), diff --git a/api/agent/runner_client.go b/api/agent/runner_client.go index 178fd60c1..1d8d9804e 100644 --- a/api/agent/runner_client.go +++ b/api/agent/runner_client.go @@ -233,6 +233,29 @@ func tryQueueError(err error, done chan error) { } } +func translateDate(dt string) time.Time { + if dt != "" { + trx, err := common.ParseDateTime(dt) + if err == nil { + return time.Time(trx) + } + } + return time.Time{} +} + +func recordFinishStats(ctx context.Context, msg *pb.CallFinished) { + + creatTs := translateDate(msg.GetCreatedAt()) + startTs := translateDate(msg.GetStartedAt()) + complTs := translateDate(msg.GetCompletedAt()) + + // Validate this as info *is* coming from runner and its local clock. + if !creatTs.IsZero() && !startTs.IsZero() && !complTs.IsZero() && !startTs.Before(creatTs) && !complTs.Before(startTs) { + statsLBAgentRunnerSchedLatency(ctx, startTs.Sub(creatTs)) + statsLBAgentRunnerExecLatency(ctx, complTs.Sub(startTs)) + } +} + func receiveFromRunner(ctx context.Context, protocolClient pb.RunnerProtocol_EngageClient, c pool.RunnerCall, done chan error) { w := c.ResponseWriter() defer close(done) @@ -286,6 +309,7 @@ DataLoop: // Finish messages required for finish/finalize the processing. case *pb.RunnerMsg_Finished: log.Infof("Call finished Success=%v %v", body.Finished.Success, body.Finished.Details) + recordFinishStats(ctx, body.Finished) if !body.Finished.Success { err := parseError(body.Finished) tryQueueError(err, done) diff --git a/api/agent/stats.go b/api/agent/stats.go index a4006fd1f..66fd31070 100644 --- a/api/agent/stats.go +++ b/api/agent/stats.go @@ -3,6 +3,7 @@ package agent import ( "context" "strings" + "time" "github.com/sirupsen/logrus" "go.opencensus.io/stats" @@ -56,6 +57,14 @@ func statsTooBusy(ctx context.Context) { stats.Record(ctx, serverBusyMeasure.M(1)) } +func statsLBAgentRunnerSchedLatency(ctx context.Context, dur time.Duration) { + stats.Record(ctx, runnerSchedLatencyMeasure.M(int64(dur/time.Millisecond))) +} + +func statsLBAgentRunnerExecLatency(ctx context.Context, dur time.Duration) { + stats.Record(ctx, runnerExecLatencyMeasure.M(int64(dur/time.Millisecond))) +} + const ( // TODO we should probably prefix these with calls_ ? queuedMetricName = "queued" @@ -66,6 +75,10 @@ const ( timedoutMetricName = "timeouts" errorsMetricName = "errors" serverBusyMetricName = "server_busy" + + // Reported By LB + runnerSchedLatencyMetricName = "lb_runner_sched_latency" + runnerExecLatencyMetricName = "lb_runner_exec_latency" ) var ( @@ -81,8 +94,23 @@ var ( dockerMeasures = initDockerMeasures() containerGaugeMeasures = initContainerGaugeMeasures() containerTimeMeasures = initContainerTimeMeasures() + + // Reported By LB: How long does a runner scheduler wait for a committed call? eg. wait/launch/pull containers + runnerSchedLatencyMeasure = makeMeasure(runnerSchedLatencyMetricName, "Runner Scheduler Latency Reported By LBAgent", "msecs") + // Reported By LB: Function execution time inside a container. + runnerExecLatencyMeasure = makeMeasure(runnerExecLatencyMetricName, "Runner Container Execution Latency Reported By LBAgent", "msecs") ) +func RegisterLBAgentViews(tagKeys []string) { + err := view.Register( + createView(runnerSchedLatencyMeasure, view.Distribution(1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000), tagKeys), + createView(runnerExecLatencyMeasure, view.Distribution(1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000), tagKeys), + ) + if err != nil { + logrus.WithError(err).Fatal("cannot register view") + } +} + // RegisterAgentViews creates and registers all agent views func RegisterAgentViews(tagKeys []string) { err := view.Register( diff --git a/api/runnerpool/placer_stats.go b/api/runnerpool/placer_stats.go index 3947016d7..e9705dc61 100644 --- a/api/runnerpool/placer_stats.go +++ b/api/runnerpool/placer_stats.go @@ -38,18 +38,18 @@ func newAttemptTracker(ctx context.Context) *attemptTracker { } } -func (data *attemptTracker) finalizeAttempts(isSuccess bool) { +func (data *attemptTracker) finalizeAttempts(isCommited bool) { stats.Record(data.ctx, attemptCountMeasure.M(data.attemptCount)) // IMPORTANT: here we use (lastAttemptTime - startTime). We want to exclude TryExec - // latency *if* TryExec() goes through with success. Placer latency metric only shows + // latency *if* TryExec() goes through with commit. Placer latency metric only shows // how much time are spending in Placer loop/retries. The metric includes rtt/latency of // *all* unsuccessful NACK (retriable) responses from runners as well. For example, if // Placer loop here retries 4 runners (which takes 5 msecs each) and then 5th runner // succeeds (but takes 35 seconds to finish execution), we report 20 msecs as our LB // latency. endTime := data.lastAttemptTime - if !isSuccess { + if !isCommited { endTime = time.Now() } diff --git a/api/server/server.go b/api/server/server.go index bc6fc2c46..5ec8e0a35 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -474,6 +474,7 @@ func WithAgentFromEnv() ServerOption { keys := []string{"fn_appname", "fn_path"} pool.RegisterPlacerViews(keys) + agent.RegisterLBAgentViews(keys) s.agent, err = agent.NewLBAgent(agent.NewCachedDataAccess(cl), runnerPool, placer) if err != nil { diff --git a/test/fn-system-tests/system_test.go b/test/fn-system-tests/system_test.go index f9e90868c..b31e63ca9 100644 --- a/test/fn-system-tests/system_test.go +++ b/test/fn-system-tests/system_test.go @@ -219,6 +219,7 @@ func SetUpLBNode(ctx context.Context) (*server.Server, error) { keys := []string{"fn_appname", "fn_path"} pool.RegisterPlacerViews(keys) + agent.RegisterLBAgentViews(keys) // Create an LB Agent with a Call Overrider to intercept calls in GetCall(). Overrider in this example // scrubs CPU/TmpFsSize and adds FN_CHEESE key/value into extensions.