mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
Hybrid plumby (#585)
* fix configuration of agent and server to be future proof and plumb in the hybrid client agent * fixes up the tests, turns off /r/ on api nodes * fix up defaults for runner nodes * shove the runner async push code down into agent land to use client * plumb up async-age * return full call from async dequeue endpoint, since we're storing a whole call in the MQ we don't need to worry about caching of app/route [for now] * fast safe shutdown of dequeue looper in runner / tidying of agent * nice errors for path not found against /r/, /v1/ or other path not found * removed some stale TODO in agent * mq backends are only loud mouths in debug mode now * update tests * Add caching to hybrid client * Fix HTTP error handling in hybrid client. The type switch was on the value rather than a pointer. * Gofmt. * Better caching with a nice caching wrapper * Remove datastore cache which is now unused * Don't need to manually wrap interface methods * Go fmt
This commit is contained in:
@@ -24,7 +24,6 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// TODO make sure some errors that user should see (like image doesn't exist) bubble up
|
||||
// TODO we should prob store async calls in db immediately since we're returning id (will 404 until post-execution)
|
||||
// TODO async calls need to add route.Headers as well
|
||||
// TODO need to shut off reads/writes in dispatch to the pipes when call times out so that
|
||||
@@ -32,15 +31,8 @@ import (
|
||||
// TODO add spans back around container launching for hot (follows from?) + other more granular spans
|
||||
// TODO handle timeouts / no response in sync & async (sync is json+503 atm, not 504, async is empty log+status)
|
||||
// see also: server/runner.go wrapping the response writer there, but need to handle async too (push down?)
|
||||
// TODO herd launch prevention part deux
|
||||
// TODO storing logs / call can push call over the timeout
|
||||
// TODO all Datastore methods need to take unit of tenancy (app or route) at least (e.g. not just call id)
|
||||
// TODO discuss concrete policy for hot launch or timeout / timeout vs time left
|
||||
// TODO it may be nice to have an interchange type for Dispatch that can have
|
||||
// all the info we need to build e.g. http req, grpc req, json, etc. so that
|
||||
// we can easily do e.g. http->grpc, grpc->http, http->json. ofc grpc<->http is
|
||||
// weird for proto specifics like e.g. proto version, method, headers, et al.
|
||||
// discuss.
|
||||
// TODO if we don't cap the number of any one container we could get into a situation
|
||||
// where the machine is full but all the containers are idle up to the idle timeout. meh.
|
||||
// TODO async is still broken, but way less so. we need to modify mq semantics
|
||||
@@ -49,9 +41,7 @@ import (
|
||||
// dies). need coordination w/ db.
|
||||
// TODO if a cold call times out but container is created but hasn't replied, could
|
||||
// end up that the client doesn't get a reply until long after the timeout (b/c of container removal, async it?)
|
||||
// TODO the call api should fill in all the fields
|
||||
// TODO the log api should be plaintext (or at least offer it)
|
||||
// TODO we should probably differentiate ran-but-timeout vs timeout-before-run
|
||||
// TODO between calls, logs and stderr can contain output/ids from previous call. need elegant solution. grossness.
|
||||
// TODO if async would store requests (or interchange format) it would be slick, but
|
||||
// if we're going to store full calls in db maybe we should only queue pointers to ids?
|
||||
@@ -113,20 +103,15 @@ type Agent interface {
|
||||
// Return the http.Handler used to handle Prometheus metric requests
|
||||
PromHandler() http.Handler
|
||||
AddCallListener(fnext.CallListener)
|
||||
|
||||
// Enqueue is to use the agent's sweet sweet client bindings to remotely
|
||||
// queue async tasks and should be removed from Agent interface ASAP.
|
||||
Enqueue(context.Context, *models.Call) error
|
||||
}
|
||||
|
||||
type AgentNodeType int32
|
||||
|
||||
const (
|
||||
AgentTypeFull AgentNodeType = iota
|
||||
AgentTypeAPI
|
||||
AgentTypeRunner
|
||||
)
|
||||
|
||||
type agent struct {
|
||||
da DataAccess
|
||||
callListeners []fnext.CallListener
|
||||
tp AgentNodeType
|
||||
|
||||
driver drivers.Driver
|
||||
|
||||
@@ -146,13 +131,12 @@ type agent struct {
|
||||
promHandler http.Handler
|
||||
}
|
||||
|
||||
func New(ds models.Datastore, ls models.LogStore, mq models.MessageQueue, tp AgentNodeType) Agent {
|
||||
func New(da DataAccess) Agent {
|
||||
// TODO: Create drivers.New(runnerConfig)
|
||||
driver := docker.NewDocker(drivers.Config{})
|
||||
|
||||
a := &agent{
|
||||
tp: tp,
|
||||
da: NewDirectDataAccess(ds, ls, mq),
|
||||
da: da,
|
||||
driver: driver,
|
||||
hot: make(map[string]chan slot),
|
||||
resources: NewResourceTracker(),
|
||||
@@ -160,16 +144,17 @@ func New(ds models.Datastore, ls models.LogStore, mq models.MessageQueue, tp Age
|
||||
promHandler: promhttp.Handler(),
|
||||
}
|
||||
|
||||
switch tp {
|
||||
case AgentTypeAPI:
|
||||
// Don't start dequeuing
|
||||
default:
|
||||
go a.asyncDequeue() // safe shutdown can nanny this fine
|
||||
}
|
||||
// TODO assert that agent doesn't get started for API nodes up above ?
|
||||
go a.asyncDequeue() // safe shutdown can nanny this fine
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
// TODO shuffle this around somewhere else (maybe)
|
||||
func (a *agent) Enqueue(ctx context.Context, call *models.Call) error {
|
||||
return a.da.Enqueue(ctx, call)
|
||||
}
|
||||
|
||||
func (a *agent) Close() error {
|
||||
select {
|
||||
case <-a.shutdown:
|
||||
@@ -191,10 +176,6 @@ func transformTimeout(e error, isRetriable bool) error {
|
||||
}
|
||||
|
||||
func (a *agent) Submit(callI Call) error {
|
||||
if a.tp == AgentTypeAPI {
|
||||
return errors.New("API agent cannot execute calls")
|
||||
}
|
||||
|
||||
a.wg.Add(1)
|
||||
defer a.wg.Done()
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ func TestCallConfigurationRequest(t *testing.T) {
|
||||
}, nil,
|
||||
)
|
||||
|
||||
a := New(ds, ds, new(mqs.Mock), AgentTypeFull)
|
||||
a := New(NewCachedDataAccess(NewDirectDataAccess(ds, ds, new(mqs.Mock))))
|
||||
defer a.Close()
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
@@ -247,7 +247,7 @@ func TestCallConfigurationModel(t *testing.T) {
|
||||
// FromModel doesn't need a datastore, for now...
|
||||
ds := datastore.NewMockInit(nil, nil, nil)
|
||||
|
||||
a := New(ds, ds, new(mqs.Mock), AgentTypeFull)
|
||||
a := New(NewCachedDataAccess(NewDirectDataAccess(ds, ds, new(mqs.Mock))))
|
||||
defer a.Close()
|
||||
|
||||
callI, err := a.GetCall(FromModel(cm))
|
||||
@@ -353,7 +353,7 @@ func TestSubmitError(t *testing.T) {
|
||||
// FromModel doesn't need a datastore, for now...
|
||||
ds := datastore.NewMockInit(nil, nil, nil)
|
||||
|
||||
a := New(ds, ds, new(mqs.Mock), AgentTypeFull)
|
||||
a := New(NewCachedDataAccess(NewDirectDataAccess(ds, ds, new(mqs.Mock))))
|
||||
defer a.Close()
|
||||
|
||||
callI, err := a.GetCall(FromModel(cm))
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/fnproject/fn/api/models"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -11,6 +12,10 @@ func (a *agent) asyncDequeue() {
|
||||
a.wg.Add(1)
|
||||
defer a.wg.Done() // we can treat this thread like one big task and get safe shutdown fo free
|
||||
|
||||
// this is just so we can hang up the dequeue request if we get shut down
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-a.shutdown:
|
||||
@@ -22,42 +27,64 @@ func (a *agent) asyncDequeue() {
|
||||
// out of RAM so..
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) // TODO ???
|
||||
model, err := a.da.Dequeue(ctx)
|
||||
cancel()
|
||||
if err != nil || model == nil {
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("error fetching queued calls")
|
||||
// we think we can get a cookie now, so go get a cookie
|
||||
select {
|
||||
case <-a.shutdown:
|
||||
return
|
||||
case model, ok := <-a.asyncChew(ctx):
|
||||
if ok {
|
||||
a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
|
||||
go func(model *models.Call) {
|
||||
a.asyncRun(model)
|
||||
a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
|
||||
}(model)
|
||||
}
|
||||
time.Sleep(1 * time.Second) // backoff a little
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO output / logger should be here too...
|
||||
|
||||
a.wg.Add(1) // need to add 1 in this thread to ensure safe shutdown
|
||||
go func() {
|
||||
defer a.wg.Done() // can shed it after this is done, Submit will add 1 too but it's fine
|
||||
|
||||
call, err := a.GetCall(FromModel(model))
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("error getting async call")
|
||||
return
|
||||
}
|
||||
|
||||
// TODO if the task is cold and doesn't require reading STDIN, it could
|
||||
// run but we may not listen for output since the task timed out. these
|
||||
// are at least once semantics, which is really preferable to at most
|
||||
// once, so let's do it for now
|
||||
|
||||
err = a.Submit(call)
|
||||
if err != nil {
|
||||
// NOTE: these could be errors / timeouts from the call that we're
|
||||
// logging here (i.e. not our fault), but it's likely better to log
|
||||
// these than suppress them so...
|
||||
id := call.Model().ID
|
||||
logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func (a *agent) asyncChew(ctx context.Context) <-chan *models.Call {
|
||||
ch := make(chan *models.Call, 1)
|
||||
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
call, err := a.da.Dequeue(ctx)
|
||||
if call != nil {
|
||||
ch <- call
|
||||
} else { // call is nil / error
|
||||
if err != nil && err != context.DeadlineExceeded {
|
||||
logrus.WithError(err).Error("error fetching queued calls")
|
||||
}
|
||||
// queue may be empty / unavailable
|
||||
time.Sleep(1 * time.Second) // backoff a little before sending no cookie message
|
||||
close(ch)
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
func (a *agent) asyncRun(model *models.Call) {
|
||||
// TODO output / logger should be here too...
|
||||
call, err := a.GetCall(FromModel(model))
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("error getting async call")
|
||||
return
|
||||
}
|
||||
|
||||
// TODO if the task is cold and doesn't require reading STDIN, it could
|
||||
// run but we may not listen for output since the task timed out. these
|
||||
// are at least once semantics, which is really preferable to at most
|
||||
// once, so let's do it for now
|
||||
|
||||
err = a.Submit(call)
|
||||
if err != nil {
|
||||
// NOTE: these could be errors / timeouts from the call that we're
|
||||
// logging here (i.e. not our fault), but it's likely better to log
|
||||
// these than suppress them so...
|
||||
id := call.Model().ID
|
||||
logrus.WithFields(logrus.Fields{"id": id}).WithError(err).Error("error running async call")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -350,6 +350,9 @@ func (c *call) End(ctx context.Context, errIn error) error {
|
||||
// note: Not returning err here since the job could have already finished successfully.
|
||||
}
|
||||
|
||||
// NOTE call this after InsertLog or the buffer will get reset
|
||||
c.stderr.Close()
|
||||
|
||||
if err := c.ct.fireAfterCall(ctx, c.Model()); err != nil {
|
||||
return fmt.Errorf("AfterCall: %v", err)
|
||||
}
|
||||
|
||||
@@ -2,9 +2,13 @@ package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/fnproject/fn/api/common"
|
||||
"github.com/fnproject/fn/api/models"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/fnproject/fn/api/common"
|
||||
"github.com/fnproject/fn/api/common/singleflight"
|
||||
"github.com/fnproject/fn/api/models"
|
||||
"github.com/patrickmn/go-cache"
|
||||
)
|
||||
|
||||
// DataAccess abstracts the datastore and message queue operations done by the
|
||||
@@ -19,7 +23,7 @@ type DataAccess interface {
|
||||
GetRoute(ctx context.Context, appName string, routePath string) (*models.Route, error)
|
||||
|
||||
// Enqueue will add a Call to the queue (ultimately forwards to mq.Push).
|
||||
Enqueue(ctx context.Context, mCall *models.Call) (*models.Call, error)
|
||||
Enqueue(ctx context.Context, mCall *models.Call) error
|
||||
|
||||
// Dequeue will query the queue for the next available Call that can be run
|
||||
// by this Agent, and reserve it (ultimately forwards to mq.Reserve).
|
||||
@@ -31,7 +35,71 @@ type DataAccess interface {
|
||||
|
||||
// Finish will notify the system that the Call has been processed, and
|
||||
// fulfill the reservation in the queue if the call came from a queue.
|
||||
Finish(ctx context.Context, mCall *models.Call, stderr io.ReadWriteCloser, async bool) error
|
||||
Finish(ctx context.Context, mCall *models.Call, stderr io.Reader, async bool) error
|
||||
}
|
||||
|
||||
// CachedDataAccess wraps a DataAccess and caches the results of GetApp and GetRoute.
|
||||
type CachedDataAccess struct {
|
||||
DataAccess
|
||||
|
||||
cache *cache.Cache
|
||||
singleflight singleflight.SingleFlight
|
||||
}
|
||||
|
||||
func NewCachedDataAccess(da DataAccess) DataAccess {
|
||||
cda := &CachedDataAccess{
|
||||
DataAccess: da,
|
||||
cache: cache.New(5*time.Second, 1*time.Minute),
|
||||
}
|
||||
return cda
|
||||
}
|
||||
|
||||
func routeCacheKey(appname, path string) string {
|
||||
return "r:" + appname + "\x00" + path
|
||||
}
|
||||
|
||||
func appCacheKey(appname string) string {
|
||||
return "a:" + appname
|
||||
}
|
||||
|
||||
func (da *CachedDataAccess) GetApp(ctx context.Context, appName string) (*models.App, error) {
|
||||
key := appCacheKey(appName)
|
||||
app, ok := da.cache.Get(key)
|
||||
if ok {
|
||||
return app.(*models.App), nil
|
||||
}
|
||||
|
||||
resp, err := da.singleflight.Do(key,
|
||||
func() (interface{}, error) {
|
||||
return da.DataAccess.GetApp(ctx, appName)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
app = resp.(*models.App)
|
||||
da.cache.Set(key, app, cache.DefaultExpiration)
|
||||
return app.(*models.App), nil
|
||||
}
|
||||
|
||||
func (da *CachedDataAccess) GetRoute(ctx context.Context, appName string, routePath string) (*models.Route, error) {
|
||||
key := routeCacheKey(appName, routePath)
|
||||
r, ok := da.cache.Get(key)
|
||||
if ok {
|
||||
return r.(*models.Route), nil
|
||||
}
|
||||
|
||||
resp, err := da.singleflight.Do(key,
|
||||
func() (interface{}, error) {
|
||||
return da.DataAccess.GetRoute(ctx, appName, routePath)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r = resp.(*models.Route)
|
||||
da.cache.Set(key, r, cache.DefaultExpiration)
|
||||
return r.(*models.Route), nil
|
||||
}
|
||||
|
||||
type directDataAccess struct {
|
||||
@@ -57,8 +125,9 @@ func (da *directDataAccess) GetRoute(ctx context.Context, appName string, routeP
|
||||
return da.ds.GetRoute(ctx, appName, routePath)
|
||||
}
|
||||
|
||||
func (da *directDataAccess) Enqueue(ctx context.Context, mCall *models.Call) (*models.Call, error) {
|
||||
return da.mq.Push(ctx, mCall)
|
||||
func (da *directDataAccess) Enqueue(ctx context.Context, mCall *models.Call) error {
|
||||
_, err := da.mq.Push(ctx, mCall)
|
||||
return err
|
||||
// TODO: Insert a call in the datastore with the 'queued' state
|
||||
}
|
||||
|
||||
@@ -77,7 +146,7 @@ func (da *directDataAccess) Start(ctx context.Context, mCall *models.Call) error
|
||||
return da.mq.Delete(ctx, mCall)
|
||||
}
|
||||
|
||||
func (da *directDataAccess) Finish(ctx context.Context, mCall *models.Call, stderr io.ReadWriteCloser, async bool) error {
|
||||
func (da *directDataAccess) Finish(ctx context.Context, mCall *models.Call, stderr io.Reader, async bool) error {
|
||||
// this means that we could potentially store an error / timeout status for a
|
||||
// call that ran successfully [by a user's perspective]
|
||||
// TODO: this should be update, really
|
||||
@@ -90,8 +159,6 @@ func (da *directDataAccess) Finish(ctx context.Context, mCall *models.Call, stde
|
||||
common.Logger(ctx).WithError(err).Error("error uploading log")
|
||||
// note: Not returning err here since the job could have already finished successfully.
|
||||
}
|
||||
// NOTE call this after InsertLog or the buffer will get reset
|
||||
stderr.Close()
|
||||
|
||||
if async {
|
||||
// XXX (reed): delete MQ message, eventually
|
||||
|
||||
241
api/agent/hybrid/client.go
Normal file
241
api/agent/hybrid/client.go
Normal file
@@ -0,0 +1,241 @@
|
||||
package hybrid
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/fnproject/fn/api/agent"
|
||||
"github.com/fnproject/fn/api/common"
|
||||
"github.com/fnproject/fn/api/models"
|
||||
opentracing "github.com/opentracing/opentracing-go"
|
||||
)
|
||||
|
||||
// client implements agent.DataAccess
|
||||
type client struct {
|
||||
base string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
func NewClient(u string) (agent.DataAccess, error) {
|
||||
uri, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if uri.Host == "" {
|
||||
return nil, errors.New("no host specified for client")
|
||||
}
|
||||
if uri.Scheme == "" {
|
||||
uri.Scheme = "http"
|
||||
}
|
||||
host := uri.Scheme + "://" + uri.Host + "/v1/"
|
||||
|
||||
httpClient := &http.Client{
|
||||
Timeout: 60 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
Dial: (&net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).Dial,
|
||||
MaxIdleConns: 512,
|
||||
MaxIdleConnsPerHost: 128,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
TLSClientConfig: &tls.Config{
|
||||
ClientSessionCache: tls.NewLRUClientSessionCache(8096),
|
||||
},
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
return &client{
|
||||
base: host,
|
||||
http: httpClient,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (cl *client) Enqueue(ctx context.Context, c *models.Call) error {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_enqueue")
|
||||
defer span.Finish()
|
||||
|
||||
err := cl.do(ctx, c, nil, "PUT", "runner", "async")
|
||||
return err
|
||||
}
|
||||
|
||||
func (cl *client) Dequeue(ctx context.Context) (*models.Call, error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_dequeue")
|
||||
defer span.Finish()
|
||||
|
||||
var c struct {
|
||||
C []*models.Call `json:"calls"`
|
||||
}
|
||||
err := cl.do(ctx, nil, &c, "GET", "runner", "async")
|
||||
if len(c.C) > 0 {
|
||||
return c.C[0], nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func (cl *client) Start(ctx context.Context, c *models.Call) error {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_start")
|
||||
defer span.Finish()
|
||||
|
||||
err := cl.do(ctx, c, nil, "POST", "runner", "start")
|
||||
return err
|
||||
}
|
||||
|
||||
func (cl *client) Finish(ctx context.Context, c *models.Call, r io.Reader, async bool) error {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_end")
|
||||
defer span.Finish()
|
||||
|
||||
var b bytes.Buffer // TODO pool / we should multipart this?
|
||||
_, err := io.Copy(&b, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
bod := struct {
|
||||
C *models.Call `json:"call"`
|
||||
L string `json:"log"`
|
||||
}{
|
||||
C: c,
|
||||
L: b.String(),
|
||||
}
|
||||
|
||||
// TODO add async bit to query params or body
|
||||
err = cl.do(ctx, bod, nil, "POST", "runner", "finish")
|
||||
return err
|
||||
}
|
||||
|
||||
func (cl *client) GetApp(ctx context.Context, appName string) (*models.App, error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_get_app")
|
||||
defer span.Finish()
|
||||
|
||||
var a struct {
|
||||
A models.App `json:"app"`
|
||||
}
|
||||
err := cl.do(ctx, nil, &a, "GET", "apps", appName)
|
||||
return &a.A, err
|
||||
}
|
||||
|
||||
func (cl *client) GetRoute(ctx context.Context, appName, route string) (*models.Route, error) {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_get_route")
|
||||
defer span.Finish()
|
||||
|
||||
// TODO trim prefix is pretty odd here eh?
|
||||
var r struct {
|
||||
R models.Route `json:"route"`
|
||||
}
|
||||
err := cl.do(ctx, nil, &r, "GET", "apps", appName, "routes", strings.TrimPrefix(route, "/"))
|
||||
return &r.R, err
|
||||
}
|
||||
|
||||
type httpErr struct {
|
||||
code int
|
||||
error
|
||||
}
|
||||
|
||||
func (cl *client) do(ctx context.Context, request, result interface{}, method string, url ...string) error {
|
||||
// TODO determine policy (should we count to infinity?)
|
||||
|
||||
var b common.Backoff
|
||||
var err error
|
||||
for i := 0; i < 5; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// TODO this isn't re-using buffers very efficiently, but retries should be rare...
|
||||
err = cl.once(ctx, request, result, method, url...)
|
||||
switch err := err.(type) {
|
||||
case nil:
|
||||
return err
|
||||
case *httpErr:
|
||||
if err.code < 500 {
|
||||
return err
|
||||
}
|
||||
// retry 500s...
|
||||
default:
|
||||
// this error wasn't from us [most likely], probably a conn refused/timeout, just retry it out
|
||||
}
|
||||
|
||||
common.Logger(ctx).WithError(err).Error("error from API server, retrying")
|
||||
|
||||
b.Sleep(ctx)
|
||||
}
|
||||
|
||||
// return last error
|
||||
return err
|
||||
}
|
||||
|
||||
func (cl *client) once(ctx context.Context, request, result interface{}, method string, url ...string) error {
|
||||
span, ctx := opentracing.StartSpanFromContext(ctx, "hybrid_client_http_do")
|
||||
defer span.Finish()
|
||||
|
||||
var b bytes.Buffer // TODO pool
|
||||
if request != nil {
|
||||
err := json.NewEncoder(&b).Encode(request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequest(method, cl.url(url...), &b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req = req.WithContext(ctx)
|
||||
// shove the span headers in so that the server will continue this span
|
||||
opentracing.GlobalTracer().Inject(
|
||||
span.Context(),
|
||||
opentracing.HTTPHeaders,
|
||||
opentracing.HTTPHeadersCarrier(req.Header))
|
||||
|
||||
resp, err := cl.http.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { io.Copy(ioutil.Discard, resp.Body); resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode >= 300 {
|
||||
// one of our errors
|
||||
var msg struct {
|
||||
Err *struct {
|
||||
Msg string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
// copy into a buffer in case it wasn't from us
|
||||
var b bytes.Buffer
|
||||
io.Copy(&b, resp.Body)
|
||||
json.Unmarshal(b.Bytes(), &msg)
|
||||
if msg.Err != nil {
|
||||
return &httpErr{code: resp.StatusCode, error: errors.New(msg.Err.Msg)}
|
||||
}
|
||||
return &httpErr{code: resp.StatusCode, error: errors.New(b.String())}
|
||||
}
|
||||
|
||||
if result != nil {
|
||||
err := json.NewDecoder(resp.Body).Decode(&result)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *client) url(args ...string) string {
|
||||
return cl.base + strings.Join(args, "/")
|
||||
}
|
||||
Reference in New Issue
Block a user