mirror of
https://github.com/fnproject/fn.git
synced 2022-10-28 21:29:17 +03:00
lb round 2
this rewrites the load balancer which was pointed out to be problematic here: https://github.com/iron-io/functions/issues/570 with a test suite located at test/fnlb-test-harness -- this test is now 'passing' in the sense that now when running multiple functions nodes behind 1 load balancer every call goes to the same functions node. yay. used a different consistent hashing algorithm and also threw out all the fallback code (all the code, really). this is basically an mvp and there is some work that needs to be done for running multiple load balancers, allowing functions to run on more nodes as load increases and some basic features like http/2 -- also needs some love to be really robust; most of this is noted in TODOs in the file. this does basic health checking configurable in the same way as aws elb. I think we could probably do gossip but this works as a first cut. after N health checks fail then requests start to go to a different node, meanwhile all requests to that node will fail (need to fix). this continues to use consistent hashing, which is great in that we don't need to store anything, and we may be able to stick with the strategy in the future with some care.
This commit is contained in:
24
Gopkg.lock
generated
24
Gopkg.lock
generated
@@ -1,4 +1,4 @@
|
||||
memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
|
||||
memo = "01114332a6ce5b71583264fdc63cf899f4ef12030a6febe795a2584189675c23"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -90,6 +90,12 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
|
||||
revision = "8ab6407b697782a06568d4b7f1db25550ec2e4c6"
|
||||
version = "v0.2.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/dchest/siphash"
|
||||
packages = ["."]
|
||||
revision = "42ba037e748c9062a75e0924705c43b893edefcd"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/dghubble/go-twitter"
|
||||
@@ -121,10 +127,10 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
|
||||
version = "v2.6.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/moby/moby"
|
||||
packages = ["api/types","api/types/blkiodev","api/types/container","api/types/filters","api/types/mount","api/types/network","api/types/registry","api/types/strslice","api/types/swarm","api/types/versions","cli/config/configfile","opts","pkg/archive","pkg/fileutils","pkg/homedir","pkg/idtools","pkg/ioutils","pkg/jsonlog","pkg/jsonmessage","pkg/longpath","pkg/pools","pkg/promise","pkg/stdcopy","pkg/system","pkg/term","pkg/term/windows"]
|
||||
revision = "a9ff628a3c7c55cf016af88f37c4fb6a6029e17a"
|
||||
name = "github.com/docker/docker"
|
||||
packages = ["api/types","api/types/blkiodev","api/types/container","api/types/filters","api/types/mount","api/types/network","api/types/registry","api/types/strslice","api/types/swarm","api/types/versions","opts","pkg/archive","pkg/fileutils","pkg/homedir","pkg/idtools","pkg/ioutils","pkg/jsonlog","pkg/jsonmessage","pkg/longpath","pkg/pools","pkg/promise","pkg/stdcopy","pkg/system","pkg/term","pkg/term/windows"]
|
||||
revision = "092cba3727bb9b4a2f0e922cd6c0f93ea270e363"
|
||||
version = "v1.13.1"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/docker/go-connections"
|
||||
@@ -255,7 +261,7 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/golang/groupcache"
|
||||
packages = ["consistenthash","singleflight"]
|
||||
packages = ["singleflight"]
|
||||
revision = "72d04f9fcdec7d3821820cc4a6f150eae553639a"
|
||||
|
||||
[[projects]]
|
||||
@@ -372,6 +378,12 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
|
||||
packages = ["."]
|
||||
revision = "53818660ed4955e899c0bcafa97299a388bd7c8e"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/moby/moby"
|
||||
packages = ["cli/config/configfile","pkg/jsonmessage"]
|
||||
revision = "a9ff628a3c7c55cf016af88f37c4fb6a6029e17a"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/opencontainers/runc"
|
||||
packages = ["libcontainer/system","libcontainer/user"]
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
all:
|
||||
go build -o fnlb
|
||||
./fnlb --help
|
||||
@@ -1,34 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gitlab.oracledx.com/odx/functions/lb"
|
||||
)
|
||||
|
||||
var (
|
||||
fnodes string
|
||||
flisten string
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.StringVar(&fnodes, "nodes", "127.0.0.1:8080", "comma separated list of Oracle Functions nodes")
|
||||
flag.StringVar(&flisten, "listen", "0.0.0.0:8081", "listening port for incoming connections")
|
||||
flag.Parse()
|
||||
}
|
||||
|
||||
func main() {
|
||||
nodes := strings.Split(fnodes, ",")
|
||||
p := lb.ConsistentHashReverseProxy(context.Background(), nodes)
|
||||
fmt.Println("forwarding calls to", nodes)
|
||||
fmt.Println("listening to", flisten)
|
||||
if err := http.ListenAndServe(flisten, p); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "could not start server. error:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
369
lb/lb.go
369
lb/lb.go
@@ -1,52 +1,357 @@
|
||||
package lb
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"strconv"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/golang/groupcache/consistenthash"
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/dchest/siphash"
|
||||
)
|
||||
|
||||
// ConsistentHashReverseProxy returns a new ReverseProxy that routes
|
||||
// URLs to the scheme, host, and base path provided in by a consistent hash
|
||||
// algorithm. If the target's path is "/base" and the incoming request was for
|
||||
// "/dir", the target request will be for /base/dir.
|
||||
// ConsistentHashReverseProxy does not rewrite the Host header.
|
||||
func ConsistentHashReverseProxy(ctx context.Context, nodes []string) *httputil.ReverseProxy {
|
||||
ch := consistenthash.New(len(nodes), nil)
|
||||
ch.Add(nodes...)
|
||||
// TODO: consistent hashing is nice to get a cheap way to place nodes but it
|
||||
// doesn't account well for certain functions that may be 'hotter' than others.
|
||||
// we should very likely keep a load ordered list and distribute based on that.
|
||||
// if we can get some kind of feedback from the f(x) nodes, we can use that.
|
||||
// maybe it's good enough to just ch(x) + 1 if ch(x) is marked as "hot"?
|
||||
|
||||
bufPool := sync.Pool{
|
||||
New: func() interface{} {
|
||||
return new(bytes.Buffer)
|
||||
},
|
||||
// TODO the load balancers all need to have the same list of nodes. gossip?
|
||||
// also gossip would handle failure detection instead of elb style
|
||||
|
||||
// TODO when adding nodes we should health check them once before adding them
|
||||
// TODO when node goes offline should try to redirect request instead of 5xxing
|
||||
|
||||
// TODO config
|
||||
// TODO TLS
|
||||
|
||||
func main() {
|
||||
// XXX (reed): normalize
|
||||
fnodes := flag.String("nodes", "", "comma separated list of IronFunction nodes")
|
||||
|
||||
var conf config
|
||||
flag.IntVar(&conf.Port, "port", 8081, "port to run on")
|
||||
flag.IntVar(&conf.HealthcheckInterval, "hc-interval", 3, "how often to check f(x) nodes, in seconds")
|
||||
flag.StringVar(&conf.HealthcheckEndpoint, "hc-path", "/version", "endpoint to determine node health")
|
||||
flag.IntVar(&conf.HealthcheckUnhealthy, "hc-unhealthy", 2, "threshold of failed checks to declare node unhealthy")
|
||||
flag.IntVar(&conf.HealthcheckTimeout, "hc-timeout", 5, "timeout of healthcheck endpoint, in seconds")
|
||||
flag.Parse()
|
||||
|
||||
conf.Nodes = strings.Split(*fnodes, ",")
|
||||
|
||||
ch := newProxy(conf)
|
||||
|
||||
// XXX (reed): safe shutdown
|
||||
fmt.Println(http.ListenAndServe(":8081", ch))
|
||||
}
|
||||
|
||||
type config struct {
|
||||
Port int `json:"port"`
|
||||
Nodes []string `json:"nodes"`
|
||||
HealthcheckInterval int `json:"healthcheck_interval"`
|
||||
HealthcheckEndpoint string `json:"healthcheck_endpoint"`
|
||||
HealthcheckUnhealthy int `json:"healthcheck_unhealthy"`
|
||||
HealthcheckTimeout int `json:"healthcheck_timeout"`
|
||||
}
|
||||
|
||||
type chProxy struct {
|
||||
ch consistentHash
|
||||
|
||||
sync.RWMutex
|
||||
// TODO map[string][]time.Time
|
||||
ded map[string]int64
|
||||
|
||||
hcInterval time.Duration
|
||||
hcEndpoint string
|
||||
hcUnhealthy int64
|
||||
hcTimeout time.Duration
|
||||
proxy *httputil.ReverseProxy
|
||||
}
|
||||
|
||||
func newProxy(conf config) *chProxy {
|
||||
ch := &chProxy{
|
||||
ded: make(map[string]int64),
|
||||
|
||||
// XXX (reed): need to be reconfigurable at some point
|
||||
hcInterval: time.Duration(conf.HealthcheckInterval) * time.Second,
|
||||
hcEndpoint: conf.HealthcheckEndpoint,
|
||||
hcUnhealthy: int64(conf.HealthcheckUnhealthy),
|
||||
hcTimeout: time.Duration(conf.HealthcheckTimeout) * time.Second,
|
||||
}
|
||||
|
||||
var i int64
|
||||
director := func(req *http.Request) {
|
||||
buf := bufPool.Get().(*bytes.Buffer)
|
||||
defer bufPool.Put(buf)
|
||||
buf.Reset()
|
||||
buf.WriteString(req.URL.Path)
|
||||
buf.WriteString("??")
|
||||
b := strconv.AppendInt(buf.Bytes(), atomic.AddInt64(&i, 1), 10)
|
||||
target := ch.ch.get(req.URL.Path)
|
||||
|
||||
target := ch.Get(string(b))
|
||||
req.URL.Scheme = "http"
|
||||
req.URL.Scheme = "http" // XXX (reed): h2 support
|
||||
req.URL.Host = target
|
||||
if _, ok := req.Header["User-Agent"]; !ok {
|
||||
// explicitly disable User-Agent so it's not set to default value
|
||||
req.Header.Set("User-Agent", "")
|
||||
}
|
||||
|
||||
ch.proxy = &httputil.ReverseProxy{
|
||||
// XXX (reed): optimized http client
|
||||
// XXX (reed): buffer pool
|
||||
Director: director,
|
||||
}
|
||||
|
||||
for _, n := range conf.Nodes {
|
||||
// XXX (reed): need to health check these
|
||||
ch.ch.add(n)
|
||||
}
|
||||
go ch.healthcheck()
|
||||
return ch
|
||||
}
|
||||
|
||||
func (ch *chProxy) healthcheck() {
|
||||
for range time.Tick(ch.hcInterval) {
|
||||
nodes := ch.ch.list()
|
||||
nodes = append(nodes, ch.dead()...)
|
||||
// XXX (reed): need to figure out elegant adding / removing better
|
||||
for _, n := range nodes {
|
||||
go ch.ping(n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &httputil.ReverseProxy{
|
||||
Director: director,
|
||||
Transport: NewRoundTripper(ctx, nodes),
|
||||
func (ch *chProxy) ping(node string) {
|
||||
req, _ := http.NewRequest("GET", "http://"+node+ch.hcEndpoint, nil)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), ch.hcTimeout)
|
||||
defer cancel()
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
// XXX (reed): use same transport as proxy is using
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if resp != nil && resp.Body != nil {
|
||||
io.Copy(ioutil.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
if err != nil || resp.StatusCode < 200 || resp.StatusCode > 299 {
|
||||
logrus.WithFields(logrus.Fields{"node": node}).Error("health check failed")
|
||||
ch.fail(node)
|
||||
} else {
|
||||
ch.alive(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (ch *chProxy) fail(node string) {
|
||||
// shouldn't be a hot path so shouldn't be too contended on since health
|
||||
// checks are infrequent
|
||||
ch.Lock()
|
||||
ch.ded[node]++
|
||||
failed := ch.ded[node]
|
||||
ch.Unlock()
|
||||
|
||||
if failed >= ch.hcUnhealthy {
|
||||
ch.ch.remove(node) // TODO under lock?
|
||||
}
|
||||
}
|
||||
|
||||
func (ch *chProxy) alive(node string) {
|
||||
ch.RLock()
|
||||
_, ok := ch.ded[node]
|
||||
ch.RUnlock()
|
||||
if ok {
|
||||
ch.Lock()
|
||||
delete(ch.ded, node)
|
||||
ch.Unlock()
|
||||
ch.ch.add(node) // TODO under lock?
|
||||
}
|
||||
}
|
||||
|
||||
func (ch *chProxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/1/lb/nodes" {
|
||||
switch r.Method {
|
||||
case "PUT":
|
||||
// XXX (reed): addNode
|
||||
ch.addNode(w, r)
|
||||
return
|
||||
case "DELETE":
|
||||
// XXX (reed): removeNode?
|
||||
ch.removeNode(w, r)
|
||||
return
|
||||
case "GET":
|
||||
ch.listNodes(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// XXX (reed): stats?
|
||||
|
||||
// XXX (reed): probably do these on a separate port to avoid conflicts
|
||||
}
|
||||
|
||||
ch.proxy.ServeHTTP(w, r)
|
||||
}
|
||||
|
||||
func (ch *chProxy) addNode(w http.ResponseWriter, r *http.Request) {
|
||||
var bod struct {
|
||||
Node string `json:"node"`
|
||||
}
|
||||
err := json.NewDecoder(r.Body).Decode(&bod)
|
||||
if err != nil {
|
||||
sendError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
ch.ch.add(bod.Node)
|
||||
sendSuccess(w, "node added")
|
||||
}
|
||||
|
||||
func (ch *chProxy) removeNode(w http.ResponseWriter, r *http.Request) {
|
||||
var bod struct {
|
||||
Node string `json:"node"`
|
||||
}
|
||||
err := json.NewDecoder(r.Body).Decode(&bod)
|
||||
if err != nil {
|
||||
sendError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
ch.ch.remove(bod.Node)
|
||||
sendSuccess(w, "node deleted")
|
||||
}
|
||||
|
||||
func (ch *chProxy) listNodes(w http.ResponseWriter, r *http.Request) {
|
||||
nodes := ch.ch.list()
|
||||
dead := ch.dead()
|
||||
|
||||
out := make(map[string]string, len(nodes)+len(dead))
|
||||
for _, n := range nodes {
|
||||
if ch.isDead(n) {
|
||||
out[n] = "offline"
|
||||
} else {
|
||||
out[n] = "online"
|
||||
}
|
||||
}
|
||||
|
||||
for _, n := range dead {
|
||||
out[n] = "offline"
|
||||
}
|
||||
|
||||
sendValue(w, struct {
|
||||
Nodes map[string]string `json:"nodes"`
|
||||
}{
|
||||
Nodes: out,
|
||||
})
|
||||
}
|
||||
|
||||
func (ch *chProxy) isDead(node string) bool {
|
||||
ch.RLock()
|
||||
val, ok := ch.ded[node]
|
||||
ch.RUnlock()
|
||||
return ok && val >= ch.hcUnhealthy
|
||||
}
|
||||
|
||||
func (ch *chProxy) dead() []string {
|
||||
ch.RLock()
|
||||
defer ch.RUnlock()
|
||||
nodes := make([]string, 0, len(ch.ded))
|
||||
for n, val := range ch.ded {
|
||||
if val >= ch.hcUnhealthy {
|
||||
nodes = append(nodes, n)
|
||||
}
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
func sendValue(w http.ResponseWriter, v interface{}) {
|
||||
err := json.NewEncoder(w).Encode(v)
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("error writing response response")
|
||||
}
|
||||
}
|
||||
|
||||
func sendSuccess(w http.ResponseWriter, msg string) {
|
||||
err := json.NewEncoder(w).Encode(struct {
|
||||
Msg string `json:"msg"`
|
||||
}{
|
||||
Msg: msg,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("error writing response response")
|
||||
}
|
||||
}
|
||||
|
||||
func sendError(w http.ResponseWriter, code int, msg string) {
|
||||
w.WriteHeader(code)
|
||||
|
||||
err := json.NewEncoder(w).Encode(struct {
|
||||
Msg string `json:"msg"`
|
||||
}{
|
||||
Msg: msg,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("error writing response response")
|
||||
}
|
||||
}
|
||||
|
||||
// consistentHash will maintain a list of strings which can be accessed by
|
||||
// keying them with a separate group of strings
|
||||
type consistentHash struct {
|
||||
// protects nodes
|
||||
sync.RWMutex
|
||||
nodes []string
|
||||
}
|
||||
|
||||
func (ch *consistentHash) add(newb string) {
|
||||
ch.Lock()
|
||||
defer ch.Unlock()
|
||||
|
||||
// filter dupes, under lock. sorted, so binary search
|
||||
i := sort.SearchStrings(ch.nodes, newb)
|
||||
if i < len(ch.nodes) && ch.nodes[i] == newb {
|
||||
return
|
||||
}
|
||||
ch.nodes = append(ch.nodes, newb)
|
||||
// need to keep in sorted order so that hash index works across nodes
|
||||
sort.Sort(sort.StringSlice(ch.nodes))
|
||||
}
|
||||
|
||||
func (ch *consistentHash) remove(ded string) {
|
||||
ch.Lock()
|
||||
i := sort.SearchStrings(ch.nodes, ded)
|
||||
if i < len(ch.nodes) && ch.nodes[i] == ded {
|
||||
ch.nodes = append(ch.nodes[:i], ch.nodes[i+1:]...)
|
||||
}
|
||||
ch.Unlock()
|
||||
}
|
||||
|
||||
// return a copy
|
||||
func (ch *consistentHash) list() []string {
|
||||
ch.RLock()
|
||||
ret := make([]string, len(ch.nodes))
|
||||
copy(ret, ch.nodes)
|
||||
ch.RUnlock()
|
||||
return ret
|
||||
}
|
||||
|
||||
func (ch *consistentHash) get(key string) string {
|
||||
// crc not unique enough & sha is too slow, it's 1 import
|
||||
sum64 := siphash.Hash(0, 0x4c617279426f6174, []byte(key))
|
||||
|
||||
ch.RLock()
|
||||
defer ch.RUnlock()
|
||||
i := int(jumpConsistentHash(sum64, int32(len(ch.nodes))))
|
||||
return ch.nodes[i]
|
||||
}
|
||||
|
||||
// A Fast, Minimal Memory, Consistent Hash Algorithm:
|
||||
// https://arxiv.org/ftp/arxiv/papers/1406/1406.2294.pdf
|
||||
func jumpConsistentHash(key uint64, num_buckets int32) int32 {
|
||||
var b, j int64 = -1, 0
|
||||
for j < int64(num_buckets) {
|
||||
b = j
|
||||
key = key*2862933555777941757 + 1
|
||||
j = (b + 1) * int64((1<<31)/(key>>33)+1)
|
||||
}
|
||||
return int32(b)
|
||||
}
|
||||
|
||||
72
lb/lb_test.go
Normal file
72
lb/lb_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package main
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestCHAdd(t *testing.T) {
|
||||
var ch consistentHash
|
||||
nodes := []string{"1", "2", "3"}
|
||||
for _, n := range nodes {
|
||||
ch.add(n)
|
||||
}
|
||||
|
||||
if len(ch.nodes) != 3 {
|
||||
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
|
||||
}
|
||||
|
||||
// test dupes don't get added
|
||||
for _, n := range nodes {
|
||||
ch.add(n)
|
||||
}
|
||||
|
||||
if len(ch.nodes) != 3 {
|
||||
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCHRemove(t *testing.T) {
|
||||
var ch consistentHash
|
||||
nodes := []string{"1", "2", "3"}
|
||||
for _, n := range nodes {
|
||||
ch.add(n)
|
||||
}
|
||||
|
||||
if len(ch.nodes) != 3 {
|
||||
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
|
||||
}
|
||||
|
||||
ch.remove("4")
|
||||
|
||||
if len(ch.nodes) != 3 {
|
||||
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
|
||||
}
|
||||
|
||||
ch.remove("3")
|
||||
|
||||
if len(ch.nodes) != 2 {
|
||||
t.Fatal("nodes list should be len of 2, got:", len(ch.nodes))
|
||||
}
|
||||
|
||||
ch.remove("3")
|
||||
|
||||
if len(ch.nodes) != 2 {
|
||||
t.Fatal("nodes list should be len of 2, got:", len(ch.nodes))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCHGet(t *testing.T) {
|
||||
var ch consistentHash
|
||||
nodes := []string{"1", "2", "3"}
|
||||
for _, n := range nodes {
|
||||
ch.add(n)
|
||||
}
|
||||
|
||||
if len(ch.nodes) != 3 {
|
||||
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
|
||||
}
|
||||
|
||||
keys := []string{"a", "b", "c"}
|
||||
for _, k := range keys {
|
||||
_ = ch.get(k)
|
||||
// testing this doesn't panic basically? could test distro but meh
|
||||
}
|
||||
}
|
||||
@@ -1,157 +0,0 @@
|
||||
package lb
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/groupcache/singleflight"
|
||||
)
|
||||
|
||||
// ErrNoFallbackNodeFound happens when the fallback routine does not manage to
|
||||
// find a TCP reachable node in alternative to the chosen one.
|
||||
var ErrNoFallbackNodeFound = errors.New("no fallback node found - whole cluster seems offline")
|
||||
|
||||
// FallbackRoundTripper implements http.RoundTripper in a way that when an
|
||||
// outgoing request does not manage to succeed with its original target host,
|
||||
// it fallsback to a list of alternative hosts. Internally it keeps a list of
|
||||
// dead hosts, and pings them until they are back online, diverting traffic
|
||||
// back to them. This is meant to be used by ConsistentHashReverseProxy().
|
||||
type FallbackRoundTripper struct {
|
||||
nodes []string
|
||||
sf singleflight.Group
|
||||
|
||||
mu sync.Mutex
|
||||
// a list of offline servers that must be rechecked to see when they
|
||||
// get back online. If a server is in this list, it must have a fallback
|
||||
// available to which requests are sent.
|
||||
fallback map[string]string
|
||||
}
|
||||
|
||||
// NewRoundTripper creates a new FallbackRoundTripper and triggers the internal
|
||||
// host TCP health checks.
|
||||
func NewRoundTripper(ctx context.Context, nodes []string) *FallbackRoundTripper {
|
||||
frt := &FallbackRoundTripper{
|
||||
nodes: nodes,
|
||||
fallback: make(map[string]string),
|
||||
}
|
||||
go frt.checkHealth(ctx)
|
||||
return frt
|
||||
}
|
||||
|
||||
func (f *FallbackRoundTripper) checkHealth(ctx context.Context) {
|
||||
tick := time.NewTicker(1 * time.Second)
|
||||
defer tick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
f.mu.Lock()
|
||||
if len(f.fallback) == 0 {
|
||||
f.mu.Unlock()
|
||||
continue
|
||||
}
|
||||
fallback := make(map[string]string)
|
||||
for k, v := range f.fallback {
|
||||
fallback[k] = v
|
||||
}
|
||||
f.mu.Unlock()
|
||||
|
||||
updatedlist := make(map[string]string)
|
||||
for host, target := range fallback {
|
||||
if !f.ping(host) {
|
||||
updatedlist[host] = target
|
||||
}
|
||||
}
|
||||
|
||||
f.mu.Lock()
|
||||
f.fallback = make(map[string]string)
|
||||
for k, v := range updatedlist {
|
||||
f.fallback[k] = v
|
||||
}
|
||||
f.mu.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FallbackRoundTripper) ping(host string) bool {
|
||||
conn, err := net.Dial("tcp", host)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
func (f *FallbackRoundTripper) fallbackHost(targetHost, failedFallback string) string {
|
||||
detected, err := f.sf.Do(targetHost, func() (interface{}, error) {
|
||||
for _, node := range f.nodes {
|
||||
if node != targetHost && node != failedFallback && f.ping(node) {
|
||||
f.mu.Lock()
|
||||
f.fallback[targetHost] = node
|
||||
f.mu.Unlock()
|
||||
return node, nil
|
||||
}
|
||||
}
|
||||
return "", ErrNoFallbackNodeFound
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return detected.(string)
|
||||
|
||||
}
|
||||
|
||||
// RoundTrip implements http.RoundTrip. It tried to fullfil an *http.Request to
|
||||
// its original target host, falling back to a list of nodes in case of failure.
|
||||
// After the first failure, it consistently delivers traffic to the fallback
|
||||
// host, until original host is back online. If no fallback node is available,
|
||||
// it fails with ErrNoFallbackNodeFound. In case of cascaded failure, that is,
|
||||
// the fallback node is also offline, it will look for another online host.
|
||||
func (f *FallbackRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
targetHost := req.URL.Host
|
||||
|
||||
f.mu.Lock()
|
||||
fallback, ok := f.fallback[targetHost]
|
||||
f.mu.Unlock()
|
||||
if ok {
|
||||
req.URL.Host = fallback
|
||||
resp, err := f.callNode(req)
|
||||
if err == nil {
|
||||
return resp, err
|
||||
}
|
||||
fallback := f.fallbackHost(targetHost, fallback)
|
||||
if fallback == "" {
|
||||
return nil, ErrNoFallbackNodeFound
|
||||
}
|
||||
req.URL.Host = fallback
|
||||
return f.callNode(req)
|
||||
}
|
||||
|
||||
resp, err := f.callNode(req)
|
||||
if err == nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
fallback = f.fallbackHost(targetHost, "")
|
||||
if fallback == "" {
|
||||
return nil, ErrNoFallbackNodeFound
|
||||
}
|
||||
req.URL.Host = fallback
|
||||
return f.callNode(req)
|
||||
}
|
||||
|
||||
func (f *FallbackRoundTripper) callNode(req *http.Request) (*http.Response, error) {
|
||||
requestURI := req.RequestURI
|
||||
req.RequestURI = ""
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err == nil {
|
||||
resp.Request.RequestURI = requestURI
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
Reference in New Issue
Block a user