From f7c6ce66960bed5e29b94de66bff6f554e2a77eb Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 28 Oct 2022 16:40:40 -0400 Subject: [PATCH] Bootstrapping doesn't need bootstrap-expect It's the Store object that needs it. --- cluster/bootstrap.go | 30 +++++++++++++++--------------- cluster/bootstrap_test.go | 12 ++++++------ cmd/rqlited/main.go | 5 ++--- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/cluster/bootstrap.go b/cluster/bootstrap.go index 2b6c9bd3..ceededaa 100644 --- a/cluster/bootstrap.go +++ b/cluster/bootstrap.go @@ -35,7 +35,6 @@ type AddressProvider interface { // Bootstrapper performs a bootstrap of this node. type Bootstrapper struct { provider AddressProvider - expect int tlsConfig *tls.Config joiner *Joiner @@ -48,10 +47,9 @@ type Bootstrapper struct { } // NewBootstrapper returns an instance of a Bootstrapper. -func NewBootstrapper(p AddressProvider, expect int, tlsConfig *tls.Config) *Bootstrapper { +func NewBootstrapper(p AddressProvider, tlsConfig *tls.Config) *Bootstrapper { bs := &Bootstrapper{ provider: p, - expect: expect, tlsConfig: &tls.Config{InsecureSkipVerify: true}, joiner: NewJoiner("", 1, 0, tlsConfig), logger: log.New(os.Stderr, "[cluster-bootstrap] ", log.LstdFlags), @@ -87,7 +85,6 @@ func (b *Bootstrapper) Boot(id, raftAddr string, done func() bool, timeout time. tickerT := time.NewTimer(jitter(time.Millisecond)) defer tickerT.Stop() - notifySuccess := false for { select { case <-timeoutT.C: @@ -104,7 +101,8 @@ func (b *Bootstrapper) Boot(id, raftAddr string, done func() bool, timeout time. if err != nil { b.logger.Printf("provider lookup failed %s", err.Error()) } - if len(targets) < b.expect { + + if len(targets) == 0 { continue } @@ -115,16 +113,18 @@ func (b *Bootstrapper) Boot(id, raftAddr string, done func() bool, timeout time. return nil } - // Join didn't work, so perhaps perform a notify if we haven't done - // one yet. - if !notifySuccess { - if err := b.notify(targets, id, raftAddr); err != nil { - b.logger.Printf("failed to notify all targets: %s (%s, will retry)", targets, - err.Error()) - } else { - b.logger.Printf("succeeded notifying all targets: %s", targets) - notifySuccess = true - } + // This is where we have to be careful. This node failed to join with any node + // in the targets list. This could be because none of the nodes are contactable, + // or none of the nodes are in a functioning cluster with a leader. That means that + // this node could be part of a set nodes that are bootstrapping to form a cluster + // de novo. For that to happen it needs to now let the otehr nodes know it is here. + // If this is a new cluster, some node will then reach the bootstrap-expect value, + // form the cluster, beating all other nodes to it. + if err := b.notify(targets, id, raftAddr); err != nil { + b.logger.Printf("failed to notify all targets: %s (%s, will retry)", targets, + err.Error()) + } else { + b.logger.Printf("succeeded notifying all targets: %s", targets) } } } diff --git a/cluster/bootstrap_test.go b/cluster/bootstrap_test.go index 36fb31f9..632ea4b4 100644 --- a/cluster/bootstrap_test.go +++ b/cluster/bootstrap_test.go @@ -24,7 +24,7 @@ func Test_AddressProviderString(t *testing.T) { } func Test_NewBootstrapper(t *testing.T) { - bs := NewBootstrapper(nil, 1, nil) + bs := NewBootstrapper(nil, nil) if bs == nil { t.Fatalf("failed to create a simple Bootstrapper") } @@ -39,7 +39,7 @@ func Test_BootstrapperBootDoneImmediately(t *testing.T) { return true } p := NewAddressProviderString([]string{ts.URL}) - bs := NewBootstrapper(p, 1, nil) + bs := NewBootstrapper(p, nil) if err := bs.Boot("node1", "192.168.1.1:1234", done, 10*time.Second); err != nil { t.Fatalf("failed to boot: %s", err) } @@ -54,7 +54,7 @@ func Test_BootstrapperBootTimeout(t *testing.T) { return false } p := NewAddressProviderString([]string{ts.URL}) - bs := NewBootstrapper(p, 1, nil) + bs := NewBootstrapper(p, nil) bs.Interval = time.Second err := bs.Boot("node1", "192.168.1.1:1234", done, 5*time.Second) if err == nil { @@ -97,7 +97,7 @@ func Test_BootstrapperBootSingleNotify(t *testing.T) { } p := NewAddressProviderString([]string{ts.URL}) - bs := NewBootstrapper(p, 1, nil) + bs := NewBootstrapper(p, nil) bs.Interval = time.Second err := bs.Boot("node1", "192.168.1.1:1234", done, 60*time.Second) @@ -145,7 +145,7 @@ func Test_BootstrapperBootSingleNotifyAuth(t *testing.T) { } p := NewAddressProviderString([]string{ts.URL}) - bs := NewBootstrapper(p, 1, nil) + bs := NewBootstrapper(p, nil) bs.SetBasicAuth("username1", "password1") bs.Interval = time.Second @@ -192,7 +192,7 @@ func Test_BootstrapperBootMultiNotify(t *testing.T) { } p := NewAddressProviderString([]string{ts1.URL, ts2.URL}) - bs := NewBootstrapper(p, 2, nil) + bs := NewBootstrapper(p, nil) bs.Interval = time.Second err := bs.Boot("node1", "192.168.1.1:1234", done, 60*time.Second) diff --git a/cmd/rqlited/main.go b/cmd/rqlited/main.go index fbe89b6f..eb99fa18 100644 --- a/cmd/rqlited/main.go +++ b/cmd/rqlited/main.go @@ -373,8 +373,7 @@ func createCluster(cfg *Config, tlsConfig *tls.Config, hasPeers bool, str *store return nil } - bs := cluster.NewBootstrapper(cluster.NewAddressProviderString(joins), - cfg.BootstrapExpect, tlsConfig) + bs := cluster.NewBootstrapper(cluster.NewAddressProviderString(joins), tlsConfig) if cfg.JoinAs != "" { pw, ok := credStr.Password(cfg.JoinAs) if !ok { @@ -424,7 +423,7 @@ func createCluster(cfg *Config, tlsConfig *tls.Config, hasPeers bool, str *store provider = dnssrv.New(dnssrvCfg) } - bs := cluster.NewBootstrapper(provider, cfg.BootstrapExpect, tlsConfig) + bs := cluster.NewBootstrapper(provider, tlsConfig) if cfg.JoinAs != "" { pw, ok := credStr.Password(cfg.JoinAs) if !ok {