1
0
mirror of https://github.com/ubuntu/microk8s.git synced 2021-05-23 02:23:41 +03:00

First pass on switch-to-dqlite (#1131)

This commit is contained in:
Konstantinos Tsakalozos
2020-05-01 12:56:53 +03:00
committed by GitHub
parent 50f20cffa6
commit 2bdc1b843e
18 changed files with 209 additions and 28 deletions

View File

@@ -1,3 +1,4 @@
--bind 0.0.0.0:25000
--keyfile "${SNAP_DATA}/certs/server.key"
--certfile "${SNAP_DATA}/certs/server.crt"
--timeout 240

View File

@@ -7,6 +7,8 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
exit_if_service_not_expected_to_start etcd
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "etcd will not run on a cluster node"

View File

@@ -102,12 +102,11 @@ def is_node_running_dqlite():
:return: True if dqlite is to be used
"""
# We want to use the join with
snapdata_path = os.environ.get('SNAP_DATA')
apiserver_conf_file = "{}/args/kube-apiserver".format(snapdata_path)
with open(apiserver_conf_file) as f:
line = f.readline()
if line.startswith("--storage-backend") and line.endswith("dqlite"):
return True
for line in f:
if line.startswith("--storage-backend") and line.rstrip().endswith("dqlite"):
return True
return False

View File

@@ -27,6 +27,7 @@ callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path
server_cert_file_via_env = "${SNAP_DATA}/certs/server.remote.crt"
server_cert_file = "{}/certs/server.remote.crt".format(snapdata_path)
CLUSTER_API_V2 = "cluster/api/v2.0"
cluster_dir = "{}/var/kubernetes/backend".format(snapdata_path)
cluster_backup_dir = "{}/var/kubernetes/backend.backup".format(snapdata_path)
cluster_cert_file = "{}/cluster.crt".format(cluster_dir)

View File

@@ -24,7 +24,8 @@ def upgrade_master(upgrade, phase):
upgrade_script='{}/upgrade-scripts/{}/{}-master.sh'.format(snap_path, upgrade, phase)
if os.path.isfile(upgrade_script):
print("Running {}-upgrade script".format(phase))
subprocess.check_output(upgrade_script)
out = subprocess.check_output(upgrade_script)
print(out)
except subprocess.CalledProcessError as e:
print("{}-upgrade step failed".format(phase))
raise e
@@ -60,7 +61,8 @@ def rollback(upgrade):
The rollback method that oversees the rollback of the cluster
:param upgrade: which upgrade to call
"""
node_info = get_nodes_info()
# We should get the nodes without checking their existence from the API server
node_info = get_nodes_info(safe=False)
upgrade_log_file = "{}/var/log/upgrades/{}.log".format(snapdata_path, upgrade)
with open(upgrade_log_file, "r") as log:
@@ -95,52 +97,60 @@ def run_upgrade(upgrade):
try:
os.makedirs(log_dir, exist_ok=True)
with open(upgrade_log_file, "w") as log:
upgrade_master(upgrade, "prepare")
log.writelines(["master prepare"])
upgrade_master(upgrade, "prepare")
log.flush()
for node_ep, token in node_info:
node_upgrade(upgrade, "prepare", node_ep, token)
log.writelines(["\nnode prepare {}".format(node_ep)])
node_upgrade(upgrade, "prepare", node_ep, token)
log.flush()
for node_ep, token in node_info:
node_upgrade(upgrade, "commit", node_ep, token)
log.writelines(["\nnode commit {}".format(node_ep)])
node_upgrade(upgrade, "commit", node_ep, token)
log.flush()
upgrade_master(upgrade, "commit")
log.writelines(["\nmaster commit"])
upgrade_master(upgrade, "commit")
log.flush()
except Exception as e:
print("Error in upgrading. Error: {}".format(e))
log.close()
rollback(upgrade_log_file)
rollback(upgrade)
exit(2)
def get_nodes_info():
def get_nodes_info(safe=True):
"""
Get the list of node endpoints and tokens in the cluster
:return:
"""
callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path)
node_info = []
try:
nodes = subprocess.check_output("{}/microk8s-kubectl.wrapper get no".format(snap_path).split())
if safe:
try:
nodes = subprocess.check_output("{}/microk8s-kubectl.wrapper get no".format(snap_path).split())
if os.path.isfile(callback_tokens_file):
with open(callback_tokens_file, "r+") as fp:
for _, line in enumerate(fp):
parts = line.split()
node_ep = parts[0]
host = node_ep.split(":")[0]
if host not in nodes.decode():
print("Node {} not present".format(host))
continue
node_info = [(parts[0], parts[1])]
except subprocess.CalledProcessError:
print("Error in gathering cluster node information. Upgrade aborted.".format(host))
exit(1)
else:
if os.path.isfile(callback_tokens_file):
with open(callback_tokens_file, "r+") as fp:
for _, line in enumerate(fp):
parts = line.split()
node_ep = parts[0]
host = node_ep.split(":")[0]
if host not in nodes.decode():
print("Node {} not present".format(host))
continue
node_info = [(parts[0], parts[1])]
except subprocess.CalledProcessError:
print("Error in gathering cluster node information. Upgrade aborted.".format(host))
exit(1)
return node_info

View File

@@ -375,6 +375,12 @@ then
snapctl restart ${SNAP_NAME}.daemon-containerd
fi
if ! grep -e "\-\-timeout" ${SNAP_DATA}/args/cluster-agent
then
refresh_opt_in_config timeout 240 cluster-agent
snapctl restart ${SNAP_NAME}.daemon-containerd
fi
mkdir -p "$SNAP_DATA/juju/share/juju" "$SNAP_DATA/juju-home"
chmod -R ug+rwX "$SNAP_DATA/juju" "$SNAP_DATA/juju-home"
chmod -R o-rwX "$SNAP_DATA/juju" "$SNAP_DATA/juju-home"

View File

@@ -185,6 +185,16 @@ parts:
- "--disable-shared"
- "--enable-static"
prime: [ -bin/iptables-xml ]
migrator:
build-snaps: [go]
source: https://github.com/ktsakalozos/go-migrator
source-type: git
plugin: go
go-importpath: github.com/ktsakalozos/go-migrator
build-packages:
- gcc
prime:
- bin/migrator
containerd:
build-snaps: [go]
after: [iptables]

View File

@@ -1,5 +1,5 @@
#!/bin/bash
set -e
set -ex
echo "Switching master to calico"
@@ -45,6 +45,8 @@ if grep -qE "bin_dir.*SNAP}\/" $SNAP_DATA/args/containerd-template.toml; then
run_with_sudo systemctl restart snap.${SNAP_NAME}.daemon-containerd
fi
# Allow for services to restart
sleep 15
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
KUBECTL="$SNAP/kubectl --kubeconfig=${SNAP_DATA}/credentials/client.config"

View File

@@ -1,6 +1,6 @@
#!/bin/bash
set -e
set -ex
echo "Switching master to calico"

View File

@@ -1,5 +1,5 @@
#!/bin/bash
set -e
set -ex
echo "Rolling back calico upgrade on master"

View File

@@ -1,5 +1,5 @@
#!/bin/bash
set -e
set -ex
echo "Rolling back calico upgrade on a node"
@@ -28,10 +28,8 @@ fi
echo "Restarting kube-apiserver"
if [ -e "$BACKUP_DIR/args/kube-apiserver" ]; then
cp "$BACKUP_DIR"/args/kube-apiserver "$SNAP_DATA/args/"
systemctl restart snap.${SNAP_NAME}.daemon-apiserver
fi
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
echo "Restarting flannel"
set_service_expected_to_start flanneld

View File

@@ -0,0 +1,57 @@
#!/bin/bash
set -e
echo "Switching master to dqlite"
source $SNAP/actions/common/utils.sh
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
DB_DIR="$BACKUP_DIR/db"
mkdir -p "$BACKUP_DIR/args/"
echo "Configuring services"
${SNAP}/microk8s-stop.wrapper
cp "$SNAP_DATA"/args/kube-apiserver "$BACKUP_DIR/args"
refresh_opt_in_config "storage-backend" "dqlite" kube-apiserver
refresh_opt_in_config "storage-dir" "\${SNAP_DATA}/var/kubernetes/backend/" kube-apiserver
skip_opt_in_config "etcd-servers" kube-apiserver
skip_opt_in_config "etcd-cafile" kube-apiserver
skip_opt_in_config "etcd-certfile" kube-apiserver
skip_opt_in_config "etcd-keyfile" kube-apiserver
cp "$SNAP_DATA"/args/etcd "$BACKUP_DIR/args"
cat <<EOT > "$SNAP_DATA"/args/etcd
--data-dir=\${SNAP_COMMON}/var/run/etcd
--advertise-client-urls=http://127.0.0.1:12379
--listen-client-urls=http://0.0.0.0:12379
--enable-v2=true
EOT
if ! [ -e "${SNAP_DATA}/var/kubernetes/backend/cluster.key" ]
then
init_cluster
fi
systemctl start snap.microk8s.daemon-etcd
systemctl start snap.microk8s.daemon-apiserver
# TODO do some proper wait here
sleep 15
rm -rf "$DB_DIR"
$SNAP/bin/migrator --mode backup --endpoint "http://127.0.0.1:12379" --db-dir "$DB_DIR" --debug
chmod 600 "$DB_DIR"
$SNAP/bin/migrator --mode restore --endpoint "unix:///var/snap/microk8s/current/var/kubernetes/backend/kine.sock" --db-dir "$DB_DIR" --debug
sleep 10
set_service_not_expected_to_start etcd
systemctl stop snap.microk8s.daemon-etcd
${SNAP}/microk8s-start.wrapper
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
echo "Dqlite is enabled"

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "Switching node to dqlite"
source $SNAP/actions/common/utils.sh
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
mkdir -p "$BACKUP_DIR/args/"
echo "Configuring services"
cp "$SNAP_DATA"/args/kube-apiserver "$BACKUP_DIR/args"
refresh_opt_in_config "storage-backend" "dqlite" kube-apiserver
refresh_opt_in_config "storage-dir" "\${SNAP_DATA}/var/kubernetes/backend/" kube-apiserver
skip_opt_in_config "etcd-servers" kube-apiserver
skip_opt_in_config "etcd-cafile" kube-apiserver
skip_opt_in_config "etcd-certfile" kube-apiserver
skip_opt_in_config "etcd-keyfile" kube-apiserver
if ! [ -e "${SNAP_DATA}/var/kubernetes/backend/cluster.key" ]
then
init_cluster
fi
set_service_not_expected_to_start etcd
${SNAP}/microk8s-stop.wrapper
sleep 5
${SNAP}/microk8s-start.wrapper
echo "Dqlite is enabled on the node"

View File

@@ -0,0 +1 @@
Migrates from etcd to dqlite

View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
echo "Master ready for dqlite"

View File

@@ -0,0 +1,5 @@
#!/bin/bash
set -e
echo "Nothing to do to praparing node for dqlite"

View File

@@ -0,0 +1,26 @@
#!/bin/bash
set -ex
echo "Rolling back dqlite upgrade on master"
source $SNAP/actions/common/utils.sh
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
echo "Restarting etcd"
set_service_expected_to_start etcd
if [ -e "$BACKUP_DIR/args/etcd" ]; then
cp "$BACKUP_DIR"/args/etcd "$SNAP_DATA/args/"
systemctl restart snap.${SNAP_NAME}.daemon-etcd
fi
echo "Restarting kube-apiserver"
if [ -e "$BACKUP_DIR/args/kube-apiserver" ]; then
cp "$BACKUP_DIR"/args/kube-apiserver "$SNAP_DATA/args/"
systemctl restart snap.${SNAP_NAME}.daemon-apiserver
fi
${SNAP}/microk8s-start.wrapper
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
echo "Dqlite rolled back"

View File

@@ -0,0 +1,25 @@
#!/bin/bash
set -ex
echo "Rolling back dqlite upgrade on master"
source $SNAP/actions/common/utils.sh
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
echo "Restarting etcd"
set_service_expected_to_start etcd
if [ -e "$BACKUP_DIR/args/etcd" ]; then
cp "$BACKUP_DIR"/args/etcd "$SNAP_DATA/args/"
systemctl restart snap.${SNAP_NAME}.daemon-etcd
fi
echo "Restarting kube-apiserver"
if [ -e "$BACKUP_DIR/args/kube-apiserver" ]; then
cp "$BACKUP_DIR"/args/kube-apiserver "$SNAP_DATA/args/"
systemctl restart snap.${SNAP_NAME}.daemon-apiserver
fi
${SNAP}/microk8s-start.wrapper
echo "Dqlite rolled back"