mirror of
https://github.com/ubuntu/microk8s.git
synced 2021-05-23 02:23:41 +03:00
First pass on switch-to-dqlite (#1131)
This commit is contained in:
committed by
GitHub
parent
50f20cffa6
commit
2bdc1b843e
@@ -1,3 +1,4 @@
|
||||
--bind 0.0.0.0:25000
|
||||
--keyfile "${SNAP_DATA}/certs/server.key"
|
||||
--certfile "${SNAP_DATA}/certs/server.crt"
|
||||
--timeout 240
|
||||
|
||||
@@ -7,6 +7,8 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
|
||||
|
||||
source $SNAP/actions/common/utils.sh
|
||||
|
||||
exit_if_service_not_expected_to_start etcd
|
||||
|
||||
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
|
||||
then
|
||||
echo "etcd will not run on a cluster node"
|
||||
|
||||
@@ -102,12 +102,11 @@ def is_node_running_dqlite():
|
||||
|
||||
:return: True if dqlite is to be used
|
||||
"""
|
||||
# We want to use the join with
|
||||
snapdata_path = os.environ.get('SNAP_DATA')
|
||||
apiserver_conf_file = "{}/args/kube-apiserver".format(snapdata_path)
|
||||
with open(apiserver_conf_file) as f:
|
||||
line = f.readline()
|
||||
if line.startswith("--storage-backend") and line.endswith("dqlite"):
|
||||
return True
|
||||
for line in f:
|
||||
if line.startswith("--storage-backend") and line.rstrip().endswith("dqlite"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@@ -27,6 +27,7 @@ callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path
|
||||
server_cert_file_via_env = "${SNAP_DATA}/certs/server.remote.crt"
|
||||
server_cert_file = "{}/certs/server.remote.crt".format(snapdata_path)
|
||||
|
||||
CLUSTER_API_V2 = "cluster/api/v2.0"
|
||||
cluster_dir = "{}/var/kubernetes/backend".format(snapdata_path)
|
||||
cluster_backup_dir = "{}/var/kubernetes/backend.backup".format(snapdata_path)
|
||||
cluster_cert_file = "{}/cluster.crt".format(cluster_dir)
|
||||
|
||||
@@ -24,7 +24,8 @@ def upgrade_master(upgrade, phase):
|
||||
upgrade_script='{}/upgrade-scripts/{}/{}-master.sh'.format(snap_path, upgrade, phase)
|
||||
if os.path.isfile(upgrade_script):
|
||||
print("Running {}-upgrade script".format(phase))
|
||||
subprocess.check_output(upgrade_script)
|
||||
out = subprocess.check_output(upgrade_script)
|
||||
print(out)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("{}-upgrade step failed".format(phase))
|
||||
raise e
|
||||
@@ -60,7 +61,8 @@ def rollback(upgrade):
|
||||
The rollback method that oversees the rollback of the cluster
|
||||
:param upgrade: which upgrade to call
|
||||
"""
|
||||
node_info = get_nodes_info()
|
||||
# We should get the nodes without checking their existence from the API server
|
||||
node_info = get_nodes_info(safe=False)
|
||||
|
||||
upgrade_log_file = "{}/var/log/upgrades/{}.log".format(snapdata_path, upgrade)
|
||||
with open(upgrade_log_file, "r") as log:
|
||||
@@ -95,52 +97,60 @@ def run_upgrade(upgrade):
|
||||
try:
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
with open(upgrade_log_file, "w") as log:
|
||||
upgrade_master(upgrade, "prepare")
|
||||
log.writelines(["master prepare"])
|
||||
upgrade_master(upgrade, "prepare")
|
||||
log.flush()
|
||||
for node_ep, token in node_info:
|
||||
node_upgrade(upgrade, "prepare", node_ep, token)
|
||||
log.writelines(["\nnode prepare {}".format(node_ep)])
|
||||
node_upgrade(upgrade, "prepare", node_ep, token)
|
||||
log.flush()
|
||||
|
||||
for node_ep, token in node_info:
|
||||
node_upgrade(upgrade, "commit", node_ep, token)
|
||||
log.writelines(["\nnode commit {}".format(node_ep)])
|
||||
node_upgrade(upgrade, "commit", node_ep, token)
|
||||
log.flush()
|
||||
|
||||
upgrade_master(upgrade, "commit")
|
||||
log.writelines(["\nmaster commit"])
|
||||
upgrade_master(upgrade, "commit")
|
||||
log.flush()
|
||||
|
||||
except Exception as e:
|
||||
print("Error in upgrading. Error: {}".format(e))
|
||||
log.close()
|
||||
rollback(upgrade_log_file)
|
||||
rollback(upgrade)
|
||||
exit(2)
|
||||
|
||||
|
||||
def get_nodes_info():
|
||||
def get_nodes_info(safe=True):
|
||||
"""
|
||||
Get the list of node endpoints and tokens in the cluster
|
||||
:return:
|
||||
"""
|
||||
callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path)
|
||||
node_info = []
|
||||
try:
|
||||
nodes = subprocess.check_output("{}/microk8s-kubectl.wrapper get no".format(snap_path).split())
|
||||
if safe:
|
||||
try:
|
||||
nodes = subprocess.check_output("{}/microk8s-kubectl.wrapper get no".format(snap_path).split())
|
||||
if os.path.isfile(callback_tokens_file):
|
||||
with open(callback_tokens_file, "r+") as fp:
|
||||
for _, line in enumerate(fp):
|
||||
parts = line.split()
|
||||
node_ep = parts[0]
|
||||
host = node_ep.split(":")[0]
|
||||
if host not in nodes.decode():
|
||||
print("Node {} not present".format(host))
|
||||
continue
|
||||
node_info = [(parts[0], parts[1])]
|
||||
except subprocess.CalledProcessError:
|
||||
print("Error in gathering cluster node information. Upgrade aborted.".format(host))
|
||||
exit(1)
|
||||
else:
|
||||
if os.path.isfile(callback_tokens_file):
|
||||
with open(callback_tokens_file, "r+") as fp:
|
||||
for _, line in enumerate(fp):
|
||||
parts = line.split()
|
||||
node_ep = parts[0]
|
||||
host = node_ep.split(":")[0]
|
||||
if host not in nodes.decode():
|
||||
print("Node {} not present".format(host))
|
||||
continue
|
||||
node_info = [(parts[0], parts[1])]
|
||||
except subprocess.CalledProcessError:
|
||||
print("Error in gathering cluster node information. Upgrade aborted.".format(host))
|
||||
exit(1)
|
||||
|
||||
return node_info
|
||||
|
||||
|
||||
|
||||
6
snap/hooks/configure
vendored
6
snap/hooks/configure
vendored
@@ -375,6 +375,12 @@ then
|
||||
snapctl restart ${SNAP_NAME}.daemon-containerd
|
||||
fi
|
||||
|
||||
if ! grep -e "\-\-timeout" ${SNAP_DATA}/args/cluster-agent
|
||||
then
|
||||
refresh_opt_in_config timeout 240 cluster-agent
|
||||
snapctl restart ${SNAP_NAME}.daemon-containerd
|
||||
fi
|
||||
|
||||
mkdir -p "$SNAP_DATA/juju/share/juju" "$SNAP_DATA/juju-home"
|
||||
chmod -R ug+rwX "$SNAP_DATA/juju" "$SNAP_DATA/juju-home"
|
||||
chmod -R o-rwX "$SNAP_DATA/juju" "$SNAP_DATA/juju-home"
|
||||
|
||||
@@ -185,6 +185,16 @@ parts:
|
||||
- "--disable-shared"
|
||||
- "--enable-static"
|
||||
prime: [ -bin/iptables-xml ]
|
||||
migrator:
|
||||
build-snaps: [go]
|
||||
source: https://github.com/ktsakalozos/go-migrator
|
||||
source-type: git
|
||||
plugin: go
|
||||
go-importpath: github.com/ktsakalozos/go-migrator
|
||||
build-packages:
|
||||
- gcc
|
||||
prime:
|
||||
- bin/migrator
|
||||
containerd:
|
||||
build-snaps: [go]
|
||||
after: [iptables]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
echo "Switching master to calico"
|
||||
|
||||
@@ -45,6 +45,8 @@ if grep -qE "bin_dir.*SNAP}\/" $SNAP_DATA/args/containerd-template.toml; then
|
||||
run_with_sudo systemctl restart snap.${SNAP_NAME}.daemon-containerd
|
||||
fi
|
||||
|
||||
# Allow for services to restart
|
||||
sleep 15
|
||||
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
|
||||
|
||||
KUBECTL="$SNAP/kubectl --kubeconfig=${SNAP_DATA}/credentials/client.config"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
echo "Switching master to calico"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
echo "Rolling back calico upgrade on master"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
echo "Rolling back calico upgrade on a node"
|
||||
|
||||
@@ -28,10 +28,8 @@ fi
|
||||
echo "Restarting kube-apiserver"
|
||||
if [ -e "$BACKUP_DIR/args/kube-apiserver" ]; then
|
||||
cp "$BACKUP_DIR"/args/kube-apiserver "$SNAP_DATA/args/"
|
||||
systemctl restart snap.${SNAP_NAME}.daemon-apiserver
|
||||
fi
|
||||
|
||||
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
|
||||
|
||||
echo "Restarting flannel"
|
||||
set_service_expected_to_start flanneld
|
||||
|
||||
57
upgrade-scripts/001-switch-to-dqlite/commit-master.sh
Executable file
57
upgrade-scripts/001-switch-to-dqlite/commit-master.sh
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "Switching master to dqlite"
|
||||
|
||||
source $SNAP/actions/common/utils.sh
|
||||
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
|
||||
DB_DIR="$BACKUP_DIR/db"
|
||||
|
||||
mkdir -p "$BACKUP_DIR/args/"
|
||||
|
||||
echo "Configuring services"
|
||||
${SNAP}/microk8s-stop.wrapper
|
||||
|
||||
cp "$SNAP_DATA"/args/kube-apiserver "$BACKUP_DIR/args"
|
||||
refresh_opt_in_config "storage-backend" "dqlite" kube-apiserver
|
||||
refresh_opt_in_config "storage-dir" "\${SNAP_DATA}/var/kubernetes/backend/" kube-apiserver
|
||||
skip_opt_in_config "etcd-servers" kube-apiserver
|
||||
skip_opt_in_config "etcd-cafile" kube-apiserver
|
||||
skip_opt_in_config "etcd-certfile" kube-apiserver
|
||||
skip_opt_in_config "etcd-keyfile" kube-apiserver
|
||||
|
||||
cp "$SNAP_DATA"/args/etcd "$BACKUP_DIR/args"
|
||||
cat <<EOT > "$SNAP_DATA"/args/etcd
|
||||
--data-dir=\${SNAP_COMMON}/var/run/etcd
|
||||
--advertise-client-urls=http://127.0.0.1:12379
|
||||
--listen-client-urls=http://0.0.0.0:12379
|
||||
--enable-v2=true
|
||||
EOT
|
||||
|
||||
if ! [ -e "${SNAP_DATA}/var/kubernetes/backend/cluster.key" ]
|
||||
then
|
||||
init_cluster
|
||||
fi
|
||||
|
||||
systemctl start snap.microk8s.daemon-etcd
|
||||
systemctl start snap.microk8s.daemon-apiserver
|
||||
|
||||
# TODO do some proper wait here
|
||||
sleep 15
|
||||
|
||||
rm -rf "$DB_DIR"
|
||||
$SNAP/bin/migrator --mode backup --endpoint "http://127.0.0.1:12379" --db-dir "$DB_DIR" --debug
|
||||
chmod 600 "$DB_DIR"
|
||||
$SNAP/bin/migrator --mode restore --endpoint "unix:///var/snap/microk8s/current/var/kubernetes/backend/kine.sock" --db-dir "$DB_DIR" --debug
|
||||
|
||||
sleep 10
|
||||
|
||||
set_service_not_expected_to_start etcd
|
||||
systemctl stop snap.microk8s.daemon-etcd
|
||||
|
||||
${SNAP}/microk8s-start.wrapper
|
||||
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
|
||||
|
||||
echo "Dqlite is enabled"
|
||||
34
upgrade-scripts/001-switch-to-dqlite/commit-node.sh
Executable file
34
upgrade-scripts/001-switch-to-dqlite/commit-node.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
echo "Switching node to dqlite"
|
||||
|
||||
source $SNAP/actions/common/utils.sh
|
||||
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
|
||||
|
||||
mkdir -p "$BACKUP_DIR/args/"
|
||||
|
||||
echo "Configuring services"
|
||||
cp "$SNAP_DATA"/args/kube-apiserver "$BACKUP_DIR/args"
|
||||
refresh_opt_in_config "storage-backend" "dqlite" kube-apiserver
|
||||
refresh_opt_in_config "storage-dir" "\${SNAP_DATA}/var/kubernetes/backend/" kube-apiserver
|
||||
skip_opt_in_config "etcd-servers" kube-apiserver
|
||||
skip_opt_in_config "etcd-cafile" kube-apiserver
|
||||
skip_opt_in_config "etcd-certfile" kube-apiserver
|
||||
skip_opt_in_config "etcd-keyfile" kube-apiserver
|
||||
|
||||
if ! [ -e "${SNAP_DATA}/var/kubernetes/backend/cluster.key" ]
|
||||
then
|
||||
init_cluster
|
||||
fi
|
||||
|
||||
set_service_not_expected_to_start etcd
|
||||
|
||||
${SNAP}/microk8s-stop.wrapper
|
||||
sleep 5
|
||||
${SNAP}/microk8s-start.wrapper
|
||||
|
||||
echo "Dqlite is enabled on the node"
|
||||
1
upgrade-scripts/001-switch-to-dqlite/description.txt
Normal file
1
upgrade-scripts/001-switch-to-dqlite/description.txt
Normal file
@@ -0,0 +1 @@
|
||||
Migrates from etcd to dqlite
|
||||
4
upgrade-scripts/001-switch-to-dqlite/prepare-master.sh
Executable file
4
upgrade-scripts/001-switch-to-dqlite/prepare-master.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
echo "Master ready for dqlite"
|
||||
|
||||
5
upgrade-scripts/001-switch-to-dqlite/prepare-node.sh
Executable file
5
upgrade-scripts/001-switch-to-dqlite/prepare-node.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
echo "Nothing to do to praparing node for dqlite"
|
||||
26
upgrade-scripts/001-switch-to-dqlite/rollback-master.sh
Executable file
26
upgrade-scripts/001-switch-to-dqlite/rollback-master.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
echo "Rolling back dqlite upgrade on master"
|
||||
|
||||
source $SNAP/actions/common/utils.sh
|
||||
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
|
||||
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
|
||||
|
||||
echo "Restarting etcd"
|
||||
set_service_expected_to_start etcd
|
||||
if [ -e "$BACKUP_DIR/args/etcd" ]; then
|
||||
cp "$BACKUP_DIR"/args/etcd "$SNAP_DATA/args/"
|
||||
systemctl restart snap.${SNAP_NAME}.daemon-etcd
|
||||
fi
|
||||
|
||||
echo "Restarting kube-apiserver"
|
||||
if [ -e "$BACKUP_DIR/args/kube-apiserver" ]; then
|
||||
cp "$BACKUP_DIR"/args/kube-apiserver "$SNAP_DATA/args/"
|
||||
systemctl restart snap.${SNAP_NAME}.daemon-apiserver
|
||||
fi
|
||||
|
||||
${SNAP}/microk8s-start.wrapper
|
||||
${SNAP}/microk8s-status.wrapper --wait-ready --timeout 30
|
||||
|
||||
echo "Dqlite rolled back"
|
||||
25
upgrade-scripts/001-switch-to-dqlite/rollback-node.sh
Executable file
25
upgrade-scripts/001-switch-to-dqlite/rollback-node.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
echo "Rolling back dqlite upgrade on master"
|
||||
|
||||
source $SNAP/actions/common/utils.sh
|
||||
CA_CERT=/snap/core/current/etc/ssl/certs/ca-certificates.crt
|
||||
BACKUP_DIR="$SNAP_DATA/var/tmp/upgrades/001-switch-to-dqlite"
|
||||
|
||||
echo "Restarting etcd"
|
||||
set_service_expected_to_start etcd
|
||||
if [ -e "$BACKUP_DIR/args/etcd" ]; then
|
||||
cp "$BACKUP_DIR"/args/etcd "$SNAP_DATA/args/"
|
||||
systemctl restart snap.${SNAP_NAME}.daemon-etcd
|
||||
fi
|
||||
|
||||
echo "Restarting kube-apiserver"
|
||||
if [ -e "$BACKUP_DIR/args/kube-apiserver" ]; then
|
||||
cp "$BACKUP_DIR"/args/kube-apiserver "$SNAP_DATA/args/"
|
||||
systemctl restart snap.${SNAP_NAME}.daemon-apiserver
|
||||
fi
|
||||
|
||||
${SNAP}/microk8s-start.wrapper
|
||||
|
||||
echo "Dqlite rolled back"
|
||||
Reference in New Issue
Block a user