1
0
mirror of https://github.com/ubuntu/microk8s.git synced 2021-05-23 02:23:41 +03:00

Clustering (#529)

This commit is contained in:
Konstantinos Tsakalozos
2019-09-12 13:39:42 +03:00
committed by GitHub
parent 833f025bb7
commit 36f79886a0
51 changed files with 1616 additions and 48 deletions

View File

@@ -12,6 +12,10 @@ mkdir -p $KUBE_SNAP_BINS
curl -LO https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-$KUBE_ARCH-${CNI_VERSION}.tgz
tar -zxvf cni-plugins-$KUBE_ARCH-${CNI_VERSION}.tgz -C cni
mkdir -p flanneld
curl -LO https://github.com/coreos/flannel/releases/download/${FLANNELD_VERSION}/flannel-${FLANNELD_VERSION}-linux-${KUBE_ARCH}.tar.gz
tar -zxvf flannel-${FLANNELD_VERSION}-linux-${KUBE_ARCH}.tar.gz -C flanneld
# Knative is released only on amd64
if [ "$KUBE_ARCH" = "amd64" ]
then

View File

@@ -11,6 +11,7 @@ elif [ "$ARCH" = "armhf" ]; then
fi
export KUBE_ARCH
export ETCD_VERSION="${ETCD_VERSION:-v3.3.4}"
export FLANNELD_VERSION="${FLANNELD_VERSION:-v0.11.0}"
export CNI_VERSION="${CNI_VERSION:-v0.7.1}"
export KNATIVE_SERVING_VERSION="${KNATIVE_SERVING_VERSION:-v0.7.1}"
export KNATIVE_BUILD_VERSION="${KNATIVE_BUILD_VERSION:-v0.7.0}"
@@ -49,6 +50,7 @@ export KUBE_SNAP_ROOT="$(readlink -f .)"
echo "Building with:"
echo "KUBE_VERSION=${KUBE_VERSION}"
echo "ETCD_VERSION=${ETCD_VERSION}"
echo "FLANNELD_VERSION=${FLANNELD_VERSION}"
echo "CNI_VERSION=${CNI_VERSION}"
echo "KUBE_ARCH=${KUBE_ARCH}"
echo "KUBE_SNAP_BINS=${KUBE_SNAP_BINS}"

View File

@@ -22,6 +22,48 @@ exit_if_stopped() {
fi
}
exit_if_service_not_expected_to_start() {
# exit if a lock is available for the service
local service="$1"
if [ -f ${SNAP_DATA}/var/lock/no-${service} ]
then
exit 0
fi
}
is_service_expected_to_start() {
# return 1 if service is expected to start
local service="$1"
if [ -f ${SNAP_DATA}/var/lock/no-${service} ]
then
echo "0"
else
echo "1"
fi
}
set_service_not_expected_to_start() {
# mark service as not starting
local service="$1"
touch ${SNAP_DATA}/var/lock/no-${service}
}
set_service_expected_to_start() {
# mark service as not starting
local service="$1"
rm -rf ${SNAP_DATA}/var/lock/no-${service}
}
remove_vxlan_interfaces() {
links="$(${SNAP}/sbin/ip link show type vxlan | $SNAP/bin/grep -E 'flannel|cilium_vxlan' | $SNAP/usr/bin/gawk '{print $2}' | $SNAP/usr/bin/tr -d :)"
for link in "$links"
do
if ! [ -z "$link" ] && $SNAP/sbin/ip link show ${link} &> /dev/null
then
$SNAP/sbin/ip link delete ${link}
fi
done
}
refresh_opt_in_config() {
# add or replace an option inside the config file.
@@ -35,6 +77,32 @@ refresh_opt_in_config() {
else
sudo "$SNAP/bin/sed" -i "$ a $replace_line" "$config_file"
fi
if [ -e "${SNAP_DATA}/credentials/callback-tokens.txt" ]
then
tokens=$(sudo "$SNAP/bin/cat" "${SNAP_DATA}/credentials/callback-tokens.txt" | "$SNAP/usr/bin/wc" -l)
if [[ "$tokens" -ge "0" ]]
then
sudo -E "$SNAP/usr/bin/python3" "$SNAP/scripts/cluster/distributed_op.py" update_argument "$3" "$opt" "$value"
fi
fi
}
nodes_addon() {
# Enable or disable a, addon across all nodes
# state should be either 'enable' or 'disable'
local addon="$1"
local state="$2"
if [ -e "${SNAP_DATA}/credentials/callback-tokens.txt" ]
then
tokens=$(sudo "$SNAP/bin/cat" "${SNAP_DATA}/credentials/callback-tokens.txt" | "$SNAP/usr/bin/wc" -l)
if [[ "$tokens" -ge "0" ]]
then
sudo -E "$SNAP/usr/bin/python3" "$SNAP/scripts/cluster/distributed_op.py" set_addon "$addon" "$state"
fi
fi
}
@@ -45,6 +113,31 @@ skip_opt_in_config() {
local opt="--$1"
local config_file="$SNAP_DATA/args/$2"
sudo "${SNAP}/bin/sed" -i '/'"$opt"'/d' "${config_file}"
if [ -e "${SNAP_DATA}/credentials/callback-tokens.txt" ]
then
tokens=$(sudo "$SNAP/bin/cat" "${SNAP_DATA}/credentials/callback-tokens.txt" | "$SNAP/usr/bin/wc" -l)
if [[ "$tokens" -ge "0" ]]
then
sudo -E "$SNAP/usr/bin/python3" "$SNAP/scripts/cluster/distributed_op.py" remove_argument "$2" "$opt"
fi
fi
}
restart_service() {
# restart a systemd service
# argument $1 is the service name
sudo systemctl restart "snap.microk8s.daemon-$1.service"
if [ -e "${SNAP_DATA}/credentials/callback-tokens.txt" ]
then
tokens=$(sudo "$SNAP/bin/cat" "${SNAP_DATA}/credentials/callback-tokens.txt" | "$SNAP/usr/bin/wc" -l)
if [[ "$tokens" -ge "0" ]]
then
sudo -E "$SNAP/usr/bin/python3" "$SNAP/scripts/cluster/distributed_op.py" restart "$1"
fi
fi
}
@@ -194,7 +287,7 @@ produce_certs() {
# Generate root CA
if ! [ -f ${SNAP_DATA}/certs/ca.crt ]; then
openssl req -x509 -new -nodes -key ${SNAP_DATA}/certs/ca.key -subj "/CN=127.0.0.1" -days 10000 -out ${SNAP_DATA}/certs/ca.crt
openssl req -x509 -new -nodes -key ${SNAP_DATA}/certs/ca.key -subj "/CN=10.152.183.1" -days 10000 -out ${SNAP_DATA}/certs/ca.crt
fi
# Produce certificates based on the rendered csr.conf.rendered.

View File

@@ -31,8 +31,14 @@ then
sudo rm -rf "$SNAP_DATA/var/run/cilium"
sudo rm -rf "$SNAP_DATA/sys/fs/bpf"
if $SNAP/sbin/ip link show "cilium_vxlan"
then
$SNAP/sbin/ip link delete "cilium_vxlan"
fi
set_service_expected_to_start flanneld
echo "Restarting kubelet"
refresh_opt_in_config "network-plugin" "kubenet" kubelet
refresh_opt_in_config "cni-bin-dir" "\${SNAP}/opt/cni/bin/" kubelet
sudo systemctl restart snap.${SNAP_NAME}.daemon-kubelet
echo "Restarting containerd"
@@ -41,5 +47,8 @@ then
fi
sudo systemctl restart snap.${SNAP_NAME}.daemon-containerd
echo "Restarting flanneld"
sudo systemctl stop snap.${SNAP_NAME}.daemon-flanneld
echo "Cilium is terminating"
fi

View File

@@ -29,7 +29,7 @@ fi
skip_opt_in_config "cluster-domain" kubelet
skip_opt_in_config "cluster-dns" kubelet
sudo systemctl restart snap.${SNAP_NAME}.daemon-kubelet
restart_service kubelet
kubelet=$(wait_for_service kubelet)
if [[ $kubelet == fail ]]
then

View File

@@ -4,8 +4,11 @@ set -e
source $SNAP/actions/common/utils.sh
echo "Disabling NVIDIA GPU support"
use_manifest gpu delete
if ! [ -e "$SNAP_DATA/var/lock/clustered.lock" ]
then
echo "Disabling NVIDIA GPU support"
use_manifest gpu delete
fi
sudo rm -rf ${SNAP_DATA}/var/lock/gpu

View File

@@ -18,10 +18,13 @@ sudo systemctl restart snap.${SNAP_NAME}.daemon-apiserver
# Reconfigure kubelet/containerd to pick up the new CNI config and binary.
echo "Restarting kubelet"
refresh_opt_in_config "network-plugin" "cni" kubelet
refresh_opt_in_config "cni-bin-dir" "\${SNAP_DATA}/opt/cni/bin/" kubelet
sudo systemctl restart snap.${SNAP_NAME}.daemon-kubelet
set_service_not_expected_to_start flanneld
sudo systemctl stop snap.${SNAP_NAME}.daemon-flanneld
remove_vxlan_interfaces
if grep -qE "bin_dir.*SNAP}\/" $SNAP_DATA/args/containerd-template.toml; then
echo "Restarting containerd"
sudo "${SNAP}/bin/sed" -i 's;bin_dir = "${SNAP}/opt;bin_dir = "${SNAP_DATA}/opt;g' "$SNAP_DATA/args/containerd-template.toml"
@@ -57,6 +60,7 @@ else
sudo tar -xf "$SNAP_DATA/tmp/cilium/cilium.tar" "$CILIUM_DIR/install" "$CILIUM_DIR/$CILIUM_CNI_CONF")
sudo mv "$SNAP_DATA/args/cni-network/cni.conf" "$SNAP_DATA/args/cni-network/10-kubenet.conf" 2>/dev/null || true
sudo mv "$SNAP_DATA/args/cni-network/flannel.conflist" "$SNAP_DATA/args/cni-network/20-flanneld.conflist" 2>/dev/null || true
sudo cp "$SNAP_DATA/tmp/cilium/$CILIUM_DIR/$CILIUM_CNI_CONF" "$SNAP_DATA/args/cni-network/05-cilium-cni.conf"
# Generate the YAMLs for Cilium and apply them

View File

@@ -23,6 +23,6 @@ echo "Restarting kubelet"
refresh_opt_in_config "cluster-domain" "cluster.local" kubelet
refresh_opt_in_config "cluster-dns" "10.152.183.10" kubelet
sudo systemctl restart snap.${SNAP_NAME}.daemon-kubelet
restart_service kubelet
echo "DNS is enabled"

View File

@@ -11,8 +11,10 @@ echo "Enabling Fluentd-Elasticsearch"
KUBECTL="$SNAP/kubectl --kubeconfig=${SNAP_DATA}/credentials/client.config"
echo "Labeling nodes"
NODENAME="$($KUBECTL get no -o yaml | grep " name:"| awk '{print $2}')"
$KUBECTL label nodes "$NODENAME" beta.kubernetes.io/fluentd-ds-ready=true || true
for NODE in $NODENAME
do
$KUBECTL label nodes "$NODE" beta.kubernetes.io/fluentd-ds-ready=true || true
done
"$SNAP/microk8s-enable.wrapper" dns
sleep 5

View File

@@ -22,11 +22,13 @@ if [[ $containerd_up == fail ]]
then
echo "Containerd did not start on time. Proceeding."
fi
# Allow for some seconds for containerd processes to start
sleep 10
"$SNAP/microk8s-enable.wrapper" dns
echo "Applying manifest"
use_manifest gpu apply
echo "NVIDIA is enabled"
if ! [ -e "$SNAP_DATA/var/lock/clustered.lock" ]
then
# Allow for some seconds for containerd processes to start
sleep 10
"$SNAP/microk8s-enable.wrapper" dns
echo "Applying manifest"
use_manifest gpu apply
echo "NVIDIA is enabled"
fi

View File

@@ -11,3 +11,10 @@ declare -A map
map[\$SNAP_COMMON]="$SNAP_COMMON"
use_manifest storage apply "$(declare -p map)"
echo "Storage will be available soon"
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo ""
echo "WARNING: The storage class enabled does not persist volumes across nodes"
echo ""
fi

View File

@@ -0,0 +1,3 @@
--bind 0.0.0.0:25000
--keyfile "${SNAP_DATA}/certs/server.key"
--certfile "${SNAP_DATA}/certs/server.crt"

View File

@@ -1,19 +0,0 @@
{
"name": "k8s-pod-network",
"cniVersion": "0.3.0",
"type": "bridge",
"bridge": "cbr0",
"isGateway": true,
"ipMasq": true,
"ipam": {
"type": "host-local",
"subnet": "10.1.1.0/24",
"routes": [
{ "dst": "0.0.0.0/0" }
],
"dataDir": "/run/ipam-state"
},
"dns": {
"nameservers": [ "8.8.8.8", "8.8.4.4" ]
}
}

View File

@@ -0,0 +1,17 @@
{
"name": "microk8s-flannel-network",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {"portMappings": true},
"snat": true
}
]
}

View File

@@ -0,0 +1 @@
{"Network": "10.1.0.0/16", "Backend": {"Type": "vxlan"}}

View File

@@ -0,0 +1,20 @@
{
"name": "microk8s-flannel-network",
"plugins": [
{
"type": "flannel",
"name": "flannel-plugin",
"subnetFile": "${SNAP_COMMON}/run/flannel/subnet.env",
"dataDir": "${SNAP_COMMON}/var/lib/cni/flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {"portMappings": true},
"snat": true
}
]
}

View File

@@ -0,0 +1,6 @@
--iface=""
--etcd-endpoints=https://127.0.0.1:12379
--etcd-cafile=${SNAP_DATA}/certs/ca.crt
--etcd-certfile=${SNAP_DATA}/certs/server.crt
--etcd-keyfile=${SNAP_DATA}/certs/server.key
--subnet-file=${SNAP_COMMON}/run/flannel/subnet.env

View File

@@ -2,11 +2,10 @@
--cert-dir=${SNAP_DATA}/certs
--client-ca-file=${SNAP_DATA}/certs/ca.crt
--anonymous-auth=false
--network-plugin=kubenet
--network-plugin=cni
--root-dir=${SNAP_COMMON}/var/lib/kubelet
--fail-swap-on=false
--pod-cidr=10.1.1.0/24
--non-masquerade-cidr=10.152.183.0/24
--cni-conf-dir=${SNAP_DATA}/args/cni-network/
--cni-bin-dir=${SNAP}/opt/cni/bin/
--feature-gates=DevicePlugins=true
--eviction-hard="memory.available<100Mi,nodefs.available<1Gi,imagefs.available<1Gi"

View File

@@ -0,0 +1,18 @@
apiVersion: v1
clusters:
- cluster:
certificate-authority-data: CADATA
server: https://127.0.0.1:16443
name: microk8s-cluster
contexts:
- context:
cluster: microk8s-cluster
user: NAME
name: microk8s
current-context: microk8s
kind: Config
preferences: {}
users:
- name: NAME
user:
token: TOKEN

View File

@@ -8,6 +8,12 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "api service kicker will not run on a cluster node"
exit 0
fi
restart_attempt=0
while true
do

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.add-node on the master."
exit 1
fi
if echo "$*" | grep -q -- 'help'; then
echo "Usage: microk8s.add-node"
echo ""
echo "Produce a connection string for a node to join the cluster"
exit 0
fi
exit_if_no_permissions
subject=$(openssl x509 -noout -subject -in "$SNAP_DATA/certs/ca.crt")
if [[ $subject == *"127.0.0.1"* ]]; then
echo "Clustering requires a fresh MicroK8s installation. Reinstall with:"
echo "sudo snap remove microk8s"
echo "sudo snap install microk8s --classic"
exit 1
fi
exit_if_stopped
token=$(< /dev/urandom tr -dc A-Za-z | head -c32;echo;)
echo ${token} | tee -a $SNAP_DATA/credentials/cluster-tokens.txt > /dev/null
port="25000"
if grep -e port "${SNAP_DATA}"/args/cluster-agent &> /dev/null
then
port=$(cat "${SNAP_DATA}"/args/cluster-agent | "$SNAP"/usr/bin/gawk '{print $2}')
fi
default_ip="$(get_default_ip)"
all_ips="$(get_ips)"
echo "Join node with: microk8s.join ${default_ip}:$port/${token}"
echo ""
echo "If the node you are adding is not reachable through the default interface you can use one of the following:"
for addr in $(echo "${all_ips}"); do
if ! [[ $addr == *":"* ]]; then
echo " microk8s.join ${addr}:$port/${token}"
fi
done

View File

@@ -6,6 +6,12 @@ export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.config on the master."
exit 0
fi
USE_LOOPBACK=false
PARSED=$(getopt --options=lho: --longoptions=use-loopback,help,output: --name "$@" -- "$@")
eval set -- "$PARSED"

View File

@@ -7,6 +7,12 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.disable on the master."
exit 0
fi
if echo "$*" | grep -q -- '--help'; then
prog=$(basename -s.wrapper "$0" | tr - .)
echo "Usage: $prog ADDON..."

View File

@@ -7,6 +7,12 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.enable on the master."
exit 0
fi
if echo "$*" | grep -q -- '--help'; then
prog=$(basename -s.wrapper "$0" | tr - .)
echo "Usage: $prog ADDON..."

View File

@@ -12,6 +12,12 @@ fi
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.istioctl on the master."
exit 0
fi
if echo "$*" | grep -v -q -- '--kubeconfig'; then
exit_if_no_permissions
fi

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
source $SNAP/actions/common/utils.sh
exit_if_no_permissions
sudo -E ${SNAP}/usr/bin/python3 ${SNAP}/scripts/cluster/join.py $@

View File

@@ -5,6 +5,12 @@ set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.kubectl on the master."
exit 0
fi
exit_if_stopped
if echo "$*" | grep -v -q -- '--kubeconfig'; then

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
export SNAP_NAME
source $SNAP/actions/common/utils.sh
if echo "$*" | grep -q -- 'help'; then
echo "Usage: microk8s.leave"
echo ""
echo "With microk8s.leave the node will depart from the cluster it is in."
exit 0
fi
exit_if_stopped
exit_if_no_permissions
if ! [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is not acting as a node in a cluster."
exit 1
fi
if [ "$#" -ne "0" ]
then
echo "This command will remove the node from the cluster. No arguments are needed."
exit 1
fi
${SNAP}/usr/bin/python3 ${SNAP}/scripts/cluster/join.py reset

View File

@@ -5,6 +5,12 @@ set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.linkerd on the master."
exit 0
fi
if [ ! -f "${SNAP_DATA}/bin/linkerd" ]; then
echo "Linkerd not available, try enabling Linkerd. 'microk8s.enable linkerd' or 'microk8s.enable linkerd:--proxy-auto-inject' "
exit 0

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
export SNAP_NAME
source $SNAP/actions/common/utils.sh
if echo "$*" | grep -q -- 'help'; then
echo "Usage: microk8s.remove <node>"
echo ""
echo "microk8s.remove will remove the node provided from the cluster."
exit 0
fi
exit_if_no_permissions
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use microk8s.leave."
exit 1
fi
if [ "$#" -ne 1 ]; then
echo "Please provide the node you want to remove."
exit 1
fi
${SNAP}/usr/bin/python3 ${SNAP}/scripts/cluster/join.py reset "$@"

View File

@@ -51,6 +51,12 @@ clean_cluster() {
fi
}
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.reset on the master."
exit 0
fi
exit_if_stopped
exit_if_no_permissions

View File

@@ -5,6 +5,11 @@ set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.reset on the master."
exit 0
fi
PARSED=$(getopt --options=lho: --longoptions=help,output: --name "$@" -- "$@")
eval set -- "$PARSED"

View File

@@ -7,6 +7,12 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.status on the master."
exit 0
fi
KUBECTL="$SNAP/kubectl --kubeconfig=$SNAP_DATA/credentials/client.config"
# Arrray of what we query per addon

View File

@@ -6,6 +6,12 @@ export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "This MicroK8s deployment is acting as a node in a cluster. Please use the microk8s.stop on the master."
exit 0
fi
FORCE=false
PARSED=$(getopt --options=lho: --longoptions=force,help,output: --name "$@" -- "$@")
eval set -- "$PARSED"

View File

@@ -0,0 +1,13 @@
#!/bin/bash
set -eu
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
# This is really the only way I could find to get the args passed in correctly. WTF
declare -a args="($(cat $SNAP_DATA/args/cluster-agent))"
cd ${SNAP}/scripts/
${SNAP}/usr/bin/gunicorn3 cluster.agent:app "${args[@]}"

View File

@@ -1,6 +1,6 @@
#!/bin/bash
set -e
set -ex
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
@@ -8,6 +8,8 @@ export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
export XDG_RUNTIME_DIR="${SNAP_COMMON}/run"
mkdir -p "${XDG_RUNTIME_DIR}"
source $SNAP/actions/common/utils.sh
if [ -d "/etc/apparmor.d" ]; then
echo "Using a default profile template"
cp ${SNAP}/containerd-profile /etc/apparmor.d/cri-containerd.apparmor.d
@@ -27,6 +29,12 @@ else
fi
sed 's@${SNAP}@'"${SNAP}"'@g;s@${SNAP_DATA}@'"${SNAP_DATA}"'@g;s@${RUNTIME}@'"${RUNTIME}"'@g' $SNAP_DATA/args/containerd-template.toml > $SNAP_DATA/args/containerd.toml
run_flanneld="$(is_service_expected_to_start flanneld)"
if [ "${run_flanneld}" == "1" ]
then
sed 's@${SNAP}@'"${SNAP}"'@g;s@${SNAP_DATA}@'"${SNAP_DATA}"'@g;s@${SNAP_COMMON}@'"${SNAP_COMMON}"'@g' $SNAP_DATA/args/flannel-template.conflist > $SNAP_DATA/args/cni-network/flannel.conflist
fi
# This is really the only way I could find to get the args passed in correctly. WTF
declare -a args="($(cat $SNAP_DATA/args/$app))"
set -a

View File

@@ -1,11 +1,17 @@
#!/bin/bash
set -ex
set -e
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
echo "etcd will not run on a cluster node"
exit 0
fi
# etcd will not start if the socket already exists.
if [ -S "/var/snap/microk8s/current/etcd.socket:2379" ]; then
rm "/var/snap/microk8s/current/etcd.socket:2379"

View File

@@ -0,0 +1,33 @@
#!/bin/bash
set -e
export PATH="$SNAP/usr/sbin:$SNAP/usr/bin:$SNAP/sbin:$SNAP/bin:$PATH"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$SNAP/lib:$SNAP/usr/lib:$SNAP/lib/x86_64-linux-gnu:$SNAP/usr/lib/x86_64-linux-gnu"
export LD_LIBRARY_PATH=$SNAP_LIBRARY_PATH:$LD_LIBRARY_PATH
source $SNAP/actions/common/utils.sh
exit_if_service_not_expected_to_start flanneld
# TODO rewrite for snaps
etcd_endpoints="$(cat $SNAP_DATA/args/flanneld | grep "etcd-endpoints" | tr "=" " "| awk '{print $2}')"
cert_file="$(cat $SNAP_DATA/args/flanneld | grep "etcd-certfile" | tr "=" " "| awk '{print $2}')"
cert_file="$(eval echo $cert_file)"
key_file="$(cat $SNAP_DATA/args/flanneld | grep "etcd-keyfile" | tr "=" " "| awk '{print $2}')"
key_file="$(eval echo $key_file)"
ca_file="$(cat $SNAP_DATA/args/flanneld | grep "etcd-cafile" | tr "=" " "| awk '{print $2}')"
ca_file="$(eval echo $ca_file)"
# TODO get this from a file
data="$(cat $SNAP_DATA/args/flannel-network-mgr-config)"
if ! "${SNAP}/etcdctl" --endpoint "${etcd_endpoints}" --cert-file "${cert_file}" --key-file "${key_file}" --ca-file "${ca_file}" rm "/coreos.com/network/config"; then
echo "/coreos.com/network/config is not in etcd. Probably a first time run."
fi
"${SNAP}/etcdctl" --endpoint "${etcd_endpoints}" --cert-file "${cert_file}" --key-file "${key_file}" --ca-file "${ca_file}" set "/coreos.com/network/config" "$data"
# This is really the only way I could find to get the args passed in correctly. WTF
declare -a args="($(cat $SNAP_DATA/args/flanneld))"
exec "$SNAP/opt/cni/bin/flanneld" "${args[@]}"

View File

@@ -8,6 +8,16 @@ source $SNAP/actions/common/utils.sh
app=$1
if [ -e ${SNAP_DATA}/var/lock/clustered.lock ]
then
if [ "${app}" = "kube-apiserver" ] || [ "${app}" = "kube-controller-manager" ] || [ "${app}" = "kube-scheduler" ]
then
echo "${app} will not run on a cluster node"
exit 0
fi
fi
if [ "${app}" = "kubelet" ]
then
pod_cidr="$(cat $SNAP_DATA/args/kubelet | grep "pod-cidr" | tr "=" " "| gawk '{print $2}')"

436
scripts/cluster/agent.py Normal file
View File

@@ -0,0 +1,436 @@
#!flask/bin/python
import getopt
import json
import os
import shutil
import socket
import string
import random
import subprocess
import sys
from flask import Flask, jsonify, request, abort, Response
app = Flask(__name__)
CLUSTER_API="cluster/api/v1.0"
snapdata_path = os.environ.get('SNAP_DATA')
snap_path = os.environ.get('SNAP_DATA')
cluster_tokens_file = "{}/credentials/cluster-tokens.txt".format(snapdata_path)
callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path)
callback_token_file = "{}/credentials/callback-token.txt".format(snapdata_path)
certs_request_tokens_file = "{}/credentials/certs-request-tokens.txt".format(snapdata_path)
default_port = 25000
default_listen_interface = "0.0.0.0"
def get_service_name(service):
"""
Returns the service name from its configuration file name.
:param service: the name of the service configuration file
:return: the service name
"""
if service in ["kube-proxy", "kube-apiserver", "kube-scheduler", "kube-controller-manager"]:
return service[len("kube-"), :]
else:
return service
def update_service_argument(service, key, val):
"""
Adds an argument to the arguments file of the service.
:param service: the service
:param key: the arguments to add
:param val: the value for the argument
"""
args_file = "{}/args/{}".format(snapdata_path, service)
args_file_tmp = "{}/args/{}.tmp".format(snapdata_path, service)
found = False
with open(args_file_tmp, "w+") as bfp:
with open(args_file, "r+") as fp:
for _, line in enumerate(fp):
if line.startswith(key):
if val is not None:
bfp.write("{}={}\n".format(key, val))
found = True
else:
bfp.write("{}\n".format(line.rstrip()))
if not found and val is not None:
bfp.write("{}={}\n".format(key, val))
shutil.move(args_file_tmp, args_file)
def store_callback_token(node, callback_token):
"""
Store a callback token
:param node: the node
:param callback_token: the token
"""
tmp_file = "{}.tmp".format(callback_tokens_file)
if not os.path.isfile(callback_tokens_file):
open(callback_tokens_file, 'a+')
os.chmod(callback_tokens_file, 0o600)
with open(tmp_file, "w") as backup_fp:
os.chmod(tmp_file, 0o600)
found = False
with open(callback_tokens_file, 'r+') as callback_fp:
for _, line in enumerate(callback_fp):
if line.startswith(node):
backup_fp.write("{} {}\n".format(node, callback_token))
found = True
else:
backup_fp.write(line)
if not found:
backup_fp.write("{} {}\n".format(node, callback_token))
shutil.move(tmp_file, callback_tokens_file)
def sign_client_cert(cert_request, token):
"""
Sign a certificate request
:param cert_request: the request
:param token: a token acttin as a request uuid
:return: the certificate
"""
req_file = "{}/certs/request.{}.csr".format(snapdata_path, token)
sign_cmd = "openssl x509 -req -in {csr} -CA {SNAP_DATA}/certs/ca.crt -CAkey" \
" {SNAP_DATA}/certs/ca.key -CAcreateserial -out {SNAP_DATA}/certs/server.{token}.crt" \
" -days 100000".format(csr=req_file, SNAP_DATA=snapdata_path, token=token)
with open(req_file, 'w') as fp:
fp.write(cert_request)
subprocess.check_call(sign_cmd.split())
with open("{SNAP_DATA}/certs/server.{token}.crt".format(SNAP_DATA=snapdata_path, token=token)) as fp:
cert = fp.read()
return cert
def add_token_to_certs_request(token):
"""
Add a token to the file holding the nodes we expect a certificate request from.
:param token: the token
"""
with open(certs_request_tokens_file, "a+") as fp:
fp.write("{}\n".format(token))
def remove_token_from_file(token, file):
"""
Remove a token from the valid tokens set
:param token: the token to be removed
:param file: the file to be removed from
"""
backup_file = "{}.backup".format(file)
# That is a critical section. We need to protect it.
# We are safe sor now because flask serves one request at a time.
with open(backup_file, 'w') as back_fp:
with open(file, 'r') as fp:
for _, line in enumerate(fp):
if line.startswith(token):
continue
back_fp.write("{}".format(line))
shutil.copyfile(backup_file, file)
def get_token(name):
"""
Get token from known_tokens file
:param name: the name of the node
:return: the token or None
"""
file = "{}/credentials/known_tokens.csv".format(snapdata_path)
with open(file) as fp:
line = fp.readline()
if name in line:
parts = line.split(',')
return parts[0].rstrip()
return None
def add_kubelet_token(hostname):
"""
Add a token for a node in the known tokens
:param hostname: the name of the node
:return: the token added
"""
file = "{}/credentials/known_tokens.csv".format(snapdata_path)
old_token = get_token("system:node:{}".format(hostname))
if old_token:
return old_token.rstrip()
alpha = string.ascii_letters + string.digits
token = ''.join(random.SystemRandom().choice(alpha) for _ in range(32))
uid = ''.join(random.SystemRandom().choice(string.digits) for _ in range(8))
with open(file, 'a') as fp:
# TODO double check this format. Why is userid unique?
line = "{},system:node:{},kubelet,kubelet-{},\"system:nodes\"".format(token, hostname, uid)
fp.write(line + os.linesep)
return token.rstrip()
def getCA():
"""
Return the CA
:return: the CA file contents
"""
ca_file = "{}/certs/ca.crt".format(snapdata_path)
with open(ca_file) as fp:
ca = fp.read()
return ca
def get_arg(key, file):
"""
Get an argument froman arguments file
:param key: the argument we look for
:param file: the arguments file to search in
:return: the value of the argument
"""
filename = "{}/args/{}".format(snapdata_path, file)
with open(filename) as fp:
for _, line in enumerate(fp):
if line.startswith(key):
args = line.split(' ')
args = args[-1].split('=')
return args[-1].rstrip()
return None
def is_valid(token, token_type=cluster_tokens_file):
"""
Check token
:param token: token to be checked
:param token_type: the type of token (bootstrap or signature)
:return: True for a valid token, false otherwise
"""
with open(token_type) as fp:
for _, line in enumerate(fp):
if line.startswith(token):
return True
return False
def read_kubelet_args_file(node=None):
"""
Return the contents of the kubelet arguments file
:param node: should we add a host override?
:return: the kubelet args file
"""
filename = "{}/args/kubelet".format(snapdata_path)
with open(filename) as fp:
args = fp.read()
if node:
args = "{}--hostname-override {}".format(args, node)
return args
def get_node_ep(hostname, remote_addr):
"""
Return the endpoint to be used for the node based by trying to resolve the hostname provided
:param hostname: the provided hostname
:param remote_addr: the address the request came from
:return: the node's location
"""
try:
socket.gethostbyname(hostname)
return hostname
except socket.gaierror:
return remote_addr
return remote_addr
@app.route('/{}/join'.format(CLUSTER_API), methods=['POST'])
def join_node():
"""
Web call to join an node to the cluster
"""
if request.headers['Content-Type'] == 'application/json':
token = request.json['token']
hostname = request.json['hostname']
port = request.json['port']
callback_token = request.json['callback']
else:
token = request.form['token']
hostname = request.form['hostname']
port = request.form['port']
callback_token = request.form['callback']
if not is_valid(token):
error_msg={"error": "Invalid token"}
return Response(json.dumps(error_msg), mimetype='application/json', status=500)
add_token_to_certs_request(token)
remove_token_from_file(token, cluster_tokens_file)
node_addr = get_node_ep(hostname, request.remote_addr)
node_ep = "{}:{}".format(node_addr, port)
store_callback_token(node_ep, callback_token)
ca = getCA()
etcd_ep = get_arg('--listen-client-urls', 'etcd')
api_port = get_arg('--secure-port', 'kube-apiserver')
proxy_token = get_token('kube-proxy')
kubelet_token = add_kubelet_token(hostname)
subprocess.check_call("systemctl restart snap.microk8s.daemon-apiserver.service".split())
if node_addr != hostname:
kubelet_args = read_kubelet_args_file(node_addr)
else:
kubelet_args = read_kubelet_args_file()
return jsonify(ca=ca,
etcd=etcd_ep,
kubeproxy=proxy_token,
apiport=api_port,
kubelet=kubelet_token,
kubelet_args=kubelet_args)
@app.route('/{}/sign-cert'.format(CLUSTER_API), methods=['POST'])
def sign_cert():
"""
Web call to sign a certificate
"""
if request.headers['Content-Type'] == 'application/json':
token = request.json['token']
cert_request = request.json['request']
else:
token = request.form['token']
cert_request = request.form['request']
if not is_valid(token, certs_request_tokens_file):
error_msg={"error": "Invalid token"}
return Response(json.dumps(error_msg), mimetype='application/json', status=500)
remove_token_from_file(token, certs_request_tokens_file)
signed_cert = sign_client_cert(cert_request, token)
return jsonify(certificate=signed_cert)
@app.route('/{}/configure'.format(CLUSTER_API), methods=['POST'])
def configure():
"""
Web call to configure the node
"""
if request.headers['Content-Type'] == 'application/json':
callback_token = request.json['callback']
configuration = request.json
else:
callback_token = request.form['callback']
configuration = json.loads(request.form['configuration'])
if not is_valid(callback_token, callback_token_file):
error_msg={"error": "Invalid token"}
return Response(json.dumps(error_msg), mimetype='application/json', status=500)
# We expect something like this:
'''
{
"callback": "xyztoken"
"service":
[
{
"name": "kubelet",
"arguments_remove":
[
"myoldarg"
],
"arguments_update":
[
{"myarg": "myvalue"},
{"myarg2": "myvalue2"},
{"myarg3": "myvalue3"}
],
"restart": false
},
{
"name": "kube-proxy",
"restart": true
}
],
"addon":
[
{
"name": "gpu",
"enable": true
},
{
"name": "gpu",
"disable": true
}
]
}
'''
if "service" in configuration:
for service in configuration["service"]:
print("{}".format(service["name"]))
if "arguments_update" in service:
print("Updating arguments")
for argument in service["arguments_update"]:
for key, val in argument.items():
print("{} is {}".format(key, val))
update_service_argument(service["name"], key, val)
if "arguments_remove" in service:
print("Removing arguments")
for argument in service["arguments_remove"]:
print("{}".format(argument))
update_service_argument(service["name"], argument, None)
if "restart" in service and service["restart"]:
service_name = get_service_name(service["name"])
print("restarting {}".format(service["name"]))
subprocess.check_call("systemctl restart snap.microk8s.daemon-{}.service".format(service_name).split())
if "addon" in configuration:
for addon in configuration["addon"]:
print("{}".format(addon["name"]))
if "enable" in addon and addon["enable"]:
print("Enabling {}".format(addon["name"]))
subprocess.check_call("{}/microk8s-enable.wrapper {}".format(snap_path, addon["name"]).split())
if "disable" in addon and addon["disable"]:
print("Disabling {}".format(addon["name"]))
subprocess.check_call("{}/microk8s-disable.wrapper {}".format(snap_path, addon["name"]).split())
resp_date = {"result": "ok"}
resp = Response(json.dumps(resp_date), status=200, mimetype='application/json')
return resp
def usage():
print("Agent responsible for setting up a cluster. Arguments:")
print("-l, --listen: interfaces to listen to (defaults to {})".format(default_listen_interface))
print("-p, --port: port to listen to (default {})".format(default_port))
if __name__ == '__main__':
server_cert = "{SNAP_DATA}/certs/server.crt".format(SNAP_DATA=snapdata_path)
server_key = "{SNAP_DATA}/certs/server.key".format(SNAP_DATA=snapdata_path)
try:
opts, args = getopt.gnu_getopt(sys.argv[1:], "hl:p:", ["help", "listen=", "port="])
except getopt.GetoptError as err:
print(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
port = default_port
listen = default_listen_interface
for o, a in opts:
if o in ("-l", "--listen"):
listen = a
if o in ("-p", "--port"):
port = a
elif o in ("-h", "--help"):
usage()
sys.exit(1)
else:
assert False, "unhandled option"
app.run(host=listen, port=port, ssl_context=(server_cert, server_key))

View File

@@ -0,0 +1,159 @@
#!/usr/bin/python3
import getopt
import subprocess
import requests
import urllib3
import os
import sys
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
CLUSTER_API = "cluster/api/v1.0"
snapdata_path = os.environ.get('SNAP_DATA')
snap_path = os.environ.get('SNAP')
callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path)
def do_op(remote_op):
"""
Perform an operation on a remote node
:param op_str: the operation json string
"""
with open(callback_tokens_file, "r+") as fp:
for _, line in enumerate(fp):
parts = line.split()
node_ep = parts[0]
host = node_ep.split(":")[0]
print("Applying to node {}.".format(host))
try:
# Make sure this node exists
subprocess.check_call("{}/microk8s-kubectl.wrapper get no {}".format(snap_path, host).split(),
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
token = parts[1]
remote_op["callback"] = token
# TODO: handle ssl verification
res = requests.post("https://{}/{}/configure".format(node_ep, CLUSTER_API),
json=remote_op,
verify=False)
if res.status_code != 200:
print("Failed to perform a {} on node {}".format(remote_op["action_str"], node_ep))
except subprocess.CalledProcessError:
print("Node {} not present".format(host))
def restart(service):
"""
Restart service on all nodes
:param service: the service name
"""
print("Restarting nodes.")
remote_op = {
"action_str": "restart {}".format(service),
"service": [
{
"name": service,
"restart": "yes"
}
]
}
do_op(remote_op)
def update_argument(service, key, value):
"""
Configure an argument on all nodes
:param service: the service we configure
:param key: the argument we configure
:param value: the value we set
"""
print("Adding argument {} to nodes.".format(key))
remote_op = {
"action_str": "change of argument {} to {}".format(key, value),
"service": [
{
"name": service,
"arguments_update": [
{key: value}
]
}
]
}
do_op(remote_op)
def remove_argument(service, key):
"""
Drop an argument from all nodes
:param service: the service we configure
:param key: the argument we configure
"""
print("Removing argument {} from nodes.".format(key))
remote_op = {
"action_str": "removal of argument {}".format(key),
"service": [
{
"name": service,
"arguments_remove": [key]
}
]
}
do_op(remote_op)
def set_addon(addon, state):
"""
Enable or disable an add-on across all nodes
:param addon: the add-on name
:param state: 'enable' or 'disable'
"""
print("Set add-on {} to {} on nodes.".format(addon, state))
remote_op = {
"action_str": "set of {} to {}".format(addon, state),
"addon": [
{
"name": addon,
state: "true"
}
]
}
do_op(remote_op)
def usage():
print("usage: dist_refresh_opt [OPERATION] [SERVICE] (ARGUMENT) (value)")
print("OPERATION is one of restart, update_argument, remove_argument, set_addon")
if __name__ == "__main__":
if not os.path.isfile(callback_tokens_file):
print("No callback tokens file.")
exit(1)
try:
opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
except getopt.GetoptError as err:
# print help information and exit:
print(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
else:
assert False, "unhandled option"
operation = args[0]
service = args[1]
if operation == "restart":
restart(service)
if operation == "update_argument":
update_argument(service, args[2], args[3])
if operation == "remove_argument":
remove_argument(service, args[2])
if operation == "set_addon":
set_addon(service, args[2])
exit(0)

376
scripts/cluster/join.py Normal file
View File

@@ -0,0 +1,376 @@
#!/usr/bin/python3
import base64
import random
import string
import subprocess
import os
import getopt
import sys
import time
import requests
import socket
import json
import shutil
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
CLUSTER_API = "cluster/api/v1.0"
snapdata_path = os.environ.get('SNAP_DATA')
snap_path = os.environ.get('SNAP')
ca_cert_file = "{}/certs/ca.remote.crt".format(snapdata_path)
callback_token_file = "{}/credentials/callback-token.txt".format(snapdata_path)
callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path)
callback_tokens_file = "{}/credentials/callback-tokens.txt".format(snapdata_path)
server_cert_file = "{}/certs/server.remote.crt".format(snapdata_path)
def get_connection_info(master_ip, master_port, token, callback_token):
"""
Contact the master and get all connection information
:param master_ip: the master IP
:param master_port: the master port
:param token: the token to contact the master with
:param callback_token: the token to provide to the master for callbacks
:return: the json response of the master
"""
cluster_agent_port = 25000
filename = "{}/args/cluster-agent".format(snapdata_path)
with open(filename) as fp:
for _, line in enumerate(fp):
if line.startswith("--port"):
cluster_agent_port = line.split(' ')
cluster_agent_port = cluster_agent_port[-1].split('=')
cluster_agent_port = cluster_agent_port[0].rstrip()
req_data = {"token": token,
"hostname": socket.gethostname(),
"port": cluster_agent_port,
"callback": callback_token}
# TODO: enable ssl verification
connection_info = requests.post("https://{}:{}/{}/join".format(master_ip, master_port, CLUSTER_API),
json=req_data,
verify=False)
if connection_info.status_code != 200:
print("Failed to join cluster. {}".format(connection_info.json()["error"]))
exit(1)
return connection_info.json()
def usage():
print("Join a cluster: microk8s.join <master>:<port>/<token>")
def set_arg(key, value, file):
"""
Set an arguement to a file
:param key: argument name
:param value: value
:param file: the arguments file
"""
filename = "{}/args/{}".format(snapdata_path, file)
filename_remote = "{}/args/{}.remote".format(snapdata_path, file)
with open(filename_remote, 'w+') as back_fp:
with open(filename, 'r+') as fp:
for _, line in enumerate(fp):
if line.startswith(key):
if value is not None:
back_fp.write("{} {}\n".format(key, value))
else:
back_fp.write("{}".format(line))
shutil.copyfile(filename, "{}.backup".format(filename))
shutil.copyfile(filename_remote, filename)
os.remove(filename_remote)
def get_etcd_client_cert(master_ip, master_port, token):
"""
Get a signed cert to access etcd
:param master_ip: master ip
:param master_port: master port
:param token: token to contact the master with
"""
cer_req_file = "{}/certs/server.remote.csr".format(snapdata_path)
cmd_cert = "openssl req -new -key {SNAP_DATA}/certs/server.key -out {csr} " \
"-config {SNAP_DATA}/certs/csr.conf".format(SNAP_DATA=snapdata_path, csr=cer_req_file)
subprocess.check_call(cmd_cert.split())
with open(cer_req_file) as fp:
csr = fp.read()
req_data = {'token': token, 'request': csr}
# TODO: enable ssl verification
signed = requests.post("https://{}:{}/{}/sign-cert".format(master_ip, master_port, CLUSTER_API),
json=req_data,
verify=False)
if signed.status_code != 200:
print("Failed to sign certificate. {}".format(signed.json()["error"]))
exit(1)
info = signed.json()
with open(server_cert_file, "w") as cert_fp:
cert_fp.write(info["certificate"])
def update_flannel(etcd, master_ip, master_port, token):
"""
Configure flannel
:param etcd: etcd endpoint
:param master_ip: master ip
:param master_port: master port
:param token: token to contact the master with
"""
get_etcd_client_cert(master_ip, master_port, token)
etcd = etcd.replace("0.0.0.0", master_ip)
set_arg("--etcd-endpoints", etcd, "flanneld")
set_arg("--etcd-cafile", ca_cert_file, "flanneld")
set_arg("--etcd-certfile", server_cert_file, "flanneld")
set_arg("--etcd-keyfile", "${SNAP_DATA}/certs/server.key", "flanneld")
subprocess.check_call("systemctl restart snap.microk8s.daemon-flanneld.service".split())
def ca_one_line(ca):
"""
The CA in one line
:param ca: the ca
:return: one line
"""
return base64.b64encode(ca.encode('utf-8')).decode('utf-8')
def create_kubeconfig(token, ca, master_ip, api_port, filename, user):
"""
Create a kubeconfig file. The file in stored under credentials named after the user
:param token: the token to be in the kubeconfig
:param ca: the ca
:param master_ip: the master node IP
:param api_port: the API server port
:param filename: the name of the config file
:param user: the user to use al login
"""
snap_path = os.environ.get('SNAP')
config_template = "{}/microk8s-resources/{}".format(snap_path, "kubelet.config.template")
config = "{}/credentials/{}".format(snapdata_path, filename)
shutil.copyfile(config, "{}.backup".format(config))
ca_line = ca_one_line(ca)
with open(config_template, 'r') as tfp:
with open(config, 'w+') as fp:
config_txt = tfp.read()
config_txt = config_txt.replace("CADATA", ca_line)
config_txt = config_txt.replace("NAME", user)
config_txt = config_txt.replace("TOKEN", token)
config_txt = config_txt.replace("127.0.0.1", master_ip)
config_txt = config_txt.replace("16443", api_port)
fp.write(config_txt)
def update_kubeproxy(token, ca, master_ip, api_port):
"""
Configure the kube-proxy
:param token: the token to be in the kubeconfig
:param ca: the ca
:param master_ip: the master node IP
:param api_port: the API server port
"""
create_kubeconfig(token, ca, master_ip, api_port, "proxy.config", "kubeproxy")
set_arg("--master", None, "kube-proxy")
subprocess.check_call("systemctl restart snap.microk8s.daemon-proxy.service".split())
def update_kubelet(token, ca, master_ip, api_port):
"""
Configure the kubelet
:param token: the token to be in the kubeconfig
:param ca: the ca
:param master_ip: the master node IP
:param api_port: the API server port
"""
create_kubeconfig(token, ca, master_ip, api_port, "kubelet.config", "kubelet")
set_arg("--client-ca-file", "${SNAP_DATA}/certs/ca.remote.crt", "kubelet")
subprocess.check_call("systemctl restart snap.microk8s.daemon-kubelet.service".split())
def store_remote_ca(ca):
"""
Store the remote ca
:param ca: the CA
"""
with open(ca_cert_file, 'w+') as fp:
fp.write(ca)
def mark_cluster_node():
"""
Mark a node as being part of a cluster by creating a var/lock/clustered.lock
"""
lock_file = "{}/var/lock/clustered.lock".format(snapdata_path)
open(lock_file, 'a').close()
os.chmod(lock_file, 0o700)
services = ['etcd', 'apiserver', 'apiserver-kicker', 'controller-manager', 'scheduler']
for service in services:
subprocess.check_call("systemctl restart snap.microk8s.daemon-{}.service".format(service).split())
def generate_callback_token():
"""
Generate a token and store it in the callback token file
:return: the token
"""
token = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(64))
with open(callback_token_file, "w") as fp:
fp.write("{}\n".format(token))
os.chmod(callback_token_file, 0o600)
return token
def store_base_kubelet_args(args_string):
"""
Create a kubelet args file from the set of args provided
:param args_string: the arguments provided
"""
args_file = "{}/args/kubelet".format(snapdata_path)
with open(args_file, "w") as fp:
fp.write(args_string)
def reset_current_installation():
"""
Take a node out of a cluster
"""
lock_file = "{}/var/lock/clustered.lock".format(snapdata_path)
if not os.path.isfile(lock_file):
print("Not in clustering mode.")
exit(2)
os.remove(lock_file)
os.remove(ca_cert_file)
os.remove(callback_token_file)
os.remove(server_cert_file)
for config_file in ["kubelet", "flanneld", "kube-proxy"]:
shutil.copyfile("{}/default-args/{}".format(snap_path, config_file),
"{}/args/{}".format(snapdata_path, config_file))
for user in ["proxy", "kubelet"]:
config = "{}/credentials/{}.config".format(snapdata_path, user)
shutil.copyfile("{}.backup".format(config), config)
subprocess.check_call("{}/microk8s-stop.wrapper".format(snap_path).split())
waits = 10
while waits > 0:
try:
subprocess.check_call("{}/microk8s-start.wrapper".format(snap_path).split())
break
except subprocess.CalledProcessError:
print("Services not ready to start. Waiting...")
time.sleep(5)
waits -= 1
def remove_kubelet_token(node):
"""
Remove a token for a node in the known tokens
:param node: the name of the node
"""
file = "{}/credentials/known_tokens.csv".format(snapdata_path)
backup_file = "{}.backup".format(file)
token = "system:node:{}".format(node)
# That is a critical section. We need to protect it.
with open(backup_file, 'w') as back_fp:
with open(file, 'r') as fp:
for _, line in enumerate(fp):
if token in line:
continue
back_fp.write("{}".format(line))
shutil.copyfile(backup_file, file)
def remove_callback_token(node):
"""
Remove a callback token
:param node: the node
"""
tmp_file = "{}.tmp".format(callback_tokens_file)
if not os.path.isfile(callback_tokens_file):
open(callback_tokens_file, 'a+')
os.chmod(callback_tokens_file, 0o600)
with open(tmp_file, "w") as backup_fp:
os.chmod(tmp_file, 0o600)
with open(callback_tokens_file, 'r+') as callback_fp:
for _, line in enumerate(callback_fp):
if line.startswith(node):
continue
else:
backup_fp.write(line)
shutil.move(tmp_file, callback_tokens_file)
def remove_node(node):
try:
# Make sure this node exists
subprocess.check_call("{}/microk8s-kubectl.wrapper get no {}".format(snap_path, node).split(),
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
print("Node {} does not exist.".format(node))
exit(1)
remove_kubelet_token(node)
remove_callback_token(node)
subprocess.check_call("{}/microk8s-kubectl.wrapper delete no {}".format(snap_path, node).split(),
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
if __name__ == "__main__":
try:
opts, args = getopt.gnu_getopt(sys.argv[1:], "h", ["help"])
except getopt.GetoptError as err:
print(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit(1)
else:
print("Unhandled option")
sys.exit(1)
if args[0] == "reset":
if len(args) > 1:
remove_node(args[1])
else:
reset_current_installation()
else:
if len(args) <= 0:
print("Please provide a connection string.")
usage()
sys.exit(4)
connection_parts = args[0].split("/")
token = connection_parts[1]
master_ep = connection_parts[0].split(":")
master_ip = master_ep[0]
master_port = master_ep[1]
callback_token = generate_callback_token()
info = get_connection_info(master_ip, master_port, token, callback_token)
store_base_kubelet_args(info["kubelet_args"])
store_remote_ca(info["ca"])
update_flannel(info["etcd"], master_ip, master_port, token)
update_kubeproxy(info["kubeproxy"], info["ca"], master_ip, info["apiport"])
update_kubelet(info["kubelet"], info["ca"], master_ip, info["apiport"])
mark_cluster_node()
sys.exit(0)

View File

@@ -179,8 +179,11 @@ rm -rf ${SNAP_DATA}/inspection-report
mkdir -p ${SNAP_DATA}/inspection-report
printf -- 'Inspecting services\n'
check_service "snap.microk8s.daemon-cluster-agent"
check_service "snap.microk8s.daemon-flanneld"
check_service "snap.microk8s.daemon-containerd"
check_service "snap.microk8s.daemon-apiserver"
check_service "snap.microk8s.daemon-apiserver-kicker"
check_service "snap.microk8s.daemon-proxy"
check_service "snap.microk8s.daemon-kubelet"
check_service "snap.microk8s.daemon-scheduler"

11
snap/hooks/configure vendored
View File

@@ -275,7 +275,16 @@ fi
if getent group microk8s >/dev/null 2>&1
then
chgrp microk8s -R ${SNAP_DATA}/credentials/ ${SNAP_DATA}/certs/ ${SNAP_DATA}/args/ || true
chgrp microk8s -R ${SNAP_DATA}/credentials/ ${SNAP_DATA}/certs/ ${SNAP_DATA}/args/ ${SNAP_DATA}/var/lock/ || true
fi
if ! [ -f "${SNAP_DATA}/args/flanneld" ]
then
cp -r ${SNAP}/default-args/cni-network/flannel.conflist ${SNAP_DATA}/args/cni-network/
cp ${SNAP}/default-args/flanneld ${SNAP_DATA}/args/
cp ${SNAP}/default-args/flannel-template.conflist ${SNAP_DATA}/args/
cp ${SNAP}/default-args/flannel-network-mgr-config ${SNAP_DATA}/args/
systemctl restart snap.${SNAP_NAME}.daemon-flanneld
fi
if grep -e "etcd.socket:2379" ${SNAP_DATA}/args/etcd

View File

@@ -78,3 +78,12 @@ do
chmod -R ug+rwX ${dir}
chmod -R o-rwX ${dir}
done
for link in "cni0 cilium_vxlan"
do
if $SNAP/sbin/ip link show ${link}
then
$SNAP/sbin/ip link delete ${link}
fi
done

View File

@@ -29,3 +29,8 @@ rm -rf ${SNAP_COMMON}/run/containerd/* || true
(cat /proc/mounts | grep ${SNAP_COMMON}/run/containerd | cut -d ' ' -f 2 | xargs umount) || true
(cat /proc/mounts | grep ${SNAP_COMMON}/var/lib/docker | cut -d ' ' -f 2 | xargs umount -l) || true
(cat /proc/mounts | grep ${SNAP_COMMON}/var/run/docker | cut -d ' ' -f 2 | xargs umount) || true
if $SNAP/sbin/ip link show cni0
then
$SNAP/sbin/ip link delete cni0
fi

View File

@@ -18,6 +18,9 @@ apps:
daemon-etcd:
command: run-etcd-with-args
daemon: simple
daemon-flanneld:
command: run-flanneld-with-args
daemon: simple
daemon-containerd:
command: run-containerd-with-args
daemon: simple
@@ -27,6 +30,9 @@ apps:
daemon-apiserver-kicker:
command: apiservice-kicker
daemon: simple
daemon-cluster-agent:
command: run-cluster-agent-with-args
daemon: simple
daemon-controller-manager:
command: run-with-config-args kube-controller-manager
daemon: simple
@@ -42,6 +48,14 @@ apps:
kubectl:
command: microk8s-kubectl.wrapper
completer: kubectl.bash
add-node:
command: microk8s-add-node.wrapper
join:
command: microk8s-join.wrapper
remove-node:
command: microk8s-remove-node.wrapper
leave:
command: microk8s-leave.wrapper
ctr:
command: microk8s-ctr.wrapper
inspect:
@@ -154,6 +168,15 @@ parts:
- -sbin/xtables-multi
- -sbin/iptables*
- -lib/xtables
cluster-agent:
plugin: python
python-version: python3
source: .
python-packages:
- flask
stage-packages:
- python3-requests
- gunicorn3
microk8s:
after: [containerd]
plugin: dump
@@ -212,6 +235,10 @@ parts:
mkdir -p opt/cni/bin/
cp $KUBE_SNAP_BINS/cni/* opt/cni/bin/
echo "Preparing flanneld"
mkdir -p opt/cni/bin/
cp $KUBE_SNAP_BINS/flanneld/flanneld opt/cni/bin/
echo "Preparing containerd"
cp $KUBE_SNAP_ROOT/microk8s-resources/containerd-profile .

49
tests/test-cluster.py Normal file
View File

@@ -0,0 +1,49 @@
import pytest
from validators import (
validate_dns_dashboard,
validate_cluster,
)
from utils import (
microk8s_enable,
wait_for_pod_state,
microk8s_disable,
microk8s_reset,
microk8s_clustering_capable
)
class TestCluster(object):
@pytest.fixture(autouse=True)
def clean_up(self):
"""
Clean up after a test
"""
yield
microk8s_reset(2)
def test_basic(self):
"""
Sets up and tests dashboard, dns in a two node cluster.
"""
if not microk8s_clustering_capable():
return
validate_cluster()
print("Enabling DNS")
microk8s_enable("dns")
wait_for_pod_state("", "kube-system", "running", label="k8s-app=kube-dns")
print("Enabling dashboard")
microk8s_enable("dashboard")
print("Validating dashboard")
validate_dns_dashboard()
print("Disabling dashboard")
microk8s_disable("dashboard")
'''
We would disable DNS here but this freezes any terminating pods.
We let microk8s.reset to do the cleanup.
print("Disabling DNS")
microk8s_disable("dns")
'''

View File

@@ -51,16 +51,59 @@ then
PROXY=$4
fi
# Test addons
create_machine $NAME $PROXY
lxc exec $NAME -- snap install microk8s --channel=${TO_CHANNEL} --classic
if [ ${TO_CHANNEL} == "local" ]
then
lxc file push ./microk8s_latest_amd64.snap $VM2_NAME/tmp/
lxc exec $VM1_NAME -- snap install /tmp/microk8s_latest_amd64.snap --dangerous --classic
else
lxc exec $NAME -- snap install microk8s --channel=${TO_CHANNEL} --classic
fi
lxc exec $NAME -- /var/tmp/tests/patch-kube-proxy.sh
# use 'script' for required tty: https://github.com/lxc/lxd/issues/1724#issuecomment-194416774
lxc exec $NAME -- script -e -c "pytest -s /var/tmp/tests/test-addons.py"
lxc exec $NAME -- microk8s.reset
lxc delete $NAME --force
# Test addons upgrade
# TODO Handle local in the upgrade
NAME=machine-$RANDOM
create_machine $NAME $PROXY
# use 'script' for required tty: https://github.com/lxc/lxd/issues/1724#issuecomment-194416774
lxc exec $NAME -- script -e -c "UPGRADE_MICROK8S_FROM=${FROM_CHANNEL} UPGRADE_MICROK8S_TO=${TO_CHANNEL} pytest -s /var/tmp/tests/test-upgrade.py"
lxc delete $NAME --force
# Test cluster
VM1_NAME=machine-$RANDOM
VM2_NAME=machine-$RANDOM
create_machine $VM1_NAME $PROXY
create_machine $VM2_NAME $PROXY
if [ ${TO_CHANNEL} == "local" ]
then
lxc file push ./microk8s_latest_amd64.snap $VM1_NAME/tmp/
lxc file push ./microk8s_latest_amd64.snap $VM2_NAME/tmp/
lxc exec $VM1_NAME -- snap install /tmp/microk8s_latest_amd64.snap --dangerous --classic
lxc exec $VM2_NAME -- snap install /tmp/microk8s_latest_amd64.snap --dangerous --classic
else
lxc exec $VM1_NAME -- snap install microk8s --channel=${TO_CHANNEL} --classic
lxc exec $VM2_NAME -- snap install microk8s --channel=${TO_CHANNEL} --classic
fi
lxc exec $VM1_NAME -- /var/tmp/tests/patch-kube-proxy.sh
lxc exec $VM2_NAME -- /var/tmp/tests/patch-kube-proxy.sh
if lxc exec $VM1_NAME -- ls /snap/bin/microk8s.token
then
GENERATE_TOKEN=$(lxc exec $VM1_NAME -- sudo /snap/bin/microk8s.token generate)
TOKEN=$(echo $GENERATE_TOKEN | awk '{print $7}')
MASTER_IP=$(lxc info $VM1_NAME | grep eth0 | head -n 1 | awk '{print $3}')
lxc exec $VM2_NAME -- sudo /snap/bin/microk8s.join $MASTER_IP:25000 --token $TOKEN
# use 'script' for required tty: https://github.com/lxc/lxd/issues/1724#issuecomment-194416774
lxc exec $VM1_NAME -- script -e -c "pytest -s /var/tmp/tests/test-cluster.py"
lxc exec $VM1_NAME -- microk8s.reset
fi
lxc delete $VM1_NAME --force
lxc delete $VM2_NAME --force

View File

@@ -118,7 +118,7 @@ def wait_for_pod_state(pod, namespace, desired_state, desired_reason=None, label
time.sleep(3)
def wait_for_installation():
def wait_for_installation(cluster_nodes=1):
"""
Wait for kubernetes service to appear.
"""
@@ -134,7 +134,7 @@ def wait_for_installation():
while True:
cmd = 'get no'
nodes = kubectl(cmd, 300)
if ' Ready' in nodes:
if nodes.count(' Ready') == cluster_nodes:
break
else:
time.sleep(3)
@@ -175,13 +175,21 @@ def microk8s_disable(addon):
return run_until_success(cmd, timeout_insec=300)
def microk8s_reset():
def microk8s_clustering_capable():
"""
Are we in a clustering capable microk8s?
"""
return os.path.isfile('/snap/bin/microk8s.join')
def microk8s_reset(cluster_nodes=1):
"""
Call microk8s reset
"""
cmd = '/snap/bin/microk8s.reset'
run_until_success(cmd, timeout_insec=300)
wait_for_installation()
wait_for_installation(cluster_nodes)
def update_yaml_with_arch(manifest_file):

View File

@@ -183,6 +183,7 @@ def validate_istio():
wait_for_pod_state("", "default", "running", label="app=details")
kubectl("delete -f {}".format(manifest))
def validate_knative():
"""
Validate Knative by deploying the helloworld-go app.
@@ -206,6 +207,7 @@ def validate_knative():
wait_for_pod_state("", "default", "running", label="serving.knative.dev/service=helloworld-go")
kubectl("delete -f {}".format(manifest))
def validate_registry():
"""
Validate the private registry.
@@ -233,11 +235,11 @@ def validate_forward():
kubectl("apply -f {}".format(manifest))
wait_for_pod_state("", "default", "running", label="app=nginx")
os.system('killall kubectl')
os.system('/snap/bin/microk8s.kubectl port-forward pod/nginx 5000:80 &')
os.system('/snap/bin/microk8s.kubectl port-forward pod/nginx 5123:80 &')
attempt = 10
while attempt >= 0:
try:
resp = requests.get("http://localhost:5000")
resp = requests.get("http://localhost:5123")
if resp.status_code == 200:
break
except:
@@ -266,6 +268,7 @@ def validate_metrics_server():
assert attempt > 0
def validate_prometheus():
"""
Validate the prometheus operator
@@ -290,6 +293,7 @@ def validate_fluentd():
wait_for_pod_state("", "kube-system", "running", label="k8s-app=fluentd-es")
wait_for_pod_state("", "kube-system", "running", label="k8s-app=kibana-logging")
def validate_jaeger():
"""
Validate the jaeger operator
@@ -312,6 +316,7 @@ def validate_jaeger():
assert attempt > 0
def validate_linkerd():
"""
Validate Linkerd by deploying emojivoto.
@@ -330,6 +335,7 @@ def validate_linkerd():
wait_for_pod_state("", "emojivoto", "running", label="app=emoji-svc")
kubectl("delete -f {}".format(manifest))
def validate_rbac():
"""
Validate RBAC is actually on
@@ -353,6 +359,7 @@ def cilium(cmd, timeout_insec=300, err_out=None):
cmd = '/snap/bin/microk8s.cilium ' + cmd
return run_until_success(cmd, timeout_insec, err_out)
def validate_cilium():
"""
Validate cilium by deploying the bookinfo app.