From 68438985a61eda6cf8a9e479f055a1af45a84f46 Mon Sep 17 00:00:00 2001 From: iwilltry42 Date: Sun, 10 Oct 2021 09:42:52 +0200 Subject: [PATCH 1/3] clusterCreate: concurrently run postStart configs (DNS stuff mostly) and add retries --- cmd/cluster/clusterCreate.go | 2 +- pkg/client/cluster.go | 81 ++++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/cmd/cluster/clusterCreate.go b/cmd/cluster/clusterCreate.go index ef77b5016..a28d2f7bb 100644 --- a/cmd/cluster/clusterCreate.go +++ b/cmd/cluster/clusterCreate.go @@ -191,7 +191,7 @@ func NewCmdClusterCreate() *cobra.Command { * Kubeconfig * **************/ - if clusterConfig.KubeconfigOpts.UpdateDefaultKubeconfig && clusterConfig.KubeconfigOpts.SwitchCurrentContext { + if !clusterConfig.KubeconfigOpts.UpdateDefaultKubeconfig && clusterConfig.KubeconfigOpts.SwitchCurrentContext { l.Log().Infoln("--kubeconfig-update-default=false --> sets --kubeconfig-switch-context=false") clusterConfig.KubeconfigOpts.SwitchCurrentContext = false } diff --git a/pkg/client/cluster.go b/pkg/client/cluster.go index bf68f90ac..5da70d50f 100644 --- a/pkg/client/cluster.go +++ b/pkg/client/cluster.go @@ -27,6 +27,7 @@ import ( "errors" "fmt" "io/ioutil" + "os" "sort" "strconv" "time" @@ -934,19 +935,30 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust } /* - * Additional Cluster Preparation + * Additional Cluster Preparation (post start) */ + postStartErrgrp, postStartErrgrpCtx := errgroup.WithContext(ctx) + /*** DNS ***/ - // add /etc/hosts and CoreDNS entry for host.k3d.internal, referring to the host system - if err := prepInjectHostIP(ctx, runtime, cluster, &clusterStartOpts); err != nil { - return fmt.Errorf("failed to inject host IP: %w", err) - } + // add host.k3d.internal record to /etc/hosts in all nodes + postStartErrgrp.Go(func() error { + return prepInjectHostIP(postStartErrgrpCtx, runtime, cluster, &clusterStartOpts) + }) + + // add host.k3d.internal record to the CoreDNS Configmap + postStartErrgrp.Go(func() error { + return corednsAddHost(postStartErrgrpCtx, runtime, cluster, clusterStartOpts.EnvironmentInfo.HostGateway.String(), k3d.DefaultK3dInternalHostRecord) + }) - // create host records in CoreDNS for external registries - if err := prepCoreDNSInjectNetworkMembers(ctx, runtime, cluster); err != nil { - return fmt.Errorf("failed to patch CoreDNS with network members: %w", err) + // add records for other containers in the cluster network to the CoreDNS configmap (e.g. useful for using registries from within Pods inside the cluster) + postStartErrgrp.Go(func() error { + return prepCoreDNSInjectNetworkMembers(postStartErrgrpCtx, runtime, cluster) + }) + + if err := postStartErrgrp.Wait(); err != nil { + return fmt.Errorf("error during post-start cluster preparation: %w", err) } return nil @@ -983,33 +995,43 @@ func SortClusters(clusters []*k3d.Cluster) []*k3d.Cluster { // corednsAddHost adds a host entry to the CoreDNS configmap if it doesn't exist (a host entry is a single line of the form "IP HOST") func corednsAddHost(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster, ip string, name string) error { + retries := 3 + if v, ok := os.LookupEnv("K3D_DEBUG_COREDNS_RETRIES"); ok && v != "" { + if r, err := strconv.Atoi(v); err != nil { + retries = r + } + } hostsEntry := fmt.Sprintf("%s %s", ip, name) patchCmd := `patch=$(kubectl get cm coredns -n kube-system --template='{{.data.NodeHosts}}' | sed -n -E -e '/[0-9\.]{4,12}\s` + name + `$/!p' -e '$a` + hostsEntry + `' | tr '\n' '^' | busybox xargs -0 printf '{"data": {"NodeHosts":"%s"}}'| sed -E 's%\^%\\n%g') && kubectl patch cm coredns -n kube-system -p="$patch"` successInjectCoreDNSEntry := false +nodeLoop: for _, node := range cluster.Nodes { if node.Role == k3d.AgentRole || node.Role == k3d.ServerRole { - logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd}) - if err == nil { - successInjectCoreDNSEntry = true - break - } else { - msg := fmt.Sprintf("error patching the CoreDNS ConfigMap to include entry '%s': %+v", hostsEntry, err) - if logreader != nil { - readlogs, err := ioutil.ReadAll(logreader) - if err != nil { - l.Log().Debugf("error reading the logs from failed CoreDNS patch exec process in node %s: %v", node.Name, err) + for i := 0; i < retries; i++ { + logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd}) + if err == nil { + successInjectCoreDNSEntry = true + break nodeLoop + } else { + msg := fmt.Sprintf("(try %d/%d) error patching the CoreDNS ConfigMap to include entry '%s': %+v", i, retries, hostsEntry, err) + if logreader != nil { + readlogs, err := ioutil.ReadAll(logreader) + if err != nil { + l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: %v", i, retries, node.Name, err) + } else { + msg += fmt.Sprintf("\nLogs: %s", string(readlogs)) + } } else { - msg += fmt.Sprintf("\nLogs: %s", string(readlogs)) + l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: no logreader returned for exec process", i, retries, node.Name) } - } else { - l.Log().Debugf("no logreader returned for exec process") + l.Log().Debugln(msg) + time.Sleep(1 * time.Second) } - l.Log().Debugln(msg) } } } if !successInjectCoreDNSEntry { - return fmt.Errorf("Failed to patch CoreDNS ConfigMap to include entry '%s' (see debug logs)", hostsEntry) + return fmt.Errorf("failed to patch CoreDNS ConfigMap to include entry '%s' (%d tries, see debug logs)", hostsEntry, retries) } return nil } @@ -1023,7 +1045,7 @@ func prepInjectHostIP(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.C hostIP := clusterStartOpts.EnvironmentInfo.HostGateway hostsEntry := fmt.Sprintf("%s %s", hostIP.String(), k3d.DefaultK3dInternalHostRecord) - l.Log().Infof("Injecting record '%s'...", hostsEntry) + l.Log().Infof("Injecting '%s' into /etc/hosts of all nodes...", hostsEntry) // entry in /etc/hosts errgrp, errgrpctx := errgroup.WithContext(ctx) @@ -1038,24 +1060,19 @@ func prepInjectHostIP(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.C } l.Log().Debugf("Successfully added host record \"%s\" to /etc/hosts in all nodes", hostsEntry) - err := corednsAddHost(ctx, runtime, cluster, hostIP.String(), k3d.DefaultK3dInternalHostRecord) - if err != nil { - return fmt.Errorf("failed to inject host record \"%s\" into CoreDNS ConfigMap: %w", hostsEntry, err) - } - l.Log().Debugf("Successfully added host record \"%s\" to the CoreDNS ConfigMap ", hostsEntry) return nil } func prepCoreDNSInjectNetworkMembers(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster) error { net, err := runtime.GetNetwork(ctx, &cluster.Network) if err != nil { - return fmt.Errorf("failed to get cluster network %s to inject host records into CoreDNS: %v", cluster.Network.Name, err) + return fmt.Errorf("failed to get cluster network %s to inject host records into CoreDNS: %w", cluster.Network.Name, err) } - l.Log().Debugf("Adding %d network members to coredns", len(net.Members)) + l.Log().Debugf("Adding %d network members to coredns...", len(net.Members)) for _, member := range net.Members { hostsEntry := fmt.Sprintf("%s %s", member.IP.String(), member.Name) if err := corednsAddHost(ctx, runtime, cluster, member.IP.String(), member.Name); err != nil { - return fmt.Errorf("failed to add host entry \"%s\" into CoreDNS: %v", hostsEntry, err) + return fmt.Errorf("failed to add host entry \"%s\" into CoreDNS: %w", hostsEntry, err) } } return nil From 7805f328d9e810921a07b8fcd0b822f65ea000b9 Mon Sep 17 00:00:00 2001 From: iwilltry42 Date: Mon, 11 Oct 2021 11:52:10 +0200 Subject: [PATCH 2/3] dns: only run coredns patch in a server node --- pkg/client/cluster.go | 50 +++++++++++++++++++++--------------- tests/common.sh | 2 +- tests/test_full_lifecycle.sh | 3 ++- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/pkg/client/cluster.go b/pkg/client/cluster.go index 5da70d50f..6be8dfe6b 100644 --- a/pkg/client/cluster.go +++ b/pkg/client/cluster.go @@ -997,6 +997,7 @@ func SortClusters(clusters []*k3d.Cluster) []*k3d.Cluster { func corednsAddHost(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster, ip string, name string) error { retries := 3 if v, ok := os.LookupEnv("K3D_DEBUG_COREDNS_RETRIES"); ok && v != "" { + l.Log().Debugf("Running with K3D_DEBUG_COREDNS_RETRIES=%s", v) if r, err := strconv.Atoi(v); err != nil { retries = r } @@ -1004,35 +1005,42 @@ func corednsAddHost(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clu hostsEntry := fmt.Sprintf("%s %s", ip, name) patchCmd := `patch=$(kubectl get cm coredns -n kube-system --template='{{.data.NodeHosts}}' | sed -n -E -e '/[0-9\.]{4,12}\s` + name + `$/!p' -e '$a` + hostsEntry + `' | tr '\n' '^' | busybox xargs -0 printf '{"data": {"NodeHosts":"%s"}}'| sed -E 's%\^%\\n%g') && kubectl patch cm coredns -n kube-system -p="$patch"` successInjectCoreDNSEntry := false -nodeLoop: - for _, node := range cluster.Nodes { - if node.Role == k3d.AgentRole || node.Role == k3d.ServerRole { - for i := 0; i < retries; i++ { - logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd}) - if err == nil { - successInjectCoreDNSEntry = true - break nodeLoop + + // select any server node + var node *k3d.Node + for _, n := range cluster.Nodes { + if n.Role == k3d.ServerRole { + node = n + } + } + + // try 3 (or K3D_DEBUG_COREDNS_RETRIES value) times, as e.g. on cluster startup it may take some time for the Configmap to be available and the server to be responsive + for i := 0; i < retries; i++ { + l.Log().Infof("Running CoreDNS patch in node %s (try %d/%d)...", node.Name, i, retries) + logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd}) + if err == nil { + successInjectCoreDNSEntry = true + break + } else { + msg := fmt.Sprintf("(try %d/%d) error patching the CoreDNS ConfigMap to include entry '%s': %+v", i, retries, hostsEntry, err) + if logreader != nil { + readlogs, err := ioutil.ReadAll(logreader) + if err != nil { + l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: %v", i, retries, node.Name, err) } else { - msg := fmt.Sprintf("(try %d/%d) error patching the CoreDNS ConfigMap to include entry '%s': %+v", i, retries, hostsEntry, err) - if logreader != nil { - readlogs, err := ioutil.ReadAll(logreader) - if err != nil { - l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: %v", i, retries, node.Name, err) - } else { - msg += fmt.Sprintf("\nLogs: %s", string(readlogs)) - } - } else { - l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: no logreader returned for exec process", i, retries, node.Name) - } - l.Log().Debugln(msg) - time.Sleep(1 * time.Second) + msg += fmt.Sprintf("\nLogs: %s", string(readlogs)) } + } else { + l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: no logreader returned for exec process", i, retries, node.Name) } + l.Log().Debugln(msg) + time.Sleep(1 * time.Second) } } if !successInjectCoreDNSEntry { return fmt.Errorf("failed to patch CoreDNS ConfigMap to include entry '%s' (%d tries, see debug logs)", hostsEntry, retries) } + l.Log().Debugf("Successfully patched CoreDNS Configmap with record '%s'", hostsEntry) return nil } diff --git a/tests/common.sh b/tests/common.sh index 577782134..e5deac1c9 100755 --- a/tests/common.sh +++ b/tests/common.sh @@ -181,4 +181,4 @@ k3s_assert_node_label() { # $1 = node name # $2 = label to assert kubectl get node "$1" --output go-template='{{ range $k, $v := .metadata.labels }}{{ printf "%s=%s\n" $k $v }}{{ end }}' | grep -qE "^$2$" -} \ No newline at end of file +} diff --git a/tests/test_full_lifecycle.sh b/tests/test_full_lifecycle.sh index f1002b1d2..e51b2ee47 100755 --- a/tests/test_full_lifecycle.sh +++ b/tests/test_full_lifecycle.sh @@ -77,7 +77,8 @@ wait_for_pod_running_by_label "k8s-app=kube-dns" "kube-system" sleep 5 # 6. test host.k3d.internal -info "Checking DNS Lookup for host.k3d.internal" +info "Checking DNS Lookup for host.k3d.internal..." +kubectl describe cm coredns -n kube-system | grep "host.k3d.internal" > /dev/null 2>&1 || failed "Couldn't find host.k3d.internal in CoreDNS configmap" wait_for_pod_exec "testimage" "nslookup host.k3d.internal" 15 || failed "DNS Lookup for host.k3d.internal failed" # Cleanup From 8f38fc7595e93b92abea61fb2cd7bdb5ac8831d1 Mon Sep 17 00:00:00 2001 From: iwilltry42 Date: Mon, 11 Oct 2021 15:18:43 +0200 Subject: [PATCH 3/3] dns: apply coredns patches sequentially to avoid race conditions --- pkg/client/cluster.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/client/cluster.go b/pkg/client/cluster.go index 6be8dfe6b..c1e7527cc 100644 --- a/pkg/client/cluster.go +++ b/pkg/client/cluster.go @@ -947,13 +947,14 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust return prepInjectHostIP(postStartErrgrpCtx, runtime, cluster, &clusterStartOpts) }) - // add host.k3d.internal record to the CoreDNS Configmap postStartErrgrp.Go(func() error { - return corednsAddHost(postStartErrgrpCtx, runtime, cluster, clusterStartOpts.EnvironmentInfo.HostGateway.String(), k3d.DefaultK3dInternalHostRecord) - }) + // add host.k3d.internal record to the CoreDNS Configmap + l.Log().Infoln("Injecting record for host.k3d.internal into CoreDNS configmap...") + if err := corednsAddHost(postStartErrgrpCtx, runtime, cluster, clusterStartOpts.EnvironmentInfo.HostGateway.String(), k3d.DefaultK3dInternalHostRecord); err != nil { + return err + } - // add records for other containers in the cluster network to the CoreDNS configmap (e.g. useful for using registries from within Pods inside the cluster) - postStartErrgrp.Go(func() error { + // add records for other containers in the cluster network to the CoreDNS configmap (e.g. useful for using registries from within Pods inside the cluster) return prepCoreDNSInjectNetworkMembers(postStartErrgrpCtx, runtime, cluster) }) @@ -998,13 +999,12 @@ func corednsAddHost(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clu retries := 3 if v, ok := os.LookupEnv("K3D_DEBUG_COREDNS_RETRIES"); ok && v != "" { l.Log().Debugf("Running with K3D_DEBUG_COREDNS_RETRIES=%s", v) - if r, err := strconv.Atoi(v); err != nil { + if r, err := strconv.Atoi(v); err == nil { retries = r + } else { + return fmt.Errorf("Invalid value set for env var K3D_DEBUG_COREDNS_RETRIES (%s): %w", v, err) } } - hostsEntry := fmt.Sprintf("%s %s", ip, name) - patchCmd := `patch=$(kubectl get cm coredns -n kube-system --template='{{.data.NodeHosts}}' | sed -n -E -e '/[0-9\.]{4,12}\s` + name + `$/!p' -e '$a` + hostsEntry + `' | tr '\n' '^' | busybox xargs -0 printf '{"data": {"NodeHosts":"%s"}}'| sed -E 's%\^%\\n%g') && kubectl patch cm coredns -n kube-system -p="$patch"` - successInjectCoreDNSEntry := false // select any server node var node *k3d.Node @@ -1014,9 +1014,13 @@ func corednsAddHost(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clu } } + hostsEntry := fmt.Sprintf("%s %s", ip, name) + patchCmd := `patch=$(kubectl get cm coredns -n kube-system --template='{{.data.NodeHosts}}' | sed -n -E -e '/[0-9\.]{4,12}\s` + name + `$/!p' -e '$a` + hostsEntry + `' | tr '\n' '^' | busybox xargs -0 printf '{"data": {"NodeHosts":"%s"}}'| sed -E 's%\^%\\n%g') && kubectl patch cm coredns -n kube-system -p="$patch"` + successInjectCoreDNSEntry := false + // try 3 (or K3D_DEBUG_COREDNS_RETRIES value) times, as e.g. on cluster startup it may take some time for the Configmap to be available and the server to be responsive for i := 0; i < retries; i++ { - l.Log().Infof("Running CoreDNS patch in node %s (try %d/%d)...", node.Name, i, retries) + l.Log().Debugf("Running CoreDNS patch in node %s to add %s (try %d/%d)...", node.Name, hostsEntry, i, retries) logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd}) if err == nil { successInjectCoreDNSEntry = true @@ -1076,7 +1080,7 @@ func prepCoreDNSInjectNetworkMembers(ctx context.Context, runtime k3drt.Runtime, if err != nil { return fmt.Errorf("failed to get cluster network %s to inject host records into CoreDNS: %w", cluster.Network.Name, err) } - l.Log().Debugf("Adding %d network members to coredns...", len(net.Members)) + l.Log().Debugf("Adding %d network members to CoreDNS...", len(net.Members)) for _, member := range net.Members { hostsEntry := fmt.Sprintf("%s %s", member.IP.String(), member.Name) if err := corednsAddHost(ctx, runtime, cluster, member.IP.String(), member.Name); err != nil {