Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

[Fix] more resilient post-create cluster preparation (DNS) #780

Merged
merged 3 commits into from
Oct 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/cluster/clusterCreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func NewCmdClusterCreate() *cobra.Command {
* Kubeconfig *
**************/

if clusterConfig.KubeconfigOpts.UpdateDefaultKubeconfig && clusterConfig.KubeconfigOpts.SwitchCurrentContext {
if !clusterConfig.KubeconfigOpts.UpdateDefaultKubeconfig && clusterConfig.KubeconfigOpts.SwitchCurrentContext {
l.Log().Infoln("--kubeconfig-update-default=false --> sets --kubeconfig-switch-context=false")
clusterConfig.KubeconfigOpts.SwitchCurrentContext = false
}
Expand Down
99 changes: 64 additions & 35 deletions pkg/client/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"errors"
"fmt"
"io/ioutil"
"os"
"sort"
"strconv"
"time"
Expand Down Expand Up @@ -934,19 +935,31 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
}

/*
* Additional Cluster Preparation
* Additional Cluster Preparation (post start)
*/

postStartErrgrp, postStartErrgrpCtx := errgroup.WithContext(ctx)

/*** DNS ***/

// add /etc/hosts and CoreDNS entry for host.k3d.internal, referring to the host system
if err := prepInjectHostIP(ctx, runtime, cluster, &clusterStartOpts); err != nil {
return fmt.Errorf("failed to inject host IP: %w", err)
}
// add host.k3d.internal record to /etc/hosts in all nodes
postStartErrgrp.Go(func() error {
return prepInjectHostIP(postStartErrgrpCtx, runtime, cluster, &clusterStartOpts)
})

postStartErrgrp.Go(func() error {
// add host.k3d.internal record to the CoreDNS Configmap
l.Log().Infoln("Injecting record for host.k3d.internal into CoreDNS configmap...")
if err := corednsAddHost(postStartErrgrpCtx, runtime, cluster, clusterStartOpts.EnvironmentInfo.HostGateway.String(), k3d.DefaultK3dInternalHostRecord); err != nil {
return err
}

// add records for other containers in the cluster network to the CoreDNS configmap (e.g. useful for using registries from within Pods inside the cluster)
return prepCoreDNSInjectNetworkMembers(postStartErrgrpCtx, runtime, cluster)
})

// create host records in CoreDNS for external registries
if err := prepCoreDNSInjectNetworkMembers(ctx, runtime, cluster); err != nil {
return fmt.Errorf("failed to patch CoreDNS with network members: %w", err)
if err := postStartErrgrp.Wait(); err != nil {
return fmt.Errorf("error during post-start cluster preparation: %w", err)
}

return nil
Expand Down Expand Up @@ -983,34 +996,55 @@ func SortClusters(clusters []*k3d.Cluster) []*k3d.Cluster {

// corednsAddHost adds a host entry to the CoreDNS configmap if it doesn't exist (a host entry is a single line of the form "IP HOST")
func corednsAddHost(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster, ip string, name string) error {
retries := 3
if v, ok := os.LookupEnv("K3D_DEBUG_COREDNS_RETRIES"); ok && v != "" {
l.Log().Debugf("Running with K3D_DEBUG_COREDNS_RETRIES=%s", v)
if r, err := strconv.Atoi(v); err == nil {
retries = r
} else {
return fmt.Errorf("Invalid value set for env var K3D_DEBUG_COREDNS_RETRIES (%s): %w", v, err)
}
}

// select any server node
var node *k3d.Node
for _, n := range cluster.Nodes {
if n.Role == k3d.ServerRole {
node = n
}
}

hostsEntry := fmt.Sprintf("%s %s", ip, name)
patchCmd := `patch=$(kubectl get cm coredns -n kube-system --template='{{.data.NodeHosts}}' | sed -n -E -e '/[0-9\.]{4,12}\s` + name + `$/!p' -e '$a` + hostsEntry + `' | tr '\n' '^' | busybox xargs -0 printf '{"data": {"NodeHosts":"%s"}}'| sed -E 's%\^%\\n%g') && kubectl patch cm coredns -n kube-system -p="$patch"`
successInjectCoreDNSEntry := false
for _, node := range cluster.Nodes {
if node.Role == k3d.AgentRole || node.Role == k3d.ServerRole {
logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd})
if err == nil {
successInjectCoreDNSEntry = true
break
} else {
msg := fmt.Sprintf("error patching the CoreDNS ConfigMap to include entry '%s': %+v", hostsEntry, err)
if logreader != nil {
readlogs, err := ioutil.ReadAll(logreader)
if err != nil {
l.Log().Debugf("error reading the logs from failed CoreDNS patch exec process in node %s: %v", node.Name, err)
} else {
msg += fmt.Sprintf("\nLogs: %s", string(readlogs))
}

// try 3 (or K3D_DEBUG_COREDNS_RETRIES value) times, as e.g. on cluster startup it may take some time for the Configmap to be available and the server to be responsive
for i := 0; i < retries; i++ {
l.Log().Debugf("Running CoreDNS patch in node %s to add %s (try %d/%d)...", node.Name, hostsEntry, i, retries)
logreader, err := runtime.ExecInNodeGetLogs(ctx, node, []string{"sh", "-c", patchCmd})
if err == nil {
successInjectCoreDNSEntry = true
break
} else {
msg := fmt.Sprintf("(try %d/%d) error patching the CoreDNS ConfigMap to include entry '%s': %+v", i, retries, hostsEntry, err)
if logreader != nil {
readlogs, err := ioutil.ReadAll(logreader)
if err != nil {
l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: %v", i, retries, node.Name, err)
} else {
l.Log().Debugf("no logreader returned for exec process")
msg += fmt.Sprintf("\nLogs: %s", string(readlogs))
}
l.Log().Debugln(msg)
} else {
l.Log().Debugf("(try %d/%d) error reading the logs from failed CoreDNS patch exec process in node %s: no logreader returned for exec process", i, retries, node.Name)
}
l.Log().Debugln(msg)
time.Sleep(1 * time.Second)
}
}
if !successInjectCoreDNSEntry {
return fmt.Errorf("Failed to patch CoreDNS ConfigMap to include entry '%s' (see debug logs)", hostsEntry)
return fmt.Errorf("failed to patch CoreDNS ConfigMap to include entry '%s' (%d tries, see debug logs)", hostsEntry, retries)
}
l.Log().Debugf("Successfully patched CoreDNS Configmap with record '%s'", hostsEntry)
return nil
}

Expand All @@ -1023,7 +1057,7 @@ func prepInjectHostIP(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.C

hostIP := clusterStartOpts.EnvironmentInfo.HostGateway
hostsEntry := fmt.Sprintf("%s %s", hostIP.String(), k3d.DefaultK3dInternalHostRecord)
l.Log().Infof("Injecting record '%s'...", hostsEntry)
l.Log().Infof("Injecting '%s' into /etc/hosts of all nodes...", hostsEntry)

// entry in /etc/hosts
errgrp, errgrpctx := errgroup.WithContext(ctx)
Expand All @@ -1038,24 +1072,19 @@ func prepInjectHostIP(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.C
}
l.Log().Debugf("Successfully added host record \"%s\" to /etc/hosts in all nodes", hostsEntry)

err := corednsAddHost(ctx, runtime, cluster, hostIP.String(), k3d.DefaultK3dInternalHostRecord)
if err != nil {
return fmt.Errorf("failed to inject host record \"%s\" into CoreDNS ConfigMap: %w", hostsEntry, err)
}
l.Log().Debugf("Successfully added host record \"%s\" to the CoreDNS ConfigMap ", hostsEntry)
return nil
}

func prepCoreDNSInjectNetworkMembers(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster) error {
net, err := runtime.GetNetwork(ctx, &cluster.Network)
if err != nil {
return fmt.Errorf("failed to get cluster network %s to inject host records into CoreDNS: %v", cluster.Network.Name, err)
return fmt.Errorf("failed to get cluster network %s to inject host records into CoreDNS: %w", cluster.Network.Name, err)
}
l.Log().Debugf("Adding %d network members to coredns", len(net.Members))
l.Log().Debugf("Adding %d network members to CoreDNS...", len(net.Members))
for _, member := range net.Members {
hostsEntry := fmt.Sprintf("%s %s", member.IP.String(), member.Name)
if err := corednsAddHost(ctx, runtime, cluster, member.IP.String(), member.Name); err != nil {
return fmt.Errorf("failed to add host entry \"%s\" into CoreDNS: %v", hostsEntry, err)
return fmt.Errorf("failed to add host entry \"%s\" into CoreDNS: %w", hostsEntry, err)
}
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion tests/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,4 @@ k3s_assert_node_label() {
# $1 = node name
# $2 = label to assert
kubectl get node "$1" --output go-template='{{ range $k, $v := .metadata.labels }}{{ printf "%s=%s\n" $k $v }}{{ end }}' | grep -qE "^$2$"
}
}
3 changes: 2 additions & 1 deletion tests/test_full_lifecycle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ wait_for_pod_running_by_label "k8s-app=kube-dns" "kube-system"
sleep 5

# 6. test host.k3d.internal
info "Checking DNS Lookup for host.k3d.internal"
info "Checking DNS Lookup for host.k3d.internal..."
kubectl describe cm coredns -n kube-system | grep "host.k3d.internal" > /dev/null 2>&1 || failed "Couldn't find host.k3d.internal in CoreDNS configmap"
wait_for_pod_exec "testimage" "nslookup host.k3d.internal" 15 || failed "DNS Lookup for host.k3d.internal failed"

# Cleanup
Expand Down