diff --git a/kube-controllers/cmd/kube-controllers/fv_test.go b/kube-controllers/cmd/kube-controllers/fv_test.go index 8395a9c04ca..378bb2833e4 100644 --- a/kube-controllers/cmd/kube-controllers/fv_test.go +++ b/kube-controllers/cmd/kube-controllers/fv_test.go @@ -17,6 +17,7 @@ package main_test import ( "context" "fmt" + "net/http" "os" "os/exec" "strings" @@ -29,6 +30,7 @@ import ( . "github.com/onsi/gomega" api "github.com/projectcalico/api/pkg/apis/projectcalico/v3" + v3 "github.com/projectcalico/api/pkg/apis/projectcalico/v3" "github.com/projectcalico/calico/felix/fv/containers" "github.com/projectcalico/calico/kube-controllers/tests/testutils" @@ -168,6 +170,105 @@ var _ = Describe("[etcd] kube-controllers health check FV tests", func() { }) }) +var _ = Describe("kube-controllers metrics and pprof FV tests", func() { + var ( + etcd *containers.Container + kubectrls *containers.Container + apiserver *containers.Container + ) + + BeforeEach(func() { + // Run etcd. + etcd = testutils.RunEtcd() + + // Run apiserver. + apiserver = testutils.RunK8sApiserver(etcd.IP) + + // Write out a kubeconfig file + kconfigfile, err := os.CreateTemp("", "ginkgo-policycontroller") + Expect(err).NotTo(HaveOccurred()) + defer os.Remove(kconfigfile.Name()) + data := testutils.BuildKubeconfig(apiserver.IP) + _, err = kconfigfile.Write([]byte(data)) + Expect(err).NotTo(HaveOccurred()) + + // Make the kubeconfig readable by the container. + Expect(kconfigfile.Chmod(os.ModePerm)).NotTo(HaveOccurred()) + + // Create some clients. + client := testutils.GetCalicoClient(apiconfig.Kubernetes, "", kconfigfile.Name()) + k8sClient, err := testutils.GetK8sClient(kconfigfile.Name()) + Expect(err).NotTo(HaveOccurred()) + + // Wait for the apiserver to be available. + Eventually(func() error { + _, err := k8sClient.CoreV1().Namespaces().List(context.Background(), metav1.ListOptions{}) + return err + }, 30*time.Second, 1*time.Second).Should(BeNil()) + + // Apply the necessary CRDs. There can sometimes be a delay between starting + // the API server and when CRDs are apply-able, so retry here. + apply := func() error { + out, err := apiserver.ExecOutput("kubectl", "apply", "-f", "/crds/") + if err != nil { + return fmt.Errorf("%s: %s", err, out) + } + return nil + } + By("Applying CRDs") + Eventually(apply, 10*time.Second).ShouldNot(HaveOccurred()) + + // Enable metrics and pprof ports for these tests. + Eventually(func() error { + kcfg := v3.NewKubeControllersConfiguration() + kcfg.Name = "default" + metricsPort := 9094 + kcfg.Spec.PrometheusMetricsPort = &metricsPort + profilePort := int32(9095) + kcfg.Spec.DebugProfilePort = &profilePort + _, err = client.KubeControllersConfiguration().Create(context.Background(), kcfg, options.SetOptions{}) + return err + }, 10*time.Second).Should(Succeed()) + + // Run the controller. We don't need to run any controllers for these tests, but + // we do need to run something, so just run the node controller. + kubectrls = testutils.RunPolicyController(apiconfig.Kubernetes, etcd.IP, kconfigfile.Name(), "node") + }) + + AfterEach(func() { + kubectrls.Stop() + apiserver.Stop() + etcd.Stop() + }) + + get := func(server, path string) error { + httpClient := http.Client{Timeout: 2 * time.Second} + url := server + path + resp, err := httpClient.Get(url) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != 200 { + return fmt.Errorf("bad status code for %q: %d", url, resp.StatusCode) + } + return nil + } + + It("should not expose pprof endpoints on the prometheus port", func() { + // By checking that prometheus metrics are available on the default port. + metricsEndpoint := fmt.Sprintf("http://%s:9094", kubectrls.IP) + Expect(get(metricsEndpoint, "/metrics")).To(Succeed()) + + // By checking that pprof endpoints are not available on the prometheus port. + Expect(get(metricsEndpoint, "/debug/pprof/profile?seconds=1")).NotTo(Succeed()) + + // By checking that pprof endpoints are available on the pprof port. + pprofEndpoint := fmt.Sprintf("http://%s:9095", kubectrls.IP) + Expect(get(pprofEndpoint, "/debug/pprof/profile?seconds=1")).To(Succeed()) + }) +}) + var _ = Describe("[kdd] kube-controllers health check FV tests", func() { var ( etcd *containers.Container diff --git a/kube-controllers/cmd/kube-controllers/main.go b/kube-controllers/cmd/kube-controllers/main.go index d313bb339a7..68ee5fa0673 100644 --- a/kube-controllers/cmd/kube-controllers/main.go +++ b/kube-controllers/cmd/kube-controllers/main.go @@ -19,14 +19,13 @@ import ( "flag" "fmt" "net/http" - _ "net/http/pprof" "os" "strings" "time" - "github.com/pkg/profile" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/projectcalico/calico/libcalico-go/lib/debugserver" "github.com/projectcalico/calico/libcalico-go/lib/winutils" log "github.com/sirupsen/logrus" @@ -210,8 +209,9 @@ func main() { // Serve prometheus metrics. log.Infof("Starting Prometheus metrics server on port %d", runCfg.PrometheusPort) go func() { - http.Handle("/metrics", promhttp.Handler()) - err := http.ListenAndServe(fmt.Sprintf(":%d", runCfg.PrometheusPort), nil) + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.Handler()) + err := http.ListenAndServe(fmt.Sprintf(":%d", runCfg.PrometheusPort), mux) if err != nil { log.WithError(err).Fatal("Failed to serve prometheus metrics") } @@ -219,15 +219,7 @@ func main() { } if runCfg.DebugProfilePort != 0 { - // Run a webserver to expose memory profiling. - setPathOption := profile.ProfilePath("/profiles") - defer profile.Start(profile.CPUProfile, profile.MemProfile, setPathOption).Stop() - go func() { - err := http.ListenAndServe(fmt.Sprintf(":%d", runCfg.DebugProfilePort), nil) - if err != nil { - log.WithError(err).Fatal("Failed to start debug profiling") - } - }() + debugserver.StartDebugPprofServer("0.0.0.0", int(runCfg.DebugProfilePort)) } // Run the controllers. This runs until a config change triggers a restart diff --git a/kube-controllers/tests/testutils/policy_controller_utils.go b/kube-controllers/tests/testutils/policy_controller_utils.go index 878add6aa9a..333cc09cada 100644 --- a/kube-controllers/tests/testutils/policy_controller_utils.go +++ b/kube-controllers/tests/testutils/policy_controller_utils.go @@ -35,8 +35,8 @@ func RunPolicyController(datastoreType apiconfig.DatastoreType, etcdIP, kconfigf "-e", fmt.Sprintf("ETCD_ENDPOINTS=http://%s:2379", etcdIP), "-e", fmt.Sprintf("DATASTORE_TYPE=%s", datastoreType), "-e", fmt.Sprintf("ENABLED_CONTROLLERS=%s", ctrls), - "-e", "LOG_LEVEL=debug", "-e", fmt.Sprintf("KUBECONFIG=%s", kconfigfile), + "-e", "LOG_LEVEL=debug", "-e", "RECONCILER_PERIOD=10s", "-v", fmt.Sprintf("%s:%s", kconfigfile, kconfigfile), os.Getenv("CONTAINER_NAME"))