-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support kubernetes controller manager SLI metrics (#15914)
* Add support for SLI metrics in controller * fixup! Add support for SLI metrics in controller * Use get requests to probe for /slis endpoint * Edit typo in documentation for kube controller manager SLI metrics * Add changelog for kube controller manager * Format kube controller manager code * Update tests to mock get request instead of head * Update sli unit tests * Change SLI metrics name tag and filter by type for kcm * Revert changes to kube scheduler * Remove extra empty line in changelog * Remove type tag from samples * Use new changelog format * Set SLI scraper config per instance * Remove redundant value in .get
- Loading branch information
Showing
9 changed files
with
367 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add support for kube_controller_manager SLI metrics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
kube_controller_manager/datadog_checks/kube_controller_manager/sli_metrics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# (C) Datadog, Inc. 2023-present | ||
# All rights reserved | ||
# Licensed under a 3-clause BSD style license (see LICENSE) | ||
from __future__ import division | ||
|
||
from copy import deepcopy | ||
|
||
from datadog_checks.base.checks.openmetrics import OpenMetricsBaseCheck | ||
|
||
SLI_METRICS_PATH = '/slis' | ||
|
||
SLI_METRICS_MAP = { | ||
'kubernetes_healthcheck': 'kubernetes_healthcheck', | ||
'kubernetes_healthchecks_total': 'kubernetes_healthchecks_total', | ||
} | ||
|
||
|
||
class SliMetricsScraperMixin(OpenMetricsBaseCheck): | ||
""" | ||
This class scrapes metrics for the kube controller manager "/metrics/sli" prometheus endpoint and submits them on | ||
behalf of a check. | ||
""" | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(SliMetricsScraperMixin, self).__init__(*args, **kwargs) | ||
self.sli_transformers = { | ||
'kubernetes_healthcheck': self.sli_metrics_transformer, | ||
'kubernetes_healthchecks_total': self.sli_metrics_transformer, | ||
} | ||
|
||
def create_sli_prometheus_instance(self, instance): | ||
""" | ||
Create a copy of the instance and set default values. | ||
This is so the base class can create a scraper_config with the proper values. | ||
""" | ||
KUBE_CONTROLLER_MANAGER_SLI_NAMESPACE = "kube_controller_manager.slis" | ||
|
||
sli_instance = deepcopy(instance) | ||
sli_instance.update( | ||
{ | ||
'namespace': KUBE_CONTROLLER_MANAGER_SLI_NAMESPACE, | ||
'prometheus_url': instance.get('prometheus_url') + SLI_METRICS_PATH, | ||
} | ||
) | ||
return sli_instance | ||
|
||
def detect_sli_endpoint(self, http_handler, url): | ||
""" | ||
Whether the SLI metrics endpoint is available (k8s 1.26+). | ||
:return: true if the endpoint returns 200, false otherwise. | ||
""" | ||
try: | ||
r = http_handler.get(url, stream=True) | ||
except Exception as e: | ||
self.log.debug("Error querying SLIs endpoint: %s", e) | ||
return False | ||
if r.status_code == 403: | ||
self.log.debug( | ||
"The /metrics/slis endpoint was introduced in Kubernetes v1.26. If you expect to see SLI metrics, \ | ||
please check that your permissions are configured properly." | ||
) | ||
return r.status_code == 200 | ||
|
||
def sli_metrics_transformer(self, metric, scraper_config): | ||
modified_metric = deepcopy(metric) | ||
modified_metric.samples = [] | ||
|
||
for sample in metric.samples: | ||
metric_type = sample[self.SAMPLE_LABELS]["type"] | ||
if metric_type == "healthz": | ||
self._rename_sli_tag(sample, "sli_name", "name") | ||
self._remove_tag(sample, "type") | ||
modified_metric.samples.append(sample) | ||
else: | ||
self.log.debug("Skipping metric with type `%s`", metric_type) | ||
self.submit_openmetric(SLI_METRICS_MAP[modified_metric.name], modified_metric, scraper_config) | ||
|
||
def _rename_sli_tag(self, sample, new_tag_name, old_tag_name): | ||
sample[self.SAMPLE_LABELS][new_tag_name] = sample[self.SAMPLE_LABELS][old_tag_name] | ||
del sample[self.SAMPLE_LABELS][old_tag_name] | ||
|
||
def _remove_tag(self, sample, tag_name): | ||
del sample[self.SAMPLE_LABELS][tag_name] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# (C) Datadog, Inc. 2023-present | ||
# All rights reserved | ||
# Licensed under a 3-clause BSD style license (see LICENSE) | ||
|
||
from datadog_checks.dev import get_here | ||
|
||
HERE = get_here() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
kube_controller_manager/tests/fixtures/metrics_slis_1.27.3.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# HELP kubernetes_healthcheck [ALPHA] This metric records the result of a single healthcheck. | ||
# TYPE kubernetes_healthcheck gauge | ||
kubernetes_healthcheck{name="attachdetach",type="healthz"} 1 | ||
kubernetes_healthcheck{name="bootstrapsigner",type="healthz"} 1 | ||
kubernetes_healthcheck{name="clusterrole-aggregation",type="healthz"} 1 | ||
kubernetes_healthcheck{name="cronjob",type="healthz"} 1 | ||
kubernetes_healthcheck{name="csrapproving",type="healthz"} 1 | ||
kubernetes_healthcheck{name="csrcleaner",type="healthz"} 1 | ||
kubernetes_healthcheck{name="csrsigning",type="healthz"} 1 | ||
kubernetes_healthcheck{name="daemonset",type="healthz"} 1 | ||
kubernetes_healthcheck{name="deployment",type="healthz"} 1 | ||
kubernetes_healthcheck{name="disruption",type="healthz"} 1 | ||
kubernetes_healthcheck{name="etcd",type="readyz"} 1 | ||
# HELP kubernetes_healthchecks_total [ALPHA] This metric records the results of all healthcheck. | ||
# TYPE kubernetes_healthchecks_total counter | ||
kubernetes_healthchecks_total{name="attachdetach",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="bootstrapsigner",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="clusterrole-aggregation",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="cronjob",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="csrapproving",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="csrcleaner",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="csrsigning",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="daemonset",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="deployment",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="disruption",status="success",type="healthz"} 423 | ||
kubernetes_healthchecks_total{name="etcd",status="success",type="readyz"} 15 |
Oops, something went wrong.