-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support Openmetrics metrics collection (#10752)
* Add logic for Envoy Openmetricsv2 * Add label remapper * Add new metrics * Finish adding other metrics * reorganize metrics that should be transformed * Introduce openmetrics_endpoint config option * Add watchdog metrics transformers * Add some more label extraction metrics * refactor tests to move to legacy * Add legacy and non legacy fixtures to test files * Update readme * Bump base req * Fix style * Mark legacy metrics * Fix watchdog counter name * document prometheus metrics in metadata csv * Fix metadata csv * Add e2e test * FIx style * Fix metadata format for validation * Flaky metrics * Only support openmetrics in latest api v3 * Fix test imports * Enable Openmetrics option by default * Fix import * Fix style * Update readme * Update config stats_url wording * Fix envoy import * Remove py27 for openmetrics version * Openmetrics endpoint should be optional * Account for flaky metrics * Document service checks * Use unique name * Update envoy/tests/legacy/test_bench.py Co-authored-by: Ofek Lev <ofekmeister@gmail.com> * Move metrics map to metrics.py * Update with feedback * Use lambda * simplify match * Refactor metadata utils * Support metadata collection in V2 * Use urlunparse * Reintroduce legacy config options as hidden Co-authored-by: Ofek Lev <ofekmeister@gmail.com>
- Loading branch information
1 parent
57c4cc6
commit 3114930
Showing
26 changed files
with
2,636 additions
and
1,175 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# (C) Datadog, Inc. 2021-present | ||
# All rights reserved | ||
# Licensed under a 3-clause BSD style license (see LICENSE) | ||
import re | ||
from collections import defaultdict | ||
|
||
from six.moves.urllib.parse import urljoin, urlparse, urlunparse | ||
|
||
from datadog_checks.base import AgentCheck, OpenMetricsBaseCheckV2 | ||
|
||
from .metrics import PROMETHEUS_METRICS_MAP | ||
from .utils import _get_server_info | ||
|
||
ENVOY_VERSION = {'istio_build': {'type': 'metadata', 'label': 'tag', 'name': 'version'}} | ||
|
||
LABEL_MAP = { | ||
'cluster_name': 'envoy_cluster', | ||
'envoy_cluster_name': 'envoy_cluster', | ||
'envoy_http_conn_manager_prefix': 'stat_prefix', # tracing | ||
'envoy_listener_address': 'address', # listener | ||
'envoy_virtual_cluster': 'virtual_envoy_cluster', # vhost | ||
'envoy_virtual_host': 'virtual_host_name', # vhost | ||
} | ||
|
||
|
||
METRIC_WITH_LABEL_NAME = { | ||
r'^envoy_server_(.+\_.+)_watchdog_miss$': { | ||
'label_name': 'thread_name', | ||
'metric_type': 'monotonic_count', | ||
'new_name': 'server.watchdog_miss.count', | ||
}, | ||
r'^envoy_server_(.+\_.+)_watchdog_mega_miss$': { | ||
'label_name': 'thread_name', | ||
'metric_type': 'monotonic_count', | ||
'new_name': 'server.watchdog_mega_miss.count', | ||
}, | ||
r'^envoy_(.+\_.+)_watchdog_miss$': { | ||
'label_name': 'thread_name', | ||
'metric_type': 'monotonic_count', | ||
'new_name': 'watchdog_miss.count', | ||
}, | ||
r'^envoy_(.+\_.+)_watchdog_mega_miss$': { | ||
'label_name': 'thread_name', | ||
'metric_type': 'monotonic_count', | ||
'new_name': 'watchdog_mega_miss.count', | ||
}, | ||
r'^envoy_cluster_circuit_breakers_(\w+)_cx_open$': { | ||
'label_name': 'priority', | ||
'metric_type': 'gauge', | ||
'new_name': 'cluster.circuit_breakers.cx_open', | ||
}, | ||
r'^envoy_cluster_circuit_breakers_(\w+)_cx_pool_open$': { | ||
'label_name': 'priority', | ||
'metric_type': 'gauge', | ||
'new_name': 'cluster.circuit_breakers.cx_pool_open', | ||
}, | ||
r'^envoy_cluster_circuit_breakers_(\w+)_rq_open$': { | ||
'label_name': 'priority', | ||
'metric_type': 'gauge', | ||
'new_name': 'cluster.circuit_breakers.rq_open', | ||
}, | ||
r'^envoy_cluster_circuit_breakers_(\w+)_rq_pending_open$': { | ||
'label_name': 'priority', | ||
'metric_type': 'gauge', | ||
'new_name': 'cluster.circuit_breakers.rq_pending_open', | ||
}, | ||
r'^envoy_cluster_circuit_breakers_(\w+)_rq_retry_open$': { | ||
'label_name': 'priority', | ||
'metric_type': 'gauge', | ||
'new_name': 'cluster.circuit_breakers.rq_retry_open', | ||
}, | ||
r'^envoy_listener_admin_(.+\_.+)_downstream_cx_active$': { | ||
'label_name': 'handler', | ||
'metric_type': 'gauge', | ||
'new_name': 'listener.admin.downstream_cx_active', | ||
}, | ||
r'^envoy_listener_(.+\_.+)_downstream_cx_active$': { | ||
'label_name': 'handler', | ||
'metric_type': 'gauge', | ||
'new_name': 'listener.downstream_cx_active', | ||
}, | ||
r'^envoy_listener_admin_(.+\_.+)_downstream_cx$': { | ||
'label_name': 'handler', | ||
'metric_type': 'monotonic_count', | ||
'new_name': 'listener.admin.downstream_cx.count', | ||
}, | ||
r'^envoy_listener_(.+)_downstream_cx$': { | ||
'label_name': 'handler', | ||
'metric_type': 'monotonic_count', | ||
'new_name': 'listener.downstream_cx.count', | ||
}, | ||
} | ||
|
||
|
||
class EnvoyCheckV2(OpenMetricsBaseCheckV2): | ||
__NAMESPACE__ = 'envoy' | ||
|
||
DEFAULT_METRIC_LIMIT = 0 | ||
|
||
def __init__(self, name, init_config, instances): | ||
super().__init__(name, init_config, instances) | ||
self.check_initializations.append(self.configure_additional_transformers) | ||
openmetrics_endpoint = self.instance.get('openmetrics_endpoint') | ||
self.base_url = None | ||
try: | ||
parts = urlparse(openmetrics_endpoint) | ||
self.base_url = urlunparse(parts[:2] + ('', '', None, None)) | ||
|
||
except Exception as e: | ||
self.log.debug("Unable to determine the base url for version collection: %s", str(e)) | ||
|
||
def check(self, _): | ||
self._collect_metadata() | ||
super(EnvoyCheckV2, self).check(None) | ||
|
||
def get_default_config(self): | ||
return { | ||
'metrics': [PROMETHEUS_METRICS_MAP], | ||
'rename_labels': LABEL_MAP, | ||
} | ||
|
||
def configure_transformer_label_in_name(self, metric_pattern, new_name, label_name, metric_type): | ||
method = getattr(self, metric_type) | ||
cached_patterns = defaultdict(lambda: re.compile(metric_pattern)) | ||
|
||
def transform(metric, sample_data, runtime_data): | ||
for sample, tags, hostname in sample_data: | ||
parsed_sample_name = sample.name | ||
if sample.name.endswith("_total"): | ||
parsed_sample_name = re.match("(.*)_total$", sample.name).groups()[0] | ||
label_value = cached_patterns[metric_pattern].match(parsed_sample_name).groups()[0] | ||
|
||
tags.append('{}:{}'.format(label_name, label_value)) | ||
method(new_name, sample.value, tags=tags, hostname=hostname) | ||
|
||
return transform | ||
|
||
def configure_additional_transformers(self): | ||
for metric, data in METRIC_WITH_LABEL_NAME.items(): | ||
self.scrapers[self.instance['openmetrics_endpoint']].metric_transformer.add_custom_transformer( | ||
metric, self.configure_transformer_label_in_name(metric, **data), pattern=True | ||
) | ||
|
||
@AgentCheck.metadata_entrypoint | ||
def _collect_metadata(self): | ||
# Replace in favor of built-in Openmetrics metadata when PR is available | ||
# https://github.com/envoyproxy/envoy/pull/18991 | ||
if not self.base_url: | ||
self.log.debug("Skipping server info collection due to malformed url: %s", self.base_url) | ||
return | ||
# From http://domain/thing/stats to http://domain/thing/server_info | ||
server_info_url = urljoin(self.base_url, 'server_info') | ||
raw_version = _get_server_info(server_info_url, self.log, self.http) | ||
|
||
if raw_version: | ||
self.set_metadata('version', raw_version) |
Oops, something went wrong.