Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

opentelemetry-instrumentation-system-metrics: add process metrics #3250

Merged
merged 13 commits into from
Feb 21, 2025
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `opentelemetry-instrumentation-system-metrics` Add `process` metrics and deprecated `process.runtime` prefixed ones
([#3250](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3250))
- `opentelemetry-instrumentation-botocore` Add support for GenAI user events and lazy initialize tracer
([#3258](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3258))
- `opentelemetry-instrumentation-botocore` Add support for GenAI system events
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,19 @@
"system.network.io": ["transmit", "receive"],
"system.network.connections": ["family", "type"],
"system.thread_count": None
"process.context_switches": ["involuntary", "voluntary"],
"process.cpu.time": ["user", "system"],
"process.cpu.utilization": None,
"process.memory.usage": None,
"process.memory.virtual": None,
Comment on lines +40 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct that these two combine to replace "process.runtime.memory": ["rss", "vms"],?

"process.open_file_descriptor.count": None,
"process.thread.count": None,
"process.runtime.memory": ["rss", "vms"],
"process.runtime.cpu.time": ["user", "system"],
"process.runtime.gc_count": None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't be deprecated, right?

"process.runtime.thread_count": None,
"process.runtime.cpu.utilization": None,
"process.runtime.context_switches": ["involuntary", "voluntary"],
"process.open_file_descriptor.count": None,
}

Usage
Expand All @@ -66,12 +72,17 @@
"system.memory.usage": ["used", "free", "cached"],
"system.cpu.time": ["idle", "user", "system", "irq"],
"system.network.io": ["transmit", "receive"],
"process.runtime.memory": ["rss", "vms"],
"process.runtime.cpu.time": ["user", "system"],
"process.runtime.context_switches": ["involuntary", "voluntary"],
"process.memory.usage": None,
"process.memory.virtual": None,
"process.cpu.time": ["user", "system"],
"process.context_switches": ["involuntary", "voluntary"],
}
SystemMetricsInstrumentor(config=configuration).instrument()


Out-of-spec `process.runtime` prefixed metrics are deprecated and will be removed in future versions, users are encouraged to move
to the `process` metrics.

API
---
"""
Expand All @@ -92,6 +103,9 @@
from opentelemetry.instrumentation.system_metrics.package import _instruments
from opentelemetry.instrumentation.system_metrics.version import __version__
from opentelemetry.metrics import CallbackOptions, Observation, get_meter
from opentelemetry.semconv._incubating.metrics.process_metrics import (
create_process_cpu_utilization,
)

_logger = logging.getLogger(__name__)

Expand All @@ -112,13 +126,19 @@
"system.network.io": ["transmit", "receive"],
"system.network.connections": ["family", "type"],
"system.thread_count": None,
"process.context_switches": ["involuntary", "voluntary"],
"process.cpu.time": ["user", "system"],
"process.cpu.utilization": ["user", "system"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are there config values here?

"process.memory.usage": None,
"process.memory.virtual": None,
"process.open_file_descriptor.count": None,
"process.thread.count": None,
"process.runtime.memory": ["rss", "vms"],
"process.runtime.cpu.time": ["user", "system"],
"process.runtime.gc_count": None,
"process.runtime.thread_count": None,
"process.runtime.cpu.utilization": None,
"process.runtime.context_switches": ["involuntary", "voluntary"],
"process.open_file_descriptor.count": None,
}

if sys.platform == "darwin":
Expand Down Expand Up @@ -165,19 +185,26 @@ def __init__(

self._system_thread_count_labels = self._labels.copy()

self._context_switches_labels = self._labels.copy()
self._cpu_time_labels = self._labels.copy()
self._cpu_utilization_labels = self._labels.copy()
self._memory_usage_labels = self._labels.copy()
self._memory_virtual_labels = self._labels.copy()
self._open_file_descriptor_count_labels = self._labels.copy()
self._thread_count_labels = self._labels.copy()

self._runtime_memory_labels = self._labels.copy()
self._runtime_cpu_time_labels = self._labels.copy()
self._runtime_gc_count_labels = self._labels.copy()
self._runtime_thread_count_labels = self._labels.copy()
self._runtime_cpu_utilization_labels = self._labels.copy()
self._runtime_context_switches_labels = self._labels.copy()
self._open_file_descriptor_count_labels = self._labels.copy()

def instrumentation_dependencies(self) -> Collection[str]:
return _instruments

def _instrument(self, **kwargs: Any):
# pylint: disable=too-many-branches
# pylint: disable=too-many-branches,too-many-statements
meter_provider = kwargs.get("meter_provider")
self._meter = get_meter(
__name__,
Expand All @@ -186,6 +213,8 @@ def _instrument(self, **kwargs: Any):
schema_url="https://opentelemetry.io/schemas/1.11.0",
)

# system metrics

if "system.cpu.time" in self._config:
self._meter.create_observable_counter(
name="system.cpu.time",
Expand All @@ -194,6 +223,7 @@ def _instrument(self, **kwargs: Any):
unit="s",
)

# FIXME: double check this is divided by cpu core
if "system.cpu.utilization" in self._config:
self._meter.create_observable_gauge(
name="system.cpu.utilization",
Expand All @@ -218,6 +248,7 @@ def _instrument(self, **kwargs: Any):
unit="1",
)

# FIXME: system.swap is gone in favour of system.paging
if "system.swap.usage" in self._config:
self._meter.create_observable_gauge(
name="system.swap.usage",
Expand Down Expand Up @@ -269,6 +300,7 @@ def _instrument(self, **kwargs: Any):
unit="operations",
)

# FIXME: this has been replaced by system.disk.operation.time
if "system.disk.time" in self._config:
self._meter.create_observable_counter(
name="system.disk.time",
Expand Down Expand Up @@ -299,6 +331,7 @@ def _instrument(self, **kwargs: Any):
# TODO Filesystem information can be obtained with os.statvfs in Unix-like
# OSs, how to do the same in Windows?

# FIXME: this is now just system.network.dropped
if "system.network.dropped.packets" in self._config:
self._meter.create_observable_counter(
name="system.network.dropped_packets",
Expand Down Expand Up @@ -339,13 +372,72 @@ def _instrument(self, **kwargs: Any):
unit="connections",
)

# FIXME: this is gone
if "system.thread_count" in self._config:
self._meter.create_observable_gauge(
name="system.thread_count",
callbacks=[self._get_system_thread_count],
description="System active threads count",
)

# process metrics

if "process.cpu.time" in self._config:
self._meter.create_observable_counter(
name="process.cpu.time",
callbacks=[self._get_cpu_time],
description="Total CPU seconds broken down by different states.",
unit="s",
)

if "process.cpu.utilization" in self._config:
create_process_cpu_utilization(
self._meter, callbacks=[self._get_cpu_utilization]
)

if "process.context_switches" in self._config:
self._meter.create_observable_counter(
name="process.context_switches",
callbacks=[self._get_context_switches],
description="Number of times the process has been context switched.",
)

if "process.memory.usage" in self._config:
self._meter.create_observable_up_down_counter(
name="process.memory.usage",
callbacks=[self._get_memory_usage],
description="The amount of physical memory in use.",
unit="By",
)

if "process.memory.virtual" in self._config:
self._meter.create_observable_up_down_counter(
name="process.memory.virtual",
callbacks=[self._get_memory_virtual],
description="The amount of committed virtual memory.",
unit="By",
)

if (
sys.platform != "win32"
and "process.open_file_descriptor.count" in self._config
):
self._meter.create_observable_up_down_counter(
name="process.open_file_descriptor.count",
callbacks=[self._get_open_file_descriptors],
description="Number of file descriptors in use by the process.",
)

if "process.thread.count" in self._config:
self._meter.create_observable_up_down_counter(
name="process.thread.count",
callbacks=[self._get_thread_count],
description="Process threads count.",
)

# FIXME: process.runtime keys are deprecated and will be removed in subsequent releases.
# When removing them, remember to clean also the callbacks and labels

if "process.runtime.memory" in self._config:
self._meter.create_observable_up_down_counter(
name=f"process.runtime.{self._python_implementation}.memory",
Expand Down Expand Up @@ -398,16 +490,6 @@ def _instrument(self, **kwargs: Any):
unit="switches",
)

if (
sys.platform != "win32"
and "process.open_file_descriptor.count" in self._config
):
self._meter.create_observable_up_down_counter(
name="process.open_file_descriptor.count",
callbacks=[self._get_open_file_descriptors],
description="Number of file descriptors in use by the process.",
)

def _uninstrument(self, **kwargs: Any):
pass

Expand Down Expand Up @@ -685,6 +767,76 @@ def _get_system_thread_count(
threading.active_count(), self._system_thread_count_labels
)

# process callbacks

def _get_context_switches(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for context switches"""
ctx_switches = self._proc.num_ctx_switches()
for metric in self._config["process.context_switches"]:
if hasattr(ctx_switches, metric):
self._context_switches_labels["type"] = metric
yield Observation(
getattr(ctx_switches, metric),
self._context_switches_labels.copy(),
)

def _get_cpu_time(self, options: CallbackOptions) -> Iterable[Observation]:
"""Observer callback for CPU time"""
proc_cpu = self._proc.cpu_times()
for metric in self._config["process.cpu.time"]:
if hasattr(proc_cpu, metric):
self._cpu_time_labels["type"] = metric
yield Observation(
getattr(proc_cpu, metric),
self._cpu_time_labels.copy(),
)

def _get_cpu_utilization(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for CPU utilization"""
proc_cpu_percent = self._proc.cpu_percent()
# may return None so add a default of 1 in case
num_cpus = psutil.cpu_count() or 1
yield Observation(
proc_cpu_percent / 100 / num_cpus,
self._cpu_utilization_labels.copy(),
)

def _get_memory_usage(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for memory usage"""
proc_memory = self._proc.memory_info()
if hasattr(proc_memory, "rss"):
yield Observation(
getattr(proc_memory, "rss"),
self._memory_usage_labels.copy(),
)

def _get_memory_virtual(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for memory virtual"""
proc_memory = self._proc.memory_info()
if hasattr(proc_memory, "vms"):
yield Observation(
getattr(proc_memory, "vms"),
self._memory_virtual_labels.copy(),
)

def _get_thread_count(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for active thread count"""
yield Observation(
self._proc.num_threads(), self._thread_count_labels.copy()
)

# runtime callbacks

def _get_runtime_memory(
self, options: CallbackOptions
) -> Iterable[Observation]:
Expand Down
Loading