From 99b1ad0151b140408fd65dfd7f1743cf1d13b2eb Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 08:48:56 +0100 Subject: [PATCH 1/7] Discard open spans after 10 minutes --- .../opentelemetry/span_processor.py | 32 +++++++ .../opentelemetry/test_span_processor.py | 92 +++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 0ed4e7f709..6d5c5c2252 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -1,3 +1,5 @@ +from time import time + from opentelemetry.context import get_value # type: ignore from opentelemetry.sdk.trace import SpanProcessor # type: ignore from opentelemetry.semconv.trace import SpanAttributes # type: ignore @@ -33,6 +35,7 @@ from sentry_sdk._types import Event, Hint OPEN_TELEMETRY_CONTEXT = "otel" +SPAN_MAX_TIME_OPEN_MINUTES = 10 def link_trace_context_to_error_event(event, otel_span_map): @@ -76,6 +79,9 @@ class SentrySpanProcessor(SpanProcessor): # type: ignore # The mapping from otel span ids to sentry spans otel_span_map = {} # type: Dict[str, Union[Transaction, SentrySpan]] + # The currently open spans. Elements will be discarded after SPAN_MAX_TIME_OPEN_MINUTES + open_spans = {} # type: Dict[int, set[str]] + def __new__(cls): # type: () -> SentrySpanProcessor if not hasattr(cls, "instance"): @@ -90,6 +96,22 @@ def global_event_processor(event, hint): # type: (Event, Hint) -> Event return link_trace_context_to_error_event(event, self.otel_span_map) + def prune_old_spans(self): + # type: (SentrySpanProcessor) -> None + """ + Prune spans that have been open for too long. + """ + current_time_minutes = int(time() / 60) + for span_start_minutes in list(self.open_spans.keys()): + # prune empty open spans buckets + if self.open_spans[span_start_minutes] == set(): + self.open_spans.pop(span_start_minutes) + + # prune old buckets + if current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES: + for span_id in self.open_spans.pop(span_start_minutes, set()): + self.otel_span_map.pop(span_id, None) + def on_start(self, otel_span, parent_context=None): # type: (OTelSpan, Optional[SpanContext]) -> None hub = Hub.current @@ -141,6 +163,12 @@ def on_start(self, otel_span, parent_context=None): self.otel_span_map[trace_data["span_id"]] = sentry_span + span_start_in_minutes = int(otel_span.start_time / 1e9 / 60) + self.open_spans.setdefault(span_start_in_minutes, set()).add( + trace_data["span_id"] + ) + self.prune_old_spans() + def on_end(self, otel_span): # type: (OTelSpan) -> None hub = Hub.current @@ -175,6 +203,10 @@ def on_end(self, otel_span): sentry_span.finish(end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9)) + span_start_in_minutes = int(otel_span.start_time / 1e9 / 60) + self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id) + self.prune_old_spans() + def _is_sentry_span(self, hub, otel_span): # type: (Hub, OTelSpan) -> bool """ diff --git a/tests/integrations/opentelemetry/test_span_processor.py b/tests/integrations/opentelemetry/test_span_processor.py index b7e5a7928d..02e3059ca8 100644 --- a/tests/integrations/opentelemetry/test_span_processor.py +++ b/tests/integrations/opentelemetry/test_span_processor.py @@ -531,3 +531,95 @@ def test_link_trace_context_to_error_event(): assert "contexts" in event assert "trace" in event["contexts"] assert event["contexts"]["trace"] == fake_trace_context + + +def test_pruning_old_spans_on_start(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + parent_context = {} + fake_client = MagicMock() + fake_client.options = {"instrumenter": "otel"} + fake_client.dsn = "https://1234567890abcdef@o123456.ingest.sentry.io/123456" + + current_hub = MagicMock() + current_hub.client = fake_client + + fake_hub = MagicMock() + fake_hub.current = current_hub + + with mock.patch( + "sentry_sdk.integrations.opentelemetry.span_processor.Hub", fake_hub + ): + span_processor = SentrySpanProcessor() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_start(otel_span, parent_context) + assert sorted(list(span_processor.otel_span_map.keys())) == [ + "111111111abcdef", + "1234567890abcdef", + ] + assert sorted(list(span_processor.open_spans.values())) == [ + {"111111111abcdef"}, + {"1234567890abcdef"}, + ] + + +def test_pruning_old_spans_on_end(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + fake_sentry_span = MagicMock(spec=Span) + fake_sentry_span.set_context = MagicMock() + fake_sentry_span.finish = MagicMock() + + span_processor = SentrySpanProcessor() + span_processor._get_otel_context = MagicMock() + span_processor._update_span_with_otel_data = MagicMock() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + "1234567890abcdef": fake_sentry_span, # should go (because it is closed) + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes: {"1234567890abcdef"}, # should go (because it is closed) + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_end(otel_span) + assert sorted(list(span_processor.otel_span_map.keys())) == ["111111111abcdef"] + assert sorted(list(span_processor.open_spans.values())) == [{"111111111abcdef"}] From f058dfb4173cf94accd8e9c3122bc0976caca85d Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 14:36:13 +0100 Subject: [PATCH 2/7] Apply suggestions from code review Co-authored-by: Daniel Szoke --- sentry_sdk/integrations/opentelemetry/span_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 6d5c5c2252..a0cdbe0f99 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -80,7 +80,7 @@ class SentrySpanProcessor(SpanProcessor): # type: ignore otel_span_map = {} # type: Dict[str, Union[Transaction, SentrySpan]] # The currently open spans. Elements will be discarded after SPAN_MAX_TIME_OPEN_MINUTES - open_spans = {} # type: Dict[int, set[str]] + open_spans = {} # type: dict[int, set[str]] def __new__(cls): # type: () -> SentrySpanProcessor @@ -102,7 +102,7 @@ def prune_old_spans(self): Prune spans that have been open for too long. """ current_time_minutes = int(time() / 60) - for span_start_minutes in list(self.open_spans.keys()): + for span_start_minutes in self.open_spans: # prune empty open spans buckets if self.open_spans[span_start_minutes] == set(): self.open_spans.pop(span_start_minutes) From da48aa19562f3e57155ac952f8e80788410f8600 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 14:37:06 +0100 Subject: [PATCH 3/7] Made function private --- sentry_sdk/integrations/opentelemetry/span_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index a0cdbe0f99..abb3cc530f 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -96,7 +96,7 @@ def global_event_processor(event, hint): # type: (Event, Hint) -> Event return link_trace_context_to_error_event(event, self.otel_span_map) - def prune_old_spans(self): + def _prune_old_spans(self): # type: (SentrySpanProcessor) -> None """ Prune spans that have been open for too long. @@ -167,7 +167,7 @@ def on_start(self, otel_span, parent_context=None): self.open_spans.setdefault(span_start_in_minutes, set()).add( trace_data["span_id"] ) - self.prune_old_spans() + self._prune_old_spans() def on_end(self, otel_span): # type: (OTelSpan) -> None @@ -205,7 +205,7 @@ def on_end(self, otel_span): span_start_in_minutes = int(otel_span.start_time / 1e9 / 60) self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id) - self.prune_old_spans() + self._prune_old_spans() def _is_sentry_span(self, hub, otel_span): # type: (Hub, OTelSpan) -> bool From 4ee95356baac4ef41f11fc5b237f61b3a87820d6 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 14:40:00 +0100 Subject: [PATCH 4/7] Clarifying comments --- .../opentelemetry/span_processor.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index abb3cc530f..2662e9999c 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -147,7 +147,9 @@ def on_start(self, otel_span, parent_context=None): sentry_span = sentry_parent_span.start_child( span_id=trace_data["span_id"], description=otel_span.name, - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) else: @@ -157,13 +159,17 @@ def on_start(self, otel_span, parent_context=None): parent_span_id=parent_span_id, trace_id=trace_data["trace_id"], baggage=trace_data["baggage"], - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) self.otel_span_map[trace_data["span_id"]] = sentry_span - span_start_in_minutes = int(otel_span.start_time / 1e9 / 60) + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision self.open_spans.setdefault(span_start_in_minutes, set()).add( trace_data["span_id"] ) @@ -201,9 +207,13 @@ def on_end(self, otel_span): else: self._update_span_with_otel_data(sentry_span, otel_span) - sentry_span.finish(end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9)) + sentry_span.finish( + end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9) + ) # OTel spans have nanosecond precision - span_start_in_minutes = int(otel_span.start_time / 1e9 / 60) + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id) self._prune_old_spans() From d426e1acc8f779cf0fb68b1d438c8b9c05d01c9e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 14:44:47 +0100 Subject: [PATCH 5/7] Update sentry_sdk/integrations/opentelemetry/span_processor.py Co-authored-by: Daniel Szoke --- sentry_sdk/integrations/opentelemetry/span_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 2662e9999c..aff261c614 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -108,8 +108,8 @@ def _prune_old_spans(self): self.open_spans.pop(span_start_minutes) # prune old buckets - if current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES: - for span_id in self.open_spans.pop(span_start_minutes, set()): + elif current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES: + for span_id in self.open_spans.pop(span_start_minutes): self.otel_span_map.pop(span_id, None) def on_start(self, otel_span, parent_context=None): From b5cc262ccf7c429d5b808f5540bf4220ac959422 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 14:55:11 +0100 Subject: [PATCH 6/7] Fixed prune --- sentry_sdk/integrations/opentelemetry/span_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index aff261c614..029cd8bc45 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -102,7 +102,7 @@ def _prune_old_spans(self): Prune spans that have been open for too long. """ current_time_minutes = int(time() / 60) - for span_start_minutes in self.open_spans: + for span_start_minutes in list(self.open_spans.keys()): # making a list because we change the dict # prune empty open spans buckets if self.open_spans[span_start_minutes] == set(): self.open_spans.pop(span_start_minutes) From 2aa4247876baffebdf6614dfb44476ef8bbd4182 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 15:00:53 +0100 Subject: [PATCH 7/7] Linting --- sentry_sdk/integrations/opentelemetry/span_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 029cd8bc45..0db698e239 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -102,7 +102,9 @@ def _prune_old_spans(self): Prune spans that have been open for too long. """ current_time_minutes = int(time() / 60) - for span_start_minutes in list(self.open_spans.keys()): # making a list because we change the dict + for span_start_minutes in list( + self.open_spans.keys() + ): # making a list because we change the dict # prune empty open spans buckets if self.open_spans[span_start_minutes] == set(): self.open_spans.pop(span_start_minutes)