feat: store more benchmark metadata in results

Use json reprensation as benchmark identifier in valgrind instrumentation
CodSpeedHQ · Jan 9, 2025 · fbae262 · fbae262
1 parent 41cdf39
commit fbae262
Show file tree

Hide file tree

Showing 12 changed files with 317 additions and 74 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@ compat = [
     "pytest-xdist ~= 3.6.1",
     # "pytest-speed>=0.3.5",
 ]
-test = ["pytest ~= 7.0", "pytest-cov ~= 4.0.0"]
+test = ["inline-snapshot>=0.18.2", "pytest ~= 7.0", "pytest-cov ~= 4.0.0"]
 
 [project.entry-points]
 pytest11 = { codspeed = "pytest_codspeed.plugin" }

diff --git a/src/pytest_codspeed/benchmark.py b/src/pytest_codspeed/benchmark.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+
+import pytest
+
+from pytest_codspeed.utils import get_git_relative_path
+
+
+def has_args(item: pytest.Item) -> bool:
+    return isinstance(item, pytest.Function) and "callspec" in item.__dict__
+
+
+@dataclass
+class Benchmark:
+    file: str
+    module: str
+    groups: list[str]
+    name: str
+    args: list
+    args_names: list[str]
+
+    @classmethod
+    def from_item(cls, item: pytest.Item) -> Benchmark:
+        file = str(get_git_relative_path(item.path))
+        module = "::".join(
+            [node.name for node in item.listchain() if isinstance(node, pytest.Class)]
+        )
+        name = item.originalname if isinstance(item, pytest.Function) else item.name
+        args = list(item.callspec.params.values()) if has_args(item) else []
+        args_names = list(item.callspec.params.keys()) if has_args(item) else []
+        groups = []
+        benchmark_marker = item.get_closest_marker("benchmark")
+        if benchmark_marker is not None:
+            benchmark_marker_kwargs = benchmark_marker.kwargs.get("group")
+            if benchmark_marker_kwargs is not None:
+                groups.append(benchmark_marker_kwargs)
+
+        return cls(
+            file=file,
+            module=module,
+            groups=groups,
+            name=name,
+            args=args,
+            args_names=args_names,
+        )
+
+    @property
+    def display_name(self) -> str:
+        args_str = f"[{'-'.join(map(str, self.args))}]" if len(self.args) > 0 else ""
+        return f"{self.name}{args_str}"
+
+    def to_json_string(self) -> str:
+        return json.dumps(self.__dict__, separators=(",", ":"), sort_keys=True)
diff --git a/src/pytest_codspeed/instruments/__init__.py b/src/pytest_codspeed/instruments/__init__.py
@@ -9,6 +9,7 @@
 
     import pytest
 
+    from pytest_codspeed.benchmark import Benchmark
     from pytest_codspeed.plugin import CodSpeedConfig
 
     T = TypeVar("T")
@@ -27,8 +28,7 @@ def get_instrument_config_str_and_warns(self) -> tuple[str, list[str]]: ...
     @abstractmethod
     def measure(
         self,
-        name: str,
-        uri: str,
+        benchmark: Benchmark,
         fn: Callable[P, T],
         *args: P.args,
         **kwargs: P.kwargs,

diff --git a/src/pytest_codspeed/instruments/valgrind/__init__.py b/src/pytest_codspeed/instruments/valgrind/__init__.py
@@ -2,6 +2,7 @@
 
 import os
 import sys
+from dataclasses import asdict
 from typing import TYPE_CHECKING
 
 from pytest_codspeed import __semver_version__
@@ -13,6 +14,7 @@
 
     from pytest import Session
 
+    from pytest_codspeed.benchmark import Benchmark
     from pytest_codspeed.instruments import P, T
     from pytest_codspeed.instruments.valgrind._wrapper import LibType
     from pytest_codspeed.plugin import CodSpeedConfig
@@ -26,6 +28,7 @@ class ValgrindInstrument(Instrument):
 
     def __init__(self, config: CodSpeedConfig) -> None:
         self.benchmark_count = 0
+        self.benchmarks: list[Benchmark] = []
         self.should_measure = os.environ.get("CODSPEED_ENV") is not None
         if self.should_measure:
             self.lib = get_lib()
@@ -54,13 +57,13 @@ def get_instrument_config_str_and_warns(self) -> tuple[str, list[str]]:
 
     def measure(
         self,
-        name: str,
-        uri: str,
+        benchmark: Benchmark,
         fn: Callable[P, T],
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> T:
         self.benchmark_count += 1
+        self.benchmarks.append(benchmark)
         if self.lib is None:  # Thus should_measure is False
             return fn(*args, **kwargs)
 
@@ -78,7 +81,7 @@ def __codspeed_root_frame__() -> T:
         finally:
             # Ensure instrumentation is stopped even if the test failed
             self.lib.stop_instrumentation()
-            self.lib.dump_stats_at(uri.encode("ascii"))
+            self.lib.dump_stats_at(benchmark.to_json_string().encode("ascii"))
 
     def report(self, session: Session) -> None:
         reporter = session.config.pluginmanager.get_plugin("terminalreporter")
@@ -91,5 +94,5 @@ def report(self, session: Session) -> None:
     def get_result_dict(self) -> dict[str, Any]:
         return {
             "instrument": {"type": self.instrument},
-            # bench results will be dumped by valgrind
+            "benchmarks": [asdict(bench) for bench in self.benchmarks],
         }
diff --git a/src/pytest_codspeed/instruments/walltime.py b/src/pytest_codspeed/instruments/walltime.py
@@ -11,6 +11,7 @@
 from rich.table import Table
 from rich.text import Text
 
+from pytest_codspeed.benchmark import Benchmark
 from pytest_codspeed.instruments import Instrument
 
 if TYPE_CHECKING:
@@ -28,14 +29,14 @@
 
 
 @dataclass
-class BenchmarkConfig:
+class WalltimeBenchmarkConfig:
     warmup_time_ns: int
     min_round_time_ns: float
     max_time_ns: int
     max_rounds: int | None
 
     @classmethod
-    def from_codspeed_config(cls, config: CodSpeedConfig) -> BenchmarkConfig:
+    def from_codspeed_config(cls, config: CodSpeedConfig) -> WalltimeBenchmarkConfig:
         return cls(
             warmup_time_ns=config.warmup_time_ns
             if config.warmup_time_ns is not None
@@ -49,7 +50,7 @@ def from_codspeed_config(cls, config: CodSpeedConfig) -> BenchmarkConfig:
 
 
 @dataclass
-class BenchmarkStats:
+class WalltimeBenchmarkStats:
     min_ns: float
     max_ns: float
     mean_ns: float
@@ -75,7 +76,7 @@ def from_list(
         iter_per_round: int,
         warmup_iters: int,
         total_time: float,
-    ) -> BenchmarkStats:
+    ) -> WalltimeBenchmarkStats:
         stdev_ns = stdev(times_ns) if len(times_ns) > 1 else 0
         mean_ns = mean(times_ns)
         if len(times_ns) > 1:
@@ -114,17 +115,18 @@ def from_list(
 
 
 @dataclass
-class Benchmark:
-    name: str
-    uri: str
-
-    config: BenchmarkConfig
-    stats: BenchmarkStats
+class WalltimeBenchmark(Benchmark):
+    config: WalltimeBenchmarkConfig
+    stats: WalltimeBenchmarkStats
 
 
 def run_benchmark(
-    name: str, uri: str, fn: Callable[P, T], args, kwargs, config: BenchmarkConfig
-) -> tuple[Benchmark, T]:
+    benchmark: Benchmark,
+    fn: Callable[P, T],
+    args,
+    kwargs,
+    config: WalltimeBenchmarkConfig,
+) -> tuple[WalltimeBenchmark, T]:
     # Compute the actual result of the function
     out = fn(*args, **kwargs)
 
@@ -171,42 +173,44 @@ def run_benchmark(
     benchmark_end = perf_counter_ns()
     total_time = (benchmark_end - run_start) / 1e9
 
-    stats = BenchmarkStats.from_list(
+    stats = WalltimeBenchmarkStats.from_list(
         times_ns,
         rounds=rounds,
         total_time=total_time,
         iter_per_round=iter_per_round,
         warmup_iters=warmup_iters,
     )
 
-    return Benchmark(name=name, uri=uri, config=config, stats=stats), out
+    return WalltimeBenchmark(
+        **asdict(benchmark),
+        config=config,
+        stats=stats,
+    ), out
 
 
 class WallTimeInstrument(Instrument):
     instrument = "walltime"
 
     def __init__(self, config: CodSpeedConfig) -> None:
         self.config = config
-        self.benchmarks: list[Benchmark] = []
+        self.benchmarks: list[WalltimeBenchmark] = []
 
     def get_instrument_config_str_and_warns(self) -> tuple[str, list[str]]:
         return f"mode: walltime, timer_resolution: {TIMER_RESOLUTION_NS:.1f}ns", []
 
     def measure(
         self,
-        name: str,
-        uri: str,
+        benchmark: Benchmark,
         fn: Callable[P, T],
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> T:
         bench, out = run_benchmark(
-            name=name,
-            uri=uri,
+            benchmark=benchmark,
             fn=fn,
             args=args,
             kwargs=kwargs,
-            config=BenchmarkConfig.from_codspeed_config(self.config),
+            config=WalltimeBenchmarkConfig.from_codspeed_config(self.config),
         )
         self.benchmarks.append(bench)
         return out
@@ -244,7 +248,7 @@ def _print_benchmark_table(self) -> None:
             if rsd > 0.1:
                 rsd_text.stylize("red bold")
             table.add_row(
-                escape(bench.name),
+                escape(bench.display_name),
                 f"{bench.stats.min_ns/bench.stats.iter_per_round:,.0f}ns",
                 rsd_text,
                 f"{bench.stats.total_time:,.2f}s",

diff --git a/src/pytest_codspeed/plugin.py b/src/pytest_codspeed/plugin.py
@@ -13,13 +13,13 @@
 import pytest
 from _pytest.fixtures import FixtureManager
 
+from pytest_codspeed.benchmark import Benchmark
 from pytest_codspeed.instruments import (
     MeasurementMode,
     get_instrument_from_mode,
 )
 from pytest_codspeed.utils import (
     get_environment_metadata,
-    get_git_relative_uri_and_name,
 )
 
 from . import __version__
@@ -253,8 +253,7 @@ def pytest_collection_modifyitems(
 
 def _measure(
     plugin: CodSpeedPlugin,
-    nodeid: str,
-    config: pytest.Config,
+    item: pytest.Item,
     fn: Callable[P, T],
     *args: P.args,
     **kwargs: P.kwargs,
@@ -264,8 +263,8 @@ def _measure(
         gc.collect()
         gc.disable()
     try:
-        uri, name = get_git_relative_uri_and_name(nodeid, config.rootpath)
-        return plugin.instrument.measure(name, uri, fn, *args, **kwargs)
+        benchmark = Benchmark.from_item(item)
+        return plugin.instrument.measure(benchmark, fn, *args, **kwargs)
     finally:
         # Ensure GC is re-enabled even if the test failed
         if is_gc_enabled:
@@ -274,13 +273,13 @@ def _measure(
 
 def wrap_runtest(
     plugin: CodSpeedPlugin,
-    nodeid: str,
-    config: pytest.Config,
-    fn: Callable[P, T],
+    item: pytest.Item,
 ) -> Callable[P, T]:
+    fn = item.runtest
+
     @functools.wraps(fn)
     def wrapped(*args: P.args, **kwargs: P.kwargs) -> T:
-        return _measure(plugin, nodeid, config, fn, *args, **kwargs)
+        return _measure(plugin, item, fn, *args, **kwargs)
 
     return wrapped
 
@@ -297,7 +296,7 @@ def pytest_runtest_protocol(item: pytest.Item, nextitem: pytest.Item | None):
         return None
 
     # Wrap runtest and defer to default protocol
-    item.runtest = wrap_runtest(plugin, item.nodeid, item.config, item.runtest)
+    item.runtest = wrap_runtest(plugin, item)
     return None
 
 
@@ -340,10 +339,9 @@ def __init__(self, request: pytest.FixtureRequest):
     def __call__(self, func: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
         config = self._request.config
         plugin = get_plugin(config)
-        if plugin.is_codspeed_enabled:
-            return _measure(
-                plugin, self._request.node.nodeid, config, func, *args, **kwargs
-            )
+        item = self._request.node
+        if plugin.is_codspeed_enabled and isinstance(item, pytest.Item):
+            return _measure(plugin, item, func, *args, **kwargs)
         else:
             return func(*args, **kwargs)
 

diff --git a/src/pytest_codspeed/utils.py b/src/pytest_codspeed/utils.py
@@ -27,26 +27,6 @@ def get_git_relative_path(abs_path: Path) -> Path:
     return abs_path
 
 
-def get_git_relative_uri_and_name(nodeid: str, pytest_rootdir: Path) -> tuple[str, str]:
-    """Get the benchmark uri relative to the git root dir and the benchmark name.
-
-    Args:
-        nodeid (str): the pytest nodeid, for example:
-          testing/test_excinfo.py::TestFormattedExcinfo::test_repr_source
-        pytest_rootdir (str): the pytest root dir, for example:
-          /home/user/gitrepo/folder
-
-    Returns:
-        str: the benchmark uri relative to the git root dir, for example:
-          folder/testing/test_excinfo.py::TestFormattedExcinfo::test_repr_source
-
-    """
-    file_path, bench_name = nodeid.split("::", 1)
-    absolute_file_path = pytest_rootdir / Path(file_path)
-    relative_git_path = get_git_relative_path(absolute_file_path)
-    return (f"{str(relative_git_path)}::{bench_name}", bench_name)
-
-
 def get_environment_metadata() -> dict[str, dict]:
     return {
         "creator": {

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -81,7 +81,10 @@ def run_pytest_codspeed_with_mode(
     if mode == MeasurementMode.WallTime:
         # Run only 1 round to speed up the test times
         csargs.extend(["--codspeed-warmup-time=0", "--codspeed-max-rounds=2"])
-    return pytester.runpytest(
+    # create empty `.git` folder in the rootdir to simulate a git repository
+    if not pytester.path.joinpath(".git").exists():
+        pytester.mkdir(".git")
+    return pytester.runpytest_subprocess(
         *csargs,
         *args,
         **kwargs,