From 0c80a3d233036bffcc778d3d585d1f928cfa8edb Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 13:52:05 -0500 Subject: [PATCH 1/9] Implement first pass of prefix stripping. --- filprofiler/_script.py | 9 ++++++++ memapi/src/memorytracking.rs | 9 +++++++- memapi/src/python.rs | 42 ++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/filprofiler/_script.py b/filprofiler/_script.py index 10ecde63..b8d061a8 100644 --- a/filprofiler/_script.py +++ b/filprofiler/_script.py @@ -241,8 +241,17 @@ def stage_2(): PARSER.print_help() sys.exit(2) script = rest[0] + + # Current directory might be added to sys.path as side-effect of how + # this code runs. We do NOT want that, it doesn't match normal Python + # behavior. + try: + sys.path.remove(os.getcwd()) + except ValueError: + pass # Make directory where script is importable: sys.path.insert(0, dirname(abspath(script))) + function = runpy.run_path func_args = (script,) func_kwargs = {"run_name": "__main__"} diff --git a/memapi/src/memorytracking.rs b/memapi/src/memorytracking.rs index 58a234ab..d2757e48 100644 --- a/memapi/src/memorytracking.rs +++ b/memapi/src/memorytracking.rs @@ -2,6 +2,7 @@ use crate::flamegraph::filter_to_useful_callstacks; use crate::flamegraph::write_flamegraphs; use crate::flamegraph::write_lines; use crate::python::get_runpy_path; +use crate::python::PrefixStripper; use super::rangemap::RangeMap; use super::util::new_hashmap; @@ -160,6 +161,7 @@ impl Callstack { pub fn as_string( &self, to_be_post_processed: bool, + prefix_stripper: Option<&PrefixStripper>, functions: &FunctionLocations, separator: &'static str, ) -> String { @@ -186,6 +188,9 @@ impl Callstack { // Get Python code. let code = crate::python::get_source_line(filename, id.line_number) .unwrap_or_else(|_| "".to_string()); + let filename = prefix_stripper + .map(|ps| ps.strip_prefix(filename)) + .unwrap_or(filename); // Leading whitespace is dropped by SVG, so we'd like to // replace it with non-breaking space. However, inferno // trims whitespace @@ -372,7 +377,7 @@ impl<'a> AllocationTracker { eprintln!("=fil-profile= {}", message); eprintln!( "=| {}", - callstack.as_string(false, &self.functions, "\n=| ") + callstack.as_string(false, None, &self.functions, "\n=| ") ); } @@ -550,11 +555,13 @@ impl<'a> AllocationTracker { let by_call = self.combine_callstacks(peak).into_iter(); let id_to_callstack = self.interner.get_reverse_map(); let functions = &self.functions; + let prefix_stripper = PrefixStripper::new(); let lines = by_call.map(move |(callstack_id, size)| { format!( "{} {}", id_to_callstack.get(&callstack_id).unwrap().as_string( to_be_post_processed, + Some(&prefix_stripper), functions, ";" ), diff --git a/memapi/src/python.rs b/memapi/src/python.rs index bdab80b9..10065626 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -3,7 +3,7 @@ use once_cell::sync::Lazy; use pyo3::prelude::*; use pyo3::types::PyModule; -// Get the source code line from a given filename. +/// Get the source code line from a given filename. pub fn get_source_line(filename: &str, line_number: u16) -> PyResult { Python::with_gil(|py| { let linecache = PyModule::import(py, "linecache")?; @@ -15,7 +15,7 @@ pub fn get_source_line(filename: &str, line_number: u16) -> PyResult { }) } -// Return the filesystem path of the stdlib's runpy module. +/// Return the filesystem path of the stdlib's runpy module. pub fn get_runpy_path() -> &'static str { static PATH: Lazy = Lazy::new(|| { Python::with_gil(|py| { @@ -25,3 +25,41 @@ pub fn get_runpy_path() -> &'static str { }); PATH.as_str() } + +/// Strip sys.path prefixes from Python modules' pathes. +pub struct PrefixStripper { + prefixes: Vec, +} + +impl PrefixStripper { + pub fn new() -> Self { + let prefixes = Python::with_gil(|py| { + let paths = py.eval( + // 1. Drop non-string values, they're not something we can understand. + // 2. Drop empty string, it's misleading. + // 3. Add '/' to end of all paths. + "[__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)]", + None, + None, + ); + paths + .map(|p| p.extract::>().unwrap_or_else(|_| vec![])) + .unwrap_or_else(|_| vec![]) + }); + PrefixStripper { prefixes } + } + + /// Remove the sys.path prefix from a path to an imported module. + /// + /// E.g. if the input is "/usr/lib/python3.9/threading.py", the result will + /// probably be "threading.py". + pub fn strip_prefix<'a>(&self, path: &'a str) -> &'a str { + for prefix in &self.prefixes { + if path.starts_with(prefix) { + return &path[prefix.len()..path.len()]; + } + } + // No prefix found. + path + } +} From f20bcb2574ef7a3942db47fa46109e9e47163061 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 14:32:38 -0500 Subject: [PATCH 2/9] Do full install. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4ab05b9e..ab262eec 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ MAKEFLAGS += --no-builtin-rules .PHONY: build build: - pip install -e . + pip install . python setup.py install_data target/release/libfilpreload.so: Cargo.lock memapi/Cargo.toml memapi/src/*.rs filpreload/src/*.rs filpreload/src/*.c From 0eb240aa3bda04b265b2a7d62af0777ecf32e81c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 19:13:06 -0500 Subject: [PATCH 3/9] Fix some lints. --- filpreload/src/lib.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/filpreload/src/lib.rs b/filpreload/src/lib.rs index 5d7b5175..d98a77ab 100644 --- a/filpreload/src/lib.rs +++ b/filpreload/src/lib.rs @@ -37,7 +37,7 @@ lazy_static! { /// Register a new function/filename location. fn add_function(filename: String, function_name: String) -> FunctionId { - let mut tracker_state = TRACKER_STATE.try_lock(); + let tracker_state = TRACKER_STATE.try_lock(); if let Some(mut tracker_state) = tracker_state { tracker_state .allocations @@ -305,10 +305,6 @@ extern "C" { // Return whether C code has initialized. fn is_initialized() -> c_int; - - // Increment/decrement reentrancy counter. - fn fil_increment_reentrancy(); - fn fil_decrement_reentrancy(); } struct FilMmapAPI; From 0c42ea596df5d5813673578bf6d650d6a947f7ca Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 19:58:11 -0500 Subject: [PATCH 4/9] News file. --- .changelog/264.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 .changelog/264.feature diff --git a/.changelog/264.feature b/.changelog/264.feature new file mode 100644 index 00000000..648e6098 --- /dev/null +++ b/.changelog/264.feature @@ -0,0 +1 @@ +Paths to modules no longer include repetitive boilerplate prefixes like "/usr/lib/python3.9/". \ No newline at end of file From 0aa83af5ba79d5dd81d31df5eda90a48f6d28174 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 20:00:48 -0500 Subject: [PATCH 5/9] Unit tests. --- memapi/src/python.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/memapi/src/python.rs b/memapi/src/python.rs index 10065626..4f78c401 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -63,3 +63,43 @@ impl PrefixStripper { path } } + +#[cfg(test)] +mod tests { + use pyo3::Python; + + use crate::python::PrefixStripper; + + /// Get the filesystem path of a Python module. + fn get_module_path(module: &str) -> String { + Python::with_gil(|py| { + py.eval( + &("__import__('".to_owned() + module + "').__file__"), + None, + None, + ) + .unwrap() + .extract() + .unwrap() + }) + } + + #[test] + fn prefix_stripping() { + pyo3::prepare_freethreaded_python(); + let ps = PrefixStripper::new(); + // stdlib + assert_eq!( + ps.strip_prefix(&get_module_path("threading")), + "threading.py" + ); + // site-packages + assert_eq!( + ps.strip_prefix(&get_module_path("filprofiler")), + "filprofiler/__init__.py" + ); + // random paths + assert_eq!(ps.strip_prefix("/x/blah.py"), "/x/blah.py"); + assert_eq!(ps.strip_prefix("foo.py"), "foo.py"); + } +} From b69e2d590b3a2fd377e6c6b2fe73120ecb2f06ea Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 20:11:48 -0500 Subject: [PATCH 6/9] This should always be available. --- memapi/src/python.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/memapi/src/python.rs b/memapi/src/python.rs index 4f78c401..7f45b42d 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -94,10 +94,7 @@ mod tests { "threading.py" ); // site-packages - assert_eq!( - ps.strip_prefix(&get_module_path("filprofiler")), - "filprofiler/__init__.py" - ); + assert_eq!(ps.strip_prefix(&get_module_path("pip")), "pip/__init__.py"); // random paths assert_eq!(ps.strip_prefix("/x/blah.py"), "/x/blah.py"); assert_eq!(ps.strip_prefix("foo.py"), "foo.py"); From bf6b29fd7af7fdfbac0c1522c85cbfc87b1142ac Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 27 Nov 2021 21:13:58 -0500 Subject: [PATCH 7/9] Sort paths. --- memapi/src/python.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/memapi/src/python.rs b/memapi/src/python.rs index 7f45b42d..ce821339 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -38,7 +38,8 @@ impl PrefixStripper { // 1. Drop non-string values, they're not something we can understand. // 2. Drop empty string, it's misleading. // 3. Add '/' to end of all paths. - "[__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)]", + // 4. Sorted, so most specific ones are first. + "list(sorted([__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)]))", None, None, ); From 28854d88b9a7762778ee6a4decde54993c9cf63a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sun, 28 Nov 2021 14:02:22 -0500 Subject: [PATCH 8/9] Reverse. --- memapi/src/python.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memapi/src/python.rs b/memapi/src/python.rs index ce821339..e3251fab 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -39,7 +39,7 @@ impl PrefixStripper { // 2. Drop empty string, it's misleading. // 3. Add '/' to end of all paths. // 4. Sorted, so most specific ones are first. - "list(sorted([__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)]))", + "list(reversed(sorted([__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)])))", None, None, ); From 2cbae6e247454edb4a1c7a37c193ce8e750c35a3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 29 Nov 2021 17:24:53 -0500 Subject: [PATCH 9/9] Sort so longest path is first. --- memapi/src/python.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/memapi/src/python.rs b/memapi/src/python.rs index e3251fab..d474231a 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -38,8 +38,8 @@ impl PrefixStripper { // 1. Drop non-string values, they're not something we can understand. // 2. Drop empty string, it's misleading. // 3. Add '/' to end of all paths. - // 4. Sorted, so most specific ones are first. - "list(reversed(sorted([__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)])))", + // 4. Sorted, so most specific (i.e. longest) ones are first. + "list(sorted([__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)], key=lambda i: -len(i)))", None, None, );