From 3018303c8759b3e96d075c62eeb8b8ef24b4d0c3 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 21 Sep 2024 09:52:16 -0400 Subject: [PATCH] Avoid parsing with Salsa (#13437) ## Summary For reasons I haven't investigated, this speeds up the resolver about 2x (from 6.404s to 3.612s on an extremely large codebase). ## Test Plan \cc @BurntSushi ``` [andrew@duff rippling]$ time ruff analyze graph --preview > /dev/null real 3.274 user 16.039 sys 7.609 maxmem 11631 MB faults 0 [andrew@duff rippling]$ time ruff-patch analyze graph --preview > /dev/null real 1.841 user 14.625 sys 3.639 maxmem 7173 MB faults 0 [andrew@duff rippling]$ time ruff-patch2 analyze graph --preview > /dev/null real 2.087 user 15.333 sys 4.869 maxmem 8642 MB faults 0 ``` Where that's `main`, then (`ruff-patch`) using the version with no `File`, no `SemanticModel`, then (`ruff-patch2`) using `File`. --- Cargo.lock | 1 + crates/ruff_graph/Cargo.toml | 1 + crates/ruff_graph/src/collector.rs | 10 ++++++---- crates/ruff_graph/src/lib.rs | 12 +++++------- crates/ruff_graph/src/resolver.rs | 29 ++++++++++++++--------------- 5 files changed, 27 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 64652add4bfdd..727b38d862f03 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2456,6 +2456,7 @@ dependencies = [ "ruff_linter", "ruff_macros", "ruff_python_ast", + "ruff_python_parser", "salsa", "schemars", "serde", diff --git a/crates/ruff_graph/Cargo.toml b/crates/ruff_graph/Cargo.toml index 601b637873dcd..c9808eace8bc5 100644 --- a/crates/ruff_graph/Cargo.toml +++ b/crates/ruff_graph/Cargo.toml @@ -16,6 +16,7 @@ ruff_db = { workspace = true, features = ["os", "serde"] } ruff_linter = { workspace = true } ruff_macros = { workspace = true } ruff_python_ast = { workspace = true } +ruff_python_parser = { workspace = true } anyhow = { workspace = true } clap = { workspace = true, optional = true } diff --git a/crates/ruff_graph/src/collector.rs b/crates/ruff_graph/src/collector.rs index 2ce801c4d4d19..c9b3a6f9b34cf 100644 --- a/crates/ruff_graph/src/collector.rs +++ b/crates/ruff_graph/src/collector.rs @@ -1,6 +1,8 @@ use red_knot_python_semantic::ModuleName; -use ruff_python_ast::visitor::source_order::{walk_body, walk_expr, walk_stmt, SourceOrderVisitor}; -use ruff_python_ast::{self as ast, Expr, ModModule, Stmt}; +use ruff_python_ast::visitor::source_order::{ + walk_expr, walk_module, walk_stmt, SourceOrderVisitor, +}; +use ruff_python_ast::{self as ast, Expr, Mod, Stmt}; /// Collect all imports for a given Python file. #[derive(Default, Debug)] @@ -23,8 +25,8 @@ impl<'a> Collector<'a> { } #[must_use] - pub(crate) fn collect(mut self, module: &ModModule) -> Vec { - walk_body(&mut self, &module.body); + pub(crate) fn collect(mut self, module: &Mod) -> Vec { + walk_module(&mut self, module); self.imports } } diff --git a/crates/ruff_graph/src/lib.rs b/crates/ruff_graph/src/lib.rs index 2b2761b11739f..f11f03ffe5894 100644 --- a/crates/ruff_graph/src/lib.rs +++ b/crates/ruff_graph/src/lib.rs @@ -3,11 +3,9 @@ pub use crate::db::ModuleDb; use crate::resolver::Resolver; pub use crate::settings::{AnalyzeSettings, Direction}; use anyhow::Result; -use red_knot_python_semantic::SemanticModel; -use ruff_db::files::system_path_to_file; -use ruff_db::parsed::parsed_module; use ruff_db::system::{SystemPath, SystemPathBuf}; use ruff_python_ast::helpers::to_module_path; +use ruff_python_parser::{parse, Mode}; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; @@ -29,11 +27,11 @@ impl ModuleImports { string_imports: bool, ) -> Result { // Read and parse the source code. - let file = system_path_to_file(db, path)?; - let parsed = parsed_module(db, file); + let source = std::fs::read_to_string(path)?; + let parsed = parse(&source, Mode::Module)?; + let module_path = package.and_then(|package| to_module_path(package.as_std_path(), path.as_std_path())); - let model = SemanticModel::new(db, file); // Collect the imports. let imports = @@ -42,7 +40,7 @@ impl ModuleImports { // Resolve the imports. let mut resolved_imports = ModuleImports::default(); for import in imports { - let Some(resolved) = Resolver::new(&model).resolve(import) else { + let Some(resolved) = Resolver::new(db).resolve(import) else { continue; }; let Some(path) = resolved.as_system_path() else { diff --git a/crates/ruff_graph/src/resolver.rs b/crates/ruff_graph/src/resolver.rs index 1de2968eb7278..646834801d9f4 100644 --- a/crates/ruff_graph/src/resolver.rs +++ b/crates/ruff_graph/src/resolver.rs @@ -1,37 +1,36 @@ -use red_knot_python_semantic::SemanticModel; +use red_knot_python_semantic::resolve_module; use ruff_db::files::FilePath; use crate::collector::CollectedImport; +use crate::ModuleDb; /// Collect all imports for a given Python file. pub(crate) struct Resolver<'a> { - semantic: &'a SemanticModel<'a>, + db: &'a ModuleDb, } impl<'a> Resolver<'a> { - /// Initialize a [`Resolver`] with a given [`SemanticModel`]. - pub(crate) fn new(semantic: &'a SemanticModel<'a>) -> Self { - Self { semantic } + /// Initialize a [`Resolver`] with a given [`ModuleDb`]. + pub(crate) fn new(db: &'a ModuleDb) -> Self { + Self { db } } /// Resolve the [`CollectedImport`] into a [`FilePath`]. pub(crate) fn resolve(&self, import: CollectedImport) -> Option<&'a FilePath> { match import { - CollectedImport::Import(import) => self - .semantic - .resolve_module(import) - .map(|module| module.file().path(self.semantic.db())), + CollectedImport::Import(import) => { + resolve_module(self.db, import).map(|module| module.file().path(self.db)) + } CollectedImport::ImportFrom(import) => { // Attempt to resolve the member (e.g., given `from foo import bar`, look for `foo.bar`). let parent = import.parent(); - self.semantic - .resolve_module(import) - .map(|module| module.file().path(self.semantic.db())) + + resolve_module(self.db, import) + .map(|module| module.file().path(self.db)) .or_else(|| { // Attempt to resolve the module (e.g., given `from foo import bar`, look for `foo`). - self.semantic - .resolve_module(parent?) - .map(|module| module.file().path(self.semantic.db())) + + resolve_module(self.db, parent?).map(|module| module.file().path(self.db)) }) } }