Skip to content

Commit 4827f3a

Browse files
authored
Speed up processing of new files in daemon by caching ASTs (#10128)
Processing newly installed stub files, in particular, could be quite slow incrementally in mypy daemon. This is because adding N files results in N steps interally, each of which adds one file. However, each step parses all remaining files, resulting in an O(n**2) algorithm. For example, processing `six` stubs could take about 40s (when not using a compiled mypy). Partially address the issue by caching parsed ASTs during a single increment. This speeds up the `import six` use case by about 3x when not using a compiled mypy. It's still about 3x slower when using daemon, however.
1 parent 9cbf4c0 commit 4827f3a

File tree

5 files changed

+58
-13
lines changed

5 files changed

+58
-13
lines changed

mypy/build.py

+40-10
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ class BuildManager:
562562
not only for debugging, but also required for correctness,
563563
in particular to check consistency of the fine-grained dependency cache.
564564
fscache: A file system cacher
565+
ast_cache: AST cache to speed up mypy daemon
565566
"""
566567

567568
def __init__(self, data_dir: str,
@@ -645,6 +646,14 @@ def __init__(self, data_dir: str,
645646
self.processed_targets = [] # type: List[str]
646647
# Missing stub packages encountered.
647648
self.missing_stub_packages = set() # type: Set[str]
649+
# Cache for mypy ASTs that have completed semantic analysis
650+
# pass 1. When multiple files are added to the build in a
651+
# single daemon increment, only one of the files gets added
652+
# per step and the others are discarded. This gets repeated
653+
# until all the files have been added. This means that a
654+
# new file can be processed O(n**2) times. This cache
655+
# avoids most of this redundant work.
656+
self.ast_cache = {} # type: Dict[str, Tuple[MypyFile, List[ErrorInfo]]]
648657

649658
def dump_stats(self) -> None:
650659
if self.options.dump_build_stats:
@@ -1994,8 +2003,14 @@ def parse_file(self) -> None:
19942003
return
19952004

19962005
manager = self.manager
2006+
2007+
# Can we reuse a previously parsed AST? This avoids redundant work in daemon.
2008+
cached = self.id in manager.ast_cache
19972009
modules = manager.modules
1998-
manager.log("Parsing %s (%s)" % (self.xpath, self.id))
2010+
if not cached:
2011+
manager.log("Parsing %s (%s)" % (self.xpath, self.id))
2012+
else:
2013+
manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id))
19992014

20002015
with self.wrap_context():
20012016
source = self.source
@@ -2026,21 +2041,36 @@ def parse_file(self) -> None:
20262041
self.source_hash = compute_hash(source)
20272042

20282043
self.parse_inline_configuration(source)
2029-
self.tree = manager.parse_file(self.id, self.xpath, source,
2030-
self.ignore_all or self.options.ignore_errors,
2031-
self.options)
2044+
if not cached:
2045+
self.tree = manager.parse_file(self.id, self.xpath, source,
2046+
self.ignore_all or self.options.ignore_errors,
2047+
self.options)
20322048

2033-
modules[self.id] = self.tree
2049+
else:
2050+
# Reuse a cached AST
2051+
self.tree = manager.ast_cache[self.id][0]
2052+
manager.errors.set_file_ignored_lines(
2053+
self.xpath,
2054+
self.tree.ignored_lines,
2055+
self.ignore_all or self.options.ignore_errors)
2056+
2057+
if not cached:
2058+
# Make a copy of any errors produced during parse time so that
2059+
# fine-grained mode can repeat them when the module is
2060+
# reprocessed.
2061+
self.early_errors = list(manager.errors.error_info_map.get(self.xpath, []))
2062+
else:
2063+
self.early_errors = manager.ast_cache[self.id][1]
20342064

2035-
# Make a copy of any errors produced during parse time so that
2036-
# fine-grained mode can repeat them when the module is
2037-
# reprocessed.
2038-
self.early_errors = list(manager.errors.error_info_map.get(self.xpath, []))
2065+
modules[self.id] = self.tree
20392066

2040-
self.semantic_analysis_pass1()
2067+
if not cached:
2068+
self.semantic_analysis_pass1()
20412069

20422070
self.check_blockers()
20432071

2072+
manager.ast_cache[self.id] = (self.tree, self.early_errors)
2073+
20442074
def parse_inline_configuration(self, source: str) -> None:
20452075
"""Check for inline mypy: options directive and parse them."""
20462076
flags = get_mypy_comments(source)

mypy/dmypy_server.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ def cmd_recheck(self,
373373
assert remove is None and update is None
374374
messages = self.fine_grained_increment_follow_imports(sources)
375375
res = self.increment_output(messages, sources, is_tty, terminal_width)
376-
self.fscache.flush()
376+
self.flush_caches()
377377
self.update_stats(res)
378378
return res
379379

@@ -392,10 +392,15 @@ def check(self, sources: List[BuildSource],
392392
else:
393393
messages = self.fine_grained_increment_follow_imports(sources)
394394
res = self.increment_output(messages, sources, is_tty, terminal_width)
395-
self.fscache.flush()
395+
self.flush_caches()
396396
self.update_stats(res)
397397
return res
398398

399+
def flush_caches(self) -> None:
400+
self.fscache.flush()
401+
if self.fine_grained_manager:
402+
self.fine_grained_manager.flush_cache()
403+
399404
def update_stats(self, res: Dict[str, Any]) -> None:
400405
if self.fine_grained_manager:
401406
manager = self.fine_grained_manager.manager
@@ -852,7 +857,7 @@ def cmd_suggest(self,
852857
out += "\n"
853858
return {'out': out, 'err': "", 'status': 0}
854859
finally:
855-
self.fscache.flush()
860+
self.flush_caches()
856861

857862
def cmd_hang(self) -> Dict[str, object]:
858863
"""Hang for 100 seconds, as a debug hack."""

mypy/server/update.py

+8
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,14 @@ def trigger(self, target: str) -> List[str]:
288288
self.previous_messages = self.manager.errors.new_messages()[:]
289289
return self.update(changed_modules, [])
290290

291+
def flush_cache(self) -> None:
292+
"""Flush AST cache.
293+
294+
This needs to be called after each increment, or file changes won't
295+
be detected reliably.
296+
"""
297+
self.manager.ast_cache.clear()
298+
291299
def update_one(self,
292300
changed_modules: List[Tuple[str, str]],
293301
initial_set: Set[str],

mypy/suggestions.py

+1
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ def reload(self, state: State, check_errors: bool = False) -> List[str]:
640640
If check_errors is true, raise an exception if there are errors.
641641
"""
642642
assert state.path is not None
643+
self.fgmanager.flush_cache()
643644
return self.fgmanager.update([(state.id, state.path)], [])
644645

645646
def ensure_loaded(self, state: State, force: bool = False) -> MypyFile:

mypy/test/testmerge.py

+1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def build(self, source: str, testcase: DataDrivenTestCase) -> Optional[BuildResu
124124
def build_increment(self, manager: FineGrainedBuildManager,
125125
module_id: str, path: str) -> Tuple[MypyFile,
126126
Dict[Expression, Type]]:
127+
manager.flush_cache()
127128
manager.update([(module_id, path)], [])
128129
module = manager.manager.modules[module_id]
129130
type_map = manager.graph[module_id].type_map()

0 commit comments

Comments
 (0)