Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add caller func name into type_info.json to make it clear which function is doing wrong #116

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions pyannotate_runtime/collect_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def _my_hash(arg_list):
FunctionData = TypedDict('FunctionData', {'path': str,
'line': int,
'func_name': str,
'caller_names': List[str],
'type_comments': List[str],
'samples': int})

Expand Down Expand Up @@ -402,10 +403,10 @@ def __repr__(self):
('varargs', Optional[List[InternalType]])])

# Task queue entry for calling a function with specific argument types
KeyAndTypes = NamedTuple('KeyAndTypes', [('key', FunctionKey), ('types', ResolvedTypes)])
KeyAndTypes = NamedTuple('KeyAndTypes', [('key', FunctionKey), ('types', ResolvedTypes), ('caller_name', str)])

# Task queue entry for returning from a function with a value
KeyAndReturn = NamedTuple('KeyAndReturn', [('key', FunctionKey), ('return_type', InternalType)])
KeyAndReturn = NamedTuple('KeyAndReturn', [('key', FunctionKey), ('return_type', InternalType), ('caller_name', str)])

# Combined argument and return types for a single function call
Signature = NamedTuple('Signature', [('args', 'ArgTypes'), ('return_type', InternalType)])
Expand Down Expand Up @@ -445,7 +446,7 @@ def name_from_type(type_):

# TODO: Make this faster
def get_function_name_from_frame(frame):
# type: (Any) -> str
# type: (Any) -> Optional[str]
"""
Heuristic to find the class-specified name by @guido

Expand All @@ -472,6 +473,8 @@ def bases_to_mro(cls, bases):
code = frame.f_code
# This ought to be aggressively cached with the code object as key.
funcname = code.co_name
if funcname == "<module>":
return None
if code.co_varnames:
varname = code.co_varnames[0]
if varname == 'self':
Expand All @@ -494,6 +497,16 @@ def bases_to_mro(cls, bases):
return '%s.%s' % (cls.__name__, funcname)
return funcname

def get_caller_info_from_frame(frame):
# type: (Any) -> str
"""
Get caller function module, name and line number from frame
"""
caller_frame = frame.f_back
caller_name = '.'.join(filter(None, [caller_frame.f_globals["__name__"], get_function_name_from_frame(caller_frame)]))
caller_lineno = caller_frame.f_lineno

return f"{caller_name} : {caller_lineno}"

def resolve_type(arg):
# type: (object) -> InternalType
Expand Down Expand Up @@ -629,7 +642,7 @@ def __ne__(self, other):

# Collected unique type comments for each function, of form '(arg, ...) -> ret'.
# There at most MAX_ITEMS_PER_FUNCTION items.
collected_signatures = {} # type: Dict[FunctionKey, Set[Tuple[ArgTypes, InternalType]]]
collected_signatures = {} # type: Dict[FunctionKey, Dict[Tuple[ArgTypes, InternalType], List[str]]]

# Number of samples collected per function (we also count ones ignored after reaching
# the maximum comment count per function).
Expand All @@ -653,8 +666,8 @@ def _make_type_comment(args_info, return_type):
return '(%s) -> %s' % (args_string, return_name)


def _flush_signature(key, return_type):
# type: (FunctionKey, InternalType) -> None
def _flush_signature(key, return_type, caller_name):
# type: (FunctionKey, InternalType, str) -> None
"""Store signature for a function.

Assume that argument types have been stored previously to
Expand All @@ -664,10 +677,11 @@ def _flush_signature(key, return_type):
As a side effect, removes the argument types for the function from
'collected_args'.
"""
signatures = collected_signatures.setdefault(key, set())
signatures = collected_signatures.setdefault(key, {})
args_info = collected_args.pop(key)
if len(signatures) < MAX_ITEMS_PER_FUNCTION:
signatures.add((args_info, return_type))
caller_names = signatures.setdefault((args_info, return_type), [])
caller_names.append(caller_name)
num_samples[key] = num_samples.get(key, 0) + 1


Expand All @@ -687,16 +701,16 @@ def type_consumer():
# Previous call didn't get a corresponding return, perhaps because we
# stopped collecting types in the middle of a call or because of
# a recursive function.
_flush_signature(item.key, UnknownType)
_flush_signature(item.key, UnknownType, item.caller_name)
collected_args[item.key] = ArgTypes(item.types)
else:
assert isinstance(item, KeyAndReturn)
if item.key in collected_args:
_flush_signature(item.key, item.return_type)
_flush_signature(item.key, item.return_type, item.caller_name)
_task_queue.task_done()


_task_queue = Queue() # type: Queue[Union[KeyAndTypes, KeyAndReturn]]
_task_queue = Queue() # type: Queue[Union[KeyAndTypes, KeyAndReturn, str]]
_consumer_thread = Thread(target=type_consumer)
_consumer_thread.daemon = True
_consumer_thread.start()
Expand Down Expand Up @@ -863,6 +877,7 @@ def _trace_dispatch(frame, event, arg):

# Track calls under current directory only.
filename = _filter_filename(code.co_filename)
caller_name = get_caller_info_from_frame(frame)
if filename:
func_name = get_function_name_from_frame(frame)
if not func_name or func_name[0] == '<':
Expand All @@ -874,7 +889,7 @@ def _trace_dispatch(frame, event, arg):
# TODO(guido): Make this faster
arg_info = inspect.getargvalues(frame) # type: ArgInfo
resolved_types = prep_args(arg_info)
_task_queue.put(KeyAndTypes(function_key, resolved_types))
_task_queue.put(KeyAndTypes(function_key, resolved_types, caller_name))
elif event == 'return':
# This event is also triggered if a function yields or raises an exception.
# We can tell the difference by looking at the bytecode.
Expand All @@ -898,7 +913,7 @@ def _trace_dispatch(frame, event, arg):
# TODO: returning non-trivial values from generators, per PEP 380;
# and async def / await stuff.
t = NoReturnType
_task_queue.put(KeyAndReturn(function_key, t))
_task_queue.put(KeyAndReturn(function_key, t, caller_name))
else:
sampling_counters[key] = None # We're not interested in this function.

Expand Down Expand Up @@ -926,12 +941,17 @@ def _dump_impl():
key=(lambda p: (p[0].path, p[0].line, p[0].func_name)))
res = [] # type: List[FunctionData]
for function_key, signatures in sorted_by_file:
comments = [_make_type_comment(args, ret_type) for args, ret_type in signatures]
comments = []
caller_names = []
for (args, ret_type), caller_name in signatures.items():
comments.append(_make_type_comment(args, ret_type))
caller_names.append(caller_name)
res.append(
{
'path': function_key.path,
'line': function_key.line,
'func_name': function_key.func_name,
'caller_names': caller_names,
'type_comments': comments,
'samples': num_samples.get(function_key, 0),
}
Expand Down
17 changes: 17 additions & 0 deletions pyannotate_runtime/tests/test_collect_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,20 @@ def assert_type_comments(self, func_name, comments):
assert len(item['type_comments']) == len(comments)
assert os.path.join(collect_types.TOP_DIR, item['path']) == __file__

def assert_caller_name(self, func_name, caller_names):
# type: (str, str) -> None
"""Assert that we generated expected caller_names for the func_name function in self.stats"""
stat_items = [item for item in self.stats if item.get('func_name') == func_name]
if not caller_names and not stat_items:
# If we expect no caller_name, it's okay if nothing was collected.
return
assert len(stat_items) == 1
item = stat_items[0]['caller_names'][0][0]
item_caller_names = item.split()[0]
if item_caller_names != caller_names:
print('Actual: ' + item_caller_names)
print('Expected: ' + caller_names)
assert item_caller_names == caller_names

class TestCollectTypes(TestBaseClass):

Expand Down Expand Up @@ -264,6 +278,9 @@ def test_type_collection_on_main_thread(self):
self.assert_type_comments('TestCollectTypes.foo', ['(int, List[str]) -> None'])
self.assert_type_comments('TestCollectTypes.bar', ['(int, List[str]) -> int'])
self.assert_type_comments('TestCollectTypes.baz', ['(List[str]) -> Set[int]'])
self.assert_caller_name('TestCollectTypes.foo', 'pyannotate_runtime.tests.test_collect_types.TestCollectTypes.test_type_collection_on_main_thread')
self.assert_caller_name('TestCollectTypes.bar', 'pyannotate_runtime.tests.test_collect_types.TestCollectTypes.foo')
self.assert_caller_name('TestCollectTypes.baz', 'pyannotate_runtime.tests.test_collect_types.TestCollectTypes.bar')

def bar_another_thread(self, int_arg, list_arg):
# type: (Any, Any) -> Any
Expand Down