forked from SublimeText/CTags
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ctags.py
527 lines (387 loc) · 16.2 KB
/
ctags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
#!/usr/bin/env python
"""A ctags wrapper, parser and sorter"""
import codecs
import re
import os
import subprocess
import bisect
import mmap
"""
Contants
"""
TAGS_RE = re.compile(
'(?P<symbol>[^\t]+)\t'
'(?P<filename>[^\t]+)\t'
'(?P<ex_command>.*?);"\t'
'(?P<type>[^\t\r\n]+)'
'(?:\t(?P<fields>.*))?'
)
# column indexes
SYMBOL = 0
FILENAME = 1
MATCHES_STARTWITH = 'starts_with'
PATH_ORDER = [
'function', 'class', 'struct',
]
PATH_IGNORE_FIELDS = ('file', 'access', 'signature',
'language', 'line', 'inherits')
TAG_PATH_SPLITTERS = ('/', '.', '::', ':')
"""
Functions
"""
"""Helper functions"""
def splits(string, *splitters):
"""Split a string on a number of splitters.
:param string: string to split
:param splitters: characters to split string on
:returns: ``string`` split on characters in ``string``"""
if splitters:
split = string.split(splitters[0])
for s in split:
for c in splits(s, *splitters[1:]):
yield c
else:
if string:
yield string
"""Tag processing functions"""
def parse_tag_lines(lines, order_by='symbol', tag_class=None, filters=[]):
"""Parse and sort a list of tags.
Parse and sort a list of tags one by using a combination of regexen and
Python functions. The end result is a dictionary containing all 'tags' or
entries found in the list of tags, sorted and filtered in a manner
specified by the user.
:param lines: list of tag lines from a tagfile
:param order_by: element by which the result should be sorted
:param tag_class: a Class to wrap around the resulting dictionary
:param filters: filters to apply to resulting dictionary
:returns: tag object or dictionary containing a sorted, filtered version
of the original input tag lines
"""
tags_lookup = {}
for line in lines:
skip = False
if isinstance(line, Tag): # handle both text and tag objects
line = line.line
search_obj = TAGS_RE.search(line)
if not search_obj:
continue
tag = search_obj.groupdict() # convert regex search result to dict
tag = post_process_tag(tag)
if tag_class is not None: # if 'casting' to a class
tag = tag_class(tag)
# apply filters, filtering out any matching entries
for f in filters:
for k, v in list(f.items()):
if re.match(v, tag[k]):
skip = True
if skip: # if a filter was matched, ignore line (filter out)
continue
tags_lookup.setdefault(tag[order_by], []).append(tag)
return tags_lookup
def post_process_tag(tag):
"""Process 'EX Command'-related elements of a tag.
Process all 'EX Command'-related elements. The 'Ex Command' element has
previously been split into the 'fields', 'type' and 'ex_command' elements.
Break these down further as seen below::
=========== = ============= =========================================
original > new meaning/example
=========== = ============= =========================================
symbol > symbol symbol name (i.e. class, variable)
filename > filename file containing symbol
. > tag_path tuple of (filename, [class], symbol)
ex_command > ex_command line number or regex used to find symbol
type > type type of symbol (i.e. class, method)
fields > fields string of fields
. > [field_keys] list of parsed field keys
. > [field_one] parsed field element one
. > [...] additional parsed field element
=========== = ============= =========================================
Example::
=========== = ============= =========================================
original > new example
=========== = ============= =========================================
symbol > symbol 'getSum'
filename > filename 'DemoClass.java'
. > tag_path ('DemoClass.java', 'DemoClass', 'getSum')
ex_command > ex_command '\tprivate int getSum(int a, int b) {'
type > type 'm'
fields > fields 'class:DemoClass\tfile:'
. > field_keys ['class', 'file']
. > class 'DemoClass'
. > file ''
=========== = ============= =========================================
:param tag: dict containing the unprocessed tag
:returns: dict containing the processed tag
"""
tag.update(process_fields(tag))
tag['ex_command'] = process_ex_cmd(tag)
tag.update(create_tag_path(tag))
return tag
def process_ex_cmd(tag):
"""Process the 'ex_command' element of a tag dictionary.
Process the ex_command string - a line number or regex used to find symbol
declaration - by unescaping the regex where used.
:param tag: dict containing a tag
:returns: updated 'ex_command' dictionary entry
"""
ex_cmd = tag.get('ex_command')
if ex_cmd.isdigit(): # if a line number, do nothing
return ex_cmd
else: # else a regex, so unescape
return re.sub(r"\\(\$|/|\^|\\)", r'\1', ex_cmd[2:-2]) # unescape regex
def process_fields(tag):
"""Process the 'field' element of a tag dictionary.
Process the fields string - a comma-separated string of "key-value" pairs
- by generating key-value pairs and appending them to the tag dictionary.
Also append a list of keys for said pairs.
:param tag: dict containing a tag
:returns: dict containing the key-value pairs from the field element, plus
a list of keys for said pairs
"""
fields = tag.get('fields')
if not fields: # do nothing
return {}
# split the fields string into a dictionary of key-value pairs
result = dict(f.split(':', 1) for f in fields.split('\t'))
# append all keys to the dictionary
result['field_keys'] = sorted(result.keys())
return result
def create_tag_path(tag):
"""Create a tag path entry for a tag dictionary.
Creates a tag path entry for a tag dictionary from the field key-value
pairs. Uses format::
[function] [class] [struct] [additional entries] symbol
Where ``additional entries`` is any field key-value pair not found in
``PATH_IGNORE_FIELDS``
:param tag: dict containing a tag
:returns: dict containing the 'tag_path' entry
"""
field_keys = tag.get('field_keys', [])[:]
fields = []
tag_path = ''
# sort field arguments related to path order in correct order
for field in PATH_ORDER:
if field in field_keys:
fields.append(field)
field_keys.pop(field_keys.index(field))
# append all remaining field arguments
fields.extend(field_keys)
# convert list of fields to dot-joined string, dropping any "ignore" fields
for field in fields:
if field not in PATH_IGNORE_FIELDS:
tag_path += (tag.get(field) + '.')
# append symbol as last item in string
tag_path += tag.get('symbol')
# split string on seperators and append tag filename to resulting list
splitup = ([tag.get('filename')] +
list(splits(tag_path, *TAG_PATH_SPLITTERS)))
# convert list to tuple
result = {'tag_path': tuple(splitup)}
return result
"""Tag building/sorting functions"""
def build_ctags(path, tag_file=None, recursive=False, opts=None, cmd=None,
env=None):
"""Execute the ``ctags`` command using ``Popen``
:param path: path to file or directory (with all files) to generate
ctags for.
:param tag_file: filename to use for the tag file. Defaults to ``tags``
:param recursive: specify if search should be recursive in directory
given by path. This overrides filename specified by ``path``
:param opts: additional options to pass to the ctags executable
:param env: environment variables to be used when executing ``ctags``
:returns: original ``tag_file`` filename
"""
# build the CTags command
if cmd:
cmd = [cmd]
else:
cmd = ['ctags']
if not os.path.exists(path):
raise IOError('\'path\' is not at valid directory or file path, or '
'is not accessible')
if os.path.isfile(path):
cwd = os.path.dirname(path)
else:
cwd = path
if tag_file:
cmd.append('-f {0}'.format(tag_file))
if opts:
cmd.append(opts)
if recursive: # ignore any file specified in path if recursive set
cmd.append('-R')
elif os.path.isfile(path):
filename = os.path.basename(path)
cmd.append(filename)
else: # search all files in current directory
cmd.append(os.path.join(path, '*'))
# execute the command
p = subprocess.Popen(cmd, cwd=cwd, shell=False, env=env,
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
ret = p.wait()
if ret:
raise EnvironmentError(ret, p.stdout.read())
if not tag_file: # Exuberant ctags defaults to ``tags`` filename.
tag_file = os.path.join(cwd, 'tags')
else:
if os.path.dirname(tag_file) != cwd:
tag_file = os.path.join(cwd, tag_file)
# re-sort ctag file in filename order to improve search performance
resort_ctags(tag_file)
return tag_file
def resort_ctags(tag_file):
"""Rearrange ctags file for speed.
Resorts (re-sort) a CTag file in order of file. This improves searching
performance when searching tags by file as a binary search can be used.
The algorithm works as so:
For each line in the tag file
Read the file name (``file_name``) the tag belongs to
If not exists, create an empty array and store in the
dictionary with the file name as key
Save the line to this list
Create a new ``[tagfile]_sorted_by_file`` file
For each key in the sorted dictionary
For each line in the list indicated by the key
Split the line on tab character
Remove the prepending ``.\`` from the ``file_name`` part of
the tag
Join the line again and write the ``sorted_by_file`` file
:param tag_file: The location of the tagfile to be sorted
:returns: None
"""
keys = {}
with codecs.open(tag_file, encoding='utf-8', errors='ignore') as fh:
for line in fh:
keys.setdefault(line.split('\t')[FILENAME], []).append(line)
with codecs.open(tag_file+'_sorted_by_file', 'w', encoding='utf-8', errors='ignore') as fw:
for k in sorted(keys):
for line in keys[k]:
split = line.split('\t')
split[FILENAME] = split[FILENAME].lstrip('.\\')
fw.write('\t'.join(split))
"""
Models
"""
class TagElements(dict):
"""Model the entries of a tag file"""
def __init__(self, *args, **kw):
"""Initialise Tag object"""
dict.__init__(self, *args, **kw)
self.__dict__ = self
class Tag(object):
"""Model a tag.
This exists mainly to enable different types of sorting.
"""
def __init__(self, line, column=0):
if isinstance(line, bytes): # python 3 compatibility
line = line.decode('utf-8', 'replace')
self.line = line
self.column = column
def __lt__(self, other):
return self.line.split('\t')[self.column] < other
def __gt__(self, other):
return self.line.split('\t')[self.column] > other
def __getitem__(self, index):
return self.line.split('\t')[index]
class TagFile(object):
"""Model a tag file.
This doesn't actually hold a entire tag file, due in part to the sheer
size of some tag files (> 100 MB files are possible). Instead, it acts
as a 'wrapper' of sorts around a file, providing functionality like
searching for a retrieving tags, finding tags based on given criteria
(prefix, suffix, exact), getting the directory of a tag and so forth.
"""
def __init__(self, path, column):
"""Initialise object.
The file indicated by ``path`` must be sorted by values in the column
indicated by ``column``.
:param path: path to a tag file
:param column: column to search on
:returns: None
"""
self.path = path
self.column = column
def __getitem__(self, index):
"""Provide sequence-type interface to tag file."""
self.mapped.seek(index)
result = self.mapped.readline()
if index != 0: # handle first line
result = self.mapped.readline() # get a complete line
result = result.strip()
return Tag(result, self.column)
def __len__(self):
"""Get size of tag file in bytes"""
return len(self.mapped)
def __enter__(self):
"""Open file on enter when using ``with`` keyword"""
self.open()
return self
def __exit__(self, type, value, traceback):
"""Close file on exit when using ``with`` keyword"""
self.close()
@property
def dir(self):
"""Get directory of tag file"""
return os.path.dirname(self.path)
def open(self):
"""Open file"""
self.file_o = codecs.open(self.path, 'r+b', encoding='ascii')
self.mapped = mmap.mmap(self.file_o.fileno(), 0,
access=mmap.ACCESS_READ)
def close(self):
"""Close file"""
self.mapped.close()
self.file_o.close()
def search(self, exact_match=True, *tags):
"""Search for one or more tags in the tag file.
Search a tag file for given tags using a binary search.
:param exact_match: if search should be an exact or partial match
:returns: matching tags
"""
if not tags:
while self.mapped.tell() < self.mapped.size():
result = Tag(self.mapped.readline().strip(), self.column)
yield(result)
return
for key in tags:
leftIndex = bisect.bisect_left(self, key)
if exact_match:
result = self[leftIndex]
while result.line and result[result.column] == key:
yield(result)
result = Tag(self.mapped.readline().strip(), self.column)
else:
result = self[leftIndex]
while result.line and result[result.column].startswith(key):
yield(result)
result = Tag(self.mapped.readline().strip(), self.column)
def search_by_suffix(self, suffix):
"""Search for one or more tags with the given suffix in the tag file.
Search a tag file for given tags with the given suffix, using a linear
search. Note that this linear search requires the entire file be
searched making it slow. Hence, it should be avoided if possible.
:param suffix: suffix to search for
:returns: matching tags
"""
for line in self.file_o:
if line.split('\t')[self.column].endswith(suffix):
yield Tag(line)
else:
continue
def tag_class(self):
"""Default class to wrap tag in.
Allows wrapping of a parsed tag dict in a class, so elements can be
accessed as class variables (i.e. ``class.variable``, rather than
``dict['variable'])
"""
return type('TagElements', (TagElements,), dict(root_dir=self.dir))
def get_tags_dict(self, *tags, **kw):
"""Return the tags from a tag file as a dict"""
filters = kw.get('filters', [])
return parse_tag_lines(self.search(True, *tags),
tag_class=self.tag_class(), filters=filters)
def get_tags_dict_by_suffix(self, suffix, **kw):
"""Return the tags with the given suffix of a tag file as a dict"""
filters = kw.get('filters', [])
return parse_tag_lines(self.search_by_suffix(suffix),
tag_class=self.tag_class(), filters=filters)