Skip to content

Commit

Permalink
Support patterns on extraction, fixes #361
Browse files Browse the repository at this point in the history
This change implements the functionality requested in issue #361:
extracting files with a given extension. It does so by permitting
patterns to be used instead plain prefix paths. The pattern styles
supported are the same as for exclusions.
  • Loading branch information
hansmi committed Jan 18, 2016
1 parent 848375e commit ceae4a9
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
22 changes: 17 additions & 5 deletions borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,25 @@ def do_extract(self, args):
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, args.location.archive,
numeric_owner=args.numeric_owner)
patterns = adjust_patterns(args.paths, args.excludes)

matcher = PatternMatcher()
if args.excludes:
matcher.add(args.excludes, False)

include_patterns = []

if args.paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
matcher.add(include_patterns, True)

matcher.fallback = not include_patterns

dry_run = args.dry_run
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
dirs = []
for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True):
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
orig_path = item[b'path']
if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
Expand All @@ -317,8 +329,8 @@ def do_extract(self, args):
if not args.dry_run:
while dirs:
archive.extract_item(dirs.pop(-1))
for pattern in (patterns or []):
if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0:
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
return self.exit_code

Expand Down Expand Up @@ -965,7 +977,7 @@ def build_parser(self, args=None, prog=None):
type=location_validator(archive=True),
help='archive to extract')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract')
help='paths to extract; patterns are supported')

rename_epilog = textwrap.dedent("""
This command renames an archive in the repository.
Expand Down
33 changes: 33 additions & 0 deletions borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,39 @@ def test_extract_include_exclude_regex_from_file(self):
self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['file3'])

def test_extract_with_pattern(self):
self.cmd("init", self.repository_location)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("file2", size=1024 * 80)
self.create_regular_file("file3", size=1024 * 80)
self.create_regular_file("file4", size=1024 * 80)
self.create_regular_file("file333", size=1024 * 80)

self.cmd("create", self.repository_location + "::test", "input")

# Extract everything with regular expression
with changedir("output"):
self.cmd("extract", self.repository_location + "::test", "re:.*")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"])
shutil.rmtree("output/input")

# Extract with pattern while also excluding files
with changedir("output"):
self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"])
shutil.rmtree("output/input")

# Combine --exclude with pattern for extraction
with changedir("output"):
self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$")
self.assert_equal(sorted(os.listdir("output/input")), ["file2"])
shutil.rmtree("output/input")

# Multiple pattern
with changedir("output"):
self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"])

def test_exclude_caches(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
Expand Down

0 comments on commit ceae4a9

Please # to comment.