From ceae4a9fa8fa030984c750fc8fcc795167ece639 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 18 Jan 2016 16:45:42 +0100 Subject: [PATCH] Support patterns on extraction, fixes #361 This change implements the functionality requested in issue #361: extracting files with a given extension. It does so by permitting patterns to be used instead plain prefix paths. The pattern styles supported are the same as for exclusions. --- borg/archiver.py | 22 +++++++++++++++++----- borg/testsuite/archiver.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 66298fe07a..d5c1d19630 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -286,13 +286,25 @@ def do_extract(self, args): manifest, key = Manifest.load(repository) archive = Archive(repository, key, manifest, args.location.archive, numeric_owner=args.numeric_owner) - patterns = adjust_patterns(args.paths, args.excludes) + + matcher = PatternMatcher() + if args.excludes: + matcher.add(args.excludes, False) + + include_patterns = [] + + if args.paths: + include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths) + matcher.add(include_patterns, True) + + matcher.fallback = not include_patterns + dry_run = args.dry_run stdout = args.stdout sparse = args.sparse strip_components = args.strip_components dirs = [] - for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True): + for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True): orig_path = item[b'path'] if strip_components: item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) @@ -317,8 +329,8 @@ def do_extract(self, args): if not args.dry_run: while dirs: archive.extract_item(dirs.pop(-1)) - for pattern in (patterns or []): - if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0: + for pattern in include_patterns: + if pattern.match_count == 0: self.print_warning("Include pattern '%s' never matched.", pattern) return self.exit_code @@ -965,7 +977,7 @@ def build_parser(self, args=None, prog=None): type=location_validator(archive=True), help='archive to extract') subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, - help='paths to extract') + help='paths to extract; patterns are supported') rename_epilog = textwrap.dedent(""" This command renames an archive in the repository. diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 0727932938..f75cc120c7 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -562,6 +562,39 @@ def test_extract_include_exclude_regex_from_file(self): self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file3']) + def test_extract_with_pattern(self): + self.cmd("init", self.repository_location) + self.create_regular_file("file1", size=1024 * 80) + self.create_regular_file("file2", size=1024 * 80) + self.create_regular_file("file3", size=1024 * 80) + self.create_regular_file("file4", size=1024 * 80) + self.create_regular_file("file333", size=1024 * 80) + + self.cmd("create", self.repository_location + "::test", "input") + + # Extract everything with regular expression + with changedir("output"): + self.cmd("extract", self.repository_location + "::test", "re:.*") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"]) + shutil.rmtree("output/input") + + # Extract with pattern while also excluding files + with changedir("output"): + self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"]) + shutil.rmtree("output/input") + + # Combine --exclude with pattern for extraction + with changedir("output"): + self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$") + self.assert_equal(sorted(os.listdir("output/input")), ["file2"]) + shutil.rmtree("output/input") + + # Multiple pattern + with changedir("output"): + self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) + def test_exclude_caches(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80)