From 5fda810ef51900b7d46a728d247a7f70cb6543cd Mon Sep 17 00:00:00 2001
From: cpburnz <2126043+cpburnz@users.noreply.github.com>
Date: Sat, 9 Dec 2023 17:13:23 -0500
Subject: [PATCH] Fix issue 81

---
 CHANGES.rst                       |   2 +
 pathspec/gitignore.py             |   3 -
 pathspec/patterns/gitwildmatch.py | 208 +++++++++++++++---------------
 tests/test_02_gitwildmatch.py     |  47 ++++++-
 4 files changed, 147 insertions(+), 113 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 0610f84..9916547 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -22,6 +22,7 @@ New features:
 
 Bug fixes:
 
+- `Issue #81`_: GitIgnoreSpec behaviors differ from git.
 - `Pull #83`_: Fix ReadTheDocs builds.
 
 Improvements:
@@ -32,6 +33,7 @@ Improvements:
 - Improve type hint on *on_error* parameter on `pathspec.util.iter_tree_entries()`.
 
 
+.. _`Issue #81`: https://github.com/cpburnz/python-pathspec/issues/81
 .. _`Pull #82`: https://github.com/cpburnz/python-pathspec/pull/82
 .. _`Pull #83`: https://github.com/cpburnz/python-pathspec/pull/83
 
diff --git a/pathspec/gitignore.py b/pathspec/gitignore.py
index a8d3c43..994a2c7 100644
--- a/pathspec/gitignore.py
+++ b/pathspec/gitignore.py
@@ -138,9 +138,6 @@ def _match_file(
 					# Check for directory marker.
 					dir_mark = match.match.groupdict().get(_DIR_MARK)
 
-					# TODO: A exclude (whitelist) dir pattern here needs to deprioritize
-					# for 81-c.
-
 					if dir_mark:
 						# Pattern matched by a directory pattern.
 						priority = 1
diff --git a/pathspec/patterns/gitwildmatch.py b/pathspec/patterns/gitwildmatch.py
index 5c00086..6a3d6d5 100644
--- a/pathspec/patterns/gitwildmatch.py
+++ b/pathspec/patterns/gitwildmatch.py
@@ -1,15 +1,14 @@
 """
-This module implements Git's wildmatch pattern matching which itself is
-derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
-files.
+This module implements Git's wildmatch pattern matching which itself is derived
+from Rsync's wildmatch. Git uses wildmatch for its ".gitignore" files.
 """
 
 import re
 import warnings
 from typing import (
 	AnyStr,
-	Optional,
-	Tuple)
+	Optional,  # Replaced by `X | None` in 3.10.
+	Tuple)  # Replaced by `tuple` in 3.9.
 
 from .. import util
 from ..pattern import RegexPattern
@@ -36,8 +35,8 @@ class GitWildMatchPatternError(ValueError):
 
 class GitWildMatchPattern(RegexPattern):
 	"""
-	The :class:`GitWildMatchPattern` class represents a compiled Git
-	wildmatch pattern.
+	The :class:`GitWildMatchPattern` class represents a compiled Git wildmatch
+	pattern.
 	"""
 
 	# Keep the dict-less class hierarchy.
@@ -51,13 +50,12 @@ def pattern_to_regex(
 		"""
 		Convert the pattern into a regular expression.
 
-		*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert
-		into a regular expression.
+		*pattern* (:class:`str` or :class:`bytes`) is the pattern to convert into a
+		regular expression.
 
-		Returns the uncompiled regular expression (:class:`str`, :class:`bytes`,
-		or :data:`None`); and whether matched files should be included
-		(:data:`True`), excluded (:data:`False`), or if it is a
-		null-operation (:data:`None`).
+		Returns the uncompiled regular expression (:class:`str`, :class:`bytes`, or
+		:data:`None`); and whether matched files should be included (:data:`True`),
+		excluded (:data:`False`), or if it is a null-operation (:data:`None`).
 		"""
 		if isinstance(pattern, str):
 			return_type = str
@@ -70,51 +68,52 @@ def pattern_to_regex(
 		original_pattern = pattern
 
 		if pattern.endswith('\\ '):
-			# EDGE CASE: Spaces can be escaped with backslash.
-			# If a pattern that ends with backslash followed by a space,
-			# only strip from left.
+			# EDGE CASE: Spaces can be escaped with backslash. If a pattern that ends
+			# with backslash followed by a space, only strip from left.
 			pattern = pattern.lstrip()
 		else:
 			pattern = pattern.strip()
 
 		if pattern.startswith('#'):
-			# A pattern starting with a hash ('#') serves as a comment
-			# (neither includes nor excludes files). Escape the hash with a
-			# back-slash to match a literal hash (i.e., '\#').
+			# A pattern starting with a hash ('#') serves as a comment (neither
+			# includes nor excludes files). Escape the hash with a back-slash to match
+			# a literal hash (i.e., '\#').
 			regex = None
 			include = None
 
 		elif pattern == '/':
-			# EDGE CASE: According to `git check-ignore` (v2.4.1), a single
-			# '/' does not match any file.
+			# EDGE CASE: According to `git check-ignore` (v2.4.1), a single '/' does
+			# not match any file.
 			regex = None
 			include = None
 
 		elif pattern:
 			if pattern.startswith('!'):
-				# A pattern starting with an exclamation mark ('!') negates the
-				# pattern (exclude instead of include). Escape the exclamation
-				# mark with a back-slash to match a literal exclamation mark
-				# (i.e., '\!').
+				# A pattern starting with an exclamation mark ('!') negates the pattern
+				# (exclude instead of include). Escape the exclamation mark with a
+				# back-slash to match a literal exclamation mark (i.e., '\!').
 				include = False
 				# Remove leading exclamation mark.
 				pattern = pattern[1:]
 			else:
 				include = True
 
-			# Allow a regex override for edge cases that cannot be handled
-			# through normalization.
+			# Allow a regex override for edge cases that cannot be handled through
+			# normalization.
 			override_regex = None
 
 			# Split pattern into segments.
 			pattern_segs = pattern.split('/')
 
+			# Check whether the pattern is specifically a directory pattern before
+			# normalization.
+			is_dir_pattern = not pattern_segs[-1]
+
 			# Normalize pattern to make processing easier.
 
-			# EDGE CASE: Deal with duplicate double-asterisk sequences.
-			# Collapse each sequence down to one double-asterisk. Iterate over
-			# the segments in reverse and remove the duplicate double
-			# asterisks as we go.
+			# EDGE CASE: Deal with duplicate double-asterisk sequences. Collapse each
+			# sequence down to one double-asterisk. Iterate over the segments in
+			# reverse and remove the duplicate double asterisks as we go.
 			for i in range(len(pattern_segs) - 1, 0, -1):
 				prev = pattern_segs[i-1]
 				seg = pattern_segs[i]
@@ -122,45 +121,42 @@ def pattern_to_regex(
 					del pattern_segs[i]
 
 			if len(pattern_segs) == 2 and pattern_segs[0] == '**' and not pattern_segs[1]:
-				# EDGE CASE: The '**/' pattern should match everything except
-				# individual files in the root directory. This case cannot be
-				# adequately handled through normalization. Use the override.
+				# EDGE CASE: The '**/' pattern should match everything except individual
+				# files in the root directory. This case cannot be adequately handled
+				# through normalization. Use the override.
 				override_regex = f'^.+(?P<{_DIR_MARK}>/).*$'
 
 			if not pattern_segs[0]:
-				# A pattern beginning with a slash ('/') will only match paths
-				# directly on the root directory instead of any descendant
-				# paths. So, remove empty first segment to make pattern relative
-				# to root.
+				# A pattern beginning with a slash ('/') will only match paths directly
+				# on the root directory instead of any descendant paths. So, remove
+				# empty first segment to make pattern relative to root.
 				del pattern_segs[0]
 
 			elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
-				# A single pattern without a beginning slash ('/') will match
-				# any descendant path. This is equivalent to "**/{pattern}". So,
-				# prepend with double-asterisks to make pattern relative to
-				# root.
-				# EDGE CASE: This also holds for a single pattern with a
-				# trailing slash (e.g. dir/).
+				# A single pattern without a beginning slash ('/') will match any
+				# descendant path. This is equivalent to "**/{pattern}". So, prepend
+				# with double-asterisks to make pattern relative to root.
+				# - EDGE CASE: This also holds for a single pattern with a trailing
+				#   slash (e.g. dir/).
 				if pattern_segs[0] != '**':
 					pattern_segs.insert(0, '**')
 
 			else:
-				# EDGE CASE: A pattern without a beginning slash ('/') but
-				# contains at least one prepended directory (e.g.
-				# "dir/{pattern}") should not match "**/dir/{pattern}",
-				# according to `git check-ignore` (v2.4.1).
+				# EDGE CASE: A pattern without a beginning slash ('/') but contains at
+				# least one prepended directory (e.g. "dir/{pattern}") should not match
+				# "**/dir/{pattern}", according to `git check-ignore` (v2.4.1).
 				pass
 
 			if not pattern_segs:
-				# After resolving the edge cases, we end up with no pattern at
-				# all. This must be because the pattern is invalid.
+				# After resolving the edge cases, we end up with no pattern at all. This
+				# must be because the pattern is invalid.
 				raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}")
 
 			if not pattern_segs[-1] and len(pattern_segs) > 1:
-				# A pattern ending with a slash ('/') will match all descendant
-				# paths if it is a directory but not if it is a regular file.
-				# This is equivalent to "{pattern}/**". So, set last segment to
-				# a double-asterisk to include all descendants.
+				# A pattern ending with a slash ('/') will match all descendant paths if
+				# it is a directory but not if it is a regular file. This is equivalent
+				# to "{pattern}/**". So, set last segment to a double-asterisk to
+				# include all descendants.
 				pattern_segs[-1] = '**'
 
 			if override_regex is None:
@@ -171,21 +167,27 @@ def pattern_to_regex(
 				for i, seg in enumerate(pattern_segs):
 					if seg == '**':
 						if i == 0 and i == end:
-							# A pattern consisting solely of double-asterisks ('**')
-							# will match every path.
-							output.append(f'[^/]+(?:(?P<{_DIR_MARK}>/).*)?')
+							# A pattern consisting solely of double-asterisks ('**') will
+							# match every path.
+							output.append(f'[^/]+(?:/.*)?')
+
 						elif i == 0:
 							# A normalized pattern beginning with double-asterisks
 							# ('**') will match any leading path segments.
 							output.append('(?:.+/)?')
 							need_slash = False
+
 						elif i == end:
-							# A normalized pattern ending with double-asterisks ('**')
-							# will match any trailing path segments.
-							output.append(f'(?P<{_DIR_MARK}>/).*')
+							# A normalized pattern ending with double-asterisks ('**') will
+							# match any trailing path segments.
+							if is_dir_pattern:
+								output.append(f'(?P<{_DIR_MARK}>/).*')
+							else:
+								output.append(f'/.*')
+
 						else:
-							# A pattern with inner double-asterisks ('**') will match
-							# multiple (or zero) inner path segments.
+							# A pattern with inner double-asterisks ('**') will match multiple
+							# (or zero) inner path segments.
 							output.append('(?:/.+)?')
 							need_slash = True
 
@@ -197,9 +199,9 @@ def pattern_to_regex(
 						output.append('[^/]+')
 
 						if i == end:
-							# A pattern ending without a slash ('/') will match a file
-							# or a directory (with paths underneath it). E.g., "foo"
-							# matches "foo", "foo/bar", "foo/bar/baz", etc.
+							# A pattern ending without a slash ('/') will match a file or a
+							# directory (with paths underneath it). E.g., "foo" matches "foo",
+							# "foo/bar", "foo/bar/baz", etc.
 							output.append(f'(?:(?P<{_DIR_MARK}>/).*)?')
 
 						need_slash = True
@@ -215,9 +217,9 @@ def pattern_to_regex(
 							raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}") from e
 
 						if i == end:
-							# A pattern ending without a slash ('/') will match a file
-							# or a directory (with paths underneath it). E.g., "foo"
-							# matches "foo", "foo/bar", "foo/bar/baz", etc.
+							# A pattern ending without a slash ('/') will match a file or a
+							# directory (with paths underneath it). E.g., "foo" matches "foo",
+							# "foo/bar", "foo/bar/baz", etc.
 							output.append(f'(?:(?P<{_DIR_MARK}>/).*)?')
 
 						need_slash = True
@@ -230,8 +232,8 @@ def pattern_to_regex(
 				regex = override_regex
 
 		else:
-			# A blank pattern is a null-operation (neither includes nor
-			# excludes files).
+			# A blank pattern is a null-operation (neither includes nor excludes
+			# files).
 			regex = None
 			include = None
 
@@ -243,16 +245,16 @@ def pattern_to_regex(
 	@staticmethod
 	def _translate_segment_glob(pattern: str) -> str:
 		"""
-		Translates the glob pattern to a regular expression. This is used in
-		the constructor to translate a path segment glob pattern to its
-		corresponding regular expression.
+		Translates the glob pattern to a regular expression. This is used in the
+		constructor to translate a path segment glob pattern to its corresponding
+		regular expression.
 
 		*pattern* (:class:`str`) is the glob pattern.
 
 		Returns the regular expression (:class:`str`).
 		"""
-		# NOTE: This is derived from `fnmatch.translate()` and is similar to
-		# the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
+		# NOTE: This is derived from `fnmatch.translate()` and is similar to the
+		# POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
 
 		escape = False
 		regex = ''
@@ -272,41 +274,40 @@ def _translate_segment_glob(pattern: str) -> str:
 				escape = True
 
 			elif char == '*':
-				# Multi-character wildcard. Match any string (except slashes),
-				# including an empty string.
+				# Multi-character wildcard. Match any string (except slashes), including
+				# an empty string.
 				regex += '[^/]*'
 
 			elif char == '?':
-				# Single-character wildcard. Match any single character (except
-				# a slash).
+				# Single-character wildcard. Match any single character (except a
+				# slash).
 				regex += '[^/]'
 
 			elif char == '[':
-				# Bracket expression wildcard. Except for the beginning
-				# exclamation mark, the whole bracket expression can be used
-				# directly as regex but we have to find where the expression
-				# ends.
+				# Bracket expression wildcard. Except for the beginning exclamation
+				# mark, the whole bracket expression can be used directly as regex, but
+				# we have to find where the expression ends.
 				# - "[][!]" matches ']', '[' and '!'.
 				# - "[]-]" matches ']' and '-'.
 				# - "[!]a-]" matches any character except ']', 'a' and '-'.
 				j = i
-        
+
 				# Pass bracket expression negation.
 				if j < end and (pattern[j] == '!' or pattern[j] == '^'):
 					j += 1
-          
+
 				# Pass first closing bracket if it is at the beginning of the
 				# expression.
 				if j < end and pattern[j] == ']':
 					j += 1
-          
+
 				# Find closing bracket. Stop once we reach the end or find it.
 				while j < end and pattern[j] != ']':
 					j += 1
 
 				if j < end:
-					# Found end of bracket expression. Increment j to be one past
-					# the closing bracket:
+					# Found end of bracket expression. Increment j to be one past the
+					# closing bracket:
 					#
 					#  [...]
 					#   ^   ^
@@ -320,17 +321,16 @@ def _translate_segment_glob(pattern: str) -> str:
 						expr += '^'
 						i += 1
 					elif pattern[i] == '^':
-						# POSIX declares that the regex bracket expression negation
-						# "[^...]" is undefined in a glob pattern. Python's
-						# `fnmatch.translate()` escapes the caret ('^') as a
-						# literal. Git supports the using a caret for negation.
-						# Maintain consistency with Git because that is the expected
-						# behavior.
+						# POSIX declares that the regex bracket expression negation "[^...]"
+						# is undefined in a glob pattern. Python's `fnmatch.translate()`
+						# escapes the caret ('^') as a literal. Git supports the using a
+						# caret for negation. Maintain consistency with Git because that is
+						# the expected behavior.
 						expr += '^'
 						i += 1
 
-					# Build regex bracket expression. Escape slashes so they are
-					# treated as literal slashes by regex as defined by POSIX.
+					# Build regex bracket expression. Escape slashes so they are treated
+					# as literal slashes by regex as defined by POSIX.
 					expr += pattern[i:j].replace('\\', '\\\\')
 
 					# Add regex bracket expression to regex result.
@@ -340,8 +340,8 @@ def _translate_segment_glob(pattern: str) -> str:
 					i = j
 
 				else:
-					# Failed to find closing bracket, treat opening bracket as a
-					# bracket literal instead of as an expression.
+					# Failed to find closing bracket, treat opening bracket as a bracket
+					# literal instead of as an expression.
 					regex += '\\['
 
 			else:
@@ -358,8 +358,8 @@ def escape(s: AnyStr) -> AnyStr:
 		"""
 		Escape special characters in the given string.
 
-		*s* (:class:`str` or :class:`bytes`) a filename or a string that you
-		want to escape, usually before adding it to a ".gitignore".
+		*s* (:class:`str` or :class:`bytes`) a filename or a string that you want to
+		escape, usually before adding it to a ".gitignore".
 
 		Returns the escaped string (:class:`str` or :class:`bytes`).
 		"""
@@ -404,8 +404,8 @@ def _deprecated() -> None:
 		Warn about deprecation.
 		"""
 		warnings.warn((
-			"GitIgnorePattern ('gitignore') is deprecated. Use "
-			"GitWildMatchPattern ('gitwildmatch') instead."
+			"GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern "
+			"('gitwildmatch') instead."
 		), DeprecationWarning, stacklevel=3)
 
 	@classmethod
@@ -416,6 +416,6 @@ def pattern_to_regex(cls, *args, **kw):
 		cls._deprecated()
 		return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)
 
-# Register `GitIgnorePattern` as "gitignore" for backward compatibility
-# with v0.4.
+# Register `GitIgnorePattern` as "gitignore" for backward compatibility with
+# v0.4.
 util.register_pattern('gitignore', GitIgnorePattern)
diff --git a/tests/test_02_gitwildmatch.py b/tests/test_02_gitwildmatch.py
index be915cf..3d272a4 100644
--- a/tests/test_02_gitwildmatch.py
+++ b/tests/test_02_gitwildmatch.py
@@ -208,7 +208,7 @@ def test_03_child_double_asterisk(self):
 		"""
 		regex, include = GitWildMatchPattern.pattern_to_regex('spam/**')
 		self.assertTrue(include)
-		self.assertEqual(regex, f'^spam{RE_DIR}.*$')
+		self.assertEqual(regex, "^spam/.*$")
 
 		pattern = GitWildMatchPattern(re.compile(regex), include)
 		results = set(filter(pattern.match_file, [
@@ -257,7 +257,7 @@ def test_03_only_double_asterisk(self):
 		"""
 		regex, include = GitWildMatchPattern.pattern_to_regex('**')
 		self.assertTrue(include)
-		self.assertEqual(regex, f'^[^/]+{RE_SUB}$')
+		self.assertEqual(regex, f'^[^/]+(?:/.*)?$')
 
 		pattern = GitWildMatchPattern(re.compile(regex), include)
 		results = set(filter(pattern.match_file, [
@@ -314,7 +314,7 @@ def test_03_duplicate_leading_double_asterisk_edge_case(self):
 		"""
 		regex, include = GitWildMatchPattern.pattern_to_regex('**')
 		self.assertTrue(include)
-		self.assertEqual(regex, f'^[^/]+{RE_SUB}$')
+		self.assertEqual(regex, "^[^/]+(?:/.*)?$")
 
 		equivalent_regex, include = GitWildMatchPattern.pattern_to_regex('**/**')
 		self.assertTrue(include)
@@ -336,10 +336,14 @@ def test_03_duplicate_leading_double_asterisk_edge_case(self):
 		self.assertTrue(include)
 		self.assertEqual(regex, f'^(?:.+/)?api{RE_DIR}.*$')
 
-		equivalent_regex, include = GitWildMatchPattern.pattern_to_regex('**/api/**')
+		equivalent_regex, include = GitWildMatchPattern.pattern_to_regex(f'**/**/api/')
 		self.assertTrue(include)
 		self.assertEqual(equivalent_regex, regex)
 
+		regex, include = GitWildMatchPattern.pattern_to_regex('**/api/**')
+		self.assertTrue(include)
+		self.assertEqual(regex, "^(?:.+/)?api/.*$")
+
 		equivalent_regex, include = GitWildMatchPattern.pattern_to_regex('**/**/api/**/**')
 		self.assertTrue(include)
 		self.assertEqual(equivalent_regex, regex)
@@ -817,10 +821,41 @@ def test_13_issue_77_2_regex(self):
 		"""
 		Test the resulting regex for regex bracket expression negation.
 		"""
-		regex, include = GitWildMatchPattern.pattern_to_regex('a[^b]c')
+		regex, include = GitWildMatchPattern.pattern_to_regex("a[^b]c")
 		self.assertTrue(include)
 
-		equiv_regex, include = GitWildMatchPattern.pattern_to_regex('a[!b]c')
+		equiv_regex, include = GitWildMatchPattern.pattern_to_regex("a[!b]c")
 		self.assertTrue(include)
 
 		self.assertEqual(regex, equiv_regex)
+
+	def test_14_issue_81_a(self):
+		"""
+		Test ignoring files in a directory, scenario A.
+		"""
+		pattern = GitWildMatchPattern("!libfoo/**")
+
+		self.assertEqual(pattern.regex.pattern, "^libfoo/.*$")
+		self.assertIs(pattern.include, False)
+		self.assertTrue(pattern.match_file("libfoo/__init__.py"))
+
+	def test_14_issue_81_b(self):
+		"""
+		Test ignoring files in a directory, scenario B.
+		"""
+		pattern = GitWildMatchPattern("!libfoo/*")
+
+		self.assertEqual(pattern.regex.pattern, f"^libfoo/[^/]+{RE_SUB}$")
+		self.assertIs(pattern.include, False)
+		self.assertTrue(pattern.match_file("libfoo/__init__.py"))
+
+	def test_14_issue_81_c(self):
+		"""
+		Test ignoring files in a directory, scenario C.
+		"""
+		# GitWildMatchPattern will match the file, but GitIgnoreSpec should not.
+		pattern = GitWildMatchPattern("!libfoo/")
+
+		self.assertEqual(pattern.regex.pattern, f"^(?:.+/)?libfoo{RE_DIR}.*$")
+		self.assertIs(pattern.include, False)
+		self.assertTrue(pattern.match_file("libfoo/__init__.py"))