Skip to content

Commit

Permalink
Export git.handleBinary and getSafeRemoteURL (#3921)
Browse files Browse the repository at this point in the history
This PR exports some functionality from the git source so that it's usable in a (new, under development) git-flavored source that does not need to wrap an entire git source in order to operate.

One is getSafeRemoteURL. This is a straightforward change.

The other is handleBinary. Right now, all git binary file handling is invoked from Git.ScanRepo, which is itself invoked by our various git-flavored sources.

Since the actual binary file handling function handleBinary only used two pieces of information from the "git" source, I just removed its receiver. One of the pieces of information was a flag that caused the function to be skipped entirely; I moved this one to the (two) call sites. The other I just forwarded as an argument.
  • Loading branch information
rosecodym authored Feb 19, 2025
1 parent b332fea commit 160f1a4
Showing 1 changed file with 27 additions and 15 deletions.
42 changes: 27 additions & 15 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ func getGitDir(path string, options *ScanOptions) string {

func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, reporter sources.ChunkReporter) error {
// Get the remote URL for reporting (may be empty)
remoteURL := getSafeRemoteURL(repo, "origin")
remoteURL := GetSafeRemoteURL(repo, "origin")
var repoCtx context.Context

if ctx.Value("repo") == nil {
Expand Down Expand Up @@ -642,6 +642,15 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string

// Handle binary files by reading the entire file rather than using the diff.
if diff.IsBinary {
commitHash := plumbing.NewHash(fullHash)

if s.skipBinaries || feature.ForceSkipBinaries.Load() {
logger.V(5).Info("skipping binary file",
"commit", commitHash.String()[:7],
"path", path)
continue
}

metadata := s.sourceMetadataFunc(fileName, email, fullHash, when, remoteURL, 0)
chunkSkel := &sources.Chunk{
SourceName: s.sourceName,
Expand All @@ -652,8 +661,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
Verify: s.verify,
}

commitHash := plumbing.NewHash(fullHash)
if err := s.handleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName); err != nil {
if err := HandleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName, s.skipArchives); err != nil {
logger.Error(
err,
"error handling binary file",
Expand Down Expand Up @@ -793,7 +801,7 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email
// ScanStaged chunks staged changes.
func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, reporter sources.ChunkReporter) error {
// Get the URL metadata for reporting (may be empty).
urlMetadata := getSafeRemoteURL(repo, "origin")
urlMetadata := GetSafeRemoteURL(repo, "origin")

diffChan, err := s.parser.Staged(ctx, path)
if err != nil {
Expand Down Expand Up @@ -864,6 +872,14 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
// Handle binary files by reading the entire file rather than using the diff.
if diff.IsBinary {
commitHash := plumbing.NewHash(fullHash)

if s.skipBinaries || feature.ForceSkipBinaries.Load() {
logger.V(5).Info("skipping binary file",
"commit", commitHash.String()[:7],
"path", path)
continue
}

metadata := s.sourceMetadataFunc(fileName, email, "Staged", when, urlMetadata, 0)
chunkSkel := &sources.Chunk{
SourceName: s.sourceName,
Expand All @@ -873,7 +889,7 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
SourceMetadata: metadata,
Verify: s.verify,
}
if err := s.handleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName); err != nil {
if err := HandleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName, s.skipArchives); err != nil {
logger.Error(err, "error handling binary file")
}
continue
Expand Down Expand Up @@ -938,7 +954,7 @@ func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath strin
remotes, _ := repo.Remotes()
repoURL := "Could not get remote for repo"
if len(remotes) != 0 {
repoURL = getSafeRemoteURL(repo, remotes[0].Config().Name)
repoURL = GetSafeRemoteURL(repo, remotes[0].Config().Name)
}
logger = logger.WithValues("repo", repoURL)
}
Expand Down Expand Up @@ -1190,10 +1206,10 @@ func PrepareRepo(ctx context.Context, uriString string) (string, bool, error) {
return path, remote, nil
}

// getSafeRemoteURL is a helper function that will attempt to get a safe URL first
// GetSafeRemoteURL is a helper function that will attempt to get a safe URL first
// from the preferred remote name, falling back to the first remote name
// available, or an empty string if there are no remotes.
func getSafeRemoteURL(repo *git.Repository, preferred string) string {
func GetSafeRemoteURL(repo *git.Repository, preferred string) string {
remote, err := repo.Remote(preferred)
if err != nil {
var remotes []*git.Remote
Expand All @@ -1213,13 +1229,14 @@ func getSafeRemoteURL(repo *git.Repository, preferred string) string {
return safeURL
}

func (s *Git) handleBinary(
func HandleBinary(
ctx context.Context,
gitDir string,
reporter sources.ChunkReporter,
chunkSkel *sources.Chunk,
commitHash plumbing.Hash,
path string,
skipArchives bool,
) (err error) {
fileCtx := context.WithValues(ctx, "commit", commitHash.String()[:7], "path", path)
fileCtx.Logger().V(5).Info("handling binary file")
Expand All @@ -1229,11 +1246,6 @@ func (s *Git) handleBinary(
return nil
}

if s.skipBinaries || feature.ForceSkipBinaries.Load() {
fileCtx.Logger().V(5).Info("skipping binary file", "path", path)
return nil
}

const (
cmdTimeout = 60 * time.Second
waitDelay = 5 * time.Second
Expand Down Expand Up @@ -1293,7 +1305,7 @@ func (s *Git) handleBinary(
err = errors.Join(err, copyErr, waitErr)
}()

return handlers.HandleFile(catFileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives))
return handlers.HandleFile(catFileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(skipArchives))
}

func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {
Expand Down

0 comments on commit 160f1a4

Please # to comment.