diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index 14792efb08c1..4a373124580d 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -16,6 +16,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/context" + bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer" ) const ( @@ -100,10 +101,11 @@ func (b *buffer) String() (string, error) { return b.Buffer.String(), nil } // The use of contentWriter enables the management of diff data either in memory or on disk, // based on its size, optimizing resource usage and performance. type Diff struct { - PathB string - LineStart int + PathB string + LineStart int + IsBinary bool + contentWriter contentWriter - IsBinary bool } type diffOption func(*Diff) @@ -111,10 +113,14 @@ type diffOption func(*Diff) // withPathB sets the PathB option. func withPathB(pathB string) diffOption { return func(d *Diff) { d.PathB = pathB } } +// withCustomContentWriter sets the useCustomContentWriter option. +func withCustomContentWriter(cr contentWriter) diffOption { + return func(d *Diff) { d.contentWriter = cr } +} + // NewDiff creates a new Diff with a threshold. func NewDiff(opts ...diffOption) *Diff { diff := new(Diff) - diff.contentWriter = newBuffer() for _, opt := range opts { opt(diff) } @@ -203,7 +209,8 @@ type Parser struct { maxDiffSize int maxCommitSize int dateFormat string - contentWriter contentWriter + + useCustomContentWriter bool } type ParseState int @@ -250,11 +257,9 @@ func (state ParseState) String() string { }[state] } -// WithContentWriter sets the ContentWriter for the Parser. -func WithContentWriter(writer contentWriter) Option { - return func(parser *Parser) { - parser.contentWriter = writer - } +// UseCustomContentWriter sets useCustomContentWriter option. +func UseCustomContentWriter() Option { + return func(parser *Parser) { parser.useCustomContentWriter = true } } // WithMaxDiffSize sets maxDiffSize option. Diffs larger than maxDiffSize will @@ -283,7 +288,6 @@ func NewParser(options ...Option) *Parser { dateFormat: defaultDateFormat, maxDiffSize: defaultMaxDiffSize, maxCommitSize: defaultMaxCommitSize, - contentWriter: newBuffer(), } for _, option := range options { option(parser) @@ -387,7 +391,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch totalLogSize int ) var latestState = Initial - currentDiff := NewDiff() + + diff := func(opts ...diffOption) *Diff { + opts = append(opts, withCustomContentWriter(newBuffer())) + return NewDiff(opts...) + } + if c.useCustomContentWriter { + diff = func(opts ...diffOption) *Diff { + opts = append(opts, withCustomContentWriter(bufferedfilewriter.New())) + return NewDiff(opts...) + } + } + currentDiff := diff() defer common.RecoverWithExit(ctx) defer close(commitChan) @@ -430,7 +445,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch totalLogSize += currentCommit.Size } // Create a new currentDiff and currentCommit - currentDiff = NewDiff() + currentDiff = diff() + // currentDiff = NewDiff(withCustomContentWriter(c.contentWriter())) currentCommit = &Commit{Message: strings.Builder{}} // Check that the commit line contains a hash and set it. if len(line) >= 47 { @@ -498,7 +514,8 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch currentCommit.Message.WriteString(oldCommit.Message.String()) } } - currentDiff = NewDiff() + currentDiff = diff() + // currentDiff = NewDiff(withCustomContentWriter(c.contentWriter())) case isModeLine(isStaged, latestState, line): latestState = ModeLine // NoOp @@ -538,7 +555,7 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, commitChan ch } currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff) } - currentDiff = NewDiff(withPathB(currentDiff.PathB)) + currentDiff = diff(withPathB(currentDiff.PathB)) words := bytes.Split(line, []byte(" ")) if len(words) >= 3 { diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index 9843b860cf6c..7a7de1ddb168 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -34,7 +34,6 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" "github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer" "github.com/trufflesecurity/trufflehog/v3/pkg/sources" - bufferedfilewriter "github.com/trufflesecurity/trufflehog/v3/pkg/writers/buffered_file_writer" ) const SourceType = sourcespb.SourceType_SOURCE_TYPE_GIT @@ -99,7 +98,7 @@ type Config struct { func NewGit(config *Config) *Git { var parser *gitparse.Parser if config.UseCustomContentWriter { - parser = gitparse.NewParser(gitparse.WithContentWriter(bufferedfilewriter.New())) + parser = gitparse.NewParser(gitparse.UseCustomContentWriter()) } else { parser = gitparse.NewParser() } @@ -522,7 +521,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string repoCtx = context.WithValue(ctx, "repo", path) } - commitChan, err := gitparse.NewParser().RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare) + commitChan, err := s.parser.RepoPath(repoCtx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs, scanOptions.Bare) if err != nil { return err }