From fb0857ff9344f352c162f1ebc921ca7acc2304bb Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 29 Jun 2021 18:06:47 -0400 Subject: [PATCH] Add support for indexing root filesystem (#442) * change directory resolver to ignore system runtime paths + drive by index Signed-off-by: Alex Goodman * add event/etui support for filesystem indexing (for dir resolver) Signed-off-by: Alex Goodman * add warnings for path indexing problems Signed-off-by: Alex Goodman * add directory resolver index tests Signed-off-by: Alex Goodman * improve testing around directory resolver Signed-off-by: Alex Goodman * renamed p var to path when not conflicting with import Signed-off-by: Alex Goodman * pull docker image in CLI dir scan timeout test Signed-off-by: Alex Goodman * ensure file not exist errors do not stop directory resolver indexing Signed-off-by: Alex Goodman --- internal/string_helpers.go | 7 + internal/string_helpers_test.go | 41 +++ syft/event/event.go | 3 + syft/event/parsers/parsers.go | 18 + syft/source/directory_resolver.go | 289 +++++++++++++--- syft/source/directory_resolver_test.go | 327 +++++++++++++++--- syft/source/file_type.go | 17 +- syft/source/scheme_test.go | 4 +- syft/source/source.go | 2 +- syft/source/source_test.go | 12 +- .../outside_root/link_target/place | 1 + .../system_paths/target/dev/place | 1 + .../system_paths/target/home/place | 1 + .../system_paths/target/link/a-symlink | 1 + .../system_paths/target/proc/place | 1 + .../system_paths/target/sys/place | 1 + test/cli/dir_root_scan_regression_test.go | 46 +++ test/cli/json_schema_test.go | 2 +- test/cli/packages_cmd_test.go | 4 +- test/cli/power_user_cmd_test.go | 2 +- test/cli/root_cmd_test.go | 6 +- test/cli/spdx_json_schema_test.go | 2 +- test/cli/utils_test.go | 72 +++- ui/event_handlers.go | 52 ++- ui/handler.go | 5 +- 25 files changed, 783 insertions(+), 134 deletions(-) create mode 100644 syft/source/test-fixtures/system_paths/outside_root/link_target/place create mode 100644 syft/source/test-fixtures/system_paths/target/dev/place create mode 100644 syft/source/test-fixtures/system_paths/target/home/place create mode 120000 syft/source/test-fixtures/system_paths/target/link/a-symlink create mode 100644 syft/source/test-fixtures/system_paths/target/proc/place create mode 100644 syft/source/test-fixtures/system_paths/target/sys/place create mode 100644 test/cli/dir_root_scan_regression_test.go diff --git a/internal/string_helpers.go b/internal/string_helpers.go index a0539e4ed39..87bebea915a 100644 --- a/internal/string_helpers.go +++ b/internal/string_helpers.go @@ -12,3 +12,10 @@ func HasAnyOfPrefixes(input string, prefixes ...string) bool { return false } + +func TruncateMiddleEllipsis(input string, maxLen int) string { + if len(input) <= maxLen { + return input + } + return input[:maxLen/2] + "..." + input[len(input)-(maxLen/2):] +} diff --git a/internal/string_helpers_test.go b/internal/string_helpers_test.go index d15a2f654ca..f89a0820fca 100644 --- a/internal/string_helpers_test.go +++ b/internal/string_helpers_test.go @@ -1,6 +1,7 @@ package internal import ( + "strconv" "testing" "github.com/stretchr/testify/assert" @@ -63,3 +64,43 @@ func TestHasAnyOfPrefixes(t *testing.T) { }) } } + +func TestTruncateMiddleEllipsis(t *testing.T) { + tests := []struct { + input string + len int + expected string + }{ + { + input: "nobody expects the spanish inquisition", + len: 39, + expected: "nobody expects the spanish inquisition", + }, + { + input: "nobody expects the spanish inquisition", + len: 30, + expected: "nobody expects ...ish inquisition", + }, + { + input: "nobody expects the spanish inquisition", + len: 38, + expected: "nobody expects the spanish inquisition", + }, + { + input: "", + len: 30, + expected: "", + }, + { + input: "", + len: 0, + expected: "", + }, + } + + for _, test := range tests { + t.Run(test.input+":"+strconv.Itoa(test.len), func(t *testing.T) { + assert.Equal(t, test.expected, TruncateMiddleEllipsis(test.input, test.len)) + }) + } +} diff --git a/syft/event/event.go b/syft/event/event.go index 2f64d7983f6..4b0e6fd5cab 100644 --- a/syft/event/event.go +++ b/syft/event/event.go @@ -23,6 +23,9 @@ const ( // FileDigestsCatalogerStarted is a partybus event that occurs when the file digests cataloging has begun FileDigestsCatalogerStarted partybus.EventType = "syft-file-digests-cataloger-started-event" + // FileIndexingStarted is a partybus event that occurs when the directory resolver begins indexing a filesystem + FileIndexingStarted partybus.EventType = "syft-file-indexing-started-event" + // PresenterReady is a partybus event that occurs when an analysis result is ready for final presentation PresenterReady partybus.EventType = "syft-presenter-ready-event" diff --git a/syft/event/parsers/parsers.go b/syft/event/parsers/parsers.go index a9fdc9f1ba5..16229b51f6e 100644 --- a/syft/event/parsers/parsers.go +++ b/syft/event/parsers/parsers.go @@ -94,6 +94,24 @@ func ParseFileDigestsCatalogingStarted(e partybus.Event) (progress.StagedProgres return prog, nil } +func ParseFileIndexingStarted(e partybus.Event) (string, progress.StagedProgressable, error) { + if err := checkEventType(e.Type, event.FileIndexingStarted); err != nil { + return "", nil, err + } + + path, ok := e.Source.(string) + if !ok { + return "", nil, newPayloadErr(e.Type, "Source", e.Source) + } + + prog, ok := e.Value.(progress.StagedProgressable) + if !ok { + return "", nil, newPayloadErr(e.Type, "Value", e.Value) + } + + return path, prog, nil +} + func ParsePresenterReady(e partybus.Event) (presenter.Presenter, error) { if err := checkEventType(e.Type, event.PresenterReady); err != nil { return nil, err diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 9645cfee7cb..73cee6d9f4f 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -1,41 +1,196 @@ package source import ( + "errors" "fmt" "io" "os" "path" "path/filepath" + "strings" "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" - "github.com/bmatcuk/doublestar/v2" + "github.com/anchore/syft/syft/event" + "github.com/wagoodman/go-partybus" + "github.com/wagoodman/go-progress" ) +var unixSystemRuntimePrefixes = []string{ + "/proc", + "/sys", + "/dev", +} + var _ FileResolver = (*directoryResolver)(nil) +type pathFilterFn func(string) bool + // directoryResolver implements path and content access for the directory data source. type directoryResolver struct { - path string + path string + cwd string + fileTree *filetree.FileTree + infos map[file.ID]os.FileInfo + // TODO: wire up to report these paths in the json report + pathFilterFns []pathFilterFn + errPaths map[string]error +} + +func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) { + cwd, err := os.Getwd() + if err != nil { + return nil, fmt.Errorf("could not create directory resolver: %w", err) + } + + if pathFilters == nil { + pathFilters = []pathFilterFn{isUnixSystemRuntimePath} + } + + resolver := directoryResolver{ + path: root, + cwd: cwd, + fileTree: filetree.NewFileTree(), + infos: make(map[file.ID]os.FileInfo), + pathFilterFns: pathFilters, + errPaths: make(map[string]error), + } + + return &resolver, indexAllRoots(root, resolver.indexTree) } -func newDirectoryResolver(path string) *directoryResolver { - return &directoryResolver{path: path} +func (r *directoryResolver) indexTree(root string) ([]string, error) { + log.Infof("indexing filesystem path=%q", root) + var err error + root, err = filepath.Abs(root) + if err != nil { + return nil, err + } + var roots []string + stager, prog := indexingProgress(root) + defer prog.SetCompleted() + + return roots, filepath.Walk(root, + func(path string, info os.FileInfo, err error) error { + stager.Current = path + + // ignore any path which a filter function returns true + for _, filterFn := range r.pathFilterFns { + if filterFn(path) { + return nil + } + } + + if err = r.handleFileAccessErr(path, err); err != nil { + return err + } + + // link cycles could cause a revisit --we should not allow this + if r.fileTree.HasPath(file.Path(path)) { + return nil + } + + if info == nil { + // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. + r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) + return nil + } + + newRoot, err := r.addPathToIndex(path, info) + if err = r.handleFileAccessErr(path, err); err != nil { + return fmt.Errorf("unable to index path: %w", err) + } + + if newRoot != "" { + roots = append(roots, newRoot) + } + + return nil + }) } -func (r directoryResolver) requestPath(userPath string) string { - fullPath := userPath - if filepath.IsAbs(fullPath) { - // a path relative to root should be prefixed with the resolvers directory path, otherwise it should be left as is - fullPath = path.Join(r.path, fullPath) +func (r *directoryResolver) handleFileAccessErr(path string, err error) error { + if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) { + // don't allow for permission errors to stop indexing, keep track of the paths and continue. + log.Warnf("unable to access path=%q: %+v", path, err) + r.errPaths[path] = err + return nil + } else if err != nil { + return fmt.Errorf("unable to access path=%q: %w", path, err) } - return fullPath + return nil +} + +func (r directoryResolver) addPathToIndex(p string, info os.FileInfo) (string, error) { + var ref *file.Reference + var err error + var newRoot string + + switch newFileTypeFromMode(info.Mode()) { + case SymbolicLink: + linkTarget, err := os.Readlink(p) + if err != nil { + return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) + } + ref, err = r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget)) + if err != nil { + return "", err + } + + targetAbsPath := linkTarget + if !filepath.IsAbs(targetAbsPath) { + targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) + } + + newRoot = targetAbsPath + + case Directory: + ref, err = r.fileTree.AddDir(file.Path(p)) + if err != nil { + return "", err + } + default: + ref, err = r.fileTree.AddFile(file.Path(p)) + if err != nil { + return "", err + } + } + + r.infos[ref.ID()] = info + return newRoot, nil +} + +func (r directoryResolver) requestPath(userPath string) (string, error) { + if filepath.IsAbs(userPath) { + // don't allow input to potentially hop above root path + userPath = path.Join(r.path, userPath) + } + var err error + userPath, err = filepath.Abs(userPath) + if err != nil { + return "", err + } + return userPath, nil +} + +func (r directoryResolver) responsePath(path string) string { + // always return references relative to the request path (not absolute path) + if filepath.IsAbs(path) { + return strings.TrimPrefix(path, r.cwd+string(filepath.Separator)) + } + return path } // HasPath indicates if the given path exists in the underlying source. func (r *directoryResolver) HasPath(userPath string) bool { - _, err := os.Stat(r.requestPath(userPath)) - return !os.IsNotExist(err) + requestPath, err := r.requestPath(userPath) + if err != nil { + return false + } + return r.fileTree.HasPath(file.Path(requestPath)) } // Stringer to represent a directory path data source @@ -48,12 +203,16 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) var references = make([]Location, 0) for _, userPath := range userPaths { - userStrPath := r.requestPath(userPath) + userStrPath, err := r.requestPath(userPath) + if err != nil { + log.Warnf("unable to get file by path=%q : %+v", userPath, err) + continue + } fileMeta, err := os.Stat(userStrPath) if os.IsNotExist(err) { continue } else if err != nil { - log.Errorf("path (%r) is not valid: %v", userStrPath, err) + log.Warnf("path (%r) is not valid: %+v", userStrPath, err) } // don't consider directories @@ -61,7 +220,7 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) continue } - references = append(references, NewLocation(userStrPath)) + references = append(references, NewLocation(r.responsePath(userStrPath))) } return references, nil @@ -72,23 +231,12 @@ func (r directoryResolver) FilesByGlob(patterns ...string) ([]Location, error) { result := make([]Location, 0) for _, pattern := range patterns { - pathPattern := path.Join(r.path, pattern) - pathMatches, err := doublestar.Glob(pathPattern) + globResults, err := r.fileTree.FilesByGlob(pattern) if err != nil { return nil, err } - for _, matchedPath := range pathMatches { - fileMeta, err := os.Stat(matchedPath) - if err != nil { - continue - } - - // don't consider directories - if fileMeta.IsDir() { - continue - } - - result = append(result, NewLocation(matchedPath)) + for _, globResult := range globResults { + result = append(result, NewLocation(r.responsePath(string(globResult.MatchPath)))) } } @@ -120,41 +268,76 @@ func (r *directoryResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { defer close(results) - err := filepath.Walk(r.path, - func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - results <- NewLocation(path) - return nil - }) - if err != nil { - log.Errorf("unable to walk path=%q : %+v", r.path, err) + for _, ref := range r.fileTree.AllFiles() { + results <- NewLocation(r.responsePath(string(ref.RealPath))) } }() return results } func (r *directoryResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { - fi, err := os.Stat(location.RealPath) - if err != nil { - return FileMetadata{}, err - } - - // best effort - ty := UnknownFileType - switch { - case fi.Mode().IsDir(): - ty = Directory - case fi.Mode().IsRegular(): - ty = RegularFile + info, exists := r.infos[location.ref.ID()] + if !exists { + return FileMetadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrExist) } return FileMetadata{ - Mode: fi.Mode(), - Type: ty, + Mode: info.Mode(), + Type: newFileTypeFromMode(info.Mode()), // unsupported across platforms UserID: -1, GroupID: -1, }, nil } + +func isUnixSystemRuntimePath(path string) bool { + return internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...) +} + +func indexingProgress(path string) (*progress.Stage, *progress.Manual) { + stage := &progress.Stage{} + prog := &progress.Manual{ + Total: -1, + } + + bus.Publish(partybus.Event{ + Type: event.FileIndexingStarted, + Source: path, + Value: struct { + progress.Stager + progress.Progressable + }{ + Stager: progress.Stager(stage), + Progressable: prog, + }, + }) + + return stage, prog +} + +func indexAllRoots(root string, indexer func(string) ([]string, error)) error { + // why account for multiple roots? To cover cases when there is a symlink that references above the root path, + // in which case we need to additionally index where the link resolves to. it's for this reason why the filetree + // must be relative to the root of the filesystem (and not just relative to the given path). + pathsToIndex := []string{root} +loop: + for { + var currentPath string + switch len(pathsToIndex) { + case 0: + break loop + case 1: + currentPath, pathsToIndex = pathsToIndex[0], nil + default: + currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:] + } + + additionalRoots, err := indexer(currentPath) + if err != nil { + return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err) + } + pathsToIndex = append(pathsToIndex, additionalRoots...) + } + + return nil +} diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index db0ccd0c1dd..61741f9279f 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -1,7 +1,14 @@ package source import ( + "os" + "path" + "reflect" + "strings" "testing" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/stretchr/testify/assert" ) func TestDirectoryResolver_FilesByPath(t *testing.T) { @@ -57,7 +64,8 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver := directoryResolver{c.root} + resolver, err := newDirectoryResolver(c.root) + assert.NoError(t, err) hasPath := resolver.HasPath(c.input) if !c.forcePositiveHasPath { @@ -112,12 +120,10 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver := directoryResolver{"test-fixtures"} - + resolver, err := newDirectoryResolver("./test-fixtures") + assert.NoError(t, err) refs, err := resolver.FilesByPath(c.input...) - if err != nil { - t.Fatalf("could not use resolver: %+v, %+v", err, refs) - } + assert.NoError(t, err) if len(refs) != c.refCount { t.Errorf("unexpected number of refs: %d != %d", len(refs), c.refCount) @@ -127,51 +133,290 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { } func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { - t.Run("finds multiple matching files", func(t *testing.T) { - resolver := directoryResolver{"test-fixtures"} - refs, err := resolver.FilesByGlob("image-symlinks/file*") + resolver, err := newDirectoryResolver("./test-fixtures") + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("**/image-symlinks/file*") + assert.NoError(t, err) + + assert.Len(t, refs, 2) +} + +func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks") + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + assert.Len(t, refs, 6) +} - if err != nil { - t.Fatalf("could not use resolver: %+v, %+v", err, refs) - } +func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures") + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt") + assert.NoError(t, err) - expected := 2 - if len(refs) != expected { - t.Errorf("unexpected number of refs: %d != %d", len(refs), expected) - } + assert.Len(t, refs, 1) + assert.Equal(t, "test-fixtures/image-symlinks/file-1.txt", refs[0].RealPath) +} +func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { + // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" + resolver, err := newDirectoryResolver("test-fixtures/system_paths/target") + assert.NoError(t, err) + // ensure the correct filter function is wired up by default + expectedFn := reflect.ValueOf(isUnixSystemRuntimePath) + actualFn := reflect.ValueOf(resolver.pathFilterFns[0]) + assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer()) + + // all paths should be found (non filtering matches a path) + refs, err := resolver.FilesByGlob("**/place") + assert.NoError(t, err) + // 4: within target/ + // 1: target/link --> relative path to "place" + // 1: outside_root/link_target/place + assert.Len(t, refs, 6) + + // ensure that symlink indexing outside of root worked + assert.Contains(t, refs, Location{ + RealPath: "test-fixtures/system_paths/outside_root/link_target/place", }) } -func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { - t.Run("finds multiple matching files", func(t *testing.T) { - resolver := directoryResolver{"test-fixtures/image-symlinks"} - refs, err := resolver.FilesByGlob("**/*.txt") +func TestDirectoryResolverUsesPathFilterFunction(t *testing.T) { + // let's make certain that the index honors the filter function + filter := func(s string) bool { + // a dummy function that works for testing purposes + return strings.Contains(s, "dev/place") || strings.Contains(s, "proc/place") || strings.Contains(s, "sys/place") + } - if err != nil { - t.Fatalf("could not use resolver: %+v, %+v", err, refs) - } + resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", filter) + assert.NoError(t, err) - expected := 6 - if len(refs) != expected { - t.Errorf("unexpected number of refs: %d != %d", len(refs), expected) - } + // ensure the correct filter function is wired up by default + expectedFn := reflect.ValueOf(filter) + actualFn := reflect.ValueOf(resolver.pathFilterFns[0]) + assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer()) + assert.Len(t, resolver.pathFilterFns, 1) - }) + refs, err := resolver.FilesByGlob("**/place") + assert.NoError(t, err) + // target/home/place + target/link/.../place + outside_root/.../place + assert.Len(t, refs, 3) } -func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { - t.Run("finds multiple matching files", func(t *testing.T) { - resolver := directoryResolver{"test-fixtures"} - refs, err := resolver.FilesByGlob("image-symlinks/*1.txt") - if err != nil { - t.Fatalf("could not use resolver: %+v, %+v", err, refs) - } - - expected := 1 - if len(refs) != expected { - t.Errorf("unexpected number of refs: %d != %d", len(refs), expected) - } +func Test_isUnixSystemRuntimePath(t *testing.T) { + tests := []struct { + path string + expected bool + }{ + { + path: "proc/place", + expected: false, + }, + { + path: "/proc/place", + expected: true, + }, + { + path: "/proc", + expected: true, + }, + { + path: "/pro/c", + expected: false, + }, + { + path: "/pro", + expected: false, + }, + { + path: "/dev", + expected: true, + }, + { + path: "/sys", + expected: true, + }, + { + path: "/something/sys", + expected: false, + }, + } + for _, test := range tests { + t.Run(test.path, func(t *testing.T) { + assert.Equal(t, test.expected, isUnixSystemRuntimePath(test.path)) + }) + } +} - }) +func Test_directoryResolver_index(t *testing.T) { + // note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex + r, err := newDirectoryResolver("test-fixtures/system_paths/target") + if err != nil { + t.Fatalf("unable to get indexed dir resolver: %+v", err) + } + tests := []struct { + name string + path string + }{ + { + name: "has dir", + path: "test-fixtures/system_paths/target/home", + }, + { + name: "has path", + path: "test-fixtures/system_paths/target/home/place", + }, + { + name: "has symlink", + path: "test-fixtures/system_paths/target/link/a-symlink", + }, + { + name: "has symlink target", + path: "test-fixtures/system_paths/outside_root/link_target/place", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + info, err := os.Stat(test.path) + assert.NoError(t, err) + + // note: the index uses absolute paths, so assertions MUST keep this in mind + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("could not get working dir: %+v", err) + } + + p := file.Path(path.Join(cwd, test.path)) + assert.Equal(t, true, r.fileTree.HasPath(p)) + exists, ref, err := r.fileTree.File(p) + assert.Equal(t, true, exists) + if assert.NoError(t, err) { + return + } + assert.Equal(t, info, r.infos[ref.ID()]) + }) + } +} + +func Test_handleFileAccessErr(t *testing.T) { + tests := []struct { + name string + input error + expectedErr error + expectedPathTracked bool + }{ + { + name: "permission error does not propagate", + input: os.ErrPermission, + expectedPathTracked: true, + expectedErr: nil, + }, + { + name: "file does not exist error does not propagate", + input: os.ErrNotExist, + expectedPathTracked: true, + expectedErr: nil, + }, + { + name: "non-permission errors propagate", + input: os.ErrInvalid, + expectedPathTracked: false, + expectedErr: os.ErrInvalid, + }, + { + name: "non-errors ignored", + input: nil, + expectedPathTracked: false, + expectedErr: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + r := directoryResolver{ + errPaths: make(map[string]error), + } + p := "a/place" + assert.ErrorIs(t, r.handleFileAccessErr(p, test.input), test.expectedErr) + _, exists := r.errPaths[p] + assert.Equal(t, test.expectedPathTracked, exists) + }) + } +} + +type indexerMock struct { + observedRoots []string + additionalRoots map[string][]string +} + +func (m *indexerMock) indexer(s string) ([]string, error) { + m.observedRoots = append(m.observedRoots, s) + return m.additionalRoots[s], nil +} + +func Test_indexAllRoots(t *testing.T) { + tests := []struct { + name string + root string + mock indexerMock + expectedRoots []string + }{ + { + name: "no additional roots", + root: "a/place", + mock: indexerMock{ + additionalRoots: make(map[string][]string), + }, + expectedRoots: []string{ + "a/place", + }, + }, + { + name: "additional roots from a single call", + root: "a/place", + mock: indexerMock{ + additionalRoots: map[string][]string{ + "a/place": { + "another/place", + "yet-another/place", + }, + }, + }, + expectedRoots: []string{ + "a/place", + "another/place", + "yet-another/place", + }, + }, + { + name: "additional roots from a multiple calls", + root: "a/place", + mock: indexerMock{ + additionalRoots: map[string][]string{ + "a/place": { + "another/place", + "yet-another/place", + }, + "yet-another/place": { + "a-quiet-place-2", + "a-final/place", + }, + }, + }, + expectedRoots: []string{ + "a/place", + "another/place", + "yet-another/place", + "a-quiet-place-2", + "a-final/place", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.NoError(t, indexAllRoots(test.root, test.mock.indexer)) + }) + } } diff --git a/syft/source/file_type.go b/syft/source/file_type.go index f0dd33bc168..d4cd5abdce7 100644 --- a/syft/source/file_type.go +++ b/syft/source/file_type.go @@ -1,6 +1,9 @@ package source -import "archive/tar" +import ( + "archive/tar" + "os" +) const ( UnknownFileType FileType = "UnknownFileType" @@ -34,3 +37,15 @@ func newFileTypeFromTarHeaderTypeFlag(flag byte) FileType { } return UnknownFileType } + +// TODO: fill in more types from mod... +func newFileTypeFromMode(mode os.FileMode) FileType { + switch { + case mode&os.ModeSymlink == os.ModeSymlink: + return SymbolicLink + case mode.IsDir(): + return Directory + default: + return RegularFile + } +} diff --git a/syft/source/scheme_test.go b/syft/source/scheme_test.go index 465f1107d03..e96b01d1ef5 100644 --- a/syft/source/scheme_test.go +++ b/syft/source/scheme_test.go @@ -83,7 +83,7 @@ func TestDetectScheme(t *testing.T) { ref: "latest", }, expectedScheme: ImageScheme, - // we want to be able to handle this case better, however, I don't see a way to do this + // we expected to be able to handle this case better, however, I don't see a way to do this // the user will need to provide more explicit input (docker:docker:latest) expectedLocation: "latest", }, @@ -95,7 +95,7 @@ func TestDetectScheme(t *testing.T) { ref: "docker:latest", }, expectedScheme: ImageScheme, - // we want to be able to handle this case better, however, I don't see a way to do this + // we expected to be able to handle this case better, however, I don't see a way to do this // the user will need to provide more explicit input (docker:docker:latest) expectedLocation: "docker:latest", }, diff --git a/syft/source/source.go b/syft/source/source.go index 71cb77fd3f1..6ce6f228d61 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -94,7 +94,7 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) { func (s Source) FileResolver(scope Scope) (FileResolver, error) { switch s.Metadata.Scheme { case DirectoryScheme: - return newDirectoryResolver(s.Metadata.Path), nil + return newDirectoryResolver(s.Metadata.Path) case ImageScheme: switch scope { case SquashedScope: diff --git a/syft/source/source_test.go b/syft/source/source_test.go index 11d2131450e..39be62fc385 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -3,6 +3,8 @@ package source import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/anchore/stereoscope/pkg/image" ) @@ -41,7 +43,6 @@ func TestNewFromDirectory(t *testing.T) { desc: "no paths exist", input: "foobar/", inputPaths: []string{"/opt/", "/other"}, - expRefs: 0, }, { desc: "path detected", @@ -73,9 +74,8 @@ func TestNewFromDirectory(t *testing.T) { t.Errorf("mismatched stringer: '%s' != '%s'", src.Metadata.Path, test.input) } resolver, err := src.FileResolver(SquashedScope) - if err != nil { - t.Errorf("could not get resolver error: %+v", err) - } + assert.NoError(t, err) + refs, err := resolver.FilesByPath(test.inputPaths...) if err != nil { t.Errorf("FilesByPath call produced an error: %+v", err) @@ -141,13 +141,13 @@ func TestFilesByGlob(t *testing.T) { { input: "test-fixtures/path-detected", desc: "a single match", - glob: "*vimrc", + glob: "**/*vimrc", expected: 1, }, { input: "test-fixtures/path-detected", desc: "multiple matches", - glob: "*", + glob: "**", expected: 2, }, } diff --git a/syft/source/test-fixtures/system_paths/outside_root/link_target/place b/syft/source/test-fixtures/system_paths/outside_root/link_target/place new file mode 100644 index 00000000000..476e93d5714 --- /dev/null +++ b/syft/source/test-fixtures/system_paths/outside_root/link_target/place @@ -0,0 +1 @@ +good \ No newline at end of file diff --git a/syft/source/test-fixtures/system_paths/target/dev/place b/syft/source/test-fixtures/system_paths/target/dev/place new file mode 100644 index 00000000000..44d6628cdc6 --- /dev/null +++ b/syft/source/test-fixtures/system_paths/target/dev/place @@ -0,0 +1 @@ +bad \ No newline at end of file diff --git a/syft/source/test-fixtures/system_paths/target/home/place b/syft/source/test-fixtures/system_paths/target/home/place new file mode 100644 index 00000000000..476e93d5714 --- /dev/null +++ b/syft/source/test-fixtures/system_paths/target/home/place @@ -0,0 +1 @@ +good \ No newline at end of file diff --git a/syft/source/test-fixtures/system_paths/target/link/a-symlink b/syft/source/test-fixtures/system_paths/target/link/a-symlink new file mode 120000 index 00000000000..f5bd998342b --- /dev/null +++ b/syft/source/test-fixtures/system_paths/target/link/a-symlink @@ -0,0 +1 @@ +../../outside_root/link_target \ No newline at end of file diff --git a/syft/source/test-fixtures/system_paths/target/proc/place b/syft/source/test-fixtures/system_paths/target/proc/place new file mode 100644 index 00000000000..44d6628cdc6 --- /dev/null +++ b/syft/source/test-fixtures/system_paths/target/proc/place @@ -0,0 +1 @@ +bad \ No newline at end of file diff --git a/syft/source/test-fixtures/system_paths/target/sys/place b/syft/source/test-fixtures/system_paths/target/sys/place new file mode 100644 index 00000000000..44d6628cdc6 --- /dev/null +++ b/syft/source/test-fixtures/system_paths/target/sys/place @@ -0,0 +1 @@ +bad \ No newline at end of file diff --git a/test/cli/dir_root_scan_regression_test.go b/test/cli/dir_root_scan_regression_test.go new file mode 100644 index 00000000000..284447629c4 --- /dev/null +++ b/test/cli/dir_root_scan_regression_test.go @@ -0,0 +1,46 @@ +package cli + +import ( + "os/exec" + "strings" + "testing" + "time" +) + +func TestDirectoryScanCompletesWithinTimeout(t *testing.T) { + image := "alpine:latest" + + // we want to pull the image ahead of the test as to not affect the timeout value + pullDockerImage(t, image) + + var cmd *exec.Cmd + var stdout, stderr string + done := make(chan struct{}) + go func() { + defer close(done) + cmd, stdout, stderr = runSyftInDocker(t, nil, image, "dir:/", "-vv") + }() + + select { + case <-done: + break + case <-time.After(5 * time.Second): + t.Fatalf("directory scan is taking too long") + } + + assertions := []traitAssertion{ + assertTableReport, + assertSuccessfulReturnCode, + } + + for _, traitFn := range assertions { + traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode()) + } + + if t.Failed() { + t.Log("STDOUT:\n", stdout) + t.Log("STDERR:\n", stderr) + t.Log("COMMAND:", strings.Join(cmd.Args, " ")) + } + +} diff --git a/test/cli/json_schema_test.go b/test/cli/json_schema_test.go index 19c9fa93e1d..d4ea995df7f 100644 --- a/test/cli/json_schema_test.go +++ b/test/cli/json_schema_test.go @@ -60,7 +60,7 @@ func TestJSONSchema(t *testing.T) { args = append(args, a) } - _, stdout, stderr := runSyftCommand(t, nil, args...) + _, stdout, stderr := runSyft(t, nil, args...) if len(strings.Trim(stdout, "\n ")) < 100 { t.Fatalf("bad syft run:\noutput: %q\n:error: %q", stdout, stderr) diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index 0be6dbed2fe..5ca02b05ce9 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -125,7 +125,7 @@ func TestPackagesCmdFlags(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - cmd, stdout, stderr := runSyftCommand(t, test.env, test.args...) + cmd, stdout, stderr := runSyft(t, test.env, test.args...) for _, traitFn := range test.assertions { traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode()) } @@ -197,7 +197,7 @@ func TestRegistryAuth(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - cmd, stdout, stderr := runSyftCommand(t, test.env, test.args...) + cmd, stdout, stderr := runSyft(t, test.env, test.args...) for _, traitAssertionFn := range test.assertions { traitAssertionFn(t, stdout, stderr, cmd.ProcessState.ExitCode()) } diff --git a/test/cli/power_user_cmd_test.go b/test/cli/power_user_cmd_test.go index 2f7dcbbe91c..e5bb95a4665 100644 --- a/test/cli/power_user_cmd_test.go +++ b/test/cli/power_user_cmd_test.go @@ -66,7 +66,7 @@ func TestPowerUserCmdFlags(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - cmd, stdout, stderr := runSyftCommand(t, test.env, test.args...) + cmd, stdout, stderr := runSyft(t, test.env, test.args...) for _, traitFn := range test.assertions { traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode()) } diff --git a/test/cli/root_cmd_test.go b/test/cli/root_cmd_test.go index ec46ed1320c..5cdc33e64c5 100644 --- a/test/cli/root_cmd_test.go +++ b/test/cli/root_cmd_test.go @@ -36,12 +36,12 @@ func TestRootCmdAliasesToPackagesSubcommand(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - aliasCmd, aliasStdout, aliasStderr := runSyftCommand(t, test.env, request) + aliasCmd, aliasStdout, aliasStderr := runSyft(t, test.env, request) for _, traitFn := range test.assertions { traitFn(t, aliasStdout, aliasStderr, aliasCmd.ProcessState.ExitCode()) } - pkgCmd, pkgsStdout, pkgsStderr := runSyftCommand(t, test.env, "packages", request) + pkgCmd, pkgsStdout, pkgsStderr := runSyft(t, test.env, "packages", request) for _, traitFn := range test.assertions { traitFn(t, pkgsStdout, pkgsStderr, pkgCmd.ProcessState.ExitCode()) } @@ -102,7 +102,7 @@ func TestPersistentFlags(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - cmd, stdout, stderr := runSyftCommand(t, test.env, test.args...) + cmd, stdout, stderr := runSyft(t, test.env, test.args...) for _, traitFn := range test.assertions { traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode()) } diff --git a/test/cli/spdx_json_schema_test.go b/test/cli/spdx_json_schema_test.go index 468508def7b..f9d036c0518 100644 --- a/test/cli/spdx_json_schema_test.go +++ b/test/cli/spdx_json_schema_test.go @@ -59,7 +59,7 @@ func TestSPDXJSONSchema(t *testing.T) { args = append(args, a) } - _, stdout, _ := runSyftCommand(t, nil, args...) + _, stdout, _ := runSyft(t, nil, args...) if len(strings.Trim(stdout, "\n ")) < 100 { t.Fatalf("bad syft output: %q", stdout) diff --git a/test/cli/utils_test.go b/test/cli/utils_test.go index a651dd7382c..b52302833c9 100644 --- a/test/cli/utils_test.go +++ b/test/cli/utils_test.go @@ -19,8 +19,45 @@ func getFixtureImage(t testing.TB, fixtureImageName string) string { return imagetest.GetFixtureImageTarPath(t, fixtureImageName) } -func runSyftCommand(t testing.TB, env map[string]string, args ...string) (*exec.Cmd, string, string) { +func pullDockerImage(t testing.TB, image string) { + cmd := exec.Command("docker", "pull", image) + stdout, stderr := runCommand(cmd, nil) + if cmd.ProcessState.ExitCode() != 0 { + t.Log("STDOUT", stdout) + t.Log("STDERR", stderr) + t.Fatalf("could not pull docker image") + } +} + +func runSyftInDocker(t testing.TB, env map[string]string, image string, args ...string) (*exec.Cmd, string, string) { + allArgs := append( + []string{ + "run", + "-t", + "-e", + "SYFT_CHECK_FOR_APP_UPDATE=false", + "-v", + fmt.Sprintf("%s:/syft", getSyftBinaryLocationByOS(t, "linux")), + image, + "/syft", + }, + args..., + ) + cmd := exec.Command("docker", allArgs...) + stdout, stderr := runCommand(cmd, env) + return cmd, stdout, stderr +} + +func runSyft(t testing.TB, env map[string]string, args ...string) (*exec.Cmd, string, string) { cmd := getSyftCommand(t, args...) + if env != nil { + env["SYFT_CHECK_FOR_APP_UPDATE"] = "false" + } + stdout, stderr := runCommand(cmd, env) + return cmd, stdout, stderr +} + +func runCommand(cmd *exec.Cmd, env map[string]string) (string, string) { if env != nil { var envList []string for key, val := range env { @@ -38,29 +75,32 @@ func runSyftCommand(t testing.TB, env map[string]string, args ...string) (*exec. // ignore errors since this may be what the test expects cmd.Run() - return cmd, stdout.String(), stderr.String() + return stdout.String(), stderr.String() } func getSyftCommand(t testing.TB, args ...string) *exec.Cmd { + return exec.Command(getSyftBinaryLocation(t), args...) +} - var binaryLocation string +func getSyftBinaryLocation(t testing.TB) string { if os.Getenv("SYFT_BINARY_LOCATION") != "" { // SYFT_BINARY_LOCATION is the absolute path to the snapshot binary - binaryLocation = os.Getenv("SYFT_BINARY_LOCATION") - } else { - // note: there is a subtle - vs _ difference between these versions - switch runtime.GOOS { - case "darwin": - binaryLocation = path.Join(repoRoot(t), fmt.Sprintf("snapshot/syft-macos_darwin_%s/syft", runtime.GOARCH)) - case "linux": - binaryLocation = path.Join(repoRoot(t), fmt.Sprintf("snapshot/syft_linux_%s/syft", runtime.GOARCH)) - default: - t.Fatalf("unsupported OS: %s", runtime.GOOS) - } - + return os.Getenv("SYFT_BINARY_LOCATION") } + return getSyftBinaryLocationByOS(t, runtime.GOOS) +} - return exec.Command(binaryLocation, args...) +func getSyftBinaryLocationByOS(t testing.TB, goOS string) string { + // note: there is a subtle - vs _ difference between these versions + switch goOS { + case "darwin": + return path.Join(repoRoot(t), fmt.Sprintf("snapshot/syft-macos_darwin_%s/syft", runtime.GOARCH)) + case "linux": + return path.Join(repoRoot(t), fmt.Sprintf("snapshot/syft_linux_%s/syft", runtime.GOARCH)) + default: + t.Fatalf("unsupported OS: %s", runtime.GOOS) + } + return "" } func repoRoot(t testing.TB) string { diff --git a/ui/event_handlers.go b/ui/event_handlers.go index 1d8f1aa2b07..c37582bd68f 100644 --- a/ui/event_handlers.go +++ b/ui/event_handlers.go @@ -8,13 +8,12 @@ import ( "sync" "time" - "github.com/anchore/syft/internal/ui/components" - - "github.com/anchore/stereoscope/pkg/image/docker" - "github.com/dustin/go-humanize" - stereoEventParsers "github.com/anchore/stereoscope/pkg/event/parsers" + "github.com/anchore/stereoscope/pkg/image/docker" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/ui/components" syftEventParsers "github.com/anchore/syft/syft/event/parsers" + "github.com/dustin/go-humanize" "github.com/gookit/color" "github.com/wagoodman/go-partybus" "github.com/wagoodman/go-progress" @@ -401,6 +400,49 @@ func FileMetadataCatalogerStartedHandler(ctx context.Context, fr *frame.Frame, e return err } +// FileIndexingStartedHandler shows the intermittent indexing progress from a directory resolver. +// nolint:dupl +func FileIndexingStartedHandler(ctx context.Context, fr *frame.Frame, event partybus.Event, wg *sync.WaitGroup) error { + path, prog, err := syftEventParsers.ParseFileIndexingStarted(event) + if err != nil { + return fmt.Errorf("bad %s event: %w", event.Type, err) + } + + line, err := fr.Append() + if err != nil { + return err + } + wg.Add(1) + + _, spinner := startProcess() + stream := progress.Stream(ctx, prog, interval) + title := tileFormat.Sprintf("Indexing %s", path) + + formatFn := func(_ progress.Progress) { + spin := color.Magenta.Sprint(spinner.Next()) + if err != nil { + _, _ = io.WriteString(line, fmt.Sprintf("Error: %+v", err)) + } else { + auxInfo := auxInfoFormat.Sprintf("[file: %s]", internal.TruncateMiddleEllipsis(prog.Stage(), 100)) + _, _ = io.WriteString(line, fmt.Sprintf(statusTitleTemplate+"%s", spin, title, auxInfo)) + } + } + + go func() { + defer wg.Done() + + formatFn(progress.Progress{}) + for p := range stream { + formatFn(p) + } + + spin := color.Green.Sprint(completedStatus) + title = tileFormat.Sprintf("Indexed %s", path) + _, _ = io.WriteString(line, fmt.Sprintf(statusTitleTemplate, spin, title)) + }() + return err +} + // FileMetadataCatalogerStartedHandler shows the intermittent secrets searching progress. // nolint:dupl func FileDigestsCatalogerStartedHandler(ctx context.Context, fr *frame.Frame, event partybus.Event, wg *sync.WaitGroup) error { diff --git a/ui/handler.go b/ui/handler.go index a1f920727c6..bd11733eb37 100644 --- a/ui/handler.go +++ b/ui/handler.go @@ -27,7 +27,7 @@ func NewHandler() *Handler { // RespondsTo indicates if the handler is capable of handling the given event. func (r *Handler) RespondsTo(event partybus.Event) bool { switch event.Type { - case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.PackageCatalogerStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.ImportStarted: + case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.PackageCatalogerStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted: return true default: return false @@ -58,6 +58,9 @@ func (r *Handler) Handle(ctx context.Context, fr *frame.Frame, event partybus.Ev case syftEvent.FileMetadataCatalogerStarted: return FileMetadataCatalogerStartedHandler(ctx, fr, event, wg) + case syftEvent.FileIndexingStarted: + return FileIndexingStartedHandler(ctx, fr, event, wg) + case syftEvent.ImportStarted: return ImportStartedHandler(ctx, fr, event, wg) }