From 987578e782925d14b706460dbfd02db171a54b3a Mon Sep 17 00:00:00 2001 From: adammcclenaghan Date: Tue, 15 Oct 2024 11:57:41 +0100 Subject: [PATCH 1/2] Use file indexer when scanning with file source Prevents filesystem walks when scanning a single file, to optimise memory & scan times in case the scanned file lives in a directory containing many files. Signed-off-by: adammcclenaghan --- syft/internal/fileresolver/directory.go | 2 + syft/internal/fileresolver/file.go | 281 ++++++++++++++++++ syft/internal/fileresolver/file_indexer.go | 223 ++++++++++++++ .../fileresolver/file_indexer_test.go | 103 +++++++ syft/internal/fileresolver/file_test.go | 269 +++++++++++++++++ syft/source/filesource/file_source.go | 45 +-- 6 files changed, 887 insertions(+), 36 deletions(-) create mode 100644 syft/internal/fileresolver/file.go create mode 100644 syft/internal/fileresolver/file_indexer.go create mode 100644 syft/internal/fileresolver/file_indexer_test.go create mode 100644 syft/internal/fileresolver/file_test.go diff --git a/syft/internal/fileresolver/directory.go b/syft/internal/fileresolver/directory.go index a86092f67b5..20f4b892521 100644 --- a/syft/internal/fileresolver/directory.go +++ b/syft/internal/fileresolver/directory.go @@ -150,6 +150,8 @@ func (r Directory) requestGlob(pattern string) (string, error) { } // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. +// +//nolint:dupl func (r Directory) FilesByGlob(patterns ...string) ([]file.Location, error) { uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() uniqueLocations := make([]file.Location, 0) diff --git a/syft/internal/fileresolver/file.go b/syft/internal/fileresolver/file.go new file mode 100644 index 00000000000..722a1d6f264 --- /dev/null +++ b/syft/internal/fileresolver/file.go @@ -0,0 +1,281 @@ +package fileresolver + +import ( + "context" + "errors" + "fmt" + "io" + "os" + + stereoscopeFile "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/windows" +) + +// Compile time assurance that we meet the Resolver interface. +var _ file.Resolver = (*File)(nil) + +// File implements path and content access for the file data source. +type File struct { + path string + chroot ChrootContext + tree filetree.Reader + index filetree.IndexReader + searchContext filetree.Searcher + indexer *fileIndexer +} + +// parent should be the symlink free absolute path to the parent directory +// path is the filepath of the file we're creating content access for +func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, error) { + chroot, err := NewChrootContextFromCWD(parent, parent) + if err != nil { + return nil, fmt.Errorf("unable to interpret chroot context: %w", err) + } + + cleanBase := chroot.Base() + + file := &File{ + path: path, + chroot: *chroot, + tree: filetree.New(), + index: filetree.NewIndex(), + indexer: newFileIndexer(path, cleanBase, pathFilters...), + } + + return file, file.buildIndex() +} + +func (r *File) buildIndex() error { + if r.indexer == nil { + return fmt.Errorf("no file indexer configured") + } + tree, index, err := r.indexer.build() + if err != nil { + return err + } + + r.tree = tree + r.index = index + r.searchContext = filetree.NewSearchContext(tree, index) + + return nil +} + +// TODO: These are Copy-pasted from Directory.go - should we consider splitting them out into a shared place? + +func (r File) requestPath(userPath string) (string, error) { + return r.chroot.ToNativePath(userPath) +} + +// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the file resolver. +func (r File) responsePath(path string) string { + return r.chroot.ToChrootPath(path) +} + +// HasPath indicates if the given path exists in the underlying source. +func (r *File) HasPath(userPath string) bool { + requestPath, err := r.requestPath(userPath) + if err != nil { + return false + } + return r.tree.HasPath(stereoscopeFile.Path(requestPath)) +} + +// Stringer to represent a file path data source +func (r File) String() string { + return fmt.Sprintf("file:%s", r.path) +} + +// FilesByPath returns all file.References that match the given paths from the file index. +func (r File) FilesByPath(userPaths ...string) ([]file.Location, error) { + var references = make([]file.Location, 0) + + for _, userPath := range userPaths { + userStrPath, err := r.requestPath(userPath) + if err != nil { + log.Warnf("unable to get file by path=%q : %+v", userPath, err) + continue + } + + // we should be resolving symlinks and preserving this information as a AccessPath to the real file + ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks) + if err != nil { + log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err) + continue + } + + if !ref.HasReference() { + continue + } + + entry, err := r.index.Get(*ref.Reference) + if err != nil { + log.Warnf("unable to get file by path=%q : %+v", userPath, err) + continue + } + + // don't consider directories + if entry.Metadata.IsDir() { + continue + } + + if windows.HostRunningOnWindows() { + userStrPath = windows.ToPosix(userStrPath) + } + + if ref.HasReference() { + references = append(references, + file.NewVirtualLocationFromDirectory( + r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root + r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root + *ref.Reference, + ), + ) + } + } + + return references, nil +} + +func (r File) requestGlob(pattern string) (string, error) { + return r.chroot.ToNativeGlob(pattern) +} + +// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. +// +//nolint:dupl +func (r File) FilesByGlob(patterns ...string) ([]file.Location, error) { + uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() + uniqueLocations := make([]file.Location, 0) + + for _, pattern := range patterns { + requestGlob, err := r.requestGlob(pattern) + if err != nil { + return nil, err + } + refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) { + continue + } + entry, err := r.index.Get(*refVia.Reference) + if err != nil { + return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err) + } + + // don't consider directories + if entry.Metadata.IsDir() { + continue + } + + loc := file.NewVirtualLocationFromDirectory( + r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root + r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root + *refVia.Reference, + ) + uniqueFileIDs.Add(*refVia.Reference) + uniqueLocations = append(uniqueLocations, loc) + } + } + + return uniqueLocations, nil +} + +// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. +// This is helpful when attempting to find a file that is in the same layer or lower as another file. +func (r *File) RelativeFileByPath(_ file.Location, path string) *file.Location { + paths, err := r.FilesByPath(path) + if err != nil { + return nil + } + if len(paths) == 0 { + return nil + } + + return &paths[0] +} + +// FileContentsByLocation fetches file contents for a single file reference relative to a directory. +// If the path does not exist an error is returned. +func (r File) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { + if location.RealPath == "" { + return nil, errors.New("empty path given") + } + + entry, err := r.index.Get(location.Reference()) + if err != nil { + return nil, err + } + + // don't consider directories + if entry.Type == stereoscopeFile.TypeDirectory { + return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath) + } + + // RealPath is posix so for windows file resolver we need to translate + // to its true on disk path. + filePath := string(location.Reference().RealPath) + if windows.HostRunningOnWindows() { + filePath = windows.FromPosix(filePath) + } + + return stereoscopeFile.NewLazyReadCloser(filePath), nil +} + +func (r *File) AllLocations(ctx context.Context) <-chan file.Location { + results := make(chan file.Location) + go func() { + defer close(results) + for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) { + select { + case <-ctx.Done(): + return + case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref): + continue + } + } + }() + return results +} + +func (r *File) FileMetadataByLocation(location file.Location) (file.Metadata, error) { + entry, err := r.index.Get(location.Reference()) + if err != nil { + return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist) + } + + return entry.Metadata, nil +} + +func (r *File) FilesByMIMEType(types ...string) ([]file.Location, error) { + uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() + uniqueLocations := make([]file.Location, 0) + + refVias, err := r.searchContext.SearchByMIMEType(types...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if !refVia.HasReference() { + continue + } + if uniqueFileIDs.Contains(*refVia.Reference) { + continue + } + location := file.NewVirtualLocationFromDirectory( + r.responsePath(string(refVia.Reference.RealPath)), + r.responsePath(string(refVia.RequestPath)), + *refVia.Reference, + ) + uniqueFileIDs.Add(*refVia.Reference) + uniqueLocations = append(uniqueLocations, location) + } + + return uniqueLocations, nil +} diff --git a/syft/internal/fileresolver/file_indexer.go b/syft/internal/fileresolver/file_indexer.go new file mode 100644 index 00000000000..fe110c4ced9 --- /dev/null +++ b/syft/internal/fileresolver/file_indexer.go @@ -0,0 +1,223 @@ +package fileresolver + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/internal/windows" + "github.com/wagoodman/go-progress" +) + +type fileIndexer struct { + path string + base string + pathIndexVisitors []PathIndexVisitor + errPaths map[string]error + tree filetree.ReadWriter + index filetree.Index +} + +func newFileIndexer(path, base string, visitors ...PathIndexVisitor) *fileIndexer { + i := &fileIndexer{ + path: path, + base: base, + tree: filetree.New(), + index: filetree.NewIndex(), + pathIndexVisitors: append( + []PathIndexVisitor{ + requireFileInfo, + disallowByFileType, + skipPathsByMountTypeAndName(path), + }, + visitors..., + ), + errPaths: make(map[string]error), + } + + return i +} + +// Build the indexer +func (r *fileIndexer) build() (filetree.Reader, filetree.IndexReader, error) { + return r.tree, r.index, index(r.path, r.indexPath) +} + +// Index file at the given path +// A file indexer simply indexes the file and its directory. +func index(path string, indexer func(string, *progress.Stage) error) error { + // We want to index the file at the provided path and its parent directory. + // We need to probably check that we have file access + // We also need to determine what to do when the file itself is a symlink. + stager, prog := indexingProgress(path) + defer prog.SetCompleted() + + err := indexer(path, stager) + if err != nil { + return fmt.Errorf("unable to index filesystem path=%q: %w", path, err) + } + + return nil +} + +// indexPath will index the file at the provided path as well as its parent directory. +// It expects path to be a file, not a directory. +// If a directory is provided then an error will be returned. Additionally, any IO or +// permissions errors on the file at path or its parent directory will return an error. +// Filter functions provided to the indexer are honoured, so if the path provided (or its parent +// directory) is filtered by a filter function, an error is returned. +func (r *fileIndexer) indexPath(path string, stager *progress.Stage) error { + log.WithFields("path", path).Trace("indexing file path") + + absPath, err := filepath.Abs(path) + if err != nil { + return err + } + + // Protect against callers trying to call file_indexer with directories + fi, err := os.Stat(absPath) + // The directory indexer ignores stat errors, however this file indexer won't ignore them + if err != nil { + return fmt.Errorf("unable to stat path=%q: %w", path, err) + } + if fi.IsDir() { + return fmt.Errorf("unable to index file, given path was a directory=%q", path) + } + + absSymlinkFreeFilePath, err := absoluteSymlinkFreePathToFile(path) + if err != nil { + return err + } + + // Now index the file and its parent directory + // We try to index the parent directory first, because if the parent directory + // is ignored by any filter function, then we must ensure we also ignore the file. + absSymlinkFreeParent, err := absoluteSymlinkFreePathToParent(absSymlinkFreeFilePath) + if err != nil { + return err + } + parentFi, err := os.Stat(absSymlinkFreeParent) + if err != nil { + return fmt.Errorf("unable to stat parent of file=%q: %w", absSymlinkFreeParent, err) + } + + stager.Current = absSymlinkFreeParent + indexParentErr := r.filterAndIndex(absSymlinkFreeParent, parentFi) + if indexParentErr != nil { + return indexParentErr + } + + // We have indexed the parent successfully, now attempt to index the file. + stager.Current = absSymlinkFreeFilePath + indexFileErr := r.filterAndIndex(absSymlinkFreeFilePath, fi) + if indexFileErr != nil { + return indexFileErr + } + + return nil +} + +func (r *fileIndexer) filterAndIndex(path string, info os.FileInfo) error { + // check if any of the filters want us to ignore this path + for _, filterFn := range r.pathIndexVisitors { + if filterFn == nil { + continue + } + + if filterErr := filterFn(r.base, path, info, nil); filterErr != nil { + // A filter function wants us to ignore this path, honour it + return filterErr + } + } + + // here we check to see if we need to normalize paths to posix on the way in coming from windows + if windows.HostRunningOnWindows() { + path = windows.ToPosix(path) + } + + err := r.addPathToIndex(path, info) + // If we hit file access errors, isFileAccessErr will handle logging & adding + // the path to the errPaths map. + // While the directory_indexer does not let these cause the indexer to throw + // we will here, as not having access to the file we index for a file source + // probably makes the file source creation useless? I need to check with Syft maintainers. + // This also poses the question, is errPaths worthwhile for file_indexer? + if r.isFileAccessErr(path, err) { + return err + } + + return nil +} + +// Add path to index. File indexer doesn't need to support symlink, as we should have abs symlink free path. +// If we somehow get a symlink here, report as an error. +func (r *fileIndexer) addPathToIndex(path string, info os.FileInfo) error { + switch t := file.TypeFromMode(info.Mode()); t { + case file.TypeDirectory: + return r.addDirectoryToIndex(path, info) + case file.TypeRegular: + return r.addFileToIndex(path, info) + default: + return fmt.Errorf("unsupported file type: %s", t) + } +} + +func (r *fileIndexer) addDirectoryToIndex(path string, info os.FileInfo) error { + ref, err := r.tree.AddDir(file.Path(path)) + if err != nil { + return err + } + + metadata := file.NewMetadataFromPath(path, info) + r.index.Add(*ref, metadata) + + return nil +} + +func (r *fileIndexer) addFileToIndex(path string, info os.FileInfo) error { + ref, err := r.tree.AddFile(file.Path(path)) + if err != nil { + return err + } + + metadata := file.NewMetadataFromPath(path, info) + r.index.Add(*ref, metadata) + + return nil +} + +// Get absolute symlink free path to parent of the file +func absoluteSymlinkFreePathToParent(path string) (string, error) { + absFilePath, err := absoluteSymlinkFreePathToFile(path) + if err != nil { + return "", err + } + + return filepath.Dir(absFilePath), nil +} + +// Get absolute symlink free path to the file +func absoluteSymlinkFreePathToFile(path string) (string, error) { + absAnalysisPath, err := filepath.Abs(path) + if err != nil { + return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err) + } + dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath) + if err != nil { + return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err) + } + return dereferencedAbsAnalysisPath, nil +} + +func (r *fileIndexer) isFileAccessErr(path string, err error) bool { + // don't allow for errors to stop indexing, keep track of the paths and continue. + if err != nil { + log.Warnf("unable to access path=%q: %+v", path, err) + r.errPaths[path] = err + return true + } + return false +} diff --git a/syft/internal/fileresolver/file_indexer_test.go b/syft/internal/fileresolver/file_indexer_test.go new file mode 100644 index 00000000000..cce3981382d --- /dev/null +++ b/syft/internal/fileresolver/file_indexer_test.go @@ -0,0 +1,103 @@ +package fileresolver + +import ( + "github.com/anchore/stereoscope/pkg/file" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "io/fs" + "os" + "path" + "testing" +) + +// - Verify that both the parent and the path are indexed +func Test_index(t *testing.T) { + testPath := "test-fixtures/system_paths/target/home/place" + indexer := newFileIndexer(testPath, "", make([]PathIndexVisitor, 0)...) + tree, index, err := indexer.build() + require.NoError(t, err) + + tests := []struct { + name string + path string + }{ + { + name: "has path", + path: "test-fixtures/system_paths/target/home/place", + }, + { + name: "has parent dir", + path: "test-fixtures/system_paths/target/home", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + info, err := os.Stat(test.path) + assert.NoError(t, err) + + // note: the index uses absolute paths, so assertions MUST keep this in mind + cwd, err := os.Getwd() + require.NoError(t, err) + + p := file.Path(path.Join(cwd, test.path)) + assert.Equal(t, true, tree.HasPath(p)) + exists, ref, err := tree.File(p) + assert.Equal(t, true, exists) + if assert.NoError(t, err) { + return + } + + entry, err := index.Get(*ref.Reference) + require.NoError(t, err) + assert.Equal(t, info.Mode(), entry.Mode) + }) + } + +} + +// - Verify that directories are rejected +func Test_indexRejectsDirectory(t *testing.T) { + dirPath := "test-fixtures/system_paths/target/home" + indexer := newFileIndexer(dirPath, "", make([]PathIndexVisitor, 0)...) + _, _, err := indexer.build() + require.Error(t, err) +} + +// - Verify ignores if filterAndIndex sets up a filter for the filepath +func Test_ignoresPathIfFiltered(t *testing.T) { + testPath := "test-fixtures/system_paths/target/home/place" + cwd, cwdErr := os.Getwd() + require.NoError(t, cwdErr) + ignorePath := path.Join(cwd, testPath) + filterFn := func(_, path string, _ os.FileInfo, _ error) error { + if path == ignorePath { + return ErrSkipPath + } + + return nil + } + indexer := newFileIndexer(testPath, "", filterFn) + _, _, err := indexer.build() + require.Error(t, err) +} + +// - Verify ignores if filterAndIndex sets up a filter for the directory +func Test_ignoresPathIfParentFiltered(t *testing.T) { + testPath := "test-fixtures/system_paths/target/home/place" + parentPath := "test-fixtures/system_paths/target/home" + + cwd, cwdErr := os.Getwd() + require.NoError(t, cwdErr) + ignorePath := path.Join(cwd, parentPath) + filterFn := func(_, path string, _ os.FileInfo, _ error) error { + if path == ignorePath { + return fs.SkipDir + } + + return nil + } + indexer := newFileIndexer(testPath, "", filterFn) + _, _, err := indexer.build() + require.Error(t, err) +} diff --git a/syft/internal/fileresolver/file_test.go b/syft/internal/fileresolver/file_test.go new file mode 100644 index 00000000000..f3d24a2cab0 --- /dev/null +++ b/syft/internal/fileresolver/file_test.go @@ -0,0 +1,269 @@ +package fileresolver + +import ( + "context" + stereoscopeFile "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/syft/syft/file" + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + "io" + "os" + "path/filepath" + "sort" + "testing" +) + +func TestFileResolver_FilesByPath(t *testing.T) { + tests := []struct { + description string + filePath string // relative to cwd + fileByPathInput string + expectedRealPath string + expectedAccessPath string + cwd string + }{ + { + description: "Finds file if searched by filepath", + filePath: "./test-fixtures/req-resp/path/to/the/file.txt", + fileByPathInput: "file.txt", + expectedRealPath: "/file.txt", + expectedAccessPath: "/file.txt", + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + parentPath, err := absoluteSymlinkFreePathToParent(tt.filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, tt.filePath) + require.NoError(t, err) + require.NotNil(t, resolver) + + refs, err := resolver.FilesByPath(tt.fileByPathInput) + require.NoError(t, err) + if tt.expectedRealPath == "" { + require.Empty(t, refs) + return + } + require.Len(t, refs, 1) + assert.Equal(t, tt.expectedRealPath, refs[0].RealPath, "real path different") + assert.Equal(t, tt.expectedAccessPath, refs[0].AccessPath, "virtual path different") + }) + } +} + +// Test mutliple files by path -> Maybe not necessary for us here? +func TestFileResolver_MultipleFilesByPath(t *testing.T) { + tests := []struct { + description string + input []string + refCount int + }{ + { + description: "finds file ", + input: []string{"file.txt"}, + refCount: 1, + }, + { + description: "skip non-existing files", + input: []string{"file.txt", "bogus.txt"}, + refCount: 1, + }, + { + description: "does not return anything for non-existing files", + input: []string{"non-existing/bogus.txt", "another-bogus.txt"}, + refCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + filePath := "./test-fixtures/req-resp/path/to/the/file.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, filePath) + assert.NoError(t, err) + refs, err := resolver.FilesByPath(tt.input...) + assert.NoError(t, err) + + if len(refs) != tt.refCount { + t.Errorf("unexpected number of refs: %d != %d", len(refs), tt.refCount) + } + }) + } +} + +func TestFileResolver_FilesByGlob(t *testing.T) { + filePath := "./test-fixtures/req-resp/path/to/the/file.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, filePath) + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("*.txt") + assert.NoError(t, err) + + assert.Len(t, refs, 1) +} + +func Test_fileResolver_FilesByMIMEType(t *testing.T) { + tests := []struct { + fixturePath string + mimeType string + expectedPaths *strset.Set + }{ + { + fixturePath: "./test-fixtures/image-simple/file-1.txt", + mimeType: "text/plain", + expectedPaths: strset.New("/file-1.txt"), + }, + } + for _, test := range tests { + t.Run(test.fixturePath, func(t *testing.T) { + filePath := "./test-fixtures/image-simple/file-1.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, filePath) + assert.NoError(t, err) + locations, err := resolver.FilesByMIMEType(test.mimeType) + assert.NoError(t, err) + assert.Equal(t, test.expectedPaths.Size(), len(locations)) + for _, l := range locations { + assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) + } + }) + } +} + +func Test_fileResolver_FileContentsByLocation(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + + filePath := "./test-fixtures/image-simple/file-1.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + r, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + exists, existingPath, err := r.tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) + require.True(t, exists) + require.NoError(t, err) + require.True(t, existingPath.HasReference()) + + tests := []struct { + name string + location file.Location + expects string + err bool + }{ + { + name: "use file reference for content requests", + location: file.NewLocationFromDirectory("some/place", *existingPath.Reference), + expects: "this file has contents", + }, + { + name: "error on empty file reference", + location: file.NewLocationFromDirectory("doesn't matter", stereoscopeFile.Reference{}), + err: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + actual, err := r.FileContentsByLocation(test.location) + if test.err { + require.Error(t, err) + return + } + + require.NoError(t, err) + if test.expects != "" { + b, err := io.ReadAll(actual) + require.NoError(t, err) + assert.Equal(t, test.expects, string(b)) + } + }) + } +} + +func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) { + defer goleak.VerifyNone(t) + + filePath := "./test-fixtures/system_paths/target/home/place" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + resolver, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + var dirLoc *file.Location + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + for loc := range resolver.AllLocations(ctx) { + entry, err := resolver.index.Get(loc.Reference()) + require.NoError(t, err) + if entry.Metadata.IsDir() { + dirLoc = &loc + break + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + +func TestFileResolver_AllLocations(t *testing.T) { + // Verify both the parent and the file itself are indexed + filePath := "./test-fixtures/system_paths/target/home/place" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + resolver, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations(context.Background()) { + paths.Add(loc.RealPath) + } + expected := []string{ + "/place", + "", // This is how we see the parent dir, since we're resolving wrt the parent directory. + } + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) +} + +func Test_AllLocationsDoesNotLeakGoRoutine(t *testing.T) { + defer goleak.VerifyNone(t) + filePath := "./test-fixtures/system_paths/target/home/place" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + resolver, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + require.NoError(t, err) + ctx, cancel := context.WithCancel(context.Background()) + for range resolver.AllLocations(ctx) { + break + } + cancel() +} diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index d810a95c2f9..501d31bcdfe 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -3,7 +3,6 @@ package filesource import ( "crypto" "fmt" - "io/fs" "os" "path" "path/filepath" @@ -36,7 +35,7 @@ type fileSource struct { id artifact.ID digestForVersion string config Config - resolver *fileresolver.Directory + resolver file.Resolver mutex *sync.Mutex closer func() error digests []file.Digest @@ -165,48 +164,22 @@ func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) { return nil, err } - var res *fileresolver.Directory if isArchiveAnalysis { // this is an analysis of an archive file... we should scan the directory where the archive contents - res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) - if err != nil { - return nil, fmt.Errorf("unable to create directory resolver: %w", err) - } - } else { - // this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we - // don't want to include any other files except this the given file. - exclusionFunctions = append([]fileresolver.PathIndexVisitor{ - - // note: we should exclude these kinds of paths first before considering any other user-provided exclusions - func(_, p string, _ os.FileInfo, _ error) error { - if p == absParentDir { - // this is the root directory... always include it - return nil - } - - if filepath.Dir(p) != absParentDir { - // we are no longer in the root directory containing the single file we want to scan... - // we should skip the directory this path resides in entirely! - return fs.SkipDir - } - - if filepath.Base(p) != filepath.Base(s.config.Path) { - // we're in the root directory, but this is not the file we want to scan... - // we should selectively skip this file (not the directory we're in). - return fileresolver.ErrSkipPath - } - return nil - }, - }, exclusionFunctions...) - - res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...) + res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) if err != nil { return nil, fmt.Errorf("unable to create directory resolver: %w", err) } + s.resolver = res + return s.resolver, nil } + // This is analysis of a single file. Use file indexer. + res, err := fileresolver.NewFromFile(absParentDir, s.analysisPath, exclusionFunctions...) + if err != nil { + return nil, fmt.Errorf("unable to create file resolver: %w", err) + } s.resolver = res - return s.resolver, nil } From 038ecaea457b733d284a60a0e9bf23ad005901ca Mon Sep 17 00:00:00 2001 From: adammcclenaghan Date: Fri, 18 Oct 2024 15:29:30 +0100 Subject: [PATCH 2/2] Create filetree resolver Shared behaviour for resolving indexed filetrees. Signed-off-by: adammcclenaghan --- syft/internal/fileresolver/directory.go | 237 +-- syft/internal/fileresolver/directory_test.go | 1348 -------------- syft/internal/fileresolver/file.go | 239 +-- syft/internal/fileresolver/file_test.go | 269 --- .../fileresolver/filetree_resolver.go | 229 +++ .../fileresolver/filetree_resolver_test.go | 1611 +++++++++++++++++ 6 files changed, 1860 insertions(+), 2073 deletions(-) delete mode 100644 syft/internal/fileresolver/file_test.go create mode 100644 syft/internal/fileresolver/filetree_resolver.go create mode 100644 syft/internal/fileresolver/filetree_resolver_test.go diff --git a/syft/internal/fileresolver/directory.go b/syft/internal/fileresolver/directory.go index 20f4b892521..1c60730b84e 100644 --- a/syft/internal/fileresolver/directory.go +++ b/syft/internal/fileresolver/directory.go @@ -1,17 +1,11 @@ package fileresolver import ( - "context" "errors" "fmt" - "io" - "os" - stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" - "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/internal/windows" ) var ErrSkipPath = errors.New("skip path") @@ -20,12 +14,9 @@ var _ file.Resolver = (*Directory)(nil) // Directory implements path and content access for the directory data source. type Directory struct { - path string - chroot ChrootContext - tree filetree.Reader - index filetree.IndexReader - searchContext filetree.Searcher - indexer *directoryIndexer + filetreeResolver + path string + indexer *directoryIndexer } func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) { @@ -47,10 +38,12 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI cleanBase := chroot.Base() return &Directory{ - path: cleanRoot, - chroot: *chroot, - tree: filetree.New(), - index: filetree.NewIndex(), + path: cleanRoot, + filetreeResolver: filetreeResolver{ + chroot: *chroot, + tree: filetree.New(), + index: filetree.NewIndex(), + }, indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...), }, nil } @@ -66,222 +59,12 @@ func (r *Directory) buildIndex() error { r.tree = tree r.index = index - r.searchContext = filetree.NewSearchContext(tree, index) + r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index) return nil } -func (r Directory) requestPath(userPath string) (string, error) { - return r.chroot.ToNativePath(userPath) -} - -// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver. -func (r Directory) responsePath(path string) string { - return r.chroot.ToChrootPath(path) -} - -// HasPath indicates if the given path exists in the underlying source. -func (r *Directory) HasPath(userPath string) bool { - requestPath, err := r.requestPath(userPath) - if err != nil { - return false - } - return r.tree.HasPath(stereoscopeFile.Path(requestPath)) -} - // Stringer to represent a directory path data source func (r Directory) String() string { return fmt.Sprintf("dir:%s", r.path) } - -// FilesByPath returns all file.References that match the given paths from the directory. -func (r Directory) FilesByPath(userPaths ...string) ([]file.Location, error) { - var references = make([]file.Location, 0) - - for _, userPath := range userPaths { - userStrPath, err := r.requestPath(userPath) - if err != nil { - log.Warnf("unable to get file by path=%q : %+v", userPath, err) - continue - } - - // we should be resolving symlinks and preserving this information as a AccessPath to the real file - ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks) - if err != nil { - log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err) - continue - } - - if !ref.HasReference() { - continue - } - - entry, err := r.index.Get(*ref.Reference) - if err != nil { - log.Warnf("unable to get file by path=%q : %+v", userPath, err) - continue - } - - // don't consider directories - if entry.Metadata.IsDir() { - continue - } - - if windows.HostRunningOnWindows() { - userStrPath = windows.ToPosix(userStrPath) - } - - if ref.HasReference() { - references = append(references, - file.NewVirtualLocationFromDirectory( - r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root - r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root - *ref.Reference, - ), - ) - } - } - - return references, nil -} - -func (r Directory) requestGlob(pattern string) (string, error) { - return r.chroot.ToNativeGlob(pattern) -} - -// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. -// -//nolint:dupl -func (r Directory) FilesByGlob(patterns ...string) ([]file.Location, error) { - uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() - uniqueLocations := make([]file.Location, 0) - - for _, pattern := range patterns { - requestGlob, err := r.requestGlob(pattern) - if err != nil { - return nil, err - } - refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks) - if err != nil { - return nil, err - } - for _, refVia := range refVias { - if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) { - continue - } - entry, err := r.index.Get(*refVia.Reference) - if err != nil { - return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err) - } - - // don't consider directories - if entry.Metadata.IsDir() { - continue - } - - loc := file.NewVirtualLocationFromDirectory( - r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root - r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root - *refVia.Reference, - ) - uniqueFileIDs.Add(*refVia.Reference) - uniqueLocations = append(uniqueLocations, loc) - } - } - - return uniqueLocations, nil -} - -// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. -// This is helpful when attempting to find a file that is in the same layer or lower as another file. For the -// Directory, this is a simple path lookup. -func (r *Directory) RelativeFileByPath(_ file.Location, path string) *file.Location { - paths, err := r.FilesByPath(path) - if err != nil { - return nil - } - if len(paths) == 0 { - return nil - } - - return &paths[0] -} - -// FileContentsByLocation fetches file contents for a single file reference relative to a directory. -// If the path does not exist an error is returned. -func (r Directory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { - if location.RealPath == "" { - return nil, errors.New("empty path given") - } - - entry, err := r.index.Get(location.Reference()) - if err != nil { - return nil, err - } - - // don't consider directories - if entry.Type == stereoscopeFile.TypeDirectory { - return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath) - } - - // RealPath is posix so for windows directory resolver we need to translate - // to its true on disk path. - filePath := string(location.Reference().RealPath) - if windows.HostRunningOnWindows() { - filePath = windows.FromPosix(filePath) - } - - return stereoscopeFile.NewLazyReadCloser(filePath), nil -} - -func (r *Directory) AllLocations(ctx context.Context) <-chan file.Location { - results := make(chan file.Location) - go func() { - defer close(results) - for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) { - select { - case <-ctx.Done(): - return - case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref): - continue - } - } - }() - return results -} - -func (r *Directory) FileMetadataByLocation(location file.Location) (file.Metadata, error) { - entry, err := r.index.Get(location.Reference()) - if err != nil { - return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist) - } - - return entry.Metadata, nil -} - -func (r *Directory) FilesByMIMEType(types ...string) ([]file.Location, error) { - uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() - uniqueLocations := make([]file.Location, 0) - - refVias, err := r.searchContext.SearchByMIMEType(types...) - if err != nil { - return nil, err - } - for _, refVia := range refVias { - if !refVia.HasReference() { - continue - } - if uniqueFileIDs.Contains(*refVia.Reference) { - continue - } - location := file.NewVirtualLocationFromDirectory( - r.responsePath(string(refVia.Reference.RealPath)), - r.responsePath(string(refVia.RequestPath)), - *refVia.Reference, - ) - uniqueFileIDs.Add(*refVia.Reference) - uniqueLocations = append(uniqueLocations, location) - } - - return uniqueLocations, nil -} diff --git a/syft/internal/fileresolver/directory_test.go b/syft/internal/fileresolver/directory_test.go index 8c271c073f9..7aaff77ff88 100644 --- a/syft/internal/fileresolver/directory_test.go +++ b/syft/internal/fileresolver/directory_test.go @@ -4,858 +4,12 @@ package fileresolver import ( - "context" - "io" - "io/fs" "os" - "path/filepath" - "sort" - "strings" "testing" - "time" - "github.com/google/go-cmp/cmp" - "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/goleak" - - stereoscopeFile "github.com/anchore/stereoscope/pkg/file" - "github.com/anchore/syft/syft/file" ) -func TestDirectoryResolver_FilesByPath_request_response(t *testing.T) { - // / - // somewhere/ - // outside.txt - // root-link -> ./ - // path/ - // to/ - // abs-inside.txt -> /path/to/the/file.txt # absolute link to somewhere inside of the root - // rel-inside.txt -> ./the/file.txt # relative link to somewhere inside of the root - // the/ - // file.txt - // abs-outside.txt -> /somewhere/outside.txt # absolute link to outside of the root - // rel-outside -> ../../../somewhere/outside.txt # relative link to outside of the root - // - - testDir, err := os.Getwd() - require.NoError(t, err) - relative := filepath.Join("test-fixtures", "req-resp") - absolute := filepath.Join(testDir, relative) - - absInsidePath := filepath.Join(absolute, "path", "to", "abs-inside.txt") - absOutsidePath := filepath.Join(absolute, "path", "to", "the", "abs-outside.txt") - - relativeViaLink := filepath.Join(relative, "root-link") - absoluteViaLink := filepath.Join(absolute, "root-link") - - relativeViaDoubleLink := filepath.Join(relative, "root-link", "root-link") - absoluteViaDoubleLink := filepath.Join(absolute, "root-link", "root-link") - - cleanup := func() { - _ = os.Remove(absInsidePath) - _ = os.Remove(absOutsidePath) - } - - // ensure the absolute symlinks are cleaned up from any previous runs - cleanup() - - require.NoError(t, os.Symlink(filepath.Join(absolute, "path", "to", "the", "file.txt"), absInsidePath)) - require.NoError(t, os.Symlink(filepath.Join(absolute, "somewhere", "outside.txt"), absOutsidePath)) - - t.Cleanup(cleanup) - - cases := []struct { - name string - cwd string - root string - base string - input string - expectedRealPath string - expectedAccessPath string // note: if empty it will be assumed to match the expectedRealPath - }{ - { - name: "relative root, relative request, direct", - root: relative, - input: "path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "abs root, relative request, direct", - root: absolute, - input: "path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "relative root, abs request, direct", - root: relative, - input: "/path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "abs root, abs request, direct", - root: absolute, - input: "/path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - // cwd within root... - { - name: "relative root, relative request, direct, cwd within root", - cwd: filepath.Join(relative, "path/to"), - root: "../../", - input: "path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "abs root, relative request, direct, cwd within root", - cwd: filepath.Join(relative, "path/to"), - root: absolute, - input: "path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "relative root, abs request, direct, cwd within root", - cwd: filepath.Join(relative, "path/to"), - root: "../../", - input: "/path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "abs root, abs request, direct, cwd within root", - cwd: filepath.Join(relative, "path/to"), - - root: absolute, - input: "/path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - // cwd within symlink root... - { - name: "relative root, relative request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: "./", - input: "path/to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "path/to/the/file.txt", - expectedAccessPath: "path/to/the/file.txt", - }, - { - name: "abs root, relative request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: absoluteViaLink, - input: "path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "relative root, abs request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: "./", - input: "/path/to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "path/to/the/file.txt", - expectedAccessPath: "path/to/the/file.txt", - }, - { - name: "abs root, abs request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: absoluteViaLink, - input: "/path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - // cwd within symlink root, request nested within... - { - name: "relative root, relative nested request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: "./path", - input: "to/the/file.txt", - // note: why not expect "to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "to/the/file.txt", - expectedAccessPath: "to/the/file.txt", - }, - { - name: "abs root, relative nested request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: filepath.Join(absoluteViaLink, "path"), - input: "to/the/file.txt", - expectedRealPath: "to/the/file.txt", - }, - { - name: "relative root, abs nested request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: "./path", - input: "/to/the/file.txt", - // note: why not expect "to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "to/the/file.txt", - expectedAccessPath: "to/the/file.txt", - }, - { - name: "abs root, abs nested request, direct, cwd within symlink root", - cwd: relativeViaLink, - root: filepath.Join(absoluteViaLink, "path"), - input: "/to/the/file.txt", - expectedRealPath: "to/the/file.txt", - }, - // cwd within DOUBLE symlink root... - { - name: "relative root, relative request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: "./", - input: "path/to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "path/to/the/file.txt", - expectedAccessPath: "path/to/the/file.txt", - }, - { - name: "abs root, relative request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: absoluteViaDoubleLink, - input: "path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - { - name: "relative root, abs request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: "./", - input: "/path/to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "path/to/the/file.txt", - expectedAccessPath: "path/to/the/file.txt", - }, - { - name: "abs root, abs request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: absoluteViaDoubleLink, - input: "/path/to/the/file.txt", - expectedRealPath: "path/to/the/file.txt", - }, - // cwd within DOUBLE symlink root, request nested within... - { - name: "relative root, relative nested request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: "./path", - input: "to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "to/the/file.txt", - expectedAccessPath: "to/the/file.txt", - }, - { - name: "abs root, relative nested request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: filepath.Join(absoluteViaDoubleLink, "path"), - input: "to/the/file.txt", - expectedRealPath: "to/the/file.txt", - }, - { - name: "relative root, abs nested request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: "./path", - input: "/to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "to/the/file.txt", - expectedAccessPath: "to/the/file.txt", - }, - { - name: "abs root, abs nested request, direct, cwd within (double) symlink root", - cwd: relativeViaDoubleLink, - root: filepath.Join(absoluteViaDoubleLink, "path"), - input: "/to/the/file.txt", - expectedRealPath: "to/the/file.txt", - }, - // cwd within DOUBLE symlink root, request nested DEEP within... - { - name: "relative root, relative nested request, direct, cwd deep within (double) symlink root", - cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), - root: "../", - input: "to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "to/the/file.txt", - expectedAccessPath: "to/the/file.txt", - }, - { - name: "abs root, relative nested request, direct, cwd deep within (double) symlink root", - cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), - root: filepath.Join(absoluteViaDoubleLink, "path"), - input: "to/the/file.txt", - expectedRealPath: "to/the/file.txt", - }, - { - name: "relative root, abs nested request, direct, cwd deep within (double) symlink root", - cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), - root: "../", - input: "/to/the/file.txt", - // note: why not expect "path/to/the/file.txt" here? - // this is because we don't know that the path used to access this path (which is a link within - // the root) resides within the root. Without this information it appears as if this file resides - // outside the root. - expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), - //expectedRealPath: "to/the/file.txt", - expectedAccessPath: "to/the/file.txt", - }, - { - name: "abs root, abs nested request, direct, cwd deep within (double) symlink root", - cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), - root: filepath.Join(absoluteViaDoubleLink, "path"), - input: "/to/the/file.txt", - expectedRealPath: "to/the/file.txt", - }, - // link to outside of root cases... - { - name: "relative root, relative request, abs indirect (outside of root)", - root: filepath.Join(relative, "path"), - input: "to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "abs root, relative request, abs indirect (outside of root)", - root: filepath.Join(absolute, "path"), - input: "to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "relative root, abs request, abs indirect (outside of root)", - root: filepath.Join(relative, "path"), - input: "/to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "abs root, abs request, abs indirect (outside of root)", - root: filepath.Join(absolute, "path"), - input: "/to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "relative root, relative request, relative indirect (outside of root)", - root: filepath.Join(relative, "path"), - input: "to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "abs root, relative request, relative indirect (outside of root)", - root: filepath.Join(absolute, "path"), - input: "to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "relative root, abs request, relative indirect (outside of root)", - root: filepath.Join(relative, "path"), - input: "/to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "abs root, abs request, relative indirect (outside of root)", - root: filepath.Join(absolute, "path"), - input: "/to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - // link to outside of root cases... cwd within symlink root - { - name: "relative root, relative request, abs indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: "path", - input: "to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "abs root, relative request, abs indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: filepath.Join(absolute, "path"), - input: "to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "relative root, abs request, abs indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: "path", - input: "/to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "abs root, abs request, abs indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: filepath.Join(absolute, "path"), - input: "/to/the/abs-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/abs-outside.txt", - }, - { - name: "relative root, relative request, relative indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: "path", - input: "to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "abs root, relative request, relative indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: filepath.Join(absolute, "path"), - input: "to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "relative root, abs request, relative indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: "path", - input: "/to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "abs root, abs request, relative indirect (outside of root), cwd within symlink root", - cwd: relativeViaLink, - root: filepath.Join(absolute, "path"), - input: "/to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "relative root, relative request, relative indirect (outside of root), cwd within DOUBLE symlink root", - cwd: relativeViaDoubleLink, - root: "path", - input: "to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "abs root, relative request, relative indirect (outside of root), cwd within DOUBLE symlink root", - cwd: relativeViaDoubleLink, - root: filepath.Join(absolute, "path"), - input: "to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "relative root, abs request, relative indirect (outside of root), cwd within DOUBLE symlink root", - cwd: relativeViaDoubleLink, - root: "path", - input: "/to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - { - name: "abs root, abs request, relative indirect (outside of root), cwd within DOUBLE symlink root", - cwd: relativeViaDoubleLink, - root: filepath.Join(absolute, "path"), - input: "/to/the/rel-outside.txt", - expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), - expectedAccessPath: "to/the/rel-outside.txt", - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - if c.expectedAccessPath == "" { - c.expectedAccessPath = c.expectedRealPath - } - - // we need to mimic a shell, otherwise we won't get a path within a symlink - targetPath := filepath.Join(testDir, c.cwd) - t.Setenv("PWD", filepath.Clean(targetPath)) - - require.NoError(t, err) - require.NoError(t, os.Chdir(targetPath)) - t.Cleanup(func() { - require.NoError(t, os.Chdir(testDir)) - }) - - resolver, err := NewFromDirectory(c.root, c.base) - require.NoError(t, err) - require.NotNil(t, resolver) - - refs, err := resolver.FilesByPath(c.input) - require.NoError(t, err) - if c.expectedRealPath == "" { - require.Empty(t, refs) - return - } - require.Len(t, refs, 1) - assert.Equal(t, c.expectedRealPath, refs[0].RealPath, "real path different") - assert.Equal(t, c.expectedAccessPath, refs[0].AccessPath, "virtual path different") - }) - } -} - -func TestDirectoryResolver_FilesByPath_relativeRoot(t *testing.T) { - cases := []struct { - name string - relativeRoot string - input string - expected []string - }{ - { - name: "should find a file from an absolute input", - relativeRoot: "./test-fixtures/", - input: "/image-symlinks/file-1.txt", - expected: []string{ - "image-symlinks/file-1.txt", - }, - }, - { - name: "should find a file from a relative path", - relativeRoot: "./test-fixtures/", - input: "image-symlinks/file-1.txt", - expected: []string{ - "image-symlinks/file-1.txt", - }, - }, - { - name: "should find a file from a relative path (root above cwd)", - // TODO: refactor me! this test depends on the structure of the source dir not changing, which isn't great - relativeRoot: "../", - input: "fileresolver/directory.go", - expected: []string{ - "fileresolver/directory.go", - }, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - resolver, err := NewFromDirectory(c.relativeRoot, "") - assert.NoError(t, err) - - refs, err := resolver.FilesByPath(c.input) - require.NoError(t, err) - assert.Len(t, refs, len(c.expected)) - s := strset.New() - for _, actual := range refs { - s.Add(actual.RealPath) - } - assert.ElementsMatch(t, c.expected, s.List()) - }) - } -} - -func TestDirectoryResolver_FilesByPath_absoluteRoot(t *testing.T) { - cases := []struct { - name string - relativeRoot string - input string - expected []string - }{ - { - name: "should find a file from an absolute input", - relativeRoot: "./test-fixtures/", - input: "/image-symlinks/file-1.txt", - expected: []string{ - "image-symlinks/file-1.txt", - }, - }, - { - name: "should find a file from a relative path", - relativeRoot: "./test-fixtures/", - input: "image-symlinks/file-1.txt", - expected: []string{ - "image-symlinks/file-1.txt", - }, - }, - { - name: "should find a file from a relative path (root above cwd)", - // TODO: refactor me! this test depends on the structure of the source dir not changing, which isn't great - relativeRoot: "../", - input: "fileresolver/directory.go", - expected: []string{ - "fileresolver/directory.go", - }, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - // note: this test is all about asserting correct functionality when the given analysis path - // is an absolute path - absRoot, err := filepath.Abs(c.relativeRoot) - require.NoError(t, err) - - resolver, err := NewFromDirectory(absRoot, "") - assert.NoError(t, err) - - refs, err := resolver.FilesByPath(c.input) - require.NoError(t, err) - assert.Len(t, refs, len(c.expected)) - s := strset.New() - for _, actual := range refs { - s.Add(actual.RealPath) - } - assert.ElementsMatch(t, c.expected, s.List()) - }) - } -} - -func TestDirectoryResolver_FilesByPath(t *testing.T) { - cases := []struct { - name string - root string - input string - expected string - refCount int - forcePositiveHasPath bool - }{ - { - name: "finds a file (relative)", - root: "./test-fixtures/", - input: "image-symlinks/file-1.txt", - expected: "image-symlinks/file-1.txt", - refCount: 1, - }, - { - name: "finds a file with relative indirection", - root: "./test-fixtures/../test-fixtures", - input: "image-symlinks/file-1.txt", - expected: "image-symlinks/file-1.txt", - refCount: 1, - }, - { - name: "managed non-existing files (relative)", - root: "./test-fixtures/", - input: "test-fixtures/image-symlinks/bogus.txt", - refCount: 0, - }, - { - name: "finds a file (absolute)", - root: "./test-fixtures/", - input: "/image-symlinks/file-1.txt", - expected: "image-symlinks/file-1.txt", - refCount: 1, - }, - { - name: "directories ignored", - root: "./test-fixtures/", - input: "/image-symlinks", - refCount: 0, - forcePositiveHasPath: true, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - resolver, err := NewFromDirectory(c.root, "") - assert.NoError(t, err) - - hasPath := resolver.HasPath(c.input) - if !c.forcePositiveHasPath { - if c.refCount != 0 && !hasPath { - t.Errorf("expected HasPath() to indicate existence, but did not") - } else if c.refCount == 0 && hasPath { - t.Errorf("expected HasPath() to NOT indicate existence, but does") - } - } else if !hasPath { - t.Errorf("expected HasPath() to indicate existence, but did not (force path)") - } - - refs, err := resolver.FilesByPath(c.input) - require.NoError(t, err) - assert.Len(t, refs, c.refCount) - for _, actual := range refs { - assert.Equal(t, c.expected, actual.RealPath) - } - }) - } -} - -func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { - cases := []struct { - name string - input []string - refCount int - }{ - { - name: "finds multiple files", - input: []string{"image-symlinks/file-1.txt", "image-symlinks/file-2.txt"}, - refCount: 2, - }, - { - name: "skips non-existing files", - input: []string{"image-symlinks/bogus.txt", "image-symlinks/file-1.txt"}, - refCount: 1, - }, - { - name: "does not return anything for non-existing directories", - input: []string{"non-existing/bogus.txt", "non-existing/file-1.txt"}, - refCount: 0, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures", "") - assert.NoError(t, err) - refs, err := resolver.FilesByPath(c.input...) - assert.NoError(t, err) - - if len(refs) != c.refCount { - t.Errorf("unexpected number of refs: %d != %d", len(refs), c.refCount) - } - }) - } -} - -func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures", "") - assert.NoError(t, err) - refs, err := resolver.FilesByGlob("**/image-symlinks/file*") - assert.NoError(t, err) - - assert.Len(t, refs, 2) -} - -func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/image-symlinks", "") - assert.NoError(t, err) - refs, err := resolver.FilesByGlob("**/*.txt") - assert.NoError(t, err) - assert.Len(t, refs, 6) -} - -func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures", "") - assert.NoError(t, err) - refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt") - assert.NoError(t, err) - - assert.Len(t, refs, 1) - assert.Equal(t, "image-symlinks/file-1.txt", refs[0].RealPath) -} - -func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) { - - tests := []struct { - name string - fixture string - }{ - { - name: "one degree", - fixture: "link_to_new_readme", - }, - { - name: "two degrees", - fixture: "link_to_link_to_new_readme", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-simple", "") - assert.NoError(t, err) - - refs, err := resolver.FilesByPath(test.fixture) - require.NoError(t, err) - assert.Len(t, refs, 1) - - reader, err := resolver.FileContentsByLocation(refs[0]) - require.NoError(t, err) - - actual, err := io.ReadAll(reader) - require.NoError(t, err) - - expected, err := os.ReadFile("test-fixtures/symlinks-simple/readme") - require.NoError(t, err) - - assert.Equal(t, string(expected), string(actual)) - }) - } -} - -func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { - // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" - resolver, err := NewFromDirectory("test-fixtures/system_paths/target", "") - assert.NoError(t, err) - - // all paths should be found (non filtering matches a path) - locations, err := resolver.FilesByGlob("**/place") - assert.NoError(t, err) - // 4: within target/ - // 1: target/link --> relative path to "place" // NOTE: this is filtered out since it not unique relative to outside_root/link_target/place - // 1: outside_root/link_target/place - assert.Len(t, locations, 5) - - // ensure that symlink indexing outside of root worked - testLocation := "test-fixtures/system_paths/outside_root/link_target/place" - ok := false - for _, location := range locations { - if strings.HasSuffix(location.RealPath, testLocation) { - ok = true - } - } - - if !ok { - t.Fatalf("could not find test location=%q", testLocation) - } -} - -var _ fs.FileInfo = (*testFileInfo)(nil) - -type testFileInfo struct { - mode os.FileMode -} - -func (t testFileInfo) Name() string { - panic("implement me") -} - -func (t testFileInfo) Size() int64 { - panic("implement me") -} - -func (t testFileInfo) Mode() fs.FileMode { - return t.mode -} - -func (t testFileInfo) ModTime() time.Time { - panic("implement me") -} - -func (t testFileInfo) IsDir() bool { - panic("implement me") -} - -func (t testFileInfo) Sys() interface{} { - panic("implement me") -} - func Test_isUnallowableFileType(t *testing.T) { tests := []struct { name string @@ -922,505 +76,3 @@ func Test_isUnallowableFileType(t *testing.T) { }) } } - -func Test_directoryResolver_FilesByMIMEType(t *testing.T) { - tests := []struct { - fixturePath string - mimeType string - expectedPaths *strset.Set - }{ - { - fixturePath: "./test-fixtures/image-simple", - mimeType: "text/plain", - expectedPaths: strset.New("file-1.txt", "file-2.txt", "target/really/nested/file-3.txt", "Dockerfile"), - }, - } - for _, test := range tests { - t.Run(test.fixturePath, func(t *testing.T) { - resolver, err := NewFromDirectory(test.fixturePath, "") - assert.NoError(t, err) - locations, err := resolver.FilesByMIMEType(test.mimeType) - assert.NoError(t, err) - assert.Equal(t, test.expectedPaths.Size(), len(locations)) - for _, l := range locations { - assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) - } - }) - } -} - -func Test_IndexingNestedSymLinks(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-simple", "") - require.NoError(t, err) - - // check that we can get the real path - locations, err := resolver.FilesByPath("./readme") - require.NoError(t, err) - assert.Len(t, locations, 1) - - // check that we can access the same file via 1 symlink - locations, err = resolver.FilesByPath("./link_to_new_readme") - require.NoError(t, err) - require.Len(t, locations, 1) - assert.Equal(t, "readme", locations[0].RealPath) - assert.Equal(t, "link_to_new_readme", locations[0].AccessPath) - - // check that we can access the same file via 2 symlinks - locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") - require.NoError(t, err) - require.Len(t, locations, 1) - assert.Equal(t, "readme", locations[0].RealPath) - assert.Equal(t, "link_to_link_to_new_readme", locations[0].AccessPath) - - // check that we can access the same file via 2 symlinks - locations, err = resolver.FilesByGlob("**/link_*") - require.NoError(t, err) - require.Len(t, locations, 1) // you would think this is 2, however, they point to the same file, and glob only returns unique files - - // returned locations can be in any order - expectedAccessPaths := []string{ - "link_to_link_to_new_readme", - //"link_to_new_readme", // we filter out this one because the first symlink resolves to the same file - } - - expectedRealPaths := []string{ - "readme", - } - - actualRealPaths := strset.New() - actualAccessPaths := strset.New() - for _, a := range locations { - actualAccessPaths.Add(a.AccessPath) - actualRealPaths.Add(a.RealPath) - } - - assert.ElementsMatch(t, expectedAccessPaths, actualAccessPaths.List()) - assert.ElementsMatch(t, expectedRealPaths, actualRealPaths.List()) -} - -func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { - filterFn := func(_, path string, _ os.FileInfo, _ error) error { - if strings.HasSuffix(path, string(filepath.Separator)+"readme") { - return ErrSkipPath - } - return nil - } - - resolver, err := NewFromDirectory("./test-fixtures/symlinks-simple", "", filterFn) - require.NoError(t, err) - - // the path to the real file is PRUNED from the index, so we should NOT expect a location returned - locations, err := resolver.FilesByPath("./readme") - require.NoError(t, err) - assert.Empty(t, locations) - - // check that we cannot access the file even via symlink - locations, err = resolver.FilesByPath("./link_to_new_readme") - require.NoError(t, err) - assert.Empty(t, locations) - - // check that we still cannot access the same file via 2 symlinks - locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") - require.NoError(t, err) - assert.Empty(t, locations) -} - -func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-multiple-roots/root", "") - require.NoError(t, err) - - // check that we can get the real path - locations, err := resolver.FilesByPath("./readme") - require.NoError(t, err) - assert.Len(t, locations, 1) - - // check that we can access the same file via 2 symlinks (link_to_link_to_readme -> link_to_readme -> readme) - locations, err = resolver.FilesByPath("./link_to_link_to_readme") - require.NoError(t, err) - assert.Len(t, locations, 1) - - // something looks wrong here - t.Failed() -} - -func Test_RootViaSymlink(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinked-root/nested/link-root", "") - require.NoError(t, err) - - locations, err := resolver.FilesByPath("./file1.txt") - require.NoError(t, err) - assert.Len(t, locations, 1) - - locations, err = resolver.FilesByPath("./nested/file2.txt") - require.NoError(t, err) - assert.Len(t, locations, 1) - - locations, err = resolver.FilesByPath("./nested/linked-file1.txt") - require.NoError(t, err) - assert.Len(t, locations, 1) -} - -func Test_directoryResolver_FileContentsByLocation(t *testing.T) { - cwd, err := os.Getwd() - require.NoError(t, err) - - r, err := NewFromDirectory(".", "") - require.NoError(t, err) - - exists, existingPath, err := r.tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) - require.True(t, exists) - require.NoError(t, err) - require.True(t, existingPath.HasReference()) - - tests := []struct { - name string - location file.Location - expects string - err bool - }{ - { - name: "use file reference for content requests", - location: file.NewLocationFromDirectory("some/place", *existingPath.Reference), - expects: "this file has contents", - }, - { - name: "error on empty file reference", - location: file.NewLocationFromDirectory("doesn't matter", stereoscopeFile.Reference{}), - err: true, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - - actual, err := r.FileContentsByLocation(test.location) - if test.err { - require.Error(t, err) - return - } - - require.NoError(t, err) - if test.expects != "" { - b, err := io.ReadAll(actual) - require.NoError(t, err) - assert.Equal(t, test.expects, string(b)) - } - }) - } -} - -func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) { - test := func(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-loop", "") - require.NoError(t, err) - - locations, err := resolver.FilesByGlob("**/file.target") - require.NoError(t, err) - - require.Len(t, locations, 1) - assert.Equal(t, "devices/loop0/file.target", locations[0].RealPath) - } - - testWithTimeout(t, 5*time.Second, test) -} - -func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) { - cases := []struct { - name string - root string - input string - expected []string - }{ - { - name: "should find the base file", - root: "./test-fixtures/symlinks-base/", - input: "./base", - expected: []string{ - "/base", - }, - }, - { - name: "should follow a link with a pivoted root", - root: "./test-fixtures/symlinks-base/", - input: "./foo", - expected: []string{ - "/base", - }, - }, - { - name: "should follow a relative link with extra parents", - root: "./test-fixtures/symlinks-base/", - input: "./bar", - expected: []string{ - "/base", - }, - }, - { - name: "should follow an absolute link with extra parents", - root: "./test-fixtures/symlinks-base/", - input: "./baz", - expected: []string{ - "/base", - }, - }, - { - name: "should follow an absolute link with extra parents", - root: "./test-fixtures/symlinks-base/", - input: "./sub/link", - expected: []string{ - "/sub/item", - }, - }, - { - name: "should follow chained pivoted link", - root: "./test-fixtures/symlinks-base/", - input: "./chain", - expected: []string{ - "/base", - }, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - resolver, err := NewFromDirectory(c.root, c.root) - assert.NoError(t, err) - - refs, err := resolver.FilesByPath(c.input) - require.NoError(t, err) - assert.Len(t, refs, len(c.expected)) - s := strset.New() - for _, actual := range refs { - s.Add(actual.RealPath) - } - assert.ElementsMatch(t, c.expected, s.List()) - }) - } - -} - -func Test_directoryResolver_resolvesLinks(t *testing.T) { - tests := []struct { - name string - runner func(file.Resolver) []file.Location - expected []file.Location - }{ - { - name: "by mimetype", - runner: func(resolver file.Resolver) []file.Location { - // links should not show up when searching mimetype - actualLocations, err := resolver.FilesByMIMEType("text/plain") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - file.NewLocation("file-1.txt"), // note: missing virtual path "file-1.txt" - file.NewLocation("file-3.txt"), // note: missing virtual path "file-3.txt" - file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt" - file.NewLocation("parent/file-4.txt"), // note: missing virtual path "file-4.txt" - }, - }, - { - name: "by glob to links", - runner: func(resolver file.Resolver) []file.Location { - // links are searched, but resolve to the real files - // for that reason we need to place **/ in front (which is not the same for other resolvers) - actualLocations, err := resolver.FilesByGlob("**/*ink-*") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - file.NewVirtualLocation("file-1.txt", "link-1"), - file.NewVirtualLocation("file-2.txt", "link-2"), - // we already have this real file path via another link, so only one is returned - //file.NewVirtualLocation("file-2.txt", "link-indirect"), - file.NewVirtualLocation("file-3.txt", "link-within"), - }, - }, - { - name: "by basename", - runner: func(resolver file.Resolver) []file.Location { - // links are searched, but resolve to the real files - actualLocations, err := resolver.FilesByGlob("**/file-2.txt") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - // this has two copies in the base image, which overwrites the same location - file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt", - }, - }, - { - name: "by basename glob", - runner: func(resolver file.Resolver) []file.Location { - // links are searched, but resolve to the real files - actualLocations, err := resolver.FilesByGlob("**/file-?.txt") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - file.NewLocation("file-1.txt"), // note: missing virtual path "file-1.txt" - file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt" - file.NewLocation("file-3.txt"), // note: missing virtual path "file-3.txt" - file.NewLocation("parent/file-4.txt"), // note: missing virtual path "parent/file-4.txt" - }, - }, - { - name: "by basename glob to links", - runner: func(resolver file.Resolver) []file.Location { - actualLocations, err := resolver.FilesByGlob("**/link-*") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - file.NewVirtualLocation("file-1.txt", "link-1"), - file.NewVirtualLocation("file-2.txt", "link-2"), - - // we already have this real file path via another link, so only one is returned - //file.NewVirtualLocation("file-2.txt", "link-indirect"), - - file.NewVirtualLocation("file-3.txt", "link-within"), - }, - }, - { - name: "by extension", - runner: func(resolver file.Resolver) []file.Location { - // links are searched, but resolve to the real files - actualLocations, err := resolver.FilesByGlob("**/*.txt") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - file.NewLocation("file-1.txt"), // note: missing virtual path "file-1.txt" - file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt" - file.NewLocation("file-3.txt"), // note: missing virtual path "file-3.txt" - file.NewLocation("parent/file-4.txt"), // note: missing virtual path "parent/file-4.txt" - }, - }, - { - name: "by path to degree 1 link", - runner: func(resolver file.Resolver) []file.Location { - // links resolve to the final file - actualLocations, err := resolver.FilesByPath("/link-2") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - // we have multiple copies across layers - file.NewVirtualLocation("file-2.txt", "link-2"), - }, - }, - { - name: "by path to degree 2 link", - runner: func(resolver file.Resolver) []file.Location { - // multiple links resolves to the final file - actualLocations, err := resolver.FilesByPath("/link-indirect") - assert.NoError(t, err) - return actualLocations - }, - expected: []file.Location{ - // we have multiple copies across layers - file.NewVirtualLocation("file-2.txt", "link-indirect"), - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-from-image-symlinks-fixture", "") - require.NoError(t, err) - assert.NoError(t, err) - - actual := test.runner(resolver) - - compareLocations(t, test.expected, actual) - }) - } -} - -func TestDirectoryResolver_DoNotAddVirtualPathsToTree(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-prune-indexing", "") - require.NoError(t, err) - - var allRealPaths []stereoscopeFile.Path - for l := range resolver.AllLocations(context.Background()) { - allRealPaths = append(allRealPaths, stereoscopeFile.Path(l.RealPath)) - } - pathSet := stereoscopeFile.NewPathSet(allRealPaths...) - - assert.False(t, - pathSet.Contains("before-path/file.txt"), - "symlink destinations should only be indexed at their real path, not through their virtual (symlinked) path", - ) - - assert.False(t, - pathSet.Contains("a-path/file.txt"), - "symlink destinations should only be indexed at their real path, not through their virtual (symlinked) path", - ) - -} - -func TestDirectoryResolver_FilesContents_errorOnDirRequest(t *testing.T) { - defer goleak.VerifyNone(t) - resolver, err := NewFromDirectory("./test-fixtures/system_paths", "") - assert.NoError(t, err) - - var dirLoc *file.Location - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - for loc := range resolver.AllLocations(ctx) { - entry, err := resolver.index.Get(loc.Reference()) - require.NoError(t, err) - if entry.Metadata.IsDir() { - dirLoc = &loc - break - } - } - - require.NotNil(t, dirLoc) - - reader, err := resolver.FileContentsByLocation(*dirLoc) - require.Error(t, err) - require.Nil(t, reader) -} - -func TestDirectoryResolver_AllLocations(t *testing.T) { - resolver, err := NewFromDirectory("./test-fixtures/symlinks-from-image-symlinks-fixture", "") - assert.NoError(t, err) - - paths := strset.New() - for loc := range resolver.AllLocations(context.Background()) { - if strings.HasPrefix(loc.RealPath, "/") { - // ignore outside the fixture root for now - continue - } - paths.Add(loc.RealPath) - } - expected := []string{ - "file-1.txt", - "file-2.txt", - "file-3.txt", - "link-1", - "link-2", - "link-dead", - "link-indirect", - "link-within", - "parent", - "parent-link", - "parent/file-4.txt", - } - - pathsList := paths.List() - sort.Strings(pathsList) - - assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) -} - -func TestAllLocationsDoesNotLeakGoRoutine(t *testing.T) { - defer goleak.VerifyNone(t) - resolver, err := NewFromDirectory("./test-fixtures/symlinks-from-image-symlinks-fixture", "") - require.NoError(t, err) - ctx, cancel := context.WithCancel(context.Background()) - for range resolver.AllLocations(ctx) { - break - } - cancel() -} diff --git a/syft/internal/fileresolver/file.go b/syft/internal/fileresolver/file.go index 722a1d6f264..2c71ceab171 100644 --- a/syft/internal/fileresolver/file.go +++ b/syft/internal/fileresolver/file.go @@ -1,17 +1,10 @@ package fileresolver import ( - "context" - "errors" "fmt" - "io" - "os" - stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" - "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/internal/windows" ) // Compile time assurance that we meet the Resolver interface. @@ -19,12 +12,9 @@ var _ file.Resolver = (*File)(nil) // File implements path and content access for the file data source. type File struct { - path string - chroot ChrootContext - tree filetree.Reader - index filetree.IndexReader - searchContext filetree.Searcher - indexer *fileIndexer + filetreeResolver + path string + indexer *fileIndexer } // parent should be the symlink free absolute path to the parent directory @@ -38,10 +28,12 @@ func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, e cleanBase := chroot.Base() file := &File{ - path: path, - chroot: *chroot, - tree: filetree.New(), - index: filetree.NewIndex(), + path: path, + filetreeResolver: filetreeResolver{ + chroot: *chroot, + tree: filetree.New(), + index: filetree.NewIndex(), + }, indexer: newFileIndexer(path, cleanBase, pathFilters...), } @@ -59,223 +51,12 @@ func (r *File) buildIndex() error { r.tree = tree r.index = index - r.searchContext = filetree.NewSearchContext(tree, index) + r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index) return nil } -// TODO: These are Copy-pasted from Directory.go - should we consider splitting them out into a shared place? - -func (r File) requestPath(userPath string) (string, error) { - return r.chroot.ToNativePath(userPath) -} - -// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the file resolver. -func (r File) responsePath(path string) string { - return r.chroot.ToChrootPath(path) -} - -// HasPath indicates if the given path exists in the underlying source. -func (r *File) HasPath(userPath string) bool { - requestPath, err := r.requestPath(userPath) - if err != nil { - return false - } - return r.tree.HasPath(stereoscopeFile.Path(requestPath)) -} - // Stringer to represent a file path data source func (r File) String() string { return fmt.Sprintf("file:%s", r.path) } - -// FilesByPath returns all file.References that match the given paths from the file index. -func (r File) FilesByPath(userPaths ...string) ([]file.Location, error) { - var references = make([]file.Location, 0) - - for _, userPath := range userPaths { - userStrPath, err := r.requestPath(userPath) - if err != nil { - log.Warnf("unable to get file by path=%q : %+v", userPath, err) - continue - } - - // we should be resolving symlinks and preserving this information as a AccessPath to the real file - ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks) - if err != nil { - log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err) - continue - } - - if !ref.HasReference() { - continue - } - - entry, err := r.index.Get(*ref.Reference) - if err != nil { - log.Warnf("unable to get file by path=%q : %+v", userPath, err) - continue - } - - // don't consider directories - if entry.Metadata.IsDir() { - continue - } - - if windows.HostRunningOnWindows() { - userStrPath = windows.ToPosix(userStrPath) - } - - if ref.HasReference() { - references = append(references, - file.NewVirtualLocationFromDirectory( - r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root - r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root - *ref.Reference, - ), - ) - } - } - - return references, nil -} - -func (r File) requestGlob(pattern string) (string, error) { - return r.chroot.ToNativeGlob(pattern) -} - -// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. -// -//nolint:dupl -func (r File) FilesByGlob(patterns ...string) ([]file.Location, error) { - uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() - uniqueLocations := make([]file.Location, 0) - - for _, pattern := range patterns { - requestGlob, err := r.requestGlob(pattern) - if err != nil { - return nil, err - } - refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks) - if err != nil { - return nil, err - } - for _, refVia := range refVias { - if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) { - continue - } - entry, err := r.index.Get(*refVia.Reference) - if err != nil { - return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err) - } - - // don't consider directories - if entry.Metadata.IsDir() { - continue - } - - loc := file.NewVirtualLocationFromDirectory( - r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root - r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root - *refVia.Reference, - ) - uniqueFileIDs.Add(*refVia.Reference) - uniqueLocations = append(uniqueLocations, loc) - } - } - - return uniqueLocations, nil -} - -// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. -// This is helpful when attempting to find a file that is in the same layer or lower as another file. -func (r *File) RelativeFileByPath(_ file.Location, path string) *file.Location { - paths, err := r.FilesByPath(path) - if err != nil { - return nil - } - if len(paths) == 0 { - return nil - } - - return &paths[0] -} - -// FileContentsByLocation fetches file contents for a single file reference relative to a directory. -// If the path does not exist an error is returned. -func (r File) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { - if location.RealPath == "" { - return nil, errors.New("empty path given") - } - - entry, err := r.index.Get(location.Reference()) - if err != nil { - return nil, err - } - - // don't consider directories - if entry.Type == stereoscopeFile.TypeDirectory { - return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath) - } - - // RealPath is posix so for windows file resolver we need to translate - // to its true on disk path. - filePath := string(location.Reference().RealPath) - if windows.HostRunningOnWindows() { - filePath = windows.FromPosix(filePath) - } - - return stereoscopeFile.NewLazyReadCloser(filePath), nil -} - -func (r *File) AllLocations(ctx context.Context) <-chan file.Location { - results := make(chan file.Location) - go func() { - defer close(results) - for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) { - select { - case <-ctx.Done(): - return - case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref): - continue - } - } - }() - return results -} - -func (r *File) FileMetadataByLocation(location file.Location) (file.Metadata, error) { - entry, err := r.index.Get(location.Reference()) - if err != nil { - return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist) - } - - return entry.Metadata, nil -} - -func (r *File) FilesByMIMEType(types ...string) ([]file.Location, error) { - uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() - uniqueLocations := make([]file.Location, 0) - - refVias, err := r.searchContext.SearchByMIMEType(types...) - if err != nil { - return nil, err - } - for _, refVia := range refVias { - if !refVia.HasReference() { - continue - } - if uniqueFileIDs.Contains(*refVia.Reference) { - continue - } - location := file.NewVirtualLocationFromDirectory( - r.responsePath(string(refVia.Reference.RealPath)), - r.responsePath(string(refVia.RequestPath)), - *refVia.Reference, - ) - uniqueFileIDs.Add(*refVia.Reference) - uniqueLocations = append(uniqueLocations, location) - } - - return uniqueLocations, nil -} diff --git a/syft/internal/fileresolver/file_test.go b/syft/internal/fileresolver/file_test.go deleted file mode 100644 index f3d24a2cab0..00000000000 --- a/syft/internal/fileresolver/file_test.go +++ /dev/null @@ -1,269 +0,0 @@ -package fileresolver - -import ( - "context" - stereoscopeFile "github.com/anchore/stereoscope/pkg/file" - "github.com/anchore/syft/syft/file" - "github.com/google/go-cmp/cmp" - "github.com/scylladb/go-set/strset" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/goleak" - "io" - "os" - "path/filepath" - "sort" - "testing" -) - -func TestFileResolver_FilesByPath(t *testing.T) { - tests := []struct { - description string - filePath string // relative to cwd - fileByPathInput string - expectedRealPath string - expectedAccessPath string - cwd string - }{ - { - description: "Finds file if searched by filepath", - filePath: "./test-fixtures/req-resp/path/to/the/file.txt", - fileByPathInput: "file.txt", - expectedRealPath: "/file.txt", - expectedAccessPath: "/file.txt", - }, - } - - for _, tt := range tests { - t.Run(tt.description, func(t *testing.T) { - parentPath, err := absoluteSymlinkFreePathToParent(tt.filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - - resolver, err := NewFromFile(parentPath, tt.filePath) - require.NoError(t, err) - require.NotNil(t, resolver) - - refs, err := resolver.FilesByPath(tt.fileByPathInput) - require.NoError(t, err) - if tt.expectedRealPath == "" { - require.Empty(t, refs) - return - } - require.Len(t, refs, 1) - assert.Equal(t, tt.expectedRealPath, refs[0].RealPath, "real path different") - assert.Equal(t, tt.expectedAccessPath, refs[0].AccessPath, "virtual path different") - }) - } -} - -// Test mutliple files by path -> Maybe not necessary for us here? -func TestFileResolver_MultipleFilesByPath(t *testing.T) { - tests := []struct { - description string - input []string - refCount int - }{ - { - description: "finds file ", - input: []string{"file.txt"}, - refCount: 1, - }, - { - description: "skip non-existing files", - input: []string{"file.txt", "bogus.txt"}, - refCount: 1, - }, - { - description: "does not return anything for non-existing files", - input: []string{"non-existing/bogus.txt", "another-bogus.txt"}, - refCount: 0, - }, - } - - for _, tt := range tests { - t.Run(tt.description, func(t *testing.T) { - filePath := "./test-fixtures/req-resp/path/to/the/file.txt" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - - resolver, err := NewFromFile(parentPath, filePath) - assert.NoError(t, err) - refs, err := resolver.FilesByPath(tt.input...) - assert.NoError(t, err) - - if len(refs) != tt.refCount { - t.Errorf("unexpected number of refs: %d != %d", len(refs), tt.refCount) - } - }) - } -} - -func TestFileResolver_FilesByGlob(t *testing.T) { - filePath := "./test-fixtures/req-resp/path/to/the/file.txt" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - - resolver, err := NewFromFile(parentPath, filePath) - assert.NoError(t, err) - refs, err := resolver.FilesByGlob("*.txt") - assert.NoError(t, err) - - assert.Len(t, refs, 1) -} - -func Test_fileResolver_FilesByMIMEType(t *testing.T) { - tests := []struct { - fixturePath string - mimeType string - expectedPaths *strset.Set - }{ - { - fixturePath: "./test-fixtures/image-simple/file-1.txt", - mimeType: "text/plain", - expectedPaths: strset.New("/file-1.txt"), - }, - } - for _, test := range tests { - t.Run(test.fixturePath, func(t *testing.T) { - filePath := "./test-fixtures/image-simple/file-1.txt" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - - resolver, err := NewFromFile(parentPath, filePath) - assert.NoError(t, err) - locations, err := resolver.FilesByMIMEType(test.mimeType) - assert.NoError(t, err) - assert.Equal(t, test.expectedPaths.Size(), len(locations)) - for _, l := range locations { - assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) - } - }) - } -} - -func Test_fileResolver_FileContentsByLocation(t *testing.T) { - cwd, err := os.Getwd() - require.NoError(t, err) - - filePath := "./test-fixtures/image-simple/file-1.txt" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - - r, err := NewFromFile(parentPath, filePath) - require.NoError(t, err) - - exists, existingPath, err := r.tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) - require.True(t, exists) - require.NoError(t, err) - require.True(t, existingPath.HasReference()) - - tests := []struct { - name string - location file.Location - expects string - err bool - }{ - { - name: "use file reference for content requests", - location: file.NewLocationFromDirectory("some/place", *existingPath.Reference), - expects: "this file has contents", - }, - { - name: "error on empty file reference", - location: file.NewLocationFromDirectory("doesn't matter", stereoscopeFile.Reference{}), - err: true, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - - actual, err := r.FileContentsByLocation(test.location) - if test.err { - require.Error(t, err) - return - } - - require.NoError(t, err) - if test.expects != "" { - b, err := io.ReadAll(actual) - require.NoError(t, err) - assert.Equal(t, test.expects, string(b)) - } - }) - } -} - -func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) { - defer goleak.VerifyNone(t) - - filePath := "./test-fixtures/system_paths/target/home/place" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - resolver, err := NewFromFile(parentPath, filePath) - require.NoError(t, err) - - var dirLoc *file.Location - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - for loc := range resolver.AllLocations(ctx) { - entry, err := resolver.index.Get(loc.Reference()) - require.NoError(t, err) - if entry.Metadata.IsDir() { - dirLoc = &loc - break - } - } - - require.NotNil(t, dirLoc) - - reader, err := resolver.FileContentsByLocation(*dirLoc) - require.Error(t, err) - require.Nil(t, reader) -} - -func TestFileResolver_AllLocations(t *testing.T) { - // Verify both the parent and the file itself are indexed - filePath := "./test-fixtures/system_paths/target/home/place" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - resolver, err := NewFromFile(parentPath, filePath) - require.NoError(t, err) - - paths := strset.New() - for loc := range resolver.AllLocations(context.Background()) { - paths.Add(loc.RealPath) - } - expected := []string{ - "/place", - "", // This is how we see the parent dir, since we're resolving wrt the parent directory. - } - - pathsList := paths.List() - sort.Strings(pathsList) - - assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) -} - -func Test_AllLocationsDoesNotLeakGoRoutine(t *testing.T) { - defer goleak.VerifyNone(t) - filePath := "./test-fixtures/system_paths/target/home/place" - parentPath, err := absoluteSymlinkFreePathToParent(filePath) - require.NoError(t, err) - require.NotNil(t, parentPath) - resolver, err := NewFromFile(parentPath, filePath) - require.NoError(t, err) - - require.NoError(t, err) - ctx, cancel := context.WithCancel(context.Background()) - for range resolver.AllLocations(ctx) { - break - } - cancel() -} diff --git a/syft/internal/fileresolver/filetree_resolver.go b/syft/internal/fileresolver/filetree_resolver.go new file mode 100644 index 00000000000..15c0dacfe12 --- /dev/null +++ b/syft/internal/fileresolver/filetree_resolver.go @@ -0,0 +1,229 @@ +package fileresolver + +import ( + "context" + "errors" + "fmt" + "io" + "os" + + stereoscopeFile "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/windows" +) + +type filetreeResolver struct { + chroot ChrootContext + tree filetree.Reader + index filetree.IndexReader + searchContext filetree.Searcher +} + +func (r *filetreeResolver) requestPath(userPath string) (string, error) { + return r.chroot.ToNativePath(userPath) +} + +// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the file resolver. +func (r filetreeResolver) responsePath(path string) string { + return r.chroot.ToChrootPath(path) +} + +// HasPath indicates if the given path exists in the underlying source. +func (r *filetreeResolver) HasPath(userPath string) bool { + requestPath, err := r.requestPath(userPath) + if err != nil { + return false + } + return r.tree.HasPath(stereoscopeFile.Path(requestPath)) +} + +// FilesByPath returns all file.References that match the given paths from the file index. +func (r filetreeResolver) FilesByPath(userPaths ...string) ([]file.Location, error) { + var references = make([]file.Location, 0) + + for _, userPath := range userPaths { + userStrPath, err := r.requestPath(userPath) + if err != nil { + log.Warnf("unable to get file by path=%q : %+v", userPath, err) + continue + } + + // we should be resolving symlinks and preserving this information as a AccessPath to the real file + ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks) + if err != nil { + log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err) + continue + } + + if !ref.HasReference() { + continue + } + + entry, err := r.index.Get(*ref.Reference) + if err != nil { + log.Warnf("unable to get file by path=%q : %+v", userPath, err) + continue + } + + // don't consider directories + if entry.Metadata.IsDir() { + continue + } + + if windows.HostRunningOnWindows() { + userStrPath = windows.ToPosix(userStrPath) + } + + if ref.HasReference() { + references = append(references, + file.NewVirtualLocationFromDirectory( + r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root + r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root + *ref.Reference, + ), + ) + } + } + + return references, nil +} + +func (r filetreeResolver) requestGlob(pattern string) (string, error) { + return r.chroot.ToNativeGlob(pattern) +} + +// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. +func (r filetreeResolver) FilesByGlob(patterns ...string) ([]file.Location, error) { + uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() + uniqueLocations := make([]file.Location, 0) + + for _, pattern := range patterns { + requestGlob, err := r.requestGlob(pattern) + if err != nil { + return nil, err + } + refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) { + continue + } + entry, err := r.index.Get(*refVia.Reference) + if err != nil { + return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err) + } + + // don't consider directories + if entry.Metadata.IsDir() { + continue + } + + loc := file.NewVirtualLocationFromDirectory( + r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root + r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root + *refVia.Reference, + ) + uniqueFileIDs.Add(*refVia.Reference) + uniqueLocations = append(uniqueLocations, loc) + } + } + + return uniqueLocations, nil +} + +// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. +// This is helpful when attempting to find a file that is in the same layer or lower as another file. +func (r *filetreeResolver) RelativeFileByPath(_ file.Location, path string) *file.Location { + paths, err := r.FilesByPath(path) + if err != nil { + return nil + } + if len(paths) == 0 { + return nil + } + + return &paths[0] +} + +// FileContentsByLocation fetches file contents for a single file reference relative to a directory. +// If the path does not exist an error is returned. +func (r filetreeResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { + if location.RealPath == "" { + return nil, errors.New("empty path given") + } + + entry, err := r.index.Get(location.Reference()) + if err != nil { + return nil, err + } + + // don't consider directories + if entry.Type == stereoscopeFile.TypeDirectory { + return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath) + } + + // RealPath is posix so for windows file resolver we need to translate + // to its true on disk path. + filePath := string(location.Reference().RealPath) + if windows.HostRunningOnWindows() { + filePath = windows.FromPosix(filePath) + } + + return stereoscopeFile.NewLazyReadCloser(filePath), nil +} + +func (r *filetreeResolver) AllLocations(ctx context.Context) <-chan file.Location { + results := make(chan file.Location) + go func() { + defer close(results) + for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) { + select { + case <-ctx.Done(): + return + case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref): + continue + } + } + }() + return results +} + +func (r *filetreeResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) { + entry, err := r.index.Get(location.Reference()) + if err != nil { + return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist) + } + + return entry.Metadata, nil +} + +func (r *filetreeResolver) FilesByMIMEType(types ...string) ([]file.Location, error) { + uniqueFileIDs := stereoscopeFile.NewFileReferenceSet() + uniqueLocations := make([]file.Location, 0) + + refVias, err := r.searchContext.SearchByMIMEType(types...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if !refVia.HasReference() { + continue + } + if uniqueFileIDs.Contains(*refVia.Reference) { + continue + } + location := file.NewVirtualLocationFromDirectory( + r.responsePath(string(refVia.Reference.RealPath)), + r.responsePath(string(refVia.RequestPath)), + *refVia.Reference, + ) + uniqueFileIDs.Add(*refVia.Reference) + uniqueLocations = append(uniqueLocations, location) + } + + return uniqueLocations, nil +} diff --git a/syft/internal/fileresolver/filetree_resolver_test.go b/syft/internal/fileresolver/filetree_resolver_test.go new file mode 100644 index 00000000000..e385ec8501f --- /dev/null +++ b/syft/internal/fileresolver/filetree_resolver_test.go @@ -0,0 +1,1611 @@ +//go:build !windows +// +build !windows + +package fileresolver + +import ( + "context" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "testing" + "time" + + stereoscopeFile "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/syft/syft/file" + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" +) + +// Tests for filetree resolver when directory is used for index +func TestDirectoryResolver_FilesByPath_request_response(t *testing.T) { + // / + // somewhere/ + // outside.txt + // root-link -> ./ + // path/ + // to/ + // abs-inside.txt -> /path/to/the/file.txt # absolute link to somewhere inside of the root + // rel-inside.txt -> ./the/file.txt # relative link to somewhere inside of the root + // the/ + // file.txt + // abs-outside.txt -> /somewhere/outside.txt # absolute link to outside of the root + // rel-outside -> ../../../somewhere/outside.txt # relative link to outside of the root + // + + testDir, err := os.Getwd() + require.NoError(t, err) + relative := filepath.Join("test-fixtures", "req-resp") + absolute := filepath.Join(testDir, relative) + + absInsidePath := filepath.Join(absolute, "path", "to", "abs-inside.txt") + absOutsidePath := filepath.Join(absolute, "path", "to", "the", "abs-outside.txt") + + relativeViaLink := filepath.Join(relative, "root-link") + absoluteViaLink := filepath.Join(absolute, "root-link") + + relativeViaDoubleLink := filepath.Join(relative, "root-link", "root-link") + absoluteViaDoubleLink := filepath.Join(absolute, "root-link", "root-link") + + cleanup := func() { + _ = os.Remove(absInsidePath) + _ = os.Remove(absOutsidePath) + } + + // ensure the absolute symlinks are cleaned up from any previous runs + cleanup() + + require.NoError(t, os.Symlink(filepath.Join(absolute, "path", "to", "the", "file.txt"), absInsidePath)) + require.NoError(t, os.Symlink(filepath.Join(absolute, "somewhere", "outside.txt"), absOutsidePath)) + + t.Cleanup(cleanup) + + cases := []struct { + name string + cwd string + root string + base string + input string + expectedRealPath string + expectedAccessPath string // note: if empty it will be assumed to match the expectedRealPath + }{ + { + name: "relative root, relative request, direct", + root: relative, + input: "path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct", + root: absolute, + input: "path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct", + root: relative, + input: "/path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct", + root: absolute, + input: "/path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + // cwd within root... + { + name: "relative root, relative request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + root: "../../", + input: "path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + root: absolute, + input: "path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + root: "../../", + input: "/path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + + root: absolute, + input: "/path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + // cwd within symlink root... + { + name: "relative root, relative request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: "./", + input: "path/to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "path/to/the/file.txt", + expectedAccessPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: absoluteViaLink, + input: "path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: "./", + input: "/path/to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "path/to/the/file.txt", + expectedAccessPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: absoluteViaLink, + input: "/path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + // cwd within symlink root, request nested within... + { + name: "relative root, relative nested request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: "./path", + input: "to/the/file.txt", + // note: why not expect "to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "to/the/file.txt", + expectedAccessPath: "to/the/file.txt", + }, + { + name: "abs root, relative nested request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: filepath.Join(absoluteViaLink, "path"), + input: "to/the/file.txt", + expectedRealPath: "to/the/file.txt", + }, + { + name: "relative root, abs nested request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: "./path", + input: "/to/the/file.txt", + // note: why not expect "to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "to/the/file.txt", + expectedAccessPath: "to/the/file.txt", + }, + { + name: "abs root, abs nested request, direct, cwd within symlink root", + cwd: relativeViaLink, + root: filepath.Join(absoluteViaLink, "path"), + input: "/to/the/file.txt", + expectedRealPath: "to/the/file.txt", + }, + // cwd within DOUBLE symlink root... + { + name: "relative root, relative request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: "./", + input: "path/to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "path/to/the/file.txt", + expectedAccessPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: absoluteViaDoubleLink, + input: "path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: "./", + input: "/path/to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "path/to/the/file.txt", + expectedAccessPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: absoluteViaDoubleLink, + input: "/path/to/the/file.txt", + expectedRealPath: "path/to/the/file.txt", + }, + // cwd within DOUBLE symlink root, request nested within... + { + name: "relative root, relative nested request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: "./path", + input: "to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "to/the/file.txt", + expectedAccessPath: "to/the/file.txt", + }, + { + name: "abs root, relative nested request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: filepath.Join(absoluteViaDoubleLink, "path"), + input: "to/the/file.txt", + expectedRealPath: "to/the/file.txt", + }, + { + name: "relative root, abs nested request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: "./path", + input: "/to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "to/the/file.txt", + expectedAccessPath: "to/the/file.txt", + }, + { + name: "abs root, abs nested request, direct, cwd within (double) symlink root", + cwd: relativeViaDoubleLink, + root: filepath.Join(absoluteViaDoubleLink, "path"), + input: "/to/the/file.txt", + expectedRealPath: "to/the/file.txt", + }, + // cwd within DOUBLE symlink root, request nested DEEP within... + { + name: "relative root, relative nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), + root: "../", + input: "to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "to/the/file.txt", + expectedAccessPath: "to/the/file.txt", + }, + { + name: "abs root, relative nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), + root: filepath.Join(absoluteViaDoubleLink, "path"), + input: "to/the/file.txt", + expectedRealPath: "to/the/file.txt", + }, + { + name: "relative root, abs nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), + root: "../", + input: "/to/the/file.txt", + // note: why not expect "path/to/the/file.txt" here? + // this is because we don't know that the path used to access this path (which is a link within + // the root) resides within the root. Without this information it appears as if this file resides + // outside the root. + expectedRealPath: filepath.Join(absolute, "path/to/the/file.txt"), + //expectedRealPath: "to/the/file.txt", + expectedAccessPath: "to/the/file.txt", + }, + { + name: "abs root, abs nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relativeViaDoubleLink, "path", "to"), + root: filepath.Join(absoluteViaDoubleLink, "path"), + input: "/to/the/file.txt", + expectedRealPath: "to/the/file.txt", + }, + // link to outside of root cases... + { + name: "relative root, relative request, abs indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, relative request, abs indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, abs request, abs indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "/to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, abs request, abs indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "/to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, relative request, relative indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, relative request, relative indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, abs request, relative indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "/to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, abs request, relative indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "/to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + // link to outside of root cases... cwd within symlink root + { + name: "relative root, relative request, abs indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: "path", + input: "to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, relative request, abs indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: filepath.Join(absolute, "path"), + input: "to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, abs request, abs indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: "path", + input: "/to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, abs request, abs indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: filepath.Join(absolute, "path"), + input: "/to/the/abs-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, relative request, relative indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: "path", + input: "to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, relative request, relative indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: filepath.Join(absolute, "path"), + input: "to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, abs request, relative indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: "path", + input: "/to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, abs request, relative indirect (outside of root), cwd within symlink root", + cwd: relativeViaLink, + root: filepath.Join(absolute, "path"), + input: "/to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, relative request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relativeViaDoubleLink, + root: "path", + input: "to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, relative request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relativeViaDoubleLink, + root: filepath.Join(absolute, "path"), + input: "to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, abs request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relativeViaDoubleLink, + root: "path", + input: "/to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, abs request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relativeViaDoubleLink, + root: filepath.Join(absolute, "path"), + input: "/to/the/rel-outside.txt", + expectedRealPath: filepath.Join(absolute, "/somewhere/outside.txt"), + expectedAccessPath: "to/the/rel-outside.txt", + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if c.expectedAccessPath == "" { + c.expectedAccessPath = c.expectedRealPath + } + + // we need to mimic a shell, otherwise we won't get a path within a symlink + targetPath := filepath.Join(testDir, c.cwd) + t.Setenv("PWD", filepath.Clean(targetPath)) + + require.NoError(t, err) + require.NoError(t, os.Chdir(targetPath)) + t.Cleanup(func() { + require.NoError(t, os.Chdir(testDir)) + }) + + resolver, err := NewFromDirectory(c.root, c.base) + require.NoError(t, err) + require.NotNil(t, resolver) + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + if c.expectedRealPath == "" { + require.Empty(t, refs) + return + } + require.Len(t, refs, 1) + assert.Equal(t, c.expectedRealPath, refs[0].RealPath, "real path different") + assert.Equal(t, c.expectedAccessPath, refs[0].AccessPath, "virtual path different") + }) + } +} + +func TestDirectoryResolver_FilesByPath_relativeRoot(t *testing.T) { + cases := []struct { + name string + relativeRoot string + input string + expected []string + }{ + { + name: "should find a file from an absolute input", + relativeRoot: "./test-fixtures/", + input: "/image-symlinks/file-1.txt", + expected: []string{ + "image-symlinks/file-1.txt", + }, + }, + { + name: "should find a file from a relative path", + relativeRoot: "./test-fixtures/", + input: "image-symlinks/file-1.txt", + expected: []string{ + "image-symlinks/file-1.txt", + }, + }, + { + name: "should find a file from a relative path (root above cwd)", + // TODO: refactor me! this test depends on the structure of the source dir not changing, which isn't great + relativeRoot: "../", + input: "fileresolver/directory.go", + expected: []string{ + "fileresolver/directory.go", + }, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + resolver, err := NewFromDirectory(c.relativeRoot, "") + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + assert.Len(t, refs, len(c.expected)) + s := strset.New() + for _, actual := range refs { + s.Add(actual.RealPath) + } + assert.ElementsMatch(t, c.expected, s.List()) + }) + } +} + +func TestDirectoryResolver_FilesByPath_absoluteRoot(t *testing.T) { + cases := []struct { + name string + relativeRoot string + input string + expected []string + }{ + { + name: "should find a file from an absolute input", + relativeRoot: "./test-fixtures/", + input: "/image-symlinks/file-1.txt", + expected: []string{ + "image-symlinks/file-1.txt", + }, + }, + { + name: "should find a file from a relative path", + relativeRoot: "./test-fixtures/", + input: "image-symlinks/file-1.txt", + expected: []string{ + "image-symlinks/file-1.txt", + }, + }, + { + name: "should find a file from a relative path (root above cwd)", + // TODO: refactor me! this test depends on the structure of the source dir not changing, which isn't great + relativeRoot: "../", + input: "fileresolver/directory.go", + expected: []string{ + "fileresolver/directory.go", + }, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + // note: this test is all about asserting correct functionality when the given analysis path + // is an absolute path + absRoot, err := filepath.Abs(c.relativeRoot) + require.NoError(t, err) + + resolver, err := NewFromDirectory(absRoot, "") + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + assert.Len(t, refs, len(c.expected)) + s := strset.New() + for _, actual := range refs { + s.Add(actual.RealPath) + } + assert.ElementsMatch(t, c.expected, s.List()) + }) + } +} + +func TestDirectoryResolver_FilesByPath(t *testing.T) { + cases := []struct { + name string + root string + input string + expected string + refCount int + forcePositiveHasPath bool + }{ + { + name: "finds a file (relative)", + root: "./test-fixtures/", + input: "image-symlinks/file-1.txt", + expected: "image-symlinks/file-1.txt", + refCount: 1, + }, + { + name: "finds a file with relative indirection", + root: "./test-fixtures/../test-fixtures", + input: "image-symlinks/file-1.txt", + expected: "image-symlinks/file-1.txt", + refCount: 1, + }, + { + name: "managed non-existing files (relative)", + root: "./test-fixtures/", + input: "test-fixtures/image-symlinks/bogus.txt", + refCount: 0, + }, + { + name: "finds a file (absolute)", + root: "./test-fixtures/", + input: "/image-symlinks/file-1.txt", + expected: "image-symlinks/file-1.txt", + refCount: 1, + }, + { + name: "directories ignored", + root: "./test-fixtures/", + input: "/image-symlinks", + refCount: 0, + forcePositiveHasPath: true, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + resolver, err := NewFromDirectory(c.root, "") + assert.NoError(t, err) + + hasPath := resolver.HasPath(c.input) + if !c.forcePositiveHasPath { + if c.refCount != 0 && !hasPath { + t.Errorf("expected HasPath() to indicate existence, but did not") + } else if c.refCount == 0 && hasPath { + t.Errorf("expected HasPath() to NOT indicate existence, but does") + } + } else if !hasPath { + t.Errorf("expected HasPath() to indicate existence, but did not (force path)") + } + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + assert.Len(t, refs, c.refCount) + for _, actual := range refs { + assert.Equal(t, c.expected, actual.RealPath) + } + }) + } +} + +func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { + cases := []struct { + name string + input []string + refCount int + }{ + { + name: "finds multiple files", + input: []string{"image-symlinks/file-1.txt", "image-symlinks/file-2.txt"}, + refCount: 2, + }, + { + name: "skips non-existing files", + input: []string{"image-symlinks/bogus.txt", "image-symlinks/file-1.txt"}, + refCount: 1, + }, + { + name: "does not return anything for non-existing directories", + input: []string{"non-existing/bogus.txt", "non-existing/file-1.txt"}, + refCount: 0, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures", "") + assert.NoError(t, err) + refs, err := resolver.FilesByPath(c.input...) + assert.NoError(t, err) + + if len(refs) != c.refCount { + t.Errorf("unexpected number of refs: %d != %d", len(refs), c.refCount) + } + }) + } +} + +func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures", "") + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("**/image-symlinks/file*") + assert.NoError(t, err) + + assert.Len(t, refs, 2) +} + +func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/image-symlinks", "") + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + assert.Len(t, refs, 6) +} + +func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures", "") + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt") + assert.NoError(t, err) + + assert.Len(t, refs, 1) + assert.Equal(t, "image-symlinks/file-1.txt", refs[0].RealPath) +} + +func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) { + + tests := []struct { + name string + fixture string + }{ + { + name: "one degree", + fixture: "link_to_new_readme", + }, + { + name: "two degrees", + fixture: "link_to_link_to_new_readme", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-simple", "") + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.fixture) + require.NoError(t, err) + assert.Len(t, refs, 1) + + reader, err := resolver.FileContentsByLocation(refs[0]) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + expected, err := os.ReadFile("test-fixtures/symlinks-simple/readme") + require.NoError(t, err) + + assert.Equal(t, string(expected), string(actual)) + }) + } +} + +func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { + // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" + resolver, err := NewFromDirectory("test-fixtures/system_paths/target", "") + assert.NoError(t, err) + + // all paths should be found (non filtering matches a path) + locations, err := resolver.FilesByGlob("**/place") + assert.NoError(t, err) + // 4: within target/ + // 1: target/link --> relative path to "place" // NOTE: this is filtered out since it not unique relative to outside_root/link_target/place + // 1: outside_root/link_target/place + assert.Len(t, locations, 5) + + // ensure that symlink indexing outside of root worked + testLocation := "test-fixtures/system_paths/outside_root/link_target/place" + ok := false + for _, location := range locations { + if strings.HasSuffix(location.RealPath, testLocation) { + ok = true + } + } + + if !ok { + t.Fatalf("could not find test location=%q", testLocation) + } +} + +func Test_directoryResolver_FilesByMIMEType(t *testing.T) { + tests := []struct { + fixturePath string + mimeType string + expectedPaths *strset.Set + }{ + { + fixturePath: "./test-fixtures/image-simple", + mimeType: "text/plain", + expectedPaths: strset.New("file-1.txt", "file-2.txt", "target/really/nested/file-3.txt", "Dockerfile"), + }, + } + for _, test := range tests { + t.Run(test.fixturePath, func(t *testing.T) { + resolver, err := NewFromDirectory(test.fixturePath, "") + assert.NoError(t, err) + locations, err := resolver.FilesByMIMEType(test.mimeType) + assert.NoError(t, err) + assert.Equal(t, test.expectedPaths.Size(), len(locations)) + for _, l := range locations { + assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) + } + }) + } +} + +func Test_IndexingNestedSymLinks(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-simple", "") + require.NoError(t, err) + + // check that we can get the real path + locations, err := resolver.FilesByPath("./readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + + // check that we can access the same file via 1 symlink + locations, err = resolver.FilesByPath("./link_to_new_readme") + require.NoError(t, err) + require.Len(t, locations, 1) + assert.Equal(t, "readme", locations[0].RealPath) + assert.Equal(t, "link_to_new_readme", locations[0].AccessPath) + + // check that we can access the same file via 2 symlinks + locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") + require.NoError(t, err) + require.Len(t, locations, 1) + assert.Equal(t, "readme", locations[0].RealPath) + assert.Equal(t, "link_to_link_to_new_readme", locations[0].AccessPath) + + // check that we can access the same file via 2 symlinks + locations, err = resolver.FilesByGlob("**/link_*") + require.NoError(t, err) + require.Len(t, locations, 1) // you would think this is 2, however, they point to the same file, and glob only returns unique files + + // returned locations can be in any order + expectedAccessPaths := []string{ + "link_to_link_to_new_readme", + //"link_to_new_readme", // we filter out this one because the first symlink resolves to the same file + } + + expectedRealPaths := []string{ + "readme", + } + + actualRealPaths := strset.New() + actualAccessPaths := strset.New() + for _, a := range locations { + actualAccessPaths.Add(a.AccessPath) + actualRealPaths.Add(a.RealPath) + } + + assert.ElementsMatch(t, expectedAccessPaths, actualAccessPaths.List()) + assert.ElementsMatch(t, expectedRealPaths, actualRealPaths.List()) +} + +func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { + filterFn := func(_, path string, _ os.FileInfo, _ error) error { + if strings.HasSuffix(path, string(filepath.Separator)+"readme") { + return ErrSkipPath + } + return nil + } + + resolver, err := NewFromDirectory("./test-fixtures/symlinks-simple", "", filterFn) + require.NoError(t, err) + + // the path to the real file is PRUNED from the index, so we should NOT expect a location returned + locations, err := resolver.FilesByPath("./readme") + require.NoError(t, err) + assert.Empty(t, locations) + + // check that we cannot access the file even via symlink + locations, err = resolver.FilesByPath("./link_to_new_readme") + require.NoError(t, err) + assert.Empty(t, locations) + + // check that we still cannot access the same file via 2 symlinks + locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") + require.NoError(t, err) + assert.Empty(t, locations) +} + +func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-multiple-roots/root", "") + require.NoError(t, err) + + // check that we can get the real path + locations, err := resolver.FilesByPath("./readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + + // check that we can access the same file via 2 symlinks (link_to_link_to_readme -> link_to_readme -> readme) + locations, err = resolver.FilesByPath("./link_to_link_to_readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + + // something looks wrong here + t.Failed() +} + +func Test_RootViaSymlink(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinked-root/nested/link-root", "") + require.NoError(t, err) + + locations, err := resolver.FilesByPath("./file1.txt") + require.NoError(t, err) + assert.Len(t, locations, 1) + + locations, err = resolver.FilesByPath("./nested/file2.txt") + require.NoError(t, err) + assert.Len(t, locations, 1) + + locations, err = resolver.FilesByPath("./nested/linked-file1.txt") + require.NoError(t, err) + assert.Len(t, locations, 1) +} + +func Test_directoryResolver_FileContentsByLocation(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + + r, err := NewFromDirectory(".", "") + require.NoError(t, err) + + exists, existingPath, err := r.tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) + require.True(t, exists) + require.NoError(t, err) + require.True(t, existingPath.HasReference()) + + tests := []struct { + name string + location file.Location + expects string + err bool + }{ + { + name: "use file reference for content requests", + location: file.NewLocationFromDirectory("some/place", *existingPath.Reference), + expects: "this file has contents", + }, + { + name: "error on empty file reference", + location: file.NewLocationFromDirectory("doesn't matter", stereoscopeFile.Reference{}), + err: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + actual, err := r.FileContentsByLocation(test.location) + if test.err { + require.Error(t, err) + return + } + + require.NoError(t, err) + if test.expects != "" { + b, err := io.ReadAll(actual) + require.NoError(t, err) + assert.Equal(t, test.expects, string(b)) + } + }) + } +} + +func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) { + test := func(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-loop", "") + require.NoError(t, err) + + locations, err := resolver.FilesByGlob("**/file.target") + require.NoError(t, err) + + require.Len(t, locations, 1) + assert.Equal(t, "devices/loop0/file.target", locations[0].RealPath) + } + + testWithTimeout(t, 5*time.Second, test) +} + +func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) { + cases := []struct { + name string + root string + input string + expected []string + }{ + { + name: "should find the base file", + root: "./test-fixtures/symlinks-base/", + input: "./base", + expected: []string{ + "/base", + }, + }, + { + name: "should follow a link with a pivoted root", + root: "./test-fixtures/symlinks-base/", + input: "./foo", + expected: []string{ + "/base", + }, + }, + { + name: "should follow a relative link with extra parents", + root: "./test-fixtures/symlinks-base/", + input: "./bar", + expected: []string{ + "/base", + }, + }, + { + name: "should follow an absolute link with extra parents", + root: "./test-fixtures/symlinks-base/", + input: "./baz", + expected: []string{ + "/base", + }, + }, + { + name: "should follow an absolute link with extra parents", + root: "./test-fixtures/symlinks-base/", + input: "./sub/link", + expected: []string{ + "/sub/item", + }, + }, + { + name: "should follow chained pivoted link", + root: "./test-fixtures/symlinks-base/", + input: "./chain", + expected: []string{ + "/base", + }, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + resolver, err := NewFromDirectory(c.root, c.root) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + assert.Len(t, refs, len(c.expected)) + s := strset.New() + for _, actual := range refs { + s.Add(actual.RealPath) + } + assert.ElementsMatch(t, c.expected, s.List()) + }) + } + +} + +func Test_directoryResolver_resolvesLinks(t *testing.T) { + tests := []struct { + name string + runner func(file.Resolver) []file.Location + expected []file.Location + }{ + { + name: "by mimetype", + runner: func(resolver file.Resolver) []file.Location { + // links should not show up when searching mimetype + actualLocations, err := resolver.FilesByMIMEType("text/plain") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewLocation("file-1.txt"), // note: missing virtual path "file-1.txt" + file.NewLocation("file-3.txt"), // note: missing virtual path "file-3.txt" + file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt" + file.NewLocation("parent/file-4.txt"), // note: missing virtual path "file-4.txt" + }, + }, + { + name: "by glob to links", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + // for that reason we need to place **/ in front (which is not the same for other resolvers) + actualLocations, err := resolver.FilesByGlob("**/*ink-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("file-1.txt", "link-1"), + file.NewVirtualLocation("file-2.txt", "link-2"), + // we already have this real file path via another link, so only one is returned + //file.NewVirtualLocation("file-2.txt", "link-indirect"), + file.NewVirtualLocation("file-3.txt", "link-within"), + }, + }, + { + name: "by basename", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-2.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + // this has two copies in the base image, which overwrites the same location + file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt", + }, + }, + { + name: "by basename glob", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-?.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewLocation("file-1.txt"), // note: missing virtual path "file-1.txt" + file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt" + file.NewLocation("file-3.txt"), // note: missing virtual path "file-3.txt" + file.NewLocation("parent/file-4.txt"), // note: missing virtual path "parent/file-4.txt" + }, + }, + { + name: "by basename glob to links", + runner: func(resolver file.Resolver) []file.Location { + actualLocations, err := resolver.FilesByGlob("**/link-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("file-1.txt", "link-1"), + file.NewVirtualLocation("file-2.txt", "link-2"), + + // we already have this real file path via another link, so only one is returned + //file.NewVirtualLocation("file-2.txt", "link-indirect"), + + file.NewVirtualLocation("file-3.txt", "link-within"), + }, + }, + { + name: "by extension", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewLocation("file-1.txt"), // note: missing virtual path "file-1.txt" + file.NewLocation("file-2.txt"), // note: missing virtual path "file-2.txt" + file.NewLocation("file-3.txt"), // note: missing virtual path "file-3.txt" + file.NewLocation("parent/file-4.txt"), // note: missing virtual path "parent/file-4.txt" + }, + }, + { + name: "by path to degree 1 link", + runner: func(resolver file.Resolver) []file.Location { + // links resolve to the final file + actualLocations, err := resolver.FilesByPath("/link-2") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + // we have multiple copies across layers + file.NewVirtualLocation("file-2.txt", "link-2"), + }, + }, + { + name: "by path to degree 2 link", + runner: func(resolver file.Resolver) []file.Location { + // multiple links resolves to the final file + actualLocations, err := resolver.FilesByPath("/link-indirect") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + // we have multiple copies across layers + file.NewVirtualLocation("file-2.txt", "link-indirect"), + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-from-image-symlinks-fixture", "") + require.NoError(t, err) + assert.NoError(t, err) + + actual := test.runner(resolver) + + compareLocations(t, test.expected, actual) + }) + } +} + +func TestDirectoryResolver_DoNotAddVirtualPathsToTree(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-prune-indexing", "") + require.NoError(t, err) + + var allRealPaths []stereoscopeFile.Path + for l := range resolver.AllLocations(context.Background()) { + allRealPaths = append(allRealPaths, stereoscopeFile.Path(l.RealPath)) + } + pathSet := stereoscopeFile.NewPathSet(allRealPaths...) + + assert.False(t, + pathSet.Contains("before-path/file.txt"), + "symlink destinations should only be indexed at their real path, not through their virtual (symlinked) path", + ) + + assert.False(t, + pathSet.Contains("a-path/file.txt"), + "symlink destinations should only be indexed at their real path, not through their virtual (symlinked) path", + ) + +} + +func TestDirectoryResolver_FilesContents_errorOnDirRequest(t *testing.T) { + defer goleak.VerifyNone(t) + resolver, err := NewFromDirectory("./test-fixtures/system_paths", "") + assert.NoError(t, err) + + var dirLoc *file.Location + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + for loc := range resolver.AllLocations(ctx) { + entry, err := resolver.index.Get(loc.Reference()) + require.NoError(t, err) + if entry.Metadata.IsDir() { + dirLoc = &loc + break + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + +func TestDirectoryResolver_AllLocations(t *testing.T) { + resolver, err := NewFromDirectory("./test-fixtures/symlinks-from-image-symlinks-fixture", "") + assert.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations(context.Background()) { + if strings.HasPrefix(loc.RealPath, "/") { + // ignore outside the fixture root for now + continue + } + paths.Add(loc.RealPath) + } + expected := []string{ + "file-1.txt", + "file-2.txt", + "file-3.txt", + "link-1", + "link-2", + "link-dead", + "link-indirect", + "link-within", + "parent", + "parent-link", + "parent/file-4.txt", + } + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) +} + +func TestAllLocationsDoesNotLeakGoRoutine(t *testing.T) { + defer goleak.VerifyNone(t) + resolver, err := NewFromDirectory("./test-fixtures/symlinks-from-image-symlinks-fixture", "") + require.NoError(t, err) + ctx, cancel := context.WithCancel(context.Background()) + for range resolver.AllLocations(ctx) { + break + } + cancel() +} + +var _ fs.FileInfo = (*testFileInfo)(nil) + +type testFileInfo struct { + mode os.FileMode +} + +func (t testFileInfo) Name() string { + panic("implement me") +} + +func (t testFileInfo) Size() int64 { + panic("implement me") +} + +func (t testFileInfo) Mode() fs.FileMode { + return t.mode +} + +func (t testFileInfo) ModTime() time.Time { + panic("implement me") +} + +func (t testFileInfo) IsDir() bool { + panic("implement me") +} + +func (t testFileInfo) Sys() interface{} { + panic("implement me") +} + +// Tests for filetree resolver when single file is used for index +func TestFileResolver_FilesByPath(t *testing.T) { + tests := []struct { + description string + filePath string // relative to cwd + fileByPathInput string + expectedRealPath string + expectedAccessPath string + cwd string + }{ + { + description: "Finds file if searched by filepath", + filePath: "./test-fixtures/req-resp/path/to/the/file.txt", + fileByPathInput: "file.txt", + expectedRealPath: "/file.txt", + expectedAccessPath: "/file.txt", + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + parentPath, err := absoluteSymlinkFreePathToParent(tt.filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, tt.filePath) + require.NoError(t, err) + require.NotNil(t, resolver) + + refs, err := resolver.FilesByPath(tt.fileByPathInput) + require.NoError(t, err) + if tt.expectedRealPath == "" { + require.Empty(t, refs) + return + } + require.Len(t, refs, 1) + assert.Equal(t, tt.expectedRealPath, refs[0].RealPath, "real path different") + assert.Equal(t, tt.expectedAccessPath, refs[0].AccessPath, "virtual path different") + }) + } +} + +func TestFileResolver_MultipleFilesByPath(t *testing.T) { + tests := []struct { + description string + input []string + refCount int + }{ + { + description: "finds file ", + input: []string{"file.txt"}, + refCount: 1, + }, + { + description: "skip non-existing files", + input: []string{"file.txt", "bogus.txt"}, + refCount: 1, + }, + { + description: "does not return anything for non-existing files", + input: []string{"non-existing/bogus.txt", "another-bogus.txt"}, + refCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + filePath := "./test-fixtures/req-resp/path/to/the/file.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, filePath) + assert.NoError(t, err) + refs, err := resolver.FilesByPath(tt.input...) + assert.NoError(t, err) + + if len(refs) != tt.refCount { + t.Errorf("unexpected number of refs: %d != %d", len(refs), tt.refCount) + } + }) + } +} + +func TestFileResolver_FilesByGlob(t *testing.T) { + filePath := "./test-fixtures/req-resp/path/to/the/file.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, filePath) + assert.NoError(t, err) + refs, err := resolver.FilesByGlob("*.txt") + assert.NoError(t, err) + + assert.Len(t, refs, 1) +} + +func Test_fileResolver_FilesByMIMEType(t *testing.T) { + tests := []struct { + fixturePath string + mimeType string + expectedPaths *strset.Set + }{ + { + fixturePath: "./test-fixtures/image-simple/file-1.txt", + mimeType: "text/plain", + expectedPaths: strset.New("/file-1.txt"), + }, + } + for _, test := range tests { + t.Run(test.fixturePath, func(t *testing.T) { + filePath := "./test-fixtures/image-simple/file-1.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + resolver, err := NewFromFile(parentPath, filePath) + assert.NoError(t, err) + locations, err := resolver.FilesByMIMEType(test.mimeType) + assert.NoError(t, err) + assert.Equal(t, test.expectedPaths.Size(), len(locations)) + for _, l := range locations { + assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) + } + }) + } +} + +func Test_fileResolver_FileContentsByLocation(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + + filePath := "./test-fixtures/image-simple/file-1.txt" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + + r, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + exists, existingPath, err := r.tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) + require.True(t, exists) + require.NoError(t, err) + require.True(t, existingPath.HasReference()) + + tests := []struct { + name string + location file.Location + expects string + err bool + }{ + { + name: "use file reference for content requests", + location: file.NewLocationFromDirectory("some/place", *existingPath.Reference), + expects: "this file has contents", + }, + { + name: "error on empty file reference", + location: file.NewLocationFromDirectory("doesn't matter", stereoscopeFile.Reference{}), + err: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + actual, err := r.FileContentsByLocation(test.location) + if test.err { + require.Error(t, err) + return + } + + require.NoError(t, err) + if test.expects != "" { + b, err := io.ReadAll(actual) + require.NoError(t, err) + assert.Equal(t, test.expects, string(b)) + } + }) + } +} + +func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) { + defer goleak.VerifyNone(t) + + filePath := "./test-fixtures/system_paths/target/home/place" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + resolver, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + var dirLoc *file.Location + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + for loc := range resolver.AllLocations(ctx) { + entry, err := resolver.index.Get(loc.Reference()) + require.NoError(t, err) + if entry.Metadata.IsDir() { + dirLoc = &loc + break + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + +func TestFileResolver_AllLocations(t *testing.T) { + // Verify both the parent and the file itself are indexed + filePath := "./test-fixtures/system_paths/target/home/place" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + resolver, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations(context.Background()) { + paths.Add(loc.RealPath) + } + expected := []string{ + "/place", + "", // This is how we see the parent dir, since we're resolving wrt the parent directory. + } + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) +} + +func Test_FileResolver_AllLocationsDoesNotLeakGoRoutine(t *testing.T) { + defer goleak.VerifyNone(t) + filePath := "./test-fixtures/system_paths/target/home/place" + parentPath, err := absoluteSymlinkFreePathToParent(filePath) + require.NoError(t, err) + require.NotNil(t, parentPath) + resolver, err := NewFromFile(parentPath, filePath) + require.NoError(t, err) + + require.NoError(t, err) + ctx, cancel := context.WithCancel(context.Background()) + for range resolver.AllLocations(ctx) { + break + } + cancel() +}