diff --git a/.github/scripts/json-schema-drift-check.sh b/.github/scripts/json-schema-drift-check.sh index 7b7f7dd2f62..3002236d68b 100755 --- a/.github/scripts/json-schema-drift-check.sh +++ b/.github/scripts/json-schema-drift-check.sh @@ -1,27 +1,17 @@ #!/usr/bin/env bash set -u -if ! git diff-index --quiet HEAD --; then - git diff-index HEAD -- - git --no-pager diff - echo "there are uncommitted changes, please commit them before running this check" +if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then + echo " 🔴 there are uncommitted changes, please commit them before running this check" exit 1 fi -success=true - if ! make generate-json-schema; then echo "Generating json schema failed" - success=false -fi - -if ! git diff-index --quiet HEAD --; then - git diff-index HEAD -- - git --no-pager diff - echo "JSON schema drift detected!" - success=false + exit 1 fi -if ! $success; then +if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then + echo " 🔴 there are uncommitted changes, please commit them before running this check" exit 1 fi diff --git a/Makefile b/Makefile index 0b944b83bd9..ae917777f72 100644 --- a/Makefile +++ b/Makefile @@ -302,7 +302,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR) .PHONY: generate-json-schema generate-json-schema: ## Generate a new json schema - cd schema/json && go run generate.go + cd schema/json && go generate . && go run . .PHONY: generate-license-list generate-license-list: ## Generate an updated spdx license list diff --git a/go.mod b/go.mod index ea46c730b23..9e7456af4ff 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/Masterminds/sprig/v3 v3.2.3 github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8 github.com/anchore/stereoscope v0.0.0-20230522170632-e14bc4437b2e + github.com/dave/jennifer v1.6.1 github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da github.com/docker/docker v24.0.1+incompatible github.com/github/go-spdx/v2 v2.1.2 diff --git a/go.sum b/go.sum index 9bfc68a1526..bed5f726321 100644 --- a/go.sum +++ b/go.sum @@ -155,6 +155,8 @@ github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/dave/jennifer v1.6.1 h1:T4T/67t6RAA5AIV6+NP8Uk/BIsXgDoqEowgycdQQLuk= +github.com/dave/jennifer v1.6.1/go.mod h1:nXbxhEmQfOZhWml3D1cDK5M1FLnMSozpbFN/m3RmGZc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/schema/json/generate/main.go b/schema/json/generate/main.go new file mode 100644 index 00000000000..fc8dc120a21 --- /dev/null +++ b/schema/json/generate/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" + "os" + + "github.com/dave/jennifer/jen" + + "github.com/anchore/syft/schema/json/internal" +) + +// This program generates internal/generated.go. + +const ( + pkgImport = "github.com/anchore/syft/syft/pkg" + path = "internal/generated.go" +) + +func main() { + typeNames, err := internal.AllSyftMetadataTypeNames() + if err != nil { + panic(fmt.Errorf("unable to get all metadata type names: %w", err)) + } + + fmt.Printf("updating metadata container object with %+v types\n", len(typeNames)) + + f := jen.NewFile("internal") + f.HeaderComment("DO NOT EDIT: generated by schema/json/generate/main.go") + f.ImportName(pkgImport, "pkg") + f.Comment("ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.") + f.Type().Id("ArtifactMetadataContainer").StructFunc(func(g *jen.Group) { + for _, typeName := range typeNames { + g.Id(typeName).Qual(pkgImport, typeName) + } + }) + + rendered := fmt.Sprintf("%#v", f) + + fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + panic(fmt.Errorf("unable to open file: %w", err)) + } + _, err = fh.WriteString(rendered) + if err != nil { + panic(fmt.Errorf("unable to write file: %w", err)) + } + if err := fh.Close(); err != nil { + panic(fmt.Errorf("unable to close file: %w", err)) + } +} diff --git a/schema/json/internal/generated.go b/schema/json/internal/generated.go new file mode 100644 index 00000000000..3341818deb7 --- /dev/null +++ b/schema/json/internal/generated.go @@ -0,0 +1,39 @@ +// DO NOT EDIT: generated by schema/json/generate/main.go + +package internal + +import "github.com/anchore/syft/syft/pkg" + +// ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field. +type ArtifactMetadataContainer struct { + AlpmMetadata pkg.AlpmMetadata + ApkMetadata pkg.ApkMetadata + BinaryMetadata pkg.BinaryMetadata + CargoPackageMetadata pkg.CargoPackageMetadata + CocoapodsMetadata pkg.CocoapodsMetadata + ConanLockMetadata pkg.ConanLockMetadata + ConanMetadata pkg.ConanMetadata + DartPubMetadata pkg.DartPubMetadata + DotnetDepsMetadata pkg.DotnetDepsMetadata + DpkgMetadata pkg.DpkgMetadata + GemMetadata pkg.GemMetadata + GolangBinMetadata pkg.GolangBinMetadata + GolangModMetadata pkg.GolangModMetadata + HackageMetadata pkg.HackageMetadata + JavaMetadata pkg.JavaMetadata + KbPackageMetadata pkg.KbPackageMetadata + LinuxKernelMetadata pkg.LinuxKernelMetadata + LinuxKernelModuleMetadata pkg.LinuxKernelModuleMetadata + MixLockMetadata pkg.MixLockMetadata + NixStoreMetadata pkg.NixStoreMetadata + NpmPackageJSONMetadata pkg.NpmPackageJSONMetadata + NpmPackageLockJSONMetadata pkg.NpmPackageLockJSONMetadata + PhpComposerJSONMetadata pkg.PhpComposerJSONMetadata + PortageMetadata pkg.PortageMetadata + PythonPackageMetadata pkg.PythonPackageMetadata + PythonPipfileLockMetadata pkg.PythonPipfileLockMetadata + PythonRequirementsMetadata pkg.PythonRequirementsMetadata + RDescriptionFileMetadata pkg.RDescriptionFileMetadata + RebarLockMetadata pkg.RebarLockMetadata + RpmMetadata pkg.RpmMetadata +} diff --git a/schema/json/internal/metadata_types.go b/schema/json/internal/metadata_types.go new file mode 100644 index 00000000000..4d515a18890 --- /dev/null +++ b/schema/json/internal/metadata_types.go @@ -0,0 +1,150 @@ +package internal + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "os/exec" + "path/filepath" + "sort" + "strings" + "unicode" + + "github.com/scylladb/go-set/strset" +) + +var metadataExceptions = strset.New( + "FileMetadata", +) + +func AllSyftMetadataTypeNames() ([]string, error) { + root, err := repoRoot() + if err != nil { + return nil, err + } + files, err := filepath.Glob(filepath.Join(root, "syft/pkg/*.go")) + if err != nil { + return nil, err + } + return findMetadataDefinitionNames(files...) +} + +func repoRoot() (string, error) { + root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() + if err != nil { + return "", fmt.Errorf("unable to find repo root dir: %+v", err) + } + absRepoRoot, err := filepath.Abs(strings.TrimSpace(string(root))) + if err != nil { + return "", fmt.Errorf("unable to get abs path to repo root: %w", err) + } + return absRepoRoot, nil +} + +func findMetadataDefinitionNames(paths ...string) ([]string, error) { + names := strset.New() + usedNames := strset.New() + for _, path := range paths { + metadataDefinitions, usedTypeNames, err := findMetadataDefinitionNamesInFile(path) + if err != nil { + return nil, err + } + + // useful for debugging... + // fmt.Println(path) + // fmt.Println("Defs:", metadataDefinitions) + // fmt.Println("Used Types:", usedTypeNames) + // fmt.Println() + + names.Add(metadataDefinitions...) + usedNames.Add(usedTypeNames...) + } + + // any definition that is used within another struct should not be considered a top-level metadata definition + names.Remove(usedNames.List()...) + + strNames := names.List() + sort.Strings(strNames) + + // note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. + // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. + if len(strNames) < 30 { + return nil, fmt.Errorf("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") + } + + return strNames, nil +} + +func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error) { + // set up the parser + fs := token.NewFileSet() + f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) + if err != nil { + return nil, nil, err + } + + var metadataDefinitions []string + var usedTypeNames []string + for _, decl := range f.Decls { + // check if the declaration is a type declaration + spec, ok := decl.(*ast.GenDecl) + if !ok || spec.Tok != token.TYPE { + continue + } + + // loop over all types declared in the type declaration + for _, typ := range spec.Specs { + // check if the type is a struct type + spec, ok := typ.(*ast.TypeSpec) + if !ok || spec.Type == nil { + continue + } + + structType, ok := spec.Type.(*ast.StructType) + if !ok { + continue + } + + // check if the struct type ends with "Metadata" + name := spec.Name.String() + + // only look for exported types that end with "Metadata" + if isMetadataTypeCandidate(name) { + // print the full declaration of the struct type + metadataDefinitions = append(metadataDefinitions, name) + usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) + } + } + } + return metadataDefinitions, usedTypeNames, nil +} + +func typeNamesUsedInStruct(structType *ast.StructType) []string { + // recursively find all type names used in the struct type + var names []string + for i := range structType.Fields.List { + // capture names of all of the types (not field names) + ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + + // add the type name to the list + names = append(names, ident.Name) + + // continue inspecting + return true + }) + } + + return names +} + +func isMetadataTypeCandidate(name string) bool { + return len(name) > 0 && + strings.HasSuffix(name, "Metadata") && + unicode.IsUpper(rune(name[0])) && // must be exported + !metadataExceptions.Has(name) +} diff --git a/schema/json/generate.go b/schema/json/main.go similarity index 60% rename from schema/json/generate.go rename to schema/json/main.go index 169e3c22ff8..246abc532a5 100644 --- a/schema/json/generate.go +++ b/schema/json/main.go @@ -13,8 +13,8 @@ import ( "github.com/invopop/jsonschema" "github.com/anchore/syft/internal" + genInt "github.com/anchore/syft/schema/json/internal" syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model" - "github.com/anchore/syft/syft/pkg" ) /* @@ -24,46 +24,9 @@ are not captured (empty interfaces). This means that pkg.Package.Metadata is not can be extended to include specific package metadata struct shapes in the future. */ -// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}). -// When a new package metadata definition is created it will need to be manually added here. The variable name does -// not matter as long as it is exported. - -// TODO: this should be generated from reflection of whats in the pkg package -// Should be created during generation below; use reflection's ability to -// create types at runtime. -// should be same name as struct minus metadata -type artifactMetadataContainer struct { - Alpm pkg.AlpmMetadata - Apk pkg.ApkMetadata - Binary pkg.BinaryMetadata - Cocopods pkg.CocoapodsMetadata - Conan pkg.ConanMetadata - ConanLock pkg.ConanLockMetadata - Dart pkg.DartPubMetadata - Dotnet pkg.DotnetDepsMetadata - Dpkg pkg.DpkgMetadata - Gem pkg.GemMetadata - GoBin pkg.GolangBinMetadata - GoMod pkg.GolangModMetadata - Hackage pkg.HackageMetadata - Java pkg.JavaMetadata - KbPackage pkg.KbPackageMetadata - LinuxKernel pkg.LinuxKernelMetadata - LinuxKernelModule pkg.LinuxKernelModuleMetadata - Nix pkg.NixStoreMetadata - NpmPackage pkg.NpmPackageJSONMetadata - NpmPackageLock pkg.NpmPackageLockJSONMetadata - MixLock pkg.MixLockMetadata - Php pkg.PhpComposerJSONMetadata - Portage pkg.PortageMetadata - PythonPackage pkg.PythonPackageMetadata - PythonPipfilelock pkg.PythonPipfileLockMetadata - PythonRequirements pkg.PythonRequirementsMetadata - RDescriptionFile pkg.RDescriptionFileMetadata - Rebar pkg.RebarLockMetadata - Rpm pkg.RpmMetadata - RustCargo pkg.CargoPackageMetadata -} +//go:generate go run ./generate/main.go + +const schemaVersion = internal.JSONSchemaVersion func main() { write(encode(build())) @@ -77,14 +40,14 @@ func build() *jsonschema.Schema { }, } documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{})) - metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{})) + metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&genInt.ArtifactMetadataContainer{})) // TODO: inject source definitions // inject the definitions of all metadatas into the schema definitions var metadataNames []string for name, definition := range metadataSchema.Definitions { - if name == "artifactMetadataContainer" { + if name == reflect.TypeOf(genInt.ArtifactMetadataContainer{}).Name() { // ignore the definition for the fake container continue } @@ -130,7 +93,7 @@ func encode(schema *jsonschema.Schema) []byte { } func write(schema []byte) { - filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion) + filename := fmt.Sprintf("schema-%s.json", schemaVersion) if _, err := os.Stat(filename); !os.IsNotExist(err) { // check if the schema is the same... @@ -167,5 +130,5 @@ func write(schema []byte) { defer fh.Close() - fmt.Printf("wrote new schema to %q\n", filename) + fmt.Printf("Wrote new schema to %q\n", filename) } diff --git a/schema/json/main_test.go b/schema/json/main_test.go new file mode 100644 index 00000000000..0903b4dde39 --- /dev/null +++ b/schema/json/main_test.go @@ -0,0 +1,39 @@ +package main + +import ( + "reflect" + "sort" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/schema/json/internal" +) + +func TestAllMetadataRepresented(t *testing.T) { + // this test checks that all the metadata types are represented in the currently generated ArtifactMetadataContainer struct + // such that PRs will reflect when there is drift from the implemented set of metadata types and the generated struct + // which controls the JSON schema content. + expected, err := internal.AllSyftMetadataTypeNames() + require.NoError(t, err) + actual := allTypeNamesFromStruct(internal.ArtifactMetadataContainer{}) + if !assert.ElementsMatch(t, expected, actual) { + t.Errorf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) + t.Log("did you add a new pkg.*Metadata type without updating the JSON schema?") + t.Log("if so, you need to update the schema version and regenerate the JSON schema (make generate-json-schema)") + } +} + +func allTypeNamesFromStruct(instance any) []string { + // get all the type names from the struct (not recursively) + var typeNames []string + tt := reflect.TypeOf(instance) + for i := 0; i < tt.NumField(); i++ { + field := tt.Field(i) + typeNames = append(typeNames, field.Type.Name()) + } + sort.Strings(typeNames) + return typeNames +}