From 3048a3da8cf7f6a636302b484b1251a8f96ddcb0 Mon Sep 17 00:00:00 2001 From: Evan Anderson Date: Sat, 18 Jan 2025 14:43:22 -0800 Subject: [PATCH] Add dependency extraction functions --- internal/engine/eval/rego/lib.go | 110 ++++++++++++++++++++++++++ internal/engine/eval/rego/lib_test.go | 106 +++++++++++++++++++++++++ internal/flags/constants.go | 2 + 3 files changed, 218 insertions(+) diff --git a/internal/engine/eval/rego/lib.go b/internal/engine/eval/rego/lib.go index 18454e5d7a..9f567be112 100644 --- a/internal/engine/eval/rego/lib.go +++ b/internal/engine/eval/rego/lib.go @@ -18,16 +18,20 @@ import ( "time" "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/helper/iofs" + "github.com/go-git/go-billy/v5/memfs" billyutil "github.com/go-git/go-billy/v5/util" "github.com/open-feature/go-sdk/openfeature" "github.com/open-policy-agent/opa/v1/ast" "github.com/open-policy-agent/opa/v1/rego" "github.com/open-policy-agent/opa/v1/types" "github.com/pelletier/go-toml/v2" + "github.com/protobom/protobom/pkg/sbom" "github.com/stacklok/frizbee/pkg/replacer" "github.com/stacklok/frizbee/pkg/utils/config" "gopkg.in/yaml.v3" + "github.com/mindersec/minder/internal/deps/scalibr" "github.com/mindersec/minder/internal/flags" "github.com/mindersec/minder/internal/util" "github.com/mindersec/minder/pkg/engine/v1/interfaces" @@ -60,6 +64,10 @@ var MinderRegoLibExperiments = map[flags.Experiment][]func(res *interfaces.Resul BaseFileWalk, BaseListGithubActions, }, + flags.DependencyExtract: { + DependencyExtract, + BaseDependencyExtract, + }, } func instantiateRegoLib(ctx context.Context, featureFlags openfeature.IClient, res *interfaces.Result) []func(*rego.Rego) { @@ -763,3 +771,105 @@ func ParseToml(_ *interfaces.Result) func(*rego.Rego) { }, ) } + +// DependencyExtract is a rego function that extracts dependencies from a file +// or subtree of the filesystem being evaluated (which comes from the ingester). +// It takes one arguments: the path to the file or subtree to be scanned. +// It returns the extracted dependencies as an AST term in the form of a +// protobom SBOM with the "nodes" fields but not "edges". +// It's exposed as `file.deps`. +func DependencyExtract(res *interfaces.Result) func(*rego.Rego) { + return rego.Function1( + ®o.Function{ + Name: "file.deps", + // TODO: The return type is types.A, but it should be types.NewObject(...) + Decl: types.NewFunction(types.Args(types.S), types.A), + }, + fsExtractDeps(res.Fs), + ) +} + +// BaseDependencyExtract is a rego function that extracts dependencies from a file +// or subtree of the base filesystem in a pull_request or other diff context. +// It takes two arguments: the path to the file or subtree to be scanned. +// It returns the extracted dependencies as an AST term in the form of a +// protobom SBOM with the "nodes" fields but not "edges". +// It's exposed as `base_file.deps`. +func BaseDependencyExtract(res *interfaces.Result) func(*rego.Rego) { + return rego.Function1( + ®o.Function{ + Name: "base_file.deps", + // TODO: The return type is types.A, but it should be types.NewObject(...) + Decl: types.NewFunction(types.Args(types.S), types.A), + }, + fsExtractDeps(res.BaseFs), + ) +} + +func fsExtractDeps(vfs billy.Filesystem) func (rego.BuiltinContext, *ast.Term) (*ast.Term, error) { + return func(bctx rego.BuiltinContext, op1 *ast.Term) (*ast.Term, error) { + var path string + if err := ast.As(op1.Value, &path); err != nil { + return nil, err + } + + if vfs == nil { + return nil, fmt.Errorf("file system is not available") + } + + // verify the file or path exists + target, err := vfs.Stat(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("file or path %q does not exist", path) + } + return nil, err + } + + // vfs.Chroot() only works on directories, so if we have a file, copy + // it to a new vfs. + if !target.IsDir() { + sourceFile, err := vfs.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open file %q", path) + } + defer sourceFile.Close() + + newVfs := memfs.New() + basename := filepath.Base(path) + file, err := newVfs.Create(basename) + if err != nil { + return nil, fmt.Errorf("failed to create file %q", basename) + } + defer file.Close() + _, err = io.Copy(file, sourceFile) + if err != nil { + return nil, fmt.Errorf("failed to copy file %q", path) + } + vfs = newVfs + path = "" + } + + // construct a scalibr extractor + extractor := scalibr.NewExtractor() + + if path != "" { + vfs, err = vfs.Chroot(path) + if err != nil { + return nil, err + } + } + + res, err := extractor.ScanFilesystem(bctx.Context, iofs.New(vfs)) + if err != nil { + return nil, fmt.Errorf("failed to scan filesystem: %v", err) + } + // put in an SBOM wrapper + sbom := &sbom.Document{ + NodeList: res, + } + astValue, err := ast.InterfaceToValue(sbom) + + return &ast.Term{Value: astValue}, err + } +} \ No newline at end of file diff --git a/internal/engine/eval/rego/lib_test.go b/internal/engine/eval/rego/lib_test.go index 9052650c2d..1f3ed5600a 100644 --- a/internal/engine/eval/rego/lib_test.go +++ b/internal/engine/eval/rego/lib_test.go @@ -1197,3 +1197,109 @@ allow { }) } } + +func TestExtractDeps(t *testing.T) { + t.Parallel() + + scenario := []struct { + name string + path string + expectedDeps []string + expectedErr error + }{{ + name: "parse all", + expectedDeps: []string{ + "example.com/othermodule", + "example.com/thismodule", + "example.com/thatmodule", + "stdlib", // Always part of golang binaries. + "PyYAML", + }, + }, { + name: "parse go.mod", + path: "foo", + expectedDeps: []string{ + "example.com/othermodule", + "example.com/thismodule", + "example.com/thatmodule", + "stdlib", // Always part of golang binaries. + }, + }, { + name: "parse file", + path: "requirements.txt", + expectedDeps: []string{ + "PyYAML", + }, + }, { + name: "parse non-existent file", + path: "missing", + expectedErr: engerrors.NewErrEvaluationFailed("denied"), + }} + + fs := memfs.New() + require.NoError(t, fs.MkdirAll("foo", 0755), "could not create directory") + // From https://go.dev/doc/modules/gomod-ref#example + goMod := ` +module example.com/mymodule + +go 1.14 + +require ( + example.com/othermodule v1.2.3 + example.com/thismodule v1.2.3 + example.com/thatmodule v1.2.3 +) +` + + require.NoError(t, billyutil.WriteFile(fs, "foo/go.mod", []byte(goMod), 0644)) + require.NoError(t, billyutil.WriteFile(fs, "requirements.txt", []byte("PyYAML>=5.3.1"), 0644)) + + featureClient := &flags.FakeClient{} + featureClient.Data = map[string]any{"dependency_extract": true} + e, err := rego.NewRegoEvaluator( + &minderv1.RuleType_Definition_Eval_Rego{ + Type: rego.DenyByDefaultEvaluationType.String(), + // TODO: update rego for different APIs + Def: ` +package minder +import rego.v1 + +deps := file.deps(input.profile.path) +depsSet := { x | x = deps.node_list.nodes[_].name } +expected := { x | x = input.profile.expected[_] } + +default allow = false +allow if { + count(depsSet) > 0 + count(depsSet - expected) == 0 + count(expected - depsSet) == 0 +} +`, + }, + featureClient, + ) + require.NoError(t, err, "could not create evaluator") + + for _, tc := range scenario { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + policy := map[string]any{ + "path": tc.path, + "expected": tc.expectedDeps, + } + + result, err := e.Eval(context.Background(), policy, nil, &interfaces.Result{ + Fs: fs, + }) + + if tc.expectedErr == nil { + t.Logf("Result: %+v", result) + require.NoError(t, err, "could not evaluate") + } else { + require.EqualError(t, err, tc.expectedErr.Error()) + } + //t.Fail() + }) + } +} diff --git a/internal/flags/constants.go b/internal/flags/constants.go index 0b3be69d78..3fbe4251c2 100644 --- a/internal/flags/constants.go +++ b/internal/flags/constants.go @@ -24,4 +24,6 @@ const ( // TarGzFunctions enables functions to produce tar.gz data in the rego // evaluation environment. TarGzFunctions Experiment = "tar_gz_functions" + // DependencyExtract enables functions to perform dependency extraction. + DependencyExtract Experiment = "dependency_extract" )