Skip to content

Commit

Permalink
Add dependency extraction functions
Browse files Browse the repository at this point in the history
  • Loading branch information
evankanderson committed Jan 18, 2025
1 parent 08238ba commit 3048a3d
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 0 deletions.
110 changes: 110 additions & 0 deletions internal/engine/eval/rego/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,20 @@ import (
"time"

"github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/helper/iofs"
"github.com/go-git/go-billy/v5/memfs"
billyutil "github.com/go-git/go-billy/v5/util"
"github.com/open-feature/go-sdk/openfeature"
"github.com/open-policy-agent/opa/v1/ast"
"github.com/open-policy-agent/opa/v1/rego"
"github.com/open-policy-agent/opa/v1/types"
"github.com/pelletier/go-toml/v2"
"github.com/protobom/protobom/pkg/sbom"
"github.com/stacklok/frizbee/pkg/replacer"
"github.com/stacklok/frizbee/pkg/utils/config"
"gopkg.in/yaml.v3"

"github.com/mindersec/minder/internal/deps/scalibr"
"github.com/mindersec/minder/internal/flags"
"github.com/mindersec/minder/internal/util"
"github.com/mindersec/minder/pkg/engine/v1/interfaces"
Expand Down Expand Up @@ -60,6 +64,10 @@ var MinderRegoLibExperiments = map[flags.Experiment][]func(res *interfaces.Resul
BaseFileWalk,
BaseListGithubActions,
},
flags.DependencyExtract: {
DependencyExtract,
BaseDependencyExtract,
},
}

func instantiateRegoLib(ctx context.Context, featureFlags openfeature.IClient, res *interfaces.Result) []func(*rego.Rego) {
Expand Down Expand Up @@ -763,3 +771,105 @@ func ParseToml(_ *interfaces.Result) func(*rego.Rego) {
},
)
}

// DependencyExtract is a rego function that extracts dependencies from a file
// or subtree of the filesystem being evaluated (which comes from the ingester).
// It takes one arguments: the path to the file or subtree to be scanned.
// It returns the extracted dependencies as an AST term in the form of a
// protobom SBOM with the "nodes" fields but not "edges".
// It's exposed as `file.deps`.
func DependencyExtract(res *interfaces.Result) func(*rego.Rego) {
return rego.Function1(
&rego.Function{
Name: "file.deps",
// TODO: The return type is types.A, but it should be types.NewObject(...)
Decl: types.NewFunction(types.Args(types.S), types.A),
},
fsExtractDeps(res.Fs),
)
}

// BaseDependencyExtract is a rego function that extracts dependencies from a file
// or subtree of the base filesystem in a pull_request or other diff context.
// It takes two arguments: the path to the file or subtree to be scanned.
// It returns the extracted dependencies as an AST term in the form of a
// protobom SBOM with the "nodes" fields but not "edges".
// It's exposed as `base_file.deps`.
func BaseDependencyExtract(res *interfaces.Result) func(*rego.Rego) {
return rego.Function1(
&rego.Function{
Name: "base_file.deps",
// TODO: The return type is types.A, but it should be types.NewObject(...)
Decl: types.NewFunction(types.Args(types.S), types.A),
},
fsExtractDeps(res.BaseFs),
)
}

func fsExtractDeps(vfs billy.Filesystem) func (rego.BuiltinContext, *ast.Term) (*ast.Term, error) {

Check failure on line 809 in internal/engine/eval/rego/lib.go

View workflow job for this annotation

GitHub Actions / lint / Run golangci-lint

File is not properly formatted (gci)
return func(bctx rego.BuiltinContext, op1 *ast.Term) (*ast.Term, error) {
var path string
if err := ast.As(op1.Value, &path); err != nil {
return nil, err
}

if vfs == nil {
return nil, fmt.Errorf("file system is not available")
}

// verify the file or path exists
target, err := vfs.Stat(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("file or path %q does not exist", path)
}
return nil, err
}

// vfs.Chroot() only works on directories, so if we have a file, copy
// it to a new vfs.
if !target.IsDir() {
sourceFile, err := vfs.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open file %q", path)
}
defer sourceFile.Close()

newVfs := memfs.New()
basename := filepath.Base(path)
file, err := newVfs.Create(basename)
if err != nil {
return nil, fmt.Errorf("failed to create file %q", basename)
}
defer file.Close()
_, err = io.Copy(file, sourceFile)
if err != nil {
return nil, fmt.Errorf("failed to copy file %q", path)
}
vfs = newVfs
path = ""
}

// construct a scalibr extractor
extractor := scalibr.NewExtractor()

if path != "" {
vfs, err = vfs.Chroot(path)
if err != nil {
return nil, err
}
}

res, err := extractor.ScanFilesystem(bctx.Context, iofs.New(vfs))
if err != nil {
return nil, fmt.Errorf("failed to scan filesystem: %v", err)
}
// put in an SBOM wrapper
sbom := &sbom.Document{
NodeList: res,
}
astValue, err := ast.InterfaceToValue(sbom)

return &ast.Term{Value: astValue}, err
}
}

Check failure on line 875 in internal/engine/eval/rego/lib.go

View workflow job for this annotation

GitHub Actions / lint / Run golangci-lint

File is not properly formatted (gci)
106 changes: 106 additions & 0 deletions internal/engine/eval/rego/lib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1197,3 +1197,109 @@ allow {
})
}
}

func TestExtractDeps(t *testing.T) {
t.Parallel()

scenario := []struct {
name string
path string
expectedDeps []string
expectedErr error
}{{
name: "parse all",
expectedDeps: []string{
"example.com/othermodule",
"example.com/thismodule",
"example.com/thatmodule",
"stdlib", // Always part of golang binaries.
"PyYAML",
},
}, {
name: "parse go.mod",
path: "foo",
expectedDeps: []string{
"example.com/othermodule",
"example.com/thismodule",
"example.com/thatmodule",
"stdlib", // Always part of golang binaries.
},
}, {
name: "parse file",
path: "requirements.txt",
expectedDeps: []string{
"PyYAML",
},
}, {
name: "parse non-existent file",
path: "missing",
expectedErr: engerrors.NewErrEvaluationFailed("denied"),
}}

fs := memfs.New()
require.NoError(t, fs.MkdirAll("foo", 0755), "could not create directory")
// From https://go.dev/doc/modules/gomod-ref#example
goMod := `
module example.com/mymodule
go 1.14
require (
example.com/othermodule v1.2.3
example.com/thismodule v1.2.3
example.com/thatmodule v1.2.3
)
`

require.NoError(t, billyutil.WriteFile(fs, "foo/go.mod", []byte(goMod), 0644))
require.NoError(t, billyutil.WriteFile(fs, "requirements.txt", []byte("PyYAML>=5.3.1"), 0644))

featureClient := &flags.FakeClient{}
featureClient.Data = map[string]any{"dependency_extract": true}
e, err := rego.NewRegoEvaluator(
&minderv1.RuleType_Definition_Eval_Rego{
Type: rego.DenyByDefaultEvaluationType.String(),
// TODO: update rego for different APIs
Def: `
package minder
import rego.v1
deps := file.deps(input.profile.path)
depsSet := { x | x = deps.node_list.nodes[_].name }
expected := { x | x = input.profile.expected[_] }
default allow = false
allow if {
count(depsSet) > 0
count(depsSet - expected) == 0
count(expected - depsSet) == 0
}
`,
},
featureClient,
)
require.NoError(t, err, "could not create evaluator")

for _, tc := range scenario {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

policy := map[string]any{
"path": tc.path,
"expected": tc.expectedDeps,
}

result, err := e.Eval(context.Background(), policy, nil, &interfaces.Result{
Fs: fs,
})

if tc.expectedErr == nil {
t.Logf("Result: %+v", result)
require.NoError(t, err, "could not evaluate")
} else {
require.EqualError(t, err, tc.expectedErr.Error())
}
//t.Fail()
})
}
}
2 changes: 2 additions & 0 deletions internal/flags/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,6 @@ const (
// TarGzFunctions enables functions to produce tar.gz data in the rego
// evaluation environment.
TarGzFunctions Experiment = "tar_gz_functions"
// DependencyExtract enables functions to perform dependency extraction.
DependencyExtract Experiment = "dependency_extract"
)

0 comments on commit 3048a3d

Please sign in to comment.