From 4e1993e7ab726a313bfa417613bbb63834560f46 Mon Sep 17 00:00:00 2001 From: Arran Schlosberg Date: Fri, 21 Jun 2024 11:53:07 +0100 Subject: [PATCH 1/4] refactor: use `go list` instead of `packages.Load()` The latter is very buggy (even its own docs state this) and can't even reliably return `_test.go` files. --- .gitignore | 5 +- x/gethclone/gethclone.go | 92 ++++++------ x/gethclone/go.go | 292 +++++++++++++++++++++++++++++++++++++++ x/gethclone/go.mod | 4 +- x/gethclone/go.sum | 6 +- x/gethclone/main.go | 15 +- 6 files changed, 358 insertions(+), 56 deletions(-) create mode 100644 x/gethclone/go.go diff --git a/.gitignore b/.gitignore index bf7593c1cb..69fc6c8b05 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,7 @@ cmd/simulator/simulator dist/ # Outputs of `scripts/diff_against.sh` -diffs/ \ No newline at end of file +diffs/ + +# gethclone output +*.gethclone diff --git a/x/gethclone/gethclone.go b/x/gethclone/gethclone.go index 1828df9c61..3c895cc2bd 100644 --- a/x/gethclone/gethclone.go +++ b/x/gethclone/gethclone.go @@ -7,7 +7,6 @@ import ( "go/format" "go/parser" "go/token" - "log" "os" "path" "path/filepath" @@ -15,10 +14,9 @@ import ( "github.com/ava-labs/avalanchego/utils/set" "github.com/ava-labs/subnet-evm/x/gethclone/astpatch" - "go.uber.org/multierr" + "go.uber.org/zap" "golang.org/x/mod/modfile" "golang.org/x/tools/go/ast/astutil" - "golang.org/x/tools/go/packages" _ "embed" @@ -30,8 +28,10 @@ type config struct { // Externally configurable (e.g. flags) packages []string outputGoMod string + goBinary string // Internal + log *zap.SugaredLogger outputModule *modfile.Module astPatches astpatch.PatchRegistry @@ -40,7 +40,14 @@ type config struct { const geth = "github.com/ethereum/go-ethereum" -func (c *config) run(ctx context.Context) error { +func (c *config) run(ctx context.Context, logOpts ...zap.Option) (retErr error) { + l, err := zap.NewDevelopment(logOpts...) + if err != nil { + return err + } + c.log = l.Sugar() + defer c.log.Sync() + for i, p := range c.packages { if !strings.HasPrefix(p, geth) { c.packages[i] = path.Join(geth, p) @@ -48,7 +55,6 @@ func (c *config) run(ctx context.Context) error { } mod, err := parseGoMod(c.outputGoMod) - fmt.Println(err) if err != nil { return nil } @@ -73,15 +79,10 @@ func (c *config) loadAndParse(ctx context.Context, fset *token.FileSet, patterns return nil } - pkgConfig := &packages.Config{ - Context: ctx, - Mode: packages.NeedName | packages.NeedCompiledGoFiles, - } - pkgs, err := packages.Load(pkgConfig, patterns...) + pkgs, err := c.goList(ctx, patterns...) if err != nil { - return fmt.Errorf("packages.Load(..., %q): %v", c.packages, err) + return err } - for _, pkg := range pkgs { if err := c.parse(ctx, pkg, fset); err != nil { return err @@ -96,31 +97,27 @@ var copyrightHeader string // parse parses all `pkg.Files` into `fset`, transforms each according to // semantic patches, and passes all geth imports back to `c.loadAndParse()` for // recursive handling. -func (c *config) parse(ctx context.Context, pkg *packages.Package, fset *token.FileSet) error { - if len(pkg.Errors) != 0 { - var err error - for _, e := range pkg.Errors { - multierr.AppendInto(&err, e) - } - return err - } - - if c.processed[pkg.PkgPath] { +func (c *config) parse(ctx context.Context, pkg *PackagePublic, fset *token.FileSet) error { + if c.processed[pkg.ImportPath] { + c.log.Debugf("Already processed %q", pkg.ImportPath) return nil } - c.processed[pkg.PkgPath] = true + c.processed[pkg.ImportPath] = true + + shortPkgPath := strings.TrimPrefix(pkg.ImportPath, geth) - outDir := filepath.Join( - filepath.Dir(c.outputGoMod), - strings.TrimPrefix(pkg.PkgPath, geth), - ) - log.Printf("Cloning %q into %q", pkg.PkgPath, outDir) + outDir := filepath.Join(filepath.Dir(c.outputGoMod), shortPkgPath) + if err := os.MkdirAll(outDir, 0755); err != nil { + return fmt.Errorf("create directory for %q: %v", shortPkgPath, err) + } + c.log.Infof("Cloning %q into %q", pkg.ImportPath, outDir) allGethImports := set.NewSet[string](0) - for _, fName := range pkg.CompiledGoFiles { - file, err := parser.ParseFile(fset, fName, nil, parser.ParseComments|parser.SkipObjectResolution) + for _, fName := range concat(pkg.GoFiles, pkg.TestGoFiles) { + fPath := filepath.Join(pkg.Dir, fName) + file, err := parser.ParseFile(fset, fPath, nil, parser.ParseComments|parser.SkipObjectResolution) if err != nil { - return fmt.Errorf("parser.ParseFile(... %q ...): %v", fName, err) + return fmt.Errorf("parser.ParseFile(... %q ...): %v", fPath, err) } gethImports, err := c.transformGethImports(fset, file) @@ -133,29 +130,32 @@ func (c *config) parse(ctx context.Context, pkg *packages.Package, fset *token.F List: []*ast.Comment{{Text: copyrightHeader}}, }}, file.Comments...) - if err := c.astPatches.Apply(pkg.PkgPath, file); err != nil { - return fmt.Errorf("apply AST patches to %q: %v", pkg.PkgPath, err) + if err := c.astPatches.Apply(pkg.ImportPath, file); err != nil { + return fmt.Errorf("apply AST patches to %q: %v", pkg.ImportPath, err) } - if false { - // TODO(arr4n): enable writing when a suitable strategy exists; in - // the meantime, this code is demonstrative of intent. Do we want to - // simply overwrite and then check `git diff`? Do we want to check - // the diffs here? - outFile := filepath.Join(outDir, filepath.Base(fName)) - f, err := os.Create(outFile) - if err != nil { - return err - } - if err := format.Node(f, fset, file); err != nil { - return err - } + outFile := fmt.Sprintf("%s.gethclone", filepath.Join(outDir, filepath.Base(fName))) + f, err := os.Create(outFile) + if err != nil { + return err } + if err := format.Node(f, fset, file); err != nil { + return fmt.Errorf("format.Node(..., %T): %v", file, err) + } + c.log.Infof("Cloned %q", filepath.Join(shortPkgPath, fName)) } return c.loadAndParse(ctx, fset, allGethImports.List()...) } +func concat(strs ...[]string) []string { + var out []string + for _, s := range strs { + out = append(out, s...) + } + return out +} + // transformGethImports finds all `ethereum/go-ethereum` imports in the file, // converts their path to `c.outputModule`, and returns the set of transformed // import paths. diff --git a/x/gethclone/go.go b/x/gethclone/go.go new file mode 100644 index 0000000000..b1c217514c --- /dev/null +++ b/x/gethclone/go.go @@ -0,0 +1,292 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" + "time" +) + +// goList runs `go list -json [patterns...]` and returns the parsed output. It +// returns an error if `PackagePublic.Err` is non-nil, but ignores +// `PackagePublic.DepsErrors` as they may pertain to a non-geth package that +// `gethclone` doesn't have available. Any dependency error pertaining to a geth +// dep will eventually be reached when traversing the import tree. +func (c *config) goList(ctx context.Context, patterns ...string) ([]*PackagePublic, error) { + if c.goBinary == "" { + // Although we could just use "go" as the command name later, this + // allows logging for debugging. + p, err := exec.LookPath("go") + if err != nil { + return nil, fmt.Errorf("finding `go` binary: %v", err) + } + c.log.Infof("Found `go` in PATH: %q", p) + c.goBinary = p + } + + var result []*PackagePublic + + // When `go list` finds more than one package, its output is not a JSON list + // but a new-line concatenation of each JSON object. It's simpler to run `go + // list` multiple times than it is to convert the output to a JSON list. + for _, p := range patterns { + cmd := exec.CommandContext(ctx, c.goBinary, "list", "-json", p) + + buf, err := cmd.Output() + if ee, ok := err.(*exec.ExitError); ok { + c.log.Errorf("stderr of `go list`:\n%s", ee.Stderr) + } + if err != nil { + return nil, fmt.Errorf("running `go list`: %v", err) + } + + pkg := new(PackagePublic) + if err := json.Unmarshal(buf, pkg); err != nil { + return nil, fmt.Errorf("unmarshal JSON output of `go list`: %v", err) + } + if pkg.Error != nil { + return nil, fmt.Errorf("package error encountered by `go list`: %v", pkg.Error) + } + result = append(result, pkg) + } + + return result, nil +} + +/* +Types included below are copied (almost) verbatim from the Go source code, under +the included license (ny modifications are documented before the respective +code). This is necessary because they're in `internal` packages that we can't +otherwise access. Despite the internal designation, their output is considered +part of the public API of the `go list` command so it is safe to assume +stability due to the Go 1 compatibility promise. +*/ + +/* +https://go.googlesource.com/go/+/refs/tags/go1.22.4/LICENSE + +Copyright (c) 2009 The Go Authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// https://go.googlesource.com/go/+/refs/tags/go1.22.4/src/cmd/go/internal/load/pkg.go#59 + +// The `Module` field has been modified to use a package-local type. + +type PackagePublic struct { + // Note: These fields are part of the go command's public API. + // See list.go. It is okay to add fields, but not to change or + // remove existing ones. Keep in sync with ../list/list.go + Dir string `json:",omitempty"` // directory containing package sources + ImportPath string `json:",omitempty"` // import path of package in dir + ImportComment string `json:",omitempty"` // path in import comment on package statement + Name string `json:",omitempty"` // package name + Doc string `json:",omitempty"` // package documentation string + Target string `json:",omitempty"` // installed target for this package (may be executable) + Shlib string `json:",omitempty"` // the shared library that contains this package (only set when -linkshared) + Root string `json:",omitempty"` // Go root, Go path dir, or module root dir containing this package + ConflictDir string `json:",omitempty"` // Dir is hidden by this other directory + ForTest string `json:",omitempty"` // package is only for use in named test + Export string `json:",omitempty"` // file containing export data (set by go list -export) + BuildID string `json:",omitempty"` // build ID of the compiled package (set by go list -export) + Module *ModulePublic `json:",omitempty"` // info about package's module, if any + Match []string `json:",omitempty"` // command-line patterns matching this package + Goroot bool `json:",omitempty"` // is this package found in the Go root? + Standard bool `json:",omitempty"` // is this package part of the standard Go library? + DepOnly bool `json:",omitempty"` // package is only as a dependency, not explicitly listed + BinaryOnly bool `json:",omitempty"` // package cannot be recompiled + Incomplete bool `json:",omitempty"` // was there an error loading this package or dependencies? + DefaultGODEBUG string `json:",omitempty"` // default GODEBUG setting (only for Name=="main") + // Stale and StaleReason remain here *only* for the list command. + // They are only initialized in preparation for list execution. + // The regular build determines staleness on the fly during action execution. + Stale bool `json:",omitempty"` // would 'go install' do anything for this package? + StaleReason string `json:",omitempty"` // why is Stale true? + // Source files + // If you add to this list you MUST add to p.AllFiles (below) too. + // Otherwise file name security lists will not apply to any new additions. + GoFiles []string `json:",omitempty"` // .go source files (excluding CgoFiles, TestGoFiles, XTestGoFiles) + CgoFiles []string `json:",omitempty"` // .go source files that import "C" + CompiledGoFiles []string `json:",omitempty"` // .go output from running cgo on CgoFiles + IgnoredGoFiles []string `json:",omitempty"` // .go source files ignored due to build constraints + InvalidGoFiles []string `json:",omitempty"` // .go source files with detected problems (parse error, wrong package name, and so on) + IgnoredOtherFiles []string `json:",omitempty"` // non-.go source files ignored due to build constraints + CFiles []string `json:",omitempty"` // .c source files + CXXFiles []string `json:",omitempty"` // .cc, .cpp and .cxx source files + MFiles []string `json:",omitempty"` // .m source files + HFiles []string `json:",omitempty"` // .h, .hh, .hpp and .hxx source files + FFiles []string `json:",omitempty"` // .f, .F, .for and .f90 Fortran source files + SFiles []string `json:",omitempty"` // .s source files + SwigFiles []string `json:",omitempty"` // .swig files + SwigCXXFiles []string `json:",omitempty"` // .swigcxx files + SysoFiles []string `json:",omitempty"` // .syso system object files added to package + // Embedded files + EmbedPatterns []string `json:",omitempty"` // //go:embed patterns + EmbedFiles []string `json:",omitempty"` // files matched by EmbedPatterns + // Cgo directives + CgoCFLAGS []string `json:",omitempty"` // cgo: flags for C compiler + CgoCPPFLAGS []string `json:",omitempty"` // cgo: flags for C preprocessor + CgoCXXFLAGS []string `json:",omitempty"` // cgo: flags for C++ compiler + CgoFFLAGS []string `json:",omitempty"` // cgo: flags for Fortran compiler + CgoLDFLAGS []string `json:",omitempty"` // cgo: flags for linker + CgoPkgConfig []string `json:",omitempty"` // cgo: pkg-config names + // Dependency information + Imports []string `json:",omitempty"` // import paths used by this package + ImportMap map[string]string `json:",omitempty"` // map from source import to ImportPath (identity entries omitted) + Deps []string `json:",omitempty"` // all (recursively) imported dependencies + // Error information + // Incomplete is above, packed into the other bools + Error *PackageError `json:",omitempty"` // error loading this package (not dependencies) + DepsErrors []*PackageError `json:",omitempty"` // errors loading dependencies, collected by go list before output + // Test information + // If you add to this list you MUST add to p.AllFiles (below) too. + // Otherwise file name security lists will not apply to any new additions. + TestGoFiles []string `json:",omitempty"` // _test.go files in package + TestImports []string `json:",omitempty"` // imports from TestGoFiles + TestEmbedPatterns []string `json:",omitempty"` // //go:embed patterns + TestEmbedFiles []string `json:",omitempty"` // files matched by TestEmbedPatterns + XTestGoFiles []string `json:",omitempty"` // _test.go files outside package + XTestImports []string `json:",omitempty"` // imports from XTestGoFiles + XTestEmbedPatterns []string `json:",omitempty"` // //go:embed patterns + XTestEmbedFiles []string `json:",omitempty"` // files matched by XTestEmbedPatterns +} + +// https://go.googlesource.com/go/+/refs/tags/go1.22.4/src/cmd/go/internal/load/pkg.go#455 + +// The `Err` field has been changed to a `jsonErr` to allow for JSON unmarshalling. +type jsonErr string + +func (e jsonErr) Error() string { return string(e) } + +// A PackageError describes an error loading information about a package. +type PackageError struct { + ImportStack []string // shortest path from package named on command line to this one + Pos string // position of error + Err jsonErr // the error itself + IsImportCycle bool // the error is an import cycle + Hard bool // whether the error is soft or hard; soft errors are ignored in some places + alwaysPrintStack bool // whether to always print the ImportStack +} + +func (p *PackageError) Error() string { + // TODO(#43696): decide when to print the stack or the position based on + // the error type and whether the package is in the main module. + // Document the rationale. + if p.Pos != "" && (len(p.ImportStack) == 0 || !p.alwaysPrintStack) { + // Omit import stack. The full path to the file where the error + // is the most important thing. + return p.Pos + ": " + p.Err.Error() + } + // If the error is an ImportPathError, and the last path on the stack appears + // in the error message, omit that path from the stack to avoid repetition. + // If an ImportPathError wraps another ImportPathError that matches the + // last path on the stack, we don't omit the path. An error like + // "package A imports B: error loading C caused by B" would not be clearer + // if "imports B" were omitted. + if len(p.ImportStack) == 0 { + return p.Err.Error() + } + var optpos string + if p.Pos != "" { + optpos = "\n\t" + p.Pos + } + return "package " + strings.Join(p.ImportStack, "\n\timports ") + optpos + ": " + p.Err.Error() +} +func (p *PackageError) Unwrap() error { return p.Err } + +// https://go.googlesource.com/go/+/refs/tags/go1.22.4/src/cmd/go/internal/modinfo/info.go#16 + +// The `Origin` field has been modified to use a package-local type. + +type ModulePublic struct { + Path string `json:",omitempty"` // module path + Version string `json:",omitempty"` // module version + Query string `json:",omitempty"` // version query corresponding to this version + Versions []string `json:",omitempty"` // available module versions + Replace *ModulePublic `json:",omitempty"` // replaced by this module + Time *time.Time `json:",omitempty"` // time version was created + Update *ModulePublic `json:",omitempty"` // available update (with -u) + Main bool `json:",omitempty"` // is this the main module? + Indirect bool `json:",omitempty"` // module is only indirectly needed by main module + Dir string `json:",omitempty"` // directory holding local copy of files, if any + GoMod string `json:",omitempty"` // path to go.mod file describing module, if any + GoVersion string `json:",omitempty"` // go version used in module + Retracted []string `json:",omitempty"` // retraction information, if any (with -retracted or -u) + Deprecated string `json:",omitempty"` // deprecation message, if any (with -u) + Error *ModuleError `json:",omitempty"` // error loading module + Origin *Origin `json:",omitempty"` // provenance of module + Reuse bool `json:",omitempty"` // reuse of old module info is safe +} +type ModuleError struct { + Err string // error text +} +type moduleErrorNoMethods ModuleError + +// UnmarshalJSON accepts both {"Err":"text"} and "text", +// so that the output of go mod download -json can still +// be unmarshalled into a ModulePublic during -reuse processing. +func (e *ModuleError) UnmarshalJSON(data []byte) error { + if len(data) > 0 && data[0] == '"' { + return json.Unmarshal(data, &e.Err) + } + return json.Unmarshal(data, (*moduleErrorNoMethods)(e)) +} + +// https://go.googlesource.com/go/+/refs/tags/go1.22.4/src/cmd/go/internal/modfetch/codehost/codehost.go#90 + +// An Origin describes the provenance of a given repo method result. +// It can be passed to CheckReuse (usually in a different go command invocation) +// to see whether the result remains up-to-date. +type Origin struct { + VCS string `json:",omitempty"` // "git" etc + URL string `json:",omitempty"` // URL of repository + Subdir string `json:",omitempty"` // subdirectory in repo + Hash string `json:",omitempty"` // commit hash or ID + // If TagSum is non-empty, then the resolution of this module version + // depends on the set of tags present in the repo, specifically the tags + // of the form TagPrefix + a valid semver version. + // If the matching repo tags and their commit hashes still hash to TagSum, + // the Origin is still valid (at least as far as the tags are concerned). + // The exact checksum is up to the Repo implementation; see (*gitRepo).Tags. + TagPrefix string `json:",omitempty"` + TagSum string `json:",omitempty"` + // If Ref is non-empty, then the resolution of this module version + // depends on Ref resolving to the revision identified by Hash. + // If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned). + // For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3", + // and the Hash is the Git object hash the ref maps to. + // Other VCS might choose differently, but the idea is that Ref is the name + // with a mutable meaning while Hash is a name with an immutable meaning. + Ref string `json:",omitempty"` + // If RepoSum is non-empty, then the resolution of this module version + // failed due to the repo being available but the version not being present. + // This depends on the entire state of the repo, which RepoSum summarizes. + // For Git, this is a hash of all the refs and their hashes. + RepoSum string `json:",omitempty"` +} diff --git a/x/gethclone/go.mod b/x/gethclone/go.mod index 0c99cc4c69..d6e04f8ad6 100644 --- a/x/gethclone/go.mod +++ b/x/gethclone/go.mod @@ -8,7 +8,7 @@ require ( github.com/google/go-cmp v0.6.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 - go.uber.org/multierr v1.10.0 + go.uber.org/zap v1.26.0 golang.org/x/mod v0.18.0 golang.org/x/tools v0.22.0 ) @@ -21,9 +21,9 @@ require ( github.com/mr-tron/base58 v1.2.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect go.uber.org/mock v0.4.0 // indirect + go.uber.org/multierr v1.10.0 // indirect golang.org/x/crypto v0.21.0 // indirect golang.org/x/exp v0.0.0-20231127185646-65229373498e // indirect - golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.21.0 // indirect gonum.org/v1/gonum v0.11.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/x/gethclone/go.sum b/x/gethclone/go.sum index 48b7852757..f79ca393a4 100644 --- a/x/gethclone/go.sum +++ b/x/gethclone/go.sum @@ -33,18 +33,20 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/thepudds/fzgen v0.4.2 h1:HlEHl5hk2/cqEomf2uK5SA/FeJc12s/vIHmOG+FbACw= github.com/thepudds/fzgen v0.4.2/go.mod h1:kHCWdsv5tdnt32NIHYDdgq083m6bMtaY0M+ipiO9xWE= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20231127185646-65229373498e h1:Gvh4YaCaXNs6dKTlfgismwWZKyjVZXwOPfIyUaqU3No= golang.org/x/exp v0.0.0-20231127185646-65229373498e/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= diff --git a/x/gethclone/main.go b/x/gethclone/main.go index c24516f18f..1b770aea1f 100644 --- a/x/gethclone/main.go +++ b/x/gethclone/main.go @@ -4,7 +4,7 @@ package main import ( "context" - "log" + "fmt" "os" "github.com/ava-labs/subnet-evm/x/gethclone/astpatch" @@ -18,12 +18,17 @@ func main() { pflag.StringSliceVar(&c.packages, "packages", []string{"core/vm"}, `Geth packages to clone, with or without "github.com/ethereum/go-ethereum" prefix.`) pflag.StringVar(&c.outputGoMod, "output_go_mod", "", "go.mod file of the destination to which geth will be cloned.") + pflag.StringVar(&c.goBinary, "go_binary", "", "Location of `go` binary; uses system default if empty.") pflag.Parse() - log.SetOutput(os.Stderr) - log.Print("START") + stderr("START") if err := c.run(context.Background()); err != nil { - log.Fatal(err) + stderr(err) + os.Exit(1) } - log.Print("DONE") + stderr("DONE") +} + +func stderr(x ...any) { + fmt.Fprintln(os.Stderr, x...) } From e04d151aa273dcd7ac368ad52054ac7fda01a338 Mon Sep 17 00:00:00 2001 From: Arran Schlosberg Date: Fri, 21 Jun 2024 13:39:34 +0100 Subject: [PATCH 2/4] feat: script to diff `gethclone` outputs against current code --- .gitignore | 2 +- scripts/gethclone_diff.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100755 scripts/gethclone_diff.sh diff --git a/.gitignore b/.gitignore index 69fc6c8b05..7ea84427bb 100644 --- a/.gitignore +++ b/.gitignore @@ -56,5 +56,5 @@ dist/ # Outputs of `scripts/diff_against.sh` diffs/ -# gethclone output +# gethclone experimental output *.gethclone diff --git a/scripts/gethclone_diff.sh b/scripts/gethclone_diff.sh new file mode 100755 index 0000000000..af6149c673 --- /dev/null +++ b/scripts/gethclone_diff.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# +# Usage: scripts/gethclone_diff.sh {filepath} +# +# Example: `scripts/gethclone_diff.sh core/types/block.go | less` +# +# Convenience script for performing side-by-side diff of a file with the output +# of the `x/gethclone` command. +# + +set -eu; + +ROOT=$(git rev-parse --show-toplevel); +diff -y "${ROOT}/${1}" "${ROOT}/${1}.gethclone"; From 4e6e61a42c6a074ccedfb949297d50d774a8ddfc Mon Sep 17 00:00:00 2001 From: Arran Schlosberg Date: Mon, 24 Jun 2024 13:57:41 +0100 Subject: [PATCH 3/4] refactor: reduce wasted `go list` time --- x/gethclone/gethclone.go | 17 ++++++++++++----- x/gethclone/go.go | 6 +++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/x/gethclone/gethclone.go b/x/gethclone/gethclone.go index 3c895cc2bd..c94dac601b 100644 --- a/x/gethclone/gethclone.go +++ b/x/gethclone/gethclone.go @@ -35,7 +35,7 @@ type config struct { outputModule *modfile.Module astPatches astpatch.PatchRegistry - processed map[string]bool + processed set.Set[string] } const geth = "github.com/ethereum/go-ethereum" @@ -60,7 +60,7 @@ func (c *config) run(ctx context.Context, logOpts ...zap.Option) (retErr error) } c.outputModule = mod.Module - c.processed = make(map[string]bool) + c.processed = make(set.Set[string]) return c.loadAndParse(ctx, token.NewFileSet(), c.packages...) } @@ -79,10 +79,17 @@ func (c *config) loadAndParse(ctx context.Context, fset *token.FileSet, patterns return nil } - pkgs, err := c.goList(ctx, patterns...) + // TODO(arr4n): most of the time is spent here, listing patterns. Although + // the `processed` set gets rid of most of the duplication, occasionally a + // package is still `list`ed (but not parse()d) twice. If the overhead + // becomes problematic, this is where to look first. + ps := set.Of(patterns...) + ps.Difference(c.processed) + pkgs, err := c.goList(ctx, ps.List()...) if err != nil { return err } + for _, pkg := range pkgs { if err := c.parse(ctx, pkg, fset); err != nil { return err @@ -98,11 +105,11 @@ var copyrightHeader string // semantic patches, and passes all geth imports back to `c.loadAndParse()` for // recursive handling. func (c *config) parse(ctx context.Context, pkg *PackagePublic, fset *token.FileSet) error { - if c.processed[pkg.ImportPath] { + if c.processed.Contains(pkg.ImportPath) { c.log.Debugf("Already processed %q", pkg.ImportPath) return nil } - c.processed[pkg.ImportPath] = true + c.processed.Add(pkg.ImportPath) shortPkgPath := strings.TrimPrefix(pkg.ImportPath, geth) diff --git a/x/gethclone/go.go b/x/gethclone/go.go index b1c217514c..07d5fabde0 100644 --- a/x/gethclone/go.go +++ b/x/gethclone/go.go @@ -32,15 +32,19 @@ func (c *config) goList(ctx context.Context, patterns ...string) ([]*PackagePubl // but a new-line concatenation of each JSON object. It's simpler to run `go // list` multiple times than it is to convert the output to a JSON list. for _, p := range patterns { - cmd := exec.CommandContext(ctx, c.goBinary, "list", "-json", p) + // -find stops `go list` from traversing the dependency tree + cmd := exec.CommandContext(ctx, c.goBinary, "list", "-find", "-json", p) + start := time.Now() buf, err := cmd.Output() + end := time.Now() if ee, ok := err.(*exec.ExitError); ok { c.log.Errorf("stderr of `go list`:\n%s", ee.Stderr) } if err != nil { return nil, fmt.Errorf("running `go list`: %v", err) } + c.log.Debugf("`go list ... %q` ran in %s", p, end.Sub(start)) pkg := new(PackagePublic) if err := json.Unmarshal(buf, pkg); err != nil { From 1b7166a8d028d091f916640a08b5f343f35bc07a Mon Sep 17 00:00:00 2001 From: Arran Schlosberg Date: Mon, 24 Jun 2024 14:03:55 +0100 Subject: [PATCH 4/4] refactor: encapsulate `go list` in `goRunner` type --- x/gethclone/gethclone.go | 5 +++-- x/gethclone/go.go | 24 ++++++++++++++++-------- x/gethclone/main.go | 2 +- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/x/gethclone/gethclone.go b/x/gethclone/gethclone.go index c94dac601b..46d822a01b 100644 --- a/x/gethclone/gethclone.go +++ b/x/gethclone/gethclone.go @@ -28,7 +28,7 @@ type config struct { // Externally configurable (e.g. flags) packages []string outputGoMod string - goBinary string + runGo goRunner // Internal log *zap.SugaredLogger @@ -46,6 +46,7 @@ func (c *config) run(ctx context.Context, logOpts ...zap.Option) (retErr error) return err } c.log = l.Sugar() + c.runGo.log = c.log defer c.log.Sync() for i, p := range c.packages { @@ -85,7 +86,7 @@ func (c *config) loadAndParse(ctx context.Context, fset *token.FileSet, patterns // becomes problematic, this is where to look first. ps := set.Of(patterns...) ps.Difference(c.processed) - pkgs, err := c.goList(ctx, ps.List()...) + pkgs, err := c.runGo.list(ctx, ps.List()...) if err != nil { return err } diff --git a/x/gethclone/go.go b/x/gethclone/go.go index 07d5fabde0..ddc04fb119 100644 --- a/x/gethclone/go.go +++ b/x/gethclone/go.go @@ -7,23 +7,31 @@ import ( "os/exec" "strings" "time" + + "go.uber.org/zap" ) -// goList runs `go list -json [patterns...]` and returns the parsed output. It +// A goRunner runs the `go` binary. +type goRunner struct { + bin string // if empty, `go` will be found in $PATH + log *zap.SugaredLogger +} + +// list runs `go list -json [patterns...]` and returns the parsed output. It // returns an error if `PackagePublic.Err` is non-nil, but ignores // `PackagePublic.DepsErrors` as they may pertain to a non-geth package that // `gethclone` doesn't have available. Any dependency error pertaining to a geth // dep will eventually be reached when traversing the import tree. -func (c *config) goList(ctx context.Context, patterns ...string) ([]*PackagePublic, error) { - if c.goBinary == "" { +func (r *goRunner) list(ctx context.Context, patterns ...string) ([]*PackagePublic, error) { + if r.bin == "" { // Although we could just use "go" as the command name later, this // allows logging for debugging. p, err := exec.LookPath("go") if err != nil { return nil, fmt.Errorf("finding `go` binary: %v", err) } - c.log.Infof("Found `go` in PATH: %q", p) - c.goBinary = p + r.log.Infof("Found `go` in PATH: %q", p) + r.bin = p } var result []*PackagePublic @@ -33,18 +41,18 @@ func (c *config) goList(ctx context.Context, patterns ...string) ([]*PackagePubl // list` multiple times than it is to convert the output to a JSON list. for _, p := range patterns { // -find stops `go list` from traversing the dependency tree - cmd := exec.CommandContext(ctx, c.goBinary, "list", "-find", "-json", p) + cmd := exec.CommandContext(ctx, r.bin, "list", "-find", "-json", p) start := time.Now() buf, err := cmd.Output() end := time.Now() if ee, ok := err.(*exec.ExitError); ok { - c.log.Errorf("stderr of `go list`:\n%s", ee.Stderr) + r.log.Errorf("stderr of `go list`:\n%s", ee.Stderr) } if err != nil { return nil, fmt.Errorf("running `go list`: %v", err) } - c.log.Debugf("`go list ... %q` ran in %s", p, end.Sub(start)) + r.log.Debugf("`go list ... %q` ran in %s", p, end.Sub(start)) pkg := new(PackagePublic) if err := json.Unmarshal(buf, pkg); err != nil { diff --git a/x/gethclone/main.go b/x/gethclone/main.go index 1b770aea1f..68397821f1 100644 --- a/x/gethclone/main.go +++ b/x/gethclone/main.go @@ -18,7 +18,7 @@ func main() { pflag.StringSliceVar(&c.packages, "packages", []string{"core/vm"}, `Geth packages to clone, with or without "github.com/ethereum/go-ethereum" prefix.`) pflag.StringVar(&c.outputGoMod, "output_go_mod", "", "go.mod file of the destination to which geth will be cloned.") - pflag.StringVar(&c.goBinary, "go_binary", "", "Location of `go` binary; uses system default if empty.") + pflag.StringVar(&c.runGo.bin, "go_binary", "", "Location of `go` binary; uses system default if empty.") pflag.Parse() stderr("START")