From 80093d48967692b715459719802a5adf81475cf2 Mon Sep 17 00:00:00 2001 From: Reilly Brogan Date: Thu, 13 Jun 2024 18:37:10 -0500 Subject: [PATCH] git: Fix submodule fetch, rewrite logic The logic for this was quite broken and had several failure cases due to that. This commit re-writes that to be saner. Changes: - Individual exec commands are all split out to separate functions so that they can be more easily composed. No function does more than a single command - Control flow now happens entirely in the Fetch() function - Before there were several failure states around submodules due to the `git switch` command happening immediately after the `git fetch` if the repo was already cloned. This is fixed by moving to a unified control flow. - `Fetch()` will now run either the initial clone OR update refs before moving onto further steps - `git clone` no longer attempts to initialize submodules. Several features from `git submodules` aren't yet implemented by `git clone` and so submodule initialization is moved entirely into a `git submodule` command - `git switch` now happens after the clone/fetch but before `git submodule` - `git switch` now no longer runs with any submodule-related flags. Like `git clone` the features we need are just not implemented yet and as far as I can tell there are subtle behavioral differences that were causing issues. - `git submodule` will now use `--filter=blob:none` to specify that submodule checkouts should themselves be blobless when possible. As far as I can tell this works correctly with `--recursive` and recursive submodules. - Because we are now checking out submodules with blobless clones when possible the `fixPermissions()` logic has been updated to account for promisor files that could be in submodule `.git` directories --- builder/source/git.go | 73 ++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/builder/source/git.go b/builder/source/git.go index 9c6d5cd..437f24d 100644 --- a/builder/source/git.go +++ b/builder/source/git.go @@ -68,7 +68,19 @@ func NewGit(uri, ref string) (*GitSource, error) { // submodules will handle setup of the git submodules after a // reset has taken place. func (g *GitSource) submodules() error { - cmd := exec.Command("git", "submodule", "update", "--init", "--recursive") + // --init initializes the submodule if it hasn't been initialized alredy + cmd := exec.Command("git", "submodule", "update", "--init", "--filter=blob:none", "--recursive") + + cmd.Dir = g.ClonePath + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stdout + + return cmd.Run() +} + +// switch will switch to the given ref. +func (g *GitSource) switchRef() error { + cmd := exec.Command("git", "switch", "--discard-changes", "--detach", g.Ref) cmd.Dir = g.ClonePath cmd.Stdout = os.Stdout @@ -80,7 +92,7 @@ func (g *GitSource) submodules() error { // For some reason git blobless clones create pack files with 600 permissions. These break future operations // as those files cannot be read by non-root users. Fix those permissions so things work as they should. func (g *GitSource) fixPermissions() error { - cmd := exec.Command("bash", "-c", "chmod +r .git/objects/pack/*.promisor") + cmd := exec.Command("bash", "-c", "find . -name \"*.promisor\" -type f -exec chmod +r \"{}\" \\;") cmd.Dir = g.ClonePath @@ -88,21 +100,9 @@ func (g *GitSource) fixPermissions() error { } // clone shallow clones an upstream git repository to the local disk. -func clone(uri, path, ref string) error { - var cmd *exec.Cmd - +func clone(uri, path string) error { // Create a blobless clone without checking out a ref - initCmd := exec.Command("git", "clone", "--filter=blob:none", "--no-checkout", uri, path) - initCmd.Stdout = os.Stdout - initCmd.Stderr = os.Stdout - - if err := initCmd.Run(); err != nil { - return err - } - - // Checkout the ref we want - cmd = exec.Command("git", "switch", "--discard-changes", "--recurse-submodules", "--detach", ref) - cmd.Dir = path + cmd := exec.Command("git", "clone", "--filter=blob:none", "--no-checkout", uri, path) cmd.Stdout = os.Stdout cmd.Stderr = os.Stdout @@ -110,26 +110,15 @@ func clone(uri, path, ref string) error { return cmd.Run() } -// reset fetches the new git reference (a tag or commit SHA1 hash) and -// hard resets the repository on that reference. -func reset(path, ref string) error { - fetchArgs := []string{ - "fetch", - "--tags", - "origin", - } - - fetchCmd := exec.Command("git", fetchArgs...) - fetchCmd.Dir = path +// updateRefs checks the upstream for new refs and tags in case we need them for future git commands +func updateRefs(path string) error { + cmd := exec.Command("git", "fetch", "--tags", "origin") - if err := fetchCmd.Run(); err != nil { - return err - } - - resetCmd := exec.Command("git", "switch", "--discard-changes", "--recurse-submodules", "--detach", ref) - resetCmd.Dir = path + cmd.Dir = path + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stdout - return resetCmd.Run() + return cmd.Run() } // Fetch will attempt to download the git tree locally. If it already exists @@ -137,18 +126,24 @@ func reset(path, ref string) error { func (g *GitSource) Fetch() error { // First things first, make sure we have a destination if !PathExists(g.ClonePath) { - if err := clone(g.URI, g.ClonePath, g.Ref); err != nil { + if err := clone(g.URI, g.ClonePath); err != nil { return err } } else { - // Repo already exists locally, try to reset to the new reference - if err := reset(g.ClonePath, g.Ref); err != nil { + // Repo already exists locally, get the latest refs from origin + if err := updateRefs(g.ClonePath); err != nil { return err } } - // Check out submodules - err := g.submodules() + // Checkout the ref we want + err := g.switchRef() + if err != nil { + return err + } + + // Update or checkout submodules + err = g.submodules() if err != nil { return err }