Skip to content

Commit

Permalink
Merge pull request #100 from gadget-inc/yandu/faster-hardlink/fastwalk
Browse files Browse the repository at this point in the history
faster hardlink with fastwalk
  • Loading branch information
udnay authored Jan 13, 2025
2 parents 0a23dc3 + c6c135e commit 0dedbe8
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/actions/setup-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ runs:
using: composite
steps:
- name: Install nix
uses: nixbuild/nix-quick-install-action@v19
uses: nixbuild/nix-quick-install-action@v29
with:
nix_conf: experimental-features = nix-command flakes

Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ jobs:

- name: Start background services
run: dev &

- name: Install js modules for HardLink tests
run: make install-js

- name: Build Go binaries
run: make build
Expand All @@ -38,6 +41,9 @@ jobs:

- name: Start background services
run: dev &

- name: Install js modules for HardLink tests
run: make install-js

- name: Build Go binaries
run: make build
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ PROTO_FILES := $(shell find internal/pb/ -type f -name '*.proto')

MIGRATE_DIR := ./migrations
SERVICE := $(PROJECT).server
BENCH_PROFILE ?= ""

.PHONY: migrate migrate-create clean build lint release
.PHONY: test test-one test-fuzz test-js lint-js install-js build-js
Expand Down Expand Up @@ -105,6 +106,13 @@ else
cd test && go test -run $(name)
endif

bench: export DB_URI = postgres://$(DB_USER):$(DB_PASS)@$(DB_HOST):5432/dl_tests
bench: migrate
cd test && go test -bench . -run=^# $(BENCH_PROFILE)

bench/cpu: export BENCH_PROFILE = -cpuprofile cpu.pprof
bench/cpu: bench

test-fuzz: export DL_TOKEN=$(DEV_TOKEN_ADMIN)
test-fuzz: export DL_SKIP_SSL_VERIFICATION=1
test-fuzz: reset-db
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ require (
github.com/benbjohnson/clock v1.3.0 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/charlievieth/fastwalk v1.0.9 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dustin/go-humanize v1.0.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghf
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charlievieth/fastwalk v1.0.9 h1:Odb92AfoReO3oFBfDGT5J+nwgzQPF/gWAw6E6/lkor0=
github.com/charlievieth/fastwalk v1.0.9/go.mod h1:yGy1zbxog41ZVMcKA/i8ojXLFsuayX5VvwhQVoj9PBI=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
Expand Down
32 changes: 28 additions & 4 deletions internal/files/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"path/filepath"
"strings"

"github.com/charlievieth/fastwalk"
"github.com/gadget-inc/dateilager/internal/db"
"github.com/gadget-inc/dateilager/internal/pb"
"github.com/gobwas/glob"
Expand Down Expand Up @@ -201,15 +202,38 @@ func HardlinkDir(olddir, newdir string) error {
}
}

return filepath.Walk(olddir, func(oldpath string, info fs.FileInfo, err error) error {
rootInfo, err := os.Lstat(olddir)
if err != nil {
return fmt.Errorf("cannot stat olddir %v: %w", olddir, err)
}

err = os.MkdirAll(newdir, rootInfo.Mode())
if err != nil {
return fmt.Errorf("cannot create new root dir %v: %w", olddir, err)
}

fastwalkConf := fastwalk.DefaultConfig.Copy()
fastwalkConf.Sort = fastwalk.SortDirsFirst

return fastwalk.Walk(fastwalkConf, olddir, func(oldpath string, d os.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("failed to walk dir: %v, %w", info, err)
return fmt.Errorf("failed to walk dir: %v, %w", oldpath, err)
}

newpath := filepath.Join(newdir, strings.TrimPrefix(oldpath, olddir))

if info.IsDir() {
err := os.MkdirAll(newpath, info.Mode())
// The new "root" already exists so don't recreate it
if newpath == newdir {
return nil
}

if d.IsDir() {
info, err := d.Info()
if err != nil {
return fmt.Errorf("unable to get directory info %v: %w", oldpath, err)
}

err = os.Mkdir(newpath, info.Mode())
if err != nil {
return fmt.Errorf("cannot create dir %v: %w", newpath, err)
}
Expand Down
54 changes: 54 additions & 0 deletions test/hardlink_dir_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package test

import (
"fmt"
"os"
"path"
"testing"

"github.com/gadget-inc/dateilager/internal/files"
"github.com/stretchr/testify/require"
)

func TestHardlinkDir(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err, "os.Getwd() failed")

bigDir := path.Join(wd, "../js/node_modules")
tmpDir := emptyTmpDir(t)
defer os.RemoveAll(tmpDir)

copyDir := path.Join(tmpDir, "node_modules")
err = files.HardlinkDir(bigDir, copyDir)
require.NoError(t, err, "HardlinkDir failed")

err = CompareDirectories(bigDir, copyDir)
require.NoError(t, err, "compareDirectories %s vs %s failed", bigDir, tmpDir)
}

func BenchmarkHardlinkDir(b *testing.B) {
wd, err := os.Getwd()
if err != nil {
b.Error(err)
}

bigDir := path.Join(wd, "../js/node_modules")
tmpDir := emptyTmpDir(b)
defer os.RemoveAll(tmpDir)

b.ResetTimer()
for n := 0; n < b.N; n++ {
copyDir := path.Join(tmpDir, "node_modules", fmt.Sprintf("%d", n))
err := files.HardlinkDir(bigDir, copyDir)
b.StopTimer()
if err != nil {
b.Error(err)
}

err = CompareDirectories(bigDir, copyDir)
if err != nil {
b.Error(err)
}
b.StartTimer()
}
}
99 changes: 97 additions & 2 deletions test/shared_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"archive/tar"
"bytes"
"context"
"crypto/sha256"
"fmt"
"io"
"io/fs"
Expand Down Expand Up @@ -275,9 +276,12 @@ func writeFile(t *testing.T, dir string, path string, content string) {
require.NoError(t, err, "write file %v", path)
}

func emptyTmpDir(t *testing.T) string {
func emptyTmpDir(t testing.TB) string {
dir, err := os.MkdirTemp("", "dateilager_tests_")
require.NoError(t, err, "create temp dir")

if err != nil {
t.Fatal(err)
}

return dir
}
Expand Down Expand Up @@ -771,3 +775,94 @@ func formatPtr(p *int64) string {
}
return fmt.Sprint(*p)
}

// CompareDirectories compares the contents and permissions of two directories recursively.
func CompareDirectories(dir1, dir2 string) error {
files1 := make(map[string]os.FileInfo)
err := filepath.Walk(dir1, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
relPath, err := filepath.Rel(dir1, path)
if err != nil {
return err
}
files1[relPath] = info
return nil
})
if err != nil {
return err
}

err = filepath.Walk(dir2, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
relPath, err := filepath.Rel(dir2, path)
if err != nil {
return err
}
if origInfo, ok := files1[relPath]; ok {
if info.IsDir() && origInfo.IsDir() {
// Only compare directories for existence, not content
delete(files1, relPath)
return nil
}
if !compareFileInfo(origInfo, info) {
return fmt.Errorf("file permissions differ for %s: %o vs %o", relPath, origInfo.Mode()&os.ModePerm, info.Mode()&os.ModePerm)
}
if equal, err := compareFileContents(filepath.Join(dir1, relPath), filepath.Join(dir2, relPath)); err != nil {
return fmt.Errorf("error comparing contents of %s: %v", relPath, err)
} else if !equal {
return fmt.Errorf("contents differ for %s", relPath)
}
delete(files1, relPath)
} else {
return fmt.Errorf("extra file found in directory 2: %s", path)
}
return nil
})
if err != nil {
return err
}
for file := range files1 {
err = fmt.Errorf("file missing in directory 2: %s", file)
}
return err
}

// compareFileInfo compares the permissions and other metadata of two files.
func compareFileInfo(info1, info2 os.FileInfo) bool {
return (info1.Mode() & os.ModePerm) == (info2.Mode() & os.ModePerm)
}

// compareFileContents compares the contents of two files.
func compareFileContents(file1, file2 string) (bool, error) {
f1, err := os.Open(file1)
if err != nil {
return false, err
}
defer f1.Close()
f2, err := os.Open(file2)
if err != nil {
return false, err
}
defer f2.Close()

if info1, _ := f1.Stat(); info1.IsDir() {
return true, nil // Skip directories in content comparison
}
if info2, _ := f2.Stat(); info2.IsDir() {
return true, nil // Skip directories in content comparison
}

hash1, hash2 := sha256.New(), sha256.New()
if _, err := io.Copy(hash1, f1); err != nil {
return false, err
}
if _, err := io.Copy(hash2, f2); err != nil {
return false, err
}

return bytes.Equal(hash1.Sum(nil), hash2.Sum(nil)), nil
}

0 comments on commit 0dedbe8

Please sign in to comment.