Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fallback to LMT on missing source hash #2866

Merged
merged 9 commits into from
Jan 16, 2025
37 changes: 25 additions & 12 deletions cmd/syncComparator.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@ import (
)

const (
syncSkipReasonTime = "the source has an older LMT than the destination"
syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5"
syncSkipReasonSameHash = "the source has the same hash"
syncOverwriteReasonNewerHash = "the source has a differing hash"
syncOverwriteReasonNewerLMT = "the source is more recent than the destination"
syncStatusSkipped = "skipped"
syncStatusOverwritten = "overwritten"
syncOverwriteReasonDeleteDestinationFile = "the flag delete-destination-file is set to true"
syncSkipReasonTime = "the source has an older LMT than the destination"
syncSkipReasonTimeAndMissingHash = "the source lacks an associated hash (please upload with --put-md5 for hash comparison) and has an older LMT than the destination"
syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5"
syncSkipReasonSameHash = "the source has the same hash"
syncOverwriteReasonNewerHash = "the source has a differing hash"
syncOverwriteReasonNewerLMT = "the source is more recent than the destination"
syncOverwriteReasonNewerLMTAndMissingHash = "the source lacks an associated hash (please upload with --put-md5 for hash comparison) and is more recent than the destination"
syncStatusSkipped = "skipped"
syncStatusOverwritten = "overwritten"
)

func syncComparatorLog(fileName, status, skipReason string, stdout bool) {
Expand Down Expand Up @@ -98,8 +99,14 @@ func (f *syncDestinationComparator) processIfNecessary(destinationObject StoredO
switch f.comparisonHashType {
case common.ESyncHashType.MD5():
if sourceObjectInMap.md5 == nil {
syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonMissingHash, true)
return nil
if sourceObjectInMap.isMoreRecentThan(destinationObject, f.preferSMBTime) {
syncComparatorLog(sourceObjectInMap.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMTAndMissingHash, false)
return f.copyTransferScheduler(sourceObjectInMap)
} else {
// skip if dest is more recent
syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonTimeAndMissingHash, false)
return nil
}
}

if !reflect.DeepEqual(sourceObjectInMap.md5, destinationObject.md5) {
Expand Down Expand Up @@ -177,8 +184,14 @@ func (f *syncSourceComparator) processIfNecessary(sourceObject StoredObject) err
switch f.comparisonHashType {
case common.ESyncHashType.MD5():
if sourceObject.md5 == nil {
syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonMissingHash, true)
return nil
if sourceObject.isMoreRecentThan(destinationObjectInMap, f.preferSMBTime) {
syncComparatorLog(sourceObject.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMTAndMissingHash, false)
return f.copyTransferScheduler(sourceObject)
} else {
// skip if dest is more recent
syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonTimeAndMissingHash, false)
return nil
}
}

if !reflect.DeepEqual(sourceObject.md5, destinationObjectInMap.md5) {
Expand Down
175 changes: 175 additions & 0 deletions e2etest/zt_newe2e_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,178 @@ func (s *SyncTestSuite) Scenario_TestSyncDeleteDestinationIfNecessary(svm *Scena
},
}, true)
}

// Note : For local sources, the hash is computed by a hashProcessor created in zc_traverser_local, so there is no way
// for local sources to have no source hash. As such these tests only cover remote sources.
func (s *SyncTestSuite) Scenario_TestSyncHashTypeSourceHash(svm *ScenarioVariationManager) {

// There are 4 cases to consider, this test will cover all of them
// 1. Has hash and is equal -> skip
// 2. Has hash and is not equal -> overwrite
// 3. Has no hash and src LMT after dest LMT -> overwrite
// 4. Has no hash and src LMT before dest LMT -> skip

// Create dest
hashEqualBody := NewRandomObjectContentContainer(512)
hashNotEqualBody := NewRandomObjectContentContainer(512)
noHashDestSrc := NewRandomObjectContentContainer(512)
noHashSrcDest := NewRandomObjectContentContainer(512)

zeroBody := NewZeroObjectContentContainer(512)

dest := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, ResolveVariation(svm, []common.Location{common.ELocation.Blob(), common.ELocation.Local()})),
ResourceDefinitionContainer{
Objects: ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: zeroBody},
"nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc},
"nohashsrcdest": ResourceDefinitionObject{Body: zeroBody},
},
},
)

time.Sleep(time.Second * 10) // Make sure source is newer

srcObjs := ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: hashNotEqualBody},
"nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc},
"nohashsrcdest": ResourceDefinitionObject{Body: noHashSrcDest},
}

src := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, common.ELocation.Blob()),
ResourceDefinitionContainer{
Objects: srcObjs,
},
)

// Need to manually unset the md5
src.GetObject(svm, "nohashdestsrc", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})
src.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})

time.Sleep(time.Second * 10) // Make sure destination is newer

// Re-create nohashsrcdest so the src LMT is before dest LMT
dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).Create(svm, noHashSrcDest, ObjectProperties{})

stdOut, _ := RunAzCopy(
svm,
AzCopyCommand{
Verb: AzCopyVerbSync,
Targets: []ResourceManager{src, dest},
Flags: SyncFlags{
CopySyncCommonFlags: CopySyncCommonFlags{
Recursive: pointerTo(true),
},
CompareHash: pointerTo(common.ESyncHashType.MD5()),
},
})

// All source, dest should match
ValidateResource[ContainerResourceManager](svm, dest, ResourceDefinitionContainer{
Objects: srcObjs,
}, true)

// Only non skipped paths should be in plan file
ValidatePlanFiles(svm, stdOut, ExpectedPlanFile{
Objects: map[PlanFilePath]PlanFileObject{
PlanFilePath{SrcPath: "/hashnotequal", DstPath: "/hashnotequal"}: {
Properties: ObjectProperties{},
},
PlanFilePath{SrcPath: "/nohashdestsrc", DstPath: "/nohashdestsrc"}: {
Properties: ObjectProperties{},
},
},
})
}

// Note : For local destinations, the hash is computed by a hashProcessor created in zc_traverser_local, so there is no way
// for local destinations to have no source hash. As such these tests only cover remote destinations.
func (s *SyncTestSuite) Scenario_TestSyncHashTypeDestinationHash(svm *ScenarioVariationManager) {

// There are 4 cases to consider, this test will cover all of them
// 1. Has hash and is equal -> skip
// 2. Has hash and is not equal -> overwrite
// 3. Has no hash and src LMT after dest LMT -> overwrite
// 4. Has no hash and src LMT before dest LMT -> overwrite

// Create dest
hashEqualBody := NewRandomObjectContentContainer(512)
hashNotEqualBody := NewRandomObjectContentContainer(512)
noHashDestSrc := NewRandomObjectContentContainer(512)
noHashSrcDest := NewRandomObjectContentContainer(512)

zeroBody := NewZeroObjectContentContainer(512)

dest := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, common.ELocation.Blob()),
ResourceDefinitionContainer{
Objects: ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: zeroBody},
"nohashdestsrc": ResourceDefinitionObject{Body: zeroBody},
"nohashsrcdest": ResourceDefinitionObject{Body: zeroBody},
},
},
)

time.Sleep(time.Second * 10) // Make sure source is newer

srcObjs := ObjectResourceMappingFlat{
"hashequal": ResourceDefinitionObject{Body: hashEqualBody},
"hashnotequal": ResourceDefinitionObject{Body: hashNotEqualBody},
"nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc},
"nohashsrcdest": ResourceDefinitionObject{Body: noHashSrcDest},
}

src := CreateResource[ContainerResourceManager](svm,
GetRootResource(svm, ResolveVariation(svm, []common.Location{common.ELocation.Blob(), common.ELocation.Local()})),
ResourceDefinitionContainer{
Objects: srcObjs,
},
)

// Need to manually unset the md5
dest.GetObject(svm, "nohashdestsrc", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})
dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil})

time.Sleep(time.Second * 10) // Make sure destination is newer

// Re-create nohashsrcdest so the src LMT is before dest LMT
dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).Create(svm, zeroBody, ObjectProperties{})

stdOut, _ := RunAzCopy(
svm,
AzCopyCommand{
Verb: AzCopyVerbSync,
Targets: []ResourceManager{src, dest},
Flags: SyncFlags{
CopySyncCommonFlags: CopySyncCommonFlags{
Recursive: pointerTo(true),
},
CompareHash: pointerTo(common.ESyncHashType.MD5()),
},
})

// All source, dest should match
ValidateResource[ContainerResourceManager](svm, dest, ResourceDefinitionContainer{
Objects: srcObjs,
}, true)

// Only non skipped paths should be in plan file
ValidatePlanFiles(svm, stdOut, ExpectedPlanFile{
Objects: map[PlanFilePath]PlanFileObject{
PlanFilePath{SrcPath: "/hashnotequal", DstPath: "/hashnotequal"}: {
Properties: ObjectProperties{},
},
PlanFilePath{SrcPath: "/nohashdestsrc", DstPath: "/nohashdestsrc"}: {
Properties: ObjectProperties{},
},
PlanFilePath{SrcPath: "/nohashsrcdest", DstPath: "/nohashsrcdest"}: {
Properties: ObjectProperties{},
},
},
})
}
Loading