Skip to content

Commit

Permalink
Rework compare mode to include exit codes.
Browse files Browse the repository at this point in the history
- Adds exit codes to compare mode.
- Fixes a bug that returns the local action result instead of the rerun result.
- Makes sure that the total reruns set by rewrapper is > 2 or logs an error.

Bug: b/281684912
Test: Integration tests
Change-Id: I6a0fca2b2f50227c1862c8a5a4336154f25e8731
GitOrigin-RevId: 002a43d9f55ff7c2402a77b904965476e20b85d0
  • Loading branch information
andusy authored and copybara-github committed Aug 24, 2023
1 parent b1c25fe commit 5dd396f
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 130 deletions.
11 changes: 8 additions & 3 deletions api/log/log.proto
Original file line number Diff line number Diff line change
Expand Up @@ -235,21 +235,26 @@ message Verification {
// Determines if the action inherently non-deterministic.
bool non_deterministic = 4 [deprecated=true];

// Lists the various output digests we got from multiple remote-execution
// retries.
// A deduped list of output digests we got from remote-execution reruns.
repeated string remote_digests = 5;

// Action digest that produced the mismatching remote digests.
string action_digest = 6;

// Lists the various output digests we got from multiple local reruns
// A deduped list of output digests we got from local reruns.
repeated string local_digests = 7;

// If true, the mismatch is considered expected/known thus can be ignored.
bool ignored = 8;

// Indicates whether the action is inherently deterministic or not.
DeterminismStatus determinism = 9;

// A deduped list of exit codes we got from remote-execution reruns.
repeated int32 remote_exit_codes = 10;

// A deduped list of exit codes we got from local reruns.
repeated int32 local_exit_codes = 11;
}

// Any SHA mismatches in the action.
Expand Down
9 changes: 9 additions & 0 deletions cmd/rewrapper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,15 @@ func main() {
log.Fatalf("Current working directory (%q) is not under the exec root (%q), relative working dir = %q", wd, cOpts.ExecRoot, cOpts.WorkDir)
}

if cOpts.NumRemoteReruns < 0 {
log.Warningf("Expected num_remote_reruns to be at least 0, got %v. Defaulting num_remote_reruns to 0.", cOpts.NumRemoteReruns)
cOpts.NumRemoteReruns = 0
}
if cOpts.NumLocalReruns < 0 {
log.Warningf("Expected num_local_reruns to be at least 0, got %v. Defaulting num_local_reruns to 0.", cOpts.NumLocalReruns)
cOpts.NumLocalReruns = 0
}

// TODO (b/296409009): Add support for preserve true and download outputs false for downloading stubs.

resp, err := rewrapper.RunCommand(ctx, *dialTimeout, proxy, cmd, cOpts)
Expand Down
127 changes: 88 additions & 39 deletions internal/pkg/reproxy/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ import (

"team/foundry-x/re-client/internal/pkg/protoencoding"

"github.com/bazelbuild/remote-apis-sdks/go/pkg/digest"

lpb "team/foundry-x/re-client/api/log"

log "github.com/golang/glog"
Expand All @@ -33,35 +31,53 @@ func compareAction(ctx context.Context, s *Server, a *action) {
return
}
mismatches := make(map[string]*lpb.Verification_Mismatch)
var localExitCodes []int32
var remoteExitCodes []int32
numVerified := 0

for _, rerun := range a.rec.LocalMetadata.RerunMetadata {
numVerified += findMismatches(rerun.OutputFileDigests, rerun.OutputDirectoryDigests, mismatches, a, false)
localExitCodes = append(localExitCodes, rerun.Result.ExitCode)
}

if a.rec.RemoteMetadata != nil {
for _, rerun := range a.rec.RemoteMetadata.RerunMetadata {
numVerified += findMismatches(rerun.OutputFileDigests, rerun.OutputDirectoryDigests, mismatches, a, true)
remoteExitCodes = append(remoteExitCodes, rerun.Result.ExitCode)
}
}

localExitCodes = dedupExitCodes(localExitCodes)
remoteExitCodes = dedupExitCodes(remoteExitCodes)

for fp, mismatch := range mismatches {
localMismatches := len(mismatch.LocalDigests)
remoteMismatches := len(mismatch.RemoteDigests)
mismatch.LocalExitCodes = localExitCodes
mismatch.RemoteExitCodes = remoteExitCodes

dgStatus, shouldLogDg := compareDigests(mismatch.LocalDigests, mismatch.RemoteDigests, a.numLocalReruns, a.numRemoteReruns)
ecStatus, shouldLogEc := compareExitCodes(mismatch.LocalExitCodes, mismatch.RemoteExitCodes, a.numLocalReruns, a.numRemoteReruns)

// Delete the records that we don't want to keep.
if !shouldLogDg && !shouldLogEc {
delete(mismatches, fp)
} else if !shouldLogDg {
mismatch.LocalDigests = nil
mismatch.RemoteDigests = nil
} else if !shouldLogEc {
mismatch.LocalExitCodes = nil
mismatch.RemoteExitCodes = nil
}

if localMismatches > 1 {
mismatch.Determinism = lpb.DeterminismStatus_NON_DETERMINISTIC
} else if remoteMismatches > 1 {
switch {
case dgStatus == lpb.DeterminismStatus_UNKNOWN || ecStatus == lpb.DeterminismStatus_UNKNOWN:
mismatch.Determinism = lpb.DeterminismStatus_UNKNOWN
case dgStatus == lpb.DeterminismStatus_REMOTE_NON_DETERMINISTIC || ecStatus == lpb.DeterminismStatus_REMOTE_NON_DETERMINISTIC:
mismatch.Determinism = lpb.DeterminismStatus_REMOTE_NON_DETERMINISTIC
} else if localMismatches == 1 && remoteMismatches == 1 && (a.numLocalReruns > 1 || a.numRemoteReruns > 1 || mismatch.LocalDigests[0] == mismatch.RemoteDigests[0]) {
case dgStatus == lpb.DeterminismStatus_NON_DETERMINISTIC || ecStatus == lpb.DeterminismStatus_NON_DETERMINISTIC:
mismatch.Determinism = lpb.DeterminismStatus_NON_DETERMINISTIC
default:
mismatch.Determinism = lpb.DeterminismStatus_DETERMINISTIC
if mismatch.LocalDigests[0] == mismatch.RemoteDigests[0] {
delete(mismatches, fp)
}
} else {
mismatch.Determinism = lpb.DeterminismStatus_UNKNOWN
}

}
verRes := mismatchesToProto(mismatches, numVerified)
a.rec.LocalMetadata.Verification = verRes
Expand Down Expand Up @@ -90,35 +106,54 @@ func mismatchesToProto(mismatches map[string]*lpb.Verification_Mismatch, numVeri
return res
}

func compareResults(localDigests, remoteDigests map[string]digest.Digest, actionDigest string) (map[string]*lpb.Verification_Mismatch, int) {
mismatches := make(map[string]*lpb.Verification_Mismatch)
numVerified := 0
for path, dl := range localDigests {
numVerified++
m := &lpb.Verification_Mismatch{
Path: path,
LocalDigest: dl.String(),
ActionDigest: actionDigest,
}
if dr, ok := remoteDigests[path]; ok {
delete(remoteDigests, path)
if dr != dl {
m.RemoteDigests = []string{dr.String()}
mismatches[path] = m
}
continue
// Returns the determinism status of the digests and whether we should log the mismatch.
func compareDigests(localDigests []string, remoteDigests []string, numLocalReruns int, numRemoteReruns int) (lpb.DeterminismStatus, bool) {
totalReruns := numLocalReruns + numRemoteReruns
localMismatches := len(localDigests)
remoteMismatches := len(remoteDigests)

if localMismatches > 1 || remoteMismatches > 1 {
if localMismatches == 1 {
return lpb.DeterminismStatus_REMOTE_NON_DETERMINISTIC, true
}
mismatches[path] = m
return lpb.DeterminismStatus_NON_DETERMINISTIC, true
}
for path, dr := range remoteDigests {
numVerified++
mismatches[path] = &lpb.Verification_Mismatch{
Path: path,
RemoteDigests: []string{dr.String()},
ActionDigest: actionDigest,

deterministicMismatch := localMismatches == 1 && remoteMismatches == 1 && localDigests[0] != remoteDigests[0] && totalReruns > 2
localDeterministic := localMismatches == 1 && numRemoteReruns == 0
remoteDeterministic := remoteMismatches == 1 && numLocalReruns == 0
deterministic := localMismatches == 1 && remoteMismatches == 1 && localDigests[0] == remoteDigests[0]

if localDeterministic || remoteDeterministic || deterministic || deterministicMismatch {
return lpb.DeterminismStatus_DETERMINISTIC, deterministicMismatch
}

return lpb.DeterminismStatus_UNKNOWN, true
}

// Returns the determinism status of the exit codes and whether we should log the mismatch.
func compareExitCodes(localExitCodes []int32, remoteExitCodes []int32, numLocalReruns int, numRemoteReruns int) (lpb.DeterminismStatus, bool) {
totalReruns := numLocalReruns + numRemoteReruns
localMismatches := len(localExitCodes)
remoteMismatches := len(remoteExitCodes)

if localMismatches > 1 || remoteMismatches > 1 {
if localMismatches == 1 {
return lpb.DeterminismStatus_REMOTE_NON_DETERMINISTIC, true
}
return lpb.DeterminismStatus_NON_DETERMINISTIC, true
}

deterministicMismatch := localMismatches == 1 && remoteMismatches == 1 && localExitCodes[0] != remoteExitCodes[0] && totalReruns > 2
localDeterministic := localMismatches == 1 && numRemoteReruns == 0
remoteDeterministic := remoteMismatches == 1 && numLocalReruns == 0
deterministic := localMismatches == 1 && remoteMismatches == 1 && localExitCodes[0] == remoteExitCodes[0]

if localDeterministic || remoteDeterministic || deterministic || deterministicMismatch {
return lpb.DeterminismStatus_DETERMINISTIC, deterministicMismatch
}
return mismatches, numVerified

return lpb.DeterminismStatus_UNKNOWN, true
}

func findMismatches(fileDg map[string]string, dirDg map[string]string, mismatches map[string]*lpb.Verification_Mismatch, a *action, remote bool) int {
Expand Down Expand Up @@ -149,3 +184,17 @@ func findMismatches(fileDg map[string]string, dirDg map[string]string, mismatche
}
return numVerified
}

func dedupExitCodes(list []int32) []int32 {
var res []int32
seen := make(map[int32]bool)

for _, n := range list {
if _, ok := seen[n]; !ok {
seen[n] = true
res = append(res, n)
}
}

return res
}
29 changes: 12 additions & 17 deletions internal/pkg/reproxy/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,20 +425,10 @@ func (s *Server) RunCommand(ctx context.Context, req *ppb.RunRequest) (*ppb.RunR
}
rec.LocalMetadata.EventTimes[k] = t
}
numLocalRerun := int(req.GetExecutionOptions().GetNumLocalReruns())

numRemoteRerun := int(req.GetExecutionOptions().GetNumRemoteReruns())
if req.GetExecutionOptions().GetCompareWithLocal() {
if numLocalRerun == 0 {
numLocalRerun = 1
}
if numRemoteRerun == 0 {
numRetries := int(req.GetExecutionOptions().GetNumRetriesIfMismatched())
if numRetries > 0 {
numRemoteRerun = numRetries
} else {
numRemoteRerun = 1
}
}
if numRemoteRerun == 0 {
numRemoteRerun = int(req.GetExecutionOptions().GetNumRetriesIfMismatched())
}

reclientTimeout := int(req.GetExecutionOptions().GetReclientTimeout())
Expand All @@ -454,7 +444,7 @@ func (s *Server) RunCommand(ctx context.Context, req *ppb.RunRequest) (*ppb.RunR
lOpt: req.GetExecutionOptions().GetLocalExecutionOptions(),
execStrategy: req.GetExecutionOptions().GetExecutionStrategy(),
compare: req.GetExecutionOptions().GetCompareWithLocal(),
numLocalReruns: numLocalRerun,
numLocalReruns: int(req.GetExecutionOptions().GetNumLocalReruns()),
numRemoteReruns: numRemoteRerun,
reclientTimeout: reclientTimeout,
oe: outerr.NewRecordingOutErr(),
Expand All @@ -475,8 +465,13 @@ func (s *Server) RunCommand(ctx context.Context, req *ppb.RunRequest) (*ppb.RunR
a.rOpt.AcceptCached = false
}
s.runAction(aCtx, a)
if (a.numLocalReruns + a.numRemoteReruns) > 0 {
s.rerunAction(aCtx, a)

if a.compare {
if (a.numLocalReruns + a.numRemoteReruns) > 1 {
s.rerunAction(aCtx, a)
} else {
log.Errorf("%v: Invalid number of reruns. Total reruns of >= 2 expected. Got num_local_reruns=%v and num_remote_reruns=%v", executionID, a.numLocalReruns, a.numRemoteReruns)
}
}
s.logRecord(a, start)
oe := a.oe.(*outerr.RecordingOutErr)
Expand Down Expand Up @@ -751,7 +746,7 @@ func (s *Server) rerunAction(ctx context.Context, a *action) {
}
localRerunMetaData := &lpb.RerunMetadata{
Attempt: int64(attemptNum),
Result: command.ResultToProto(a.res),
Result: command.ResultToProto(act.res),
EventTimes: act.rec.LocalMetadata.EventTimes,
OutputFileDigests: fileDgs,
OutputDirectoryDigests: dirDgs,
Expand Down
Loading

0 comments on commit 5dd396f

Please sign in to comment.