-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Data race in semi-join #17417
base: main
Are you sure you want to change the base?
Fix Data race in semi-join #17417
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ package engine | |
|
||
import ( | ||
"context" | ||
"sync/atomic" | ||
|
||
"vitess.io/vitess/go/sqltypes" | ||
querypb "vitess.io/vitess/go/vt/proto/query" | ||
|
@@ -62,24 +63,26 @@ func (jn *SemiJoin) TryExecute(ctx context.Context, vcursor VCursor, bindVars ma | |
|
||
// TryStreamExecute performs a streaming exec. | ||
func (jn *SemiJoin) TryStreamExecute(ctx context.Context, vcursor VCursor, bindVars map[string]*querypb.BindVariable, wantfields bool, callback func(*sqltypes.Result) error) error { | ||
joinVars := make(map[string]*querypb.BindVariable) | ||
err := vcursor.StreamExecutePrimitive(ctx, jn.Left, bindVars, wantfields, func(lresult *sqltypes.Result) error { | ||
joinVars := make(map[string]*querypb.BindVariable) | ||
result := &sqltypes.Result{Fields: lresult.Fields} | ||
for _, lrow := range lresult.Rows { | ||
for k, col := range jn.Vars { | ||
joinVars[k] = sqltypes.ValueBindVariable(lrow[col]) | ||
} | ||
rowAdded := false | ||
var rowAdded atomic.Bool | ||
err := vcursor.StreamExecutePrimitive(ctx, jn.Right, combineVars(bindVars, joinVars), false, func(rresult *sqltypes.Result) error { | ||
if len(rresult.Rows) > 0 && !rowAdded { | ||
result.Rows = append(result.Rows, lrow) | ||
rowAdded = true | ||
if len(rresult.Rows) > 0 { | ||
rowAdded.Store(true) | ||
} | ||
return nil | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
if rowAdded.Load() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this one atomic? If this needs an atomic since it otherwise races, I imagine that the If this doesn't race, it doesn't need to be atomic? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes i know, I made the changes such that i could avoid using this atomic thing, but in the callback right above we are writing to it. We are only setting it to true but golang still complains in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. both are racy as the internal There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @harshit-gangal Right, so that means we need to guard the |
||
result.Rows = append(result.Rows, lrow) | ||
} | ||
} | ||
return callback(result) | ||
}) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,3 +159,60 @@ func TestSemiJoinStreamExecute(t *testing.T) { | |
"4|d|dd", | ||
)) | ||
} | ||
|
||
// TestSemiJoinStreamExecuteParallelExecution tests SemiJoin stream execution with parallel execution | ||
// to ensure we have no data races. | ||
func TestSemiJoinStreamExecuteParallelExecution(t *testing.T) { | ||
leftPrim := &fakePrimitive{ | ||
results: []*sqltypes.Result{ | ||
sqltypes.MakeTestResult( | ||
sqltypes.MakeTestFields( | ||
"col1|col2|col3", | ||
"int64|varchar|varchar", | ||
), | ||
"1|a|aa", | ||
"2|b|bb", | ||
), sqltypes.MakeTestResult( | ||
sqltypes.MakeTestFields( | ||
"col1|col2|col3", | ||
"int64|varchar|varchar", | ||
), | ||
"3|c|cc", | ||
"4|d|dd", | ||
), | ||
}, | ||
async: true, | ||
} | ||
rightFields := sqltypes.MakeTestFields( | ||
"col4|col5|col6", | ||
"int64|varchar|varchar", | ||
) | ||
rightPrim := &fakePrimitive{ | ||
// we'll return non-empty results for rows 2 and 4 | ||
results: sqltypes.MakeTestStreamingResults(rightFields, | ||
"4|d|dd", | ||
"---", | ||
"---", | ||
"5|e|ee", | ||
"6|f|ff", | ||
"7|g|gg", | ||
), | ||
async: true, | ||
noLog: true, | ||
} | ||
|
||
jn := &SemiJoin{ | ||
Left: leftPrim, | ||
Right: rightPrim, | ||
Vars: map[string]int{ | ||
"bv": 1, | ||
}, | ||
} | ||
err := jn.TryStreamExecute(context.Background(), &noopVCursor{}, map[string]*querypb.BindVariable{}, true, func(result *sqltypes.Result) error { | ||
return nil | ||
}) | ||
Comment on lines
+211
to
+213
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should the results be validated? |
||
require.NoError(t, err) | ||
leftPrim.ExpectLog(t, []string{ | ||
`StreamExecute true`, | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is another issue I mentioned: during a transaction, we might end up opening two connections, leaving one of them in limbo.
Do you have another PR that fixes it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will open a separate issue for it, with separate tests and PR for it, after this one gets merged.