Skip to content

Commit

Permalink
[Fix](merge-on-write) Fix `MergeIndexDeleteBitmapCalculator::calculat…
Browse files Browse the repository at this point in the history
…e_one()` coredump (apache#44284)

### What problem does this PR solve?

Problem Summary:

`MergeIndexDeleteBitmapCalculatorContext::get_current_key()` may return
non-OK status when encounter memory allocation failure, which makes
`MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()`
returns incorrect result and break some assumptions during the process
of multiway merging, which leads to coredump.

```
 1# 0x00007F507D0B3520 in /lib/x86_64-linux-gnu/libc.so.6
 2# pthread_kill at ./nptl/pthread_kill.c:89
 3# raise at ../sysdeps/posix/raise.c:27
 4# abort at ./stdlib/abort.c:81
 5# 0x000055E3A805DD7D in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
 6# 0x000055E3A805047A in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
 7# google::LogMessage::SendToLog() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
 8# google::LogMessage::Flush() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
 9# google::LogMessageFatal::~LogMessageFatal() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
10# doris::MergeIndexDeleteBitmapCalculatorContext::seek_at_or_after(doris::Slice const&) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
11# doris::MergeIndexDeleteBitmapCalculator::calculate_one(doris::RowLocation&) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/delete_bitmap_calculator.cpp:197
12# doris::MergeIndexDeleteBitmapCalculator::calculate_all(std::shared_ptr) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
13# doris::Tablet::calc_delete_bitmap_between_segments(std::shared_ptr, std::vector, std::allocator > > const&, std::shared_ptr) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/tablet.cpp:4075
14# doris::Tablet::update_delete_bitmap_without_lock(std::shared_ptr const&, std::vector, std::allocator > > const*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/tablet.cpp:3468
15# doris::Tablet::revise_tablet_meta(std::vector, std::allocator > > const&, std::vector, std::allocator > > const&, bool) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/tablet.cpp:415
16# doris::EngineCloneTask::_finish_incremental_clone(doris::Tablet*, std::shared_ptr const&, long) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/task/engine_clone_task.cpp:795
17# doris::EngineCloneTask::_finish_clone(doris::Tablet*, std::__cxx11::basic_string, std::allocator > const&, long, bool) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
18# doris::EngineCloneTask::_do_clone() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
19# doris::EngineCloneTask::execute() at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/task/engine_clone_task.cpp:159
20# doris::clone_callback(doris::StorageEngine&, doris::TMasterInfo const&, doris::TAgentTaskRequest const&) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be
21# std::_Function_handler(doris::TAgentTaskRequest const&) const::{lambda()#1}>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291
22# doris::ThreadPool::dispatch_thread() at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/util/threadpool.cpp:551
23# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/util/thread.cpp:499
24# start_thread at ./nptl/pthread_create.c:442
25# 0x00007F507D197850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
```
  • Loading branch information
bobhan1 authored Nov 20, 2024
1 parent 2a4885e commit 1601d75
Showing 1 changed file with 38 additions and 32 deletions.
70 changes: 38 additions & 32 deletions be/src/olap/delete_bitmap_calculator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
// std::proiroty_queue is a max heap, and function should return the result of `lhs < rhs`
// so if the result of the function is true, rhs will be popped before lhs
Slice key1, key2;
RETURN_IF_ERROR(lhs->get_current_key(key1));
RETURN_IF_ERROR(rhs->get_current_key(key2));
// MergeIndexDeleteBitmapCalculatorContext::get_current_key may return non-OK status if encounter
// memory allocation failure, we can only throw exception here to propagate error in this situation
THROW_IF_ERROR(lhs->get_current_key(key1));
THROW_IF_ERROR(rhs->get_current_key(key2));
if (_sequence_length == 0 && _rowid_length == 0) {
auto cmp_result = key1.compare(key2);
// when key1 is the same as key2,
Expand Down Expand Up @@ -135,28 +137,30 @@ Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id,
std::vector<SegmentSharedPtr> const& segments,
size_t seq_col_length, size_t rowdid_length,
size_t max_batch_size) {
_rowset_id = rowset_id;
_seq_col_length = seq_col_length;
_rowid_length = rowdid_length;
_comparator =
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length, _rowid_length);
_contexts.reserve(segments.size());
_heap = std::make_unique<Heap>(_comparator);
RETURN_IF_CATCH_EXCEPTION({
_rowset_id = rowset_id;
_seq_col_length = seq_col_length;
_rowid_length = rowdid_length;
_comparator =
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length, _rowid_length);
_contexts.reserve(segments.size());
_heap = std::make_unique<Heap>(_comparator);

for (auto& segment : segments) {
RETURN_IF_ERROR(segment->load_index());
auto pk_idx = segment->get_primary_key_index();
std::unique_ptr<segment_v2::IndexedColumnIterator> index;
RETURN_IF_ERROR(pk_idx->new_iterator(&index));
auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
pk_idx->type_info()->type(), 1, 0);
_contexts.emplace_back(std::move(index), index_type, segment->id(), pk_idx->num_rows());
_heap->push(&_contexts.back());
}
if (_rowid_length > 0) {
_rowid_coder = get_key_coder(
get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT>()->type());
}
for (auto& segment : segments) {
RETURN_IF_ERROR(segment->load_index());
auto pk_idx = segment->get_primary_key_index();
std::unique_ptr<segment_v2::IndexedColumnIterator> index;
RETURN_IF_ERROR(pk_idx->new_iterator(&index));
auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
pk_idx->type_info()->type(), 1, 0);
_contexts.emplace_back(std::move(index), index_type, segment->id(), pk_idx->num_rows());
_heap->push(&_contexts.back());
}
if (_rowid_length > 0) {
_rowid_coder = get_key_coder(
get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT>()->type());
}
});
return Status::OK();
}

Expand Down Expand Up @@ -209,16 +213,18 @@ Status MergeIndexDeleteBitmapCalculator::calculate_one(RowLocation& loc) {
}

Status MergeIndexDeleteBitmapCalculator::calculate_all(DeleteBitmapPtr delete_bitmap) {
RowLocation loc;
while (true) {
auto st = calculate_one(loc);
if (st.is<ErrorCode::END_OF_FILE>()) {
break;
RETURN_IF_CATCH_EXCEPTION({
RowLocation loc;
while (true) {
auto st = calculate_one(loc);
if (st.is<ErrorCode::END_OF_FILE>()) {
break;
}
RETURN_IF_ERROR(st);
delete_bitmap->add({_rowset_id, loc.segment_id, DeleteBitmap::TEMP_VERSION_COMMON},
loc.row_id);
}
RETURN_IF_ERROR(st);
delete_bitmap->add({_rowset_id, loc.segment_id, DeleteBitmap::TEMP_VERSION_COMMON},
loc.row_id);
}
});
return Status::OK();
}

Expand Down

0 comments on commit 1601d75

Please sign in to comment.