Skip to content

Commit

Permalink
Merge branch 'main' into bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
lnkuiper committed Aug 8, 2024
2 parents 0ca6be0 + dd9e6ab commit bffce7f
Show file tree
Hide file tree
Showing 52 changed files with 1,215 additions and 360 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ body:
* [duckdb-wasm](https://github.com/duckdb/duckdb-wasm/issues/new)
* [go-duckdb](https://github.com/marcboeker/go-duckdb/issues/new)
* Extensions:
* [Arrow extension](https://github.com/duckdb/duckdb_arrow/issues/new)
* [Arrow extension](https://github.com/duckdb/arrow/issues/new)
* [AWS extension](https://github.com/duckdb/duckdb_aws/issues/new)
* [Azure extension](https://github.com/duckdb/duckdb_azure/issues/new)
* [Delta extension](https://github.com/duckdb/duckdb_delta/issues/new)
Expand Down
16 changes: 16 additions & 0 deletions benchmark/micro/list/list_extract.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_extract.benchmark
# description: Benchmark for the list_extract function
# group: [list]

name list_extract micro
group micro
subgroup list

load
CREATE TABLE t1 as SELECT range(0,1000) as l FROM range(0,10000) as r(e);

run
SELECT sum(list_extract(l, 500)) FROM t1;

result I
4990000
16 changes: 16 additions & 0 deletions benchmark/micro/list/list_extract_null.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_extract_null.benchmark
# description: Benchmark for the list_extract function
# group: [list]

name list_extract micro
group micro
subgroup list

load
CREATE TABLE t1 as SELECT list_transform(range(0,1000), a -> if(e % a = 0, null, a)) as l FROM range(0,10000) as r(e);

run
SELECT count(list_extract(l, 5)) FROM t1;

result I
7500
17 changes: 17 additions & 0 deletions benchmark/micro/list/list_extract_struct.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# name: benchmark/micro/list/list_extract_struct.benchmark
# description: Benchmark for the list_extract function
# group: [list]

name list_extract micro
group micro
subgroup list

load
CREATE TABLE t1 as SELECT list_transform(range(0,1000), x -> {'foo': x, 'bar': (-x)::VARCHAR}) as l
FROM range(0,10000) as r(e);

run
SELECT sum(list_extract(l, 500).foo) FROM t1;

result I
4990000
17 changes: 17 additions & 0 deletions benchmark/micro/list/list_extract_struct_null.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# name: benchmark/micro/list/list_extract_struct_null.benchmark
# description: Benchmark for the list_extract function
# group: [list]

name list_extract micro
group micro
subgroup list

load
CREATE TABLE t1 as SELECT list_transform(range(0,1000), x -> if(e % x = 0, null, {'foo': x, 'bar': (-x)::VARCHAR})) as l
FROM range(0,10000) as r(e);

run
SELECT sum(list_extract(l, 500).foo) FROM t1;

result I
4979521
6 changes: 4 additions & 2 deletions extension/parquet/parquet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1120,8 +1120,10 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
}
names += name.first;
}
throw BinderException("Column name \"%s\" specified in FIELD_IDS not found. Available column names: [%s]",
col_name, names);
throw BinderException(
"Column name \"%s\" specified in FIELD_IDS not found. Consider using WRITE_PARTITION_COLUMNS if this "
"column is a partition column. Available column names: [%s]",
col_name, names);
}
D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys

Expand Down
5 changes: 5 additions & 0 deletions src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4307,6 +4307,8 @@ const char* EnumUtil::ToChars<MetricsType>(MetricsType value) {
return "CPU_TIME";
case MetricsType::EXTRA_INFO:
return "EXTRA_INFO";
case MetricsType::CUMULATIVE_CARDINALITY:
return "CUMULATIVE_CARDINALITY";
case MetricsType::OPERATOR_CARDINALITY:
return "OPERATOR_CARDINALITY";
case MetricsType::OPERATOR_TIMING:
Expand All @@ -4324,6 +4326,9 @@ MetricsType EnumUtil::FromString<MetricsType>(const char *value) {
if (StringUtil::Equals(value, "EXTRA_INFO")) {
return MetricsType::EXTRA_INFO;
}
if (StringUtil::Equals(value, "CUMULATIVE_CARDINALITY")) {
return MetricsType::CUMULATIVE_CARDINALITY;
}
if (StringUtil::Equals(value, "OPERATOR_CARDINALITY")) {
return MetricsType::OPERATOR_CARDINALITY;
}
Expand Down
13 changes: 9 additions & 4 deletions src/common/render_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,19 @@ static unique_ptr<RenderTreeNode> CreateNode(const PipelineRenderNode &op) {
static unique_ptr<RenderTreeNode> CreateNode(const ProfilingNode &op) {
InsertionOrderPreservingMap<string> extra_info;
if (op.GetProfilingInfo().Enabled(MetricsType::EXTRA_INFO)) {
extra_info = op.GetProfilingInfo().metrics.extra_info;
extra_info = op.GetProfilingInfo().extra_info;
}

auto node_name = op.GetName();
auto result = make_uniq<RenderTreeNode>(node_name, extra_info);
result->extra_text["Cardinality"] = to_string(op.GetProfilingInfo().metrics.operator_cardinality);
string timing = StringUtil::Format("%.2f", op.GetProfilingInfo().metrics.operator_timing);
result->extra_text["Timing"] = timing + "s";
if (op.GetProfilingInfo().Enabled(MetricsType::OPERATOR_CARDINALITY)) {
result->extra_text["Cardinality"] = op.GetProfilingInfo().GetMetricAsString(MetricsType::OPERATOR_CARDINALITY);
}
if (op.GetProfilingInfo().Enabled(MetricsType::OPERATOR_TIMING)) {
string timing = StringUtil::Format(
"%.2f", op.GetProfilingInfo().metrics.at(MetricsType::OPERATOR_TIMING).GetValue<double>());
result->extra_text["Timing"] = timing + "s";
}
return result;
}

Expand Down
26 changes: 25 additions & 1 deletion src/execution/operator/persistent/physical_copy_to_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "duckdb/common/types/uuid.hpp"
#include "duckdb/common/value_operations/value_operations.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/planner/operator/logical_copy_to_file.hpp"

#include <algorithm>

Expand Down Expand Up @@ -228,6 +229,22 @@ class CopyToFunctionLocalState : public LocalSinkState {
append_count = 0;
}

void SetDataWithoutPartitions(DataChunk &chunk, const DataChunk &source, const vector<LogicalType> &col_types,
const vector<idx_t> &part_cols) {
D_ASSERT(source.ColumnCount() == col_types.size());
auto types = LogicalCopyToFile::GetTypesWithoutPartitions(col_types, part_cols, false);
chunk.InitializeEmpty(types);
set<idx_t> part_col_set(part_cols.begin(), part_cols.end());
idx_t new_col_id = 0;
for (idx_t col_idx = 0; col_idx < source.ColumnCount(); col_idx++) {
if (part_col_set.find(col_idx) == part_col_set.end()) {
chunk.data[new_col_id].Reference(source.data[col_idx]);
new_col_id++;
}
}
chunk.SetCardinality(source.size());
}

void FlushPartitions(ExecutionContext &context, const PhysicalCopyToFile &op, CopyToFunctionGlobalState &g) {
if (!part_buffer) {
return;
Expand All @@ -247,7 +264,14 @@ class CopyToFunctionLocalState : public LocalSinkState {
auto local_copy_state = op.function.copy_to_initialize_local(context, *op.bind_data);
// push the chunks into the write state
for (auto &chunk : partitions[i]->Chunks()) {
op.function.copy_to_sink(context, *op.bind_data, *info.global_state, *local_copy_state, chunk);
if (op.write_partition_columns) {
op.function.copy_to_sink(context, *op.bind_data, *info.global_state, *local_copy_state, chunk);
} else {
DataChunk filtered_chunk;
SetDataWithoutPartitions(filtered_chunk, chunk, op.expected_types, op.partition_columns);
op.function.copy_to_sink(context, *op.bind_data, *info.global_state, *local_copy_state,
filtered_chunk);
}
}
op.function.copy_to_combine(context, *op.bind_data, *info.global_state, *local_copy_state);
local_copy_state.reset();
Expand Down
2 changes: 2 additions & 0 deletions src/execution/physical_plan/plan_copy_to_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
copy->return_type = op.return_type;
return std::move(copy);
}

// COPY from select statement to file
auto copy = make_uniq<PhysicalCopyToFile>(op.types, op.function, std::move(op.bind_data), op.estimated_cardinality);
copy->file_path = op.file_path;
Expand All @@ -54,6 +55,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
copy->return_type = op.return_type;
copy->partition_output = op.partition_output;
copy->partition_columns = op.partition_columns;
copy->write_partition_columns = op.write_partition_columns;
copy->names = op.names;
copy->expected_types = op.expected_types;
copy->parallel = mode == CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE;
Expand Down
7 changes: 4 additions & 3 deletions src/execution/window_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1682,9 +1682,10 @@ void WindowLeadLagExecutor::EvaluateInternal(WindowExecutorGlobalState &gstate,
i += width;
row_idx += width;
} else if (wexpr.default_expr) {
llstate.leadlag_default.CopyCell(result, i, delta);
i += delta;
row_idx += delta;
const auto width = MinValue(delta, count - i);
llstate.leadlag_default.CopyCell(result, i, width);
i += width;
row_idx += width;
} else {
for (idx_t nulls = MinValue(delta, count - i); nulls--; ++i, ++row_idx) {
FlatVector::SetNull(result, i, true);
Expand Down
Loading

0 comments on commit bffce7f

Please sign in to comment.