From 7c0fedca84d276172bb4c4f671ddcc2d5c419f5f Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 30 Sep 2024 14:11:55 +0200
Subject: [PATCH 01/20] wip on globbing

---
 data/csv/glob_dif_dialect/14166/__2000.csv    |  3 ++
 data/csv/glob_dif_dialect/14166/__2001.csv    |  1 +
 .../buffer_manager/csv_buffer_manager.cpp     |  8 ++--
 .../csv_scanner/scanner/base_scanner.cpp      |  2 +-
 .../csv_scanner/scanner/csv_schema.cpp        | 42 ++++++++++++++---
 .../csv_scanner/sniffer/csv_sniffer.cpp       | 45 ++++++++++---------
 .../csv_scanner/sniffer/dialect_detection.cpp |  2 +-
 .../csv_scanner/sniffer/header_detection.cpp  |  2 +-
 .../csv_scanner/sniffer/type_detection.cpp    |  2 +-
 .../csv_scanner/sniffer/type_refinement.cpp   |  2 +-
 .../csv_scanner/sniffer/type_replacement.cpp  |  2 +-
 .../state_machine/csv_state_machine.cpp       |  2 +-
 .../state_machine/csv_state_machine_cache.cpp |  2 +-
 .../table_function/csv_file_scanner.cpp       |  2 +-
 .../table_function/global_csv_state.cpp       |  2 +-
 src/function/table/copy_csv.cpp               |  2 +-
 src/function/table/read_csv.cpp               |  2 +-
 src/function/table/sniff_csv.cpp              |  2 +-
 .../csv_scanner/csv_buffer_manager.hpp        |  8 ++--
 .../operator/csv_scanner/csv_schema.hpp       |  5 ++-
 .../csv_scanner/{ => sniffer}/csv_sniffer.hpp | 17 ++-----
 .../csv_scanner/sniffer/sniff_result.hpp      | 34 ++++++++++++++
 src/main/relation/read_csv_relation.cpp       |  2 +-
 test/sql/copy/csv/glob/read_csv_glob.test     | 11 +++++
 24 files changed, 138 insertions(+), 64 deletions(-)
 create mode 100644 data/csv/glob_dif_dialect/14166/__2000.csv
 create mode 100644 data/csv/glob_dif_dialect/14166/__2001.csv
 rename src/include/duckdb/execution/operator/csv_scanner/{ => sniffer}/csv_sniffer.hpp (95%)
 create mode 100644 src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp

diff --git a/data/csv/glob_dif_dialect/14166/__2000.csv b/data/csv/glob_dif_dialect/14166/__2000.csv
new file mode 100644
index 00000000000..a6885b98e8f
--- /dev/null
+++ b/data/csv/glob_dif_dialect/14166/__2000.csv
@@ -0,0 +1,3 @@
+date_col,int_col,double_col
+2000-01-01,10,80.9189441112103
+2000-01-02,5,109.16581782022259
diff --git a/data/csv/glob_dif_dialect/14166/__2001.csv b/data/csv/glob_dif_dialect/14166/__2001.csv
new file mode 100644
index 00000000000..bb6ef187f95
--- /dev/null
+++ b/data/csv/glob_dif_dialect/14166/__2001.csv
@@ -0,0 +1 @@
+date_col,int_col,double_col
diff --git a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
index c8a7d167678..064595f3c43 100644
--- a/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
+++ b/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp
@@ -119,15 +119,15 @@ void CSVBufferManager::ResetBuffer(const idx_t buffer_idx) {
 	}
 }
 
-idx_t CSVBufferManager::GetBufferSize() {
+idx_t CSVBufferManager::GetBufferSize() const {
 	return buffer_size;
 }
 
-idx_t CSVBufferManager::BufferCount() {
+idx_t CSVBufferManager::BufferCount() const {
 	return cached_buffers.size();
 }
 
-bool CSVBufferManager::Done() {
+bool CSVBufferManager::Done() const {
 	return done;
 }
 
@@ -144,7 +144,7 @@ void CSVBufferManager::ResetBufferManager() {
 	}
 }
 
-string CSVBufferManager::GetFilePath() {
+string CSVBufferManager::GetFilePath() const {
 	return file_path;
 }
 
diff --git a/src/execution/operator/csv_scanner/scanner/base_scanner.cpp b/src/execution/operator/csv_scanner/scanner/base_scanner.cpp
index 63e93edaee2..757598e140e 100644
--- a/src/execution/operator/csv_scanner/scanner/base_scanner.cpp
+++ b/src/execution/operator/csv_scanner/scanner/base_scanner.cpp
@@ -1,6 +1,6 @@
 #include "duckdb/execution/operator/csv_scanner/base_scanner.hpp"
 
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
 
 namespace duckdb {
diff --git a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
index 5d6a9b0d2ee..1eb0b218974 100644
--- a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
+++ b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
@@ -60,14 +60,46 @@ bool CSVSchema::Empty() const {
 	return columns.empty();
 }
 
-bool CSVSchema::SchemasMatch(string &error_message, vector<string> &names, vector<LogicalType> &types,
-                             const string &cur_file_path) {
-	D_ASSERT(names.size() == types.size());
+bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_result,
+                             const string &cur_file_path, bool is_minimal_sniffer) const{
+	D_ASSERT(sniffer_result->names.size() == sniffer_result->return_types.size());
 	bool match = true;
 	unordered_map<string, TypeIdxPair> current_schema;
-	for (idx_t i = 0; i < names.size(); i++) {
+
+	for (idx_t i = 0; i < sniffer_result->names.size(); i++) {
 		// Populate our little schema
-		current_schema[names[i]] = {types[i], i};
+		current_schema[sniffer_result->names[i]] = {sniffer_result->return_types[i], i};
+	}
+	if (is_minimal_sniffer) {
+		auto min_sniffer = static_cast<AdaptiveSnifferResult*>(sniffer_result);
+		if (!min_sniffer->more_than_one_row) {
+			bool min_sniff_match = true;
+			// If we don't have more than one row, either the names must match or the types must match.
+			for (auto &column : columns) {
+				if (current_schema.find(column.name) == current_schema.end()) {
+					min_sniff_match = false;
+					break;
+				}
+			}
+			if (min_sniff_match) {
+				return true;
+			}
+			// Otherwise, the types must match.
+			min_sniff_match = true;
+			if (sniffer_result->return_types.size() == current_schema.size()) {
+				idx_t return_type_idx = 0;
+				for (auto &column : current_schema) {
+					if (column.second.type != sniffer_result->return_types[return_type_idx++]) {
+						min_sniff_match = false;
+						break;
+					}
+				}
+			}
+			if (min_sniff_match) {
+				return true;
+			}
+		}
+		// If we got to this point, the minimal sniffer doesn't match, we throw an error.
 	}
 	// Here we check if the schema of a given file matched our original schema
 	// We consider it's not a match if:
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index baefcff45fa..935aa27136b 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -1,4 +1,4 @@
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/common/types/value.hpp"
 
 namespace duckdb {
@@ -88,15 +88,14 @@ void CSVSniffer::SetResultOptions() {
 	options.dialect_options.rows_until_header = best_candidate->GetStateMachine().dialect_options.rows_until_header;
 }
 
-SnifferResult CSVSniffer::MinimalSniff() {
+AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
 	if (set_columns.IsSet()) {
 		// Nothing to see here
-		return SnifferResult(*set_columns.types, *set_columns.names);
+		return AdaptiveSnifferResult(*set_columns.types, *set_columns.names, true);
 	}
 	// Return Types detected
 	vector<LogicalType> return_types;
 	// Column Names detected
-	vector<string> names;
 
 	buffer_manager->sniffing = true;
 	constexpr idx_t result_size = 2;
@@ -106,7 +105,8 @@ SnifferResult CSVSniffer::MinimalSniff() {
 	ColumnCountScanner count_scanner(buffer_manager, state_machine, error_handler, result_size);
 	auto &sniffed_column_counts = count_scanner.ParseChunk();
 	if (sniffed_column_counts.result_position == 0) {
-		return {{}, {}};
+		// The file is an empty file, we just return
+		return {{}, {}, false};
 	}
 
 	state_machine->dialect_options.num_cols = sniffed_column_counts[0].number_of_columns;
@@ -130,20 +130,20 @@ SnifferResult CSVSniffer::MinimalSniff() {
 
 	// Possibly Gather Header
 	vector<HeaderValue> potential_header;
-	if (start_row != 0) {
-		for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
-			auto &cur_vector = data_chunk.data[col_idx];
-			auto vector_data = FlatVector::GetData<string_t>(cur_vector);
-			auto &validity = FlatVector::Validity(cur_vector);
-			HeaderValue val;
-			if (validity.RowIsValid(0)) {
-				val = HeaderValue(vector_data[0]);
-			}
-			potential_header.emplace_back(val);
+
+	for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
+		auto &cur_vector = data_chunk.data[col_idx];
+		auto vector_data = FlatVector::GetData<string_t>(cur_vector);
+		auto &validity = FlatVector::Validity(cur_vector);
+		HeaderValue val;
+		if (validity.RowIsValid(0)) {
+			val = HeaderValue(vector_data[0]);
 		}
+		potential_header.emplace_back(val);
 	}
-	names = DetectHeaderInternal(buffer_manager->context, potential_header, *state_machine, set_columns,
-	                             best_sql_types_candidates_per_column_idx, options, *error_handler);
+
+	vector<string> names = DetectHeaderInternal(buffer_manager->context, potential_header, *state_machine, set_columns,
+	                                            best_sql_types_candidates_per_column_idx, options, *error_handler);
 
 	for (idx_t column_idx = 0; column_idx < best_sql_types_candidates_per_column_idx.size(); column_idx++) {
 		LogicalType d_type = best_sql_types_candidates_per_column_idx[column_idx].back();
@@ -153,10 +153,11 @@ SnifferResult CSVSniffer::MinimalSniff() {
 		detected_types.push_back(d_type);
 	}
 
-	return {detected_types, names};
+	return {detected_types, names, sniffed_column_counts.result_position > 2};
 }
 
-SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
+
+SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 	auto min_sniff_res = MinimalSniff();
 	bool run_full = error_handler->AnyErrors() || detection_error_handler->AnyErrors();
 	// Check if we are happy with the result or if we need to do more sniffing
@@ -165,7 +166,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
 		if (!set_columns.IsSet() && !options.file_options.AnySet()) {
 			string error;
 			run_full =
-			    !file_schema.SchemasMatch(error, min_sniff_res.names, min_sniff_res.return_types, options.file_path);
+			    !file_schema.SchemasMatch(error, &min_sniff_res, options.file_path, true);
 		}
 	}
 	if (run_full) {
@@ -173,14 +174,14 @@ SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
 		auto full_sniffer = SniffCSV();
 		if (!set_columns.IsSet() && !options.file_options.AnySet()) {
 			string error;
-			if (!file_schema.SchemasMatch(error, full_sniffer.names, full_sniffer.return_types, options.file_path) &&
+			if (!file_schema.SchemasMatch(error, &full_sniffer, options.file_path, false) &&
 			    !options.ignore_errors.GetValue()) {
 				throw InvalidInputException(error);
 			}
 		}
 		return full_sniffer;
 	}
-	return min_sniff_res;
+	return static_cast<SnifferResult>(min_sniff_res);
 }
 SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 	buffer_manager->sniffing = true;
diff --git a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
index bf142a93010..c335f03ce74 100644
--- a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp
@@ -1,5 +1,5 @@
 #include "duckdb/common/shared_ptr.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/main/client_data.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
 
diff --git a/src/execution/operator/csv_scanner/sniffer/header_detection.cpp b/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
index cf42f56dd3a..9475f5942da 100644
--- a/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/header_detection.cpp
@@ -1,5 +1,5 @@
 #include "duckdb/common/types/cast_helpers.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
 
 #include "utf8proc.hpp"
diff --git a/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
index df2a9540edd..89347630fae 100644
--- a/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/type_detection.cpp
@@ -4,7 +4,7 @@
 #include "duckdb/common/operator/integer_cast_operator.hpp"
 #include "duckdb/common/string.hpp"
 #include "duckdb/common/types/time.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 
 namespace duckdb {
 struct TryCastFloatingOperator {
diff --git a/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp b/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp
index 43d69318307..8d3e268450c 100644
--- a/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp
@@ -1,4 +1,4 @@
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_casting.hpp"
 
 namespace duckdb {
diff --git a/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp b/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp
index 34fa41469b3..a693144decc 100644
--- a/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp
@@ -1,4 +1,4 @@
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 
 namespace duckdb {
 void CSVSniffer::ReplaceTypes() {
diff --git a/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp b/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp
index 665c5b392f2..eae140f7d36 100644
--- a/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp
+++ b/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp
@@ -1,5 +1,5 @@
 #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "utf8proc_wrapper.hpp"
 #include "duckdb/main/error_manager.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
diff --git a/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp b/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
index 6c93cc939f3..929892c3b7b 100644
--- a/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
+++ b/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
@@ -1,6 +1,6 @@
 #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 
 namespace duckdb {
 
diff --git a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
index e35894861d3..3e457580c59 100644
--- a/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
+++ b/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp
@@ -1,6 +1,6 @@
 #include "duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp"
 
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
 #include "duckdb/function/table/read_csv.hpp"
 
diff --git a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
index 4f3e9dce824..cefb134138c 100644
--- a/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
+++ b/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp
@@ -1,6 +1,6 @@
 #include "duckdb/execution/operator/csv_scanner/global_csv_state.hpp"
 
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/scanner_boundary.hpp"
 #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
 #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
diff --git a/src/function/table/copy_csv.cpp b/src/function/table/copy_csv.cpp
index b2c16a67172..26d04f89df7 100644
--- a/src/function/table/copy_csv.cpp
+++ b/src/function/table/copy_csv.cpp
@@ -7,7 +7,7 @@
 #include "duckdb/common/types/column/column_data_collection.hpp"
 #include "duckdb/common/types/string_type.hpp"
 #include "duckdb/common/vector_operations/vector_operations.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/function/copy_function.hpp"
 #include "duckdb/function/scalar/string_functions.hpp"
 #include "duckdb/function/table/read_csv.hpp"
diff --git a/src/function/table/read_csv.cpp b/src/function/table/read_csv.cpp
index 58948af7f03..02e571b0552 100644
--- a/src/function/table/read_csv.cpp
+++ b/src/function/table/read_csv.cpp
@@ -8,7 +8,7 @@
 #include "duckdb/common/union_by_name.hpp"
 #include "duckdb/execution/operator/csv_scanner/global_csv_state.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_error.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
 #include "duckdb/function/function_set.hpp"
 #include "duckdb/main/client_context.hpp"
diff --git a/src/function/table/sniff_csv.cpp b/src/function/table/sniff_csv.cpp
index 2a1d1b98437..ae38e5587a9 100644
--- a/src/function/table/sniff_csv.cpp
+++ b/src/function/table/sniff_csv.cpp
@@ -1,7 +1,7 @@
 #include "duckdb/function/built_in_functions.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
 #include "duckdb/common/types/data_chunk.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp"
 #include "duckdb/function/table_function.hpp"
 #include "duckdb/main/client_context.hpp"
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
index 396c60c9ba2..5bcf0af6511 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp
@@ -35,15 +35,15 @@ class CSVBufferManager {
 
 	void UnpinBuffer(const idx_t cache_idx);
 	//! Returns the buffer size set for this CSV buffer manager
-	idx_t GetBufferSize();
+	idx_t GetBufferSize() const;
 	//! Returns the number of buffers in the cached_buffers cache
-	idx_t BufferCount();
+	idx_t BufferCount() const;
 	//! If this buffer manager is done. In the context of a buffer manager it means that it read all buffers at least
 	//! once.
-	bool Done();
+	bool Done() const;
 
 	void ResetBufferManager();
-	string GetFilePath();
+	string GetFilePath() const;
 
 	ClientContext &context;
 	idx_t skip_rows = 0;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
index cf56f75f9ad..e2693517e2c 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include "duckdb/common/types.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp"
 
 namespace duckdb {
 //! Basic CSV Column Info
@@ -23,8 +24,8 @@ struct CSVColumnInfo {
 struct CSVSchema {
 	void Initialize(vector<string> &names, vector<LogicalType> &types, const string &file_path);
 	bool Empty() const;
-	bool SchemasMatch(string &error_message, vector<string> &names, vector<LogicalType> &types,
-	                  const string &cur_file_path);
+	bool SchemasMatch(string &error_message, SnifferResult *sniffer_resilt,
+                             const string &cur_file_path, bool is_minimal_sniffer) const;
 
 private:
 	static bool CanWeCastIt(LogicalTypeId source, LogicalTypeId destination);
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
similarity index 95%
rename from src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp
rename to src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
index fa463788e36..6de097deaa1 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
@@ -1,7 +1,7 @@
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
-// duckdb/execution/operator/csv_scanner/csv_sniffer.hpp
+// duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
 //
 //
 //===----------------------------------------------------------------------===//
@@ -14,6 +14,7 @@
 #include "duckdb/execution/operator/csv_scanner/column_count_scanner.hpp"
 #include "duckdb/execution/operator/csv_scanner/csv_schema.hpp"
 #include "duckdb/execution/operator/csv_scanner/header_value.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp"
 
 namespace duckdb {
 struct DateTimestampSniffing {
@@ -22,16 +23,6 @@ struct DateTimestampSniffing {
 	vector<string> format;
 	idx_t initial_size;
 };
-//! Struct to store the result of the Sniffer
-struct SnifferResult {
-	SnifferResult(vector<LogicalType> return_types_p, vector<string> names_p)
-	    : return_types(std::move(return_types_p)), names(std::move(names_p)) {
-	}
-	//! Return Types that were detected
-	vector<LogicalType> return_types;
-	//! Column Names that were detected
-	vector<string> names;
-};
 
 //! All the options that will be used to sniff the dialect of the CSV file
 struct DialectCandidates {
@@ -134,10 +125,10 @@ class CSVSniffer {
 	//! data types It does this considering a priorly set CSV schema. If there is a mismatch of the schema it runs the
 	//! full on blazing all guns sniffer, if that still fails it tells the user to union_by_name.
 	//! It returns the projection order.
-	SnifferResult AdaptiveSniff(CSVSchema &file_schema);
+	SnifferResult AdaptiveSniff(const CSVSchema &file_schema);
 
 	//! Function that only sniffs the first two rows, to verify if a header exists and what are the data types
-	SnifferResult MinimalSniff();
+	AdaptiveSnifferResult MinimalSniff();
 
 	static NewLineIdentifier DetectNewLineDelimiter(CSVBufferManager &buffer_manager);
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
new file mode 100644
index 00000000000..a31d510330b
--- /dev/null
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
+#include "duckdb/common/vector.hpp"
+
+
+namespace duckdb {
+
+//! Struct to store the result of the Sniffer
+struct SnifferResult {
+	SnifferResult(vector<LogicalType> return_types_p, vector<string> names_p)
+	    : return_types(std::move(return_types_p)), names(std::move(names_p)) {
+	}
+	//! Return Types that were detected
+	vector<LogicalType> return_types;
+	//! Column Names that were detected
+	vector<string> names;
+};
+
+struct AdaptiveSnifferResult: SnifferResult {
+	AdaptiveSnifferResult(vector<LogicalType> return_types_p, vector<string> names_p, bool more_than_one_row_p)
+	    : SnifferResult(std::move(return_types_p),std::move(names_p)), more_than_one_row(more_than_one_row_p) {
+	}
+	bool more_than_one_row;
+};
+}
\ No newline at end of file
diff --git a/src/main/relation/read_csv_relation.cpp b/src/main/relation/read_csv_relation.cpp
index a23091eee5a..56eb0d6f5cc 100644
--- a/src/main/relation/read_csv_relation.cpp
+++ b/src/main/relation/read_csv_relation.cpp
@@ -1,7 +1,7 @@
 #include "duckdb/main/relation/read_csv_relation.hpp"
 
 #include "duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp"
-#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
+#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
 #include "duckdb/parser/expression/columnref_expression.hpp"
 #include "duckdb/parser/expression/comparison_expression.hpp"
 #include "duckdb/parser/expression/constant_expression.hpp"
diff --git a/test/sql/copy/csv/glob/read_csv_glob.test b/test/sql/copy/csv/glob/read_csv_glob.test
index 8b7b693c42a..784bef427a2 100644
--- a/test/sql/copy/csv/glob/read_csv_glob.test
+++ b/test/sql/copy/csv/glob/read_csv_glob.test
@@ -5,6 +5,12 @@
 statement ok
 PRAGMA enable_verification
 
+query III
+FROM 'data/csv/glob_dif_dialect/14166/__200*.csv';
+----
+2000-01-01	10	80.9189441112103
+2000-01-02	5	109.16581782022259
+
 # Globbing with different dialects
 query III
 FROM 'data/csv/glob_dif_dialect/f_*.csv'
@@ -17,6 +23,11 @@ FROM 'data/csv/glob_dif_dialect/f_*.csv'
 4	pedro	pedro@email.com
 5	tim	tim@email.com
 
+query III
+FROM read_csv(['data/csv/glob_dif_dialect/f_1.csv', 'data/csv/glob_dif_dialect/empty.csv'])
+----
+
+
 # simple globbing
 query I
 SELECT * FROM read_csv('data/csv/glob/a?/*.csv') ORDER BY 1

From 7aef75b7a2b31a5329e40a9eaa96335d2360f8cb Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 30 Sep 2024 15:01:38 +0200
Subject: [PATCH 02/20] More tests/fixes

---
 .../csv_scanner/scanner/csv_schema.cpp        | 14 +++---
 .../csv_scanner/sniffer/csv_sniffer.cpp       |  6 +--
 .../operator/csv_scanner/csv_schema.hpp       |  4 +-
 .../csv_scanner/sniffer/sniff_result.hpp      |  7 ++-
 test/sql/copy/csv/glob/read_csv_glob.test     | 23 ----------
 .../sql/copy/csv/glob/test_unmatch_globs.test | 24 ++++++++++
 test/sql/copy/csv/test_mismatch_schemas.test  | 44 +++++++++----------
 7 files changed, 61 insertions(+), 61 deletions(-)
 create mode 100644 test/sql/copy/csv/glob/test_unmatch_globs.test

diff --git a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
index 1eb0b218974..b2fe12f4ef9 100644
--- a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
+++ b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
@@ -60,8 +60,8 @@ bool CSVSchema::Empty() const {
 	return columns.empty();
 }
 
-bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_result,
-                             const string &cur_file_path, bool is_minimal_sniffer) const{
+bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_result, const string &cur_file_path,
+                             bool is_minimal_sniffer) const {
 	D_ASSERT(sniffer_result->names.size() == sniffer_result->return_types.size());
 	bool match = true;
 	unordered_map<string, TypeIdxPair> current_schema;
@@ -71,7 +71,7 @@ bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_resul
 		current_schema[sniffer_result->names[i]] = {sniffer_result->return_types[i], i};
 	}
 	if (is_minimal_sniffer) {
-		auto min_sniffer = static_cast<AdaptiveSnifferResult*>(sniffer_result);
+		auto min_sniffer = static_cast<AdaptiveSnifferResult *>(sniffer_result);
 		if (!min_sniffer->more_than_one_row) {
 			bool min_sniff_match = true;
 			// If we don't have more than one row, either the names must match or the types must match.
@@ -86,14 +86,16 @@ bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_resul
 			}
 			// Otherwise, the types must match.
 			min_sniff_match = true;
-			if (sniffer_result->return_types.size() == current_schema.size()) {
+			if (sniffer_result->return_types.size() == columns.size()) {
 				idx_t return_type_idx = 0;
-				for (auto &column : current_schema) {
-					if (column.second.type != sniffer_result->return_types[return_type_idx++]) {
+				for (auto &column : columns) {
+					if (column.type != sniffer_result->return_types[return_type_idx++]) {
 						min_sniff_match = false;
 						break;
 					}
 				}
+			} else {
+				min_sniff_match = false;
 			}
 			if (min_sniff_match) {
 				return true;
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index 935aa27136b..a40c6724818 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -153,10 +153,9 @@ AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
 		detected_types.push_back(d_type);
 	}
 
-	return {detected_types, names, sniffed_column_counts.result_position > 2};
+	return {detected_types, names, sniffed_column_counts.result_position > 1};
 }
 
-
 SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 	auto min_sniff_res = MinimalSniff();
 	bool run_full = error_handler->AnyErrors() || detection_error_handler->AnyErrors();
@@ -165,8 +164,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 		// If we got no errors, we also run full if schemas do not match.
 		if (!set_columns.IsSet() && !options.file_options.AnySet()) {
 			string error;
-			run_full =
-			    !file_schema.SchemasMatch(error, &min_sniff_res, options.file_path, true);
+			run_full = !file_schema.SchemasMatch(error, &min_sniff_res, options.file_path, true);
 		}
 	}
 	if (run_full) {
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
index e2693517e2c..1e76c3a13f2 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
@@ -24,8 +24,8 @@ struct CSVColumnInfo {
 struct CSVSchema {
 	void Initialize(vector<string> &names, vector<LogicalType> &types, const string &file_path);
 	bool Empty() const;
-	bool SchemasMatch(string &error_message, SnifferResult *sniffer_resilt,
-                             const string &cur_file_path, bool is_minimal_sniffer) const;
+	bool SchemasMatch(string &error_message, SnifferResult *sniffer_resilt, const string &cur_file_path,
+	                  bool is_minimal_sniffer) const;
 
 private:
 	static bool CanWeCastIt(LogicalTypeId source, LogicalTypeId destination);
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
index a31d510330b..f7f2fe6d260 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
@@ -11,7 +11,6 @@
 #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
 #include "duckdb/common/vector.hpp"
 
-
 namespace duckdb {
 
 //! Struct to store the result of the Sniffer
@@ -25,10 +24,10 @@ struct SnifferResult {
 	vector<string> names;
 };
 
-struct AdaptiveSnifferResult: SnifferResult {
+struct AdaptiveSnifferResult : SnifferResult {
 	AdaptiveSnifferResult(vector<LogicalType> return_types_p, vector<string> names_p, bool more_than_one_row_p)
-	    : SnifferResult(std::move(return_types_p),std::move(names_p)), more_than_one_row(more_than_one_row_p) {
+	    : SnifferResult(std::move(return_types_p), std::move(names_p)), more_than_one_row(more_than_one_row_p) {
 	}
 	bool more_than_one_row;
 };
-}
\ No newline at end of file
+} // namespace duckdb
diff --git a/test/sql/copy/csv/glob/read_csv_glob.test b/test/sql/copy/csv/glob/read_csv_glob.test
index 784bef427a2..c69b64c67d4 100644
--- a/test/sql/copy/csv/glob/read_csv_glob.test
+++ b/test/sql/copy/csv/glob/read_csv_glob.test
@@ -5,29 +5,6 @@
 statement ok
 PRAGMA enable_verification
 
-query III
-FROM 'data/csv/glob_dif_dialect/14166/__200*.csv';
-----
-2000-01-01	10	80.9189441112103
-2000-01-02	5	109.16581782022259
-
-# Globbing with different dialects
-query III
-FROM 'data/csv/glob_dif_dialect/f_*.csv'
-----
-1	alice	alice@email.com
-2	eve	eve@email.com
-3r	bob	NULL
-1	alice	alice@email.com
-3	bob	bob@email.com
-4	pedro	pedro@email.com
-5	tim	tim@email.com
-
-query III
-FROM read_csv(['data/csv/glob_dif_dialect/f_1.csv', 'data/csv/glob_dif_dialect/empty.csv'])
-----
-
-
 # simple globbing
 query I
 SELECT * FROM read_csv('data/csv/glob/a?/*.csv') ORDER BY 1
diff --git a/test/sql/copy/csv/glob/test_unmatch_globs.test b/test/sql/copy/csv/glob/test_unmatch_globs.test
new file mode 100644
index 00000000000..8770f6d39ec
--- /dev/null
+++ b/test/sql/copy/csv/glob/test_unmatch_globs.test
@@ -0,0 +1,24 @@
+# name: test/sql/copy/csv/glob/test_unmatch_globs.test
+# description: Test globbing CSVs
+# group: [glob]
+
+statement ok
+PRAGMA enable_verification
+
+#query III
+#FROM 'data/csv/glob_dif_dialect/14166/__200*.csv';
+#----
+#2000-01-01	10	80.9189441112103
+#2000-01-02	5	109.16581782022259
+
+# Globbing with different dialects
+query III
+FROM 'data/csv/glob_dif_dialect/f_*.csv'
+----
+1	alice	alice@email.com
+2	eve	eve@email.com
+3r	bob	NULL
+1	alice	alice@email.com
+3	bob	bob@email.com
+4	pedro	pedro@email.com
+5	tim	tim@email.com
diff --git a/test/sql/copy/csv/test_mismatch_schemas.test b/test/sql/copy/csv/test_mismatch_schemas.test
index 57b73edc634..572a297acb8 100644
--- a/test/sql/copy/csv/test_mismatch_schemas.test
+++ b/test/sql/copy/csv/test_mismatch_schemas.test
@@ -5,28 +5,28 @@
 statement ok
 PRAGMA enable_verification
 
-# We will use the header from the first file (the one identified in the binder) as our schema. If other files have extra columns, these columns will be ignored
-query III
-SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
-'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_4.csv'])
-ORDER BY ALL;
-----
-1	2	3
-1	2	3
-1	2	3
-1	2	3
-1	2	3
-1	2	3
-1	2	3
-1	2	3
-
-# If there is a mismatch, all files must have a header, or error.
-statement error
-SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
-'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_no_header.csv'])
-ORDER BY ALL;
-----
-Current file: data/csv/multiple_files/more_columns/file_no_header.csv
+## We will use the header from the first file (the one identified in the binder) as our schema. If other files have extra columns, these columns will be ignored
+#query III
+#SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
+#'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_4.csv'])
+#ORDER BY ALL;
+#----
+#1	2	3
+#1	2	3
+#1	2	3
+#1	2	3
+#1	2	3
+#1	2	3
+#1	2	3
+#1	2	3
+#
+## If there is a mismatch, all files must have a header, or error.
+#statement error
+#SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
+#'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_no_header.csv'])
+#ORDER BY ALL;
+#----
+#Current file: data/csv/multiple_files/more_columns/file_no_header.csv
 
 # What if we add file 5, has 3 columns but with a name mismatch
 statement error

From efc2b0f80eadc6a0bf497d384e985b499a378a8c Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 30 Sep 2024 15:03:50 +0200
Subject: [PATCH 03/20] fix tests

---
 .../sql/copy/csv/glob/test_unmatch_globs.test | 10 ++---
 test/sql/copy/csv/test_mismatch_schemas.test  | 44 +++++++++----------
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/test/sql/copy/csv/glob/test_unmatch_globs.test b/test/sql/copy/csv/glob/test_unmatch_globs.test
index 8770f6d39ec..f267e00093e 100644
--- a/test/sql/copy/csv/glob/test_unmatch_globs.test
+++ b/test/sql/copy/csv/glob/test_unmatch_globs.test
@@ -5,11 +5,11 @@
 statement ok
 PRAGMA enable_verification
 
-#query III
-#FROM 'data/csv/glob_dif_dialect/14166/__200*.csv';
-#----
-#2000-01-01	10	80.9189441112103
-#2000-01-02	5	109.16581782022259
+query III
+FROM 'data/csv/glob_dif_dialect/14166/__200*.csv';
+----
+2000-01-01	10	80.9189441112103
+2000-01-02	5	109.16581782022259
 
 # Globbing with different dialects
 query III
diff --git a/test/sql/copy/csv/test_mismatch_schemas.test b/test/sql/copy/csv/test_mismatch_schemas.test
index 572a297acb8..57b73edc634 100644
--- a/test/sql/copy/csv/test_mismatch_schemas.test
+++ b/test/sql/copy/csv/test_mismatch_schemas.test
@@ -5,28 +5,28 @@
 statement ok
 PRAGMA enable_verification
 
-## We will use the header from the first file (the one identified in the binder) as our schema. If other files have extra columns, these columns will be ignored
-#query III
-#SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
-#'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_4.csv'])
-#ORDER BY ALL;
-#----
-#1	2	3
-#1	2	3
-#1	2	3
-#1	2	3
-#1	2	3
-#1	2	3
-#1	2	3
-#1	2	3
-#
-## If there is a mismatch, all files must have a header, or error.
-#statement error
-#SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
-#'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_no_header.csv'])
-#ORDER BY ALL;
-#----
-#Current file: data/csv/multiple_files/more_columns/file_no_header.csv
+# We will use the header from the first file (the one identified in the binder) as our schema. If other files have extra columns, these columns will be ignored
+query III
+SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
+'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_4.csv'])
+ORDER BY ALL;
+----
+1	2	3
+1	2	3
+1	2	3
+1	2	3
+1	2	3
+1	2	3
+1	2	3
+1	2	3
+
+# If there is a mismatch, all files must have a header, or error.
+statement error
+SELECT * FROM read_csv(['data/csv/multiple_files/more_columns/file_1.csv','data/csv/multiple_files/more_columns/file_2.csv',
+'data/csv/multiple_files/more_columns/file_3.csv','data/csv/multiple_files/more_columns/file_no_header.csv'])
+ORDER BY ALL;
+----
+Current file: data/csv/multiple_files/more_columns/file_no_header.csv
 
 # What if we add file 5, has 3 columns but with a name mismatch
 statement error

From 124d68becffe0d437ddd2e969726e7392fd015fa Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Mon, 30 Sep 2024 15:27:46 +0200
Subject: [PATCH 04/20] Adding a test with a single type line

---
 data/csv/glob_dif_dialect/14166/empty.csv            |  0
 data/csv/glob_dif_dialect/14166/matching_types.csv   |  1 +
 .../operator/csv_scanner/scanner/csv_schema.cpp      |  5 +++++
 test/sql/copy/csv/glob/test_unmatch_globs.test       | 12 ++++++++++++
 4 files changed, 18 insertions(+)
 create mode 100644 data/csv/glob_dif_dialect/14166/empty.csv
 create mode 100644 data/csv/glob_dif_dialect/14166/matching_types.csv

diff --git a/data/csv/glob_dif_dialect/14166/empty.csv b/data/csv/glob_dif_dialect/14166/empty.csv
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/data/csv/glob_dif_dialect/14166/matching_types.csv b/data/csv/glob_dif_dialect/14166/matching_types.csv
new file mode 100644
index 00000000000..87f3c093dad
--- /dev/null
+++ b/data/csv/glob_dif_dialect/14166/matching_types.csv
@@ -0,0 +1 @@
+2003-01-02,5,109.16581782022259
\ No newline at end of file
diff --git a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
index b2fe12f4ef9..b1e9399eeb5 100644
--- a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
+++ b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
@@ -98,6 +98,11 @@ bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_resul
 				min_sniff_match = false;
 			}
 			if (min_sniff_match) {
+				// If we got here, we have the right types but the wrong names, lets fix the names
+				idx_t sniff_name_idx = 0;
+				for (auto &column : columns) {
+					sniffer_result->names[sniff_name_idx++] = column.name;
+				}
 				return true;
 			}
 		}
diff --git a/test/sql/copy/csv/glob/test_unmatch_globs.test b/test/sql/copy/csv/glob/test_unmatch_globs.test
index f267e00093e..3b9e2c0761a 100644
--- a/test/sql/copy/csv/glob/test_unmatch_globs.test
+++ b/test/sql/copy/csv/glob/test_unmatch_globs.test
@@ -11,6 +11,18 @@ FROM 'data/csv/glob_dif_dialect/14166/__200*.csv';
 2000-01-01	10	80.9189441112103
 2000-01-02	5	109.16581782022259
 
+query III
+FROM read_csv(['data/csv/glob_dif_dialect/14166/__2000.csv', 'data/csv/glob_dif_dialect/14166/__2001.csv', 'data/csv/glob_dif_dialect/14166/empty.csv']);
+----
+2000-01-01	10	80.9189441112103
+2000-01-02	5	109.16581782022259
+
+query III
+FROM read_csv(['data/csv/glob_dif_dialect/14166/__2000.csv','data/csv/glob_dif_dialect/14166/matching_types.csv']);
+----
+2000-01-01	10	80.9189441112103
+2000-01-02	5	109.16581782022259
+
 # Globbing with different dialects
 query III
 FROM 'data/csv/glob_dif_dialect/f_*.csv'

From 7894773caa72a33a0d227f2abb7c7861b4860c11 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Mon, 30 Sep 2024 16:37:45 +0200
Subject: [PATCH 05/20] avoid slicing

---
 src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp     | 2 +-
 .../execution/operator/csv_scanner/sniffer/sniff_result.hpp    | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index a40c6724818..d3017f111a5 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -179,7 +179,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 		}
 		return full_sniffer;
 	}
-	return static_cast<SnifferResult>(min_sniff_res);
+	return min_sniff_res.ToSnifferResult();
 }
 SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 	buffer_manager->sniffing = true;
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
index f7f2fe6d260..4d3b18932b8 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
@@ -29,5 +29,8 @@ struct AdaptiveSnifferResult : SnifferResult {
 	    : SnifferResult(std::move(return_types_p), std::move(names_p)), more_than_one_row(more_than_one_row_p) {
 	}
 	bool more_than_one_row;
+	SnifferResult ToSnifferResult() {
+		return {return_types,names};
+	}
 };
 } // namespace duckdb

From c4f27cca3b259a0509ac771f2c7ed58916d021f7 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Wed, 2 Oct 2024 14:26:12 +0200
Subject: [PATCH 06/20] Format

---
 .../execution/operator/csv_scanner/sniffer/sniff_result.hpp     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
index 4d3b18932b8..2a8e0de19fa 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
@@ -30,7 +30,7 @@ struct AdaptiveSnifferResult : SnifferResult {
 	}
 	bool more_than_one_row;
 	SnifferResult ToSnifferResult() {
-		return {return_types,names};
+		return {return_types, names};
 	}
 };
 } // namespace duckdb

From 53fec90563acd99dc27fe51e2ae50f9e304f4da7 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Thu, 10 Oct 2024 12:19:38 +0200
Subject: [PATCH 07/20] Remove raw ptr

---
 .../csv_scanner/scanner/csv_schema.cpp        | 10 +++---
 .../csv_scanner/sniffer/csv_sniffer.cpp       | 33 +++++++++++++------
 .../operator/csv_scanner/csv_schema.hpp       |  2 +-
 .../csv_scanner/sniffer/csv_sniffer.hpp       |  5 +--
 .../csv_scanner/sniffer/sniff_result.hpp      |  3 +-
 5 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
index b1e9399eeb5..fc6115cdc5f 100644
--- a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
+++ b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
@@ -60,8 +60,10 @@ bool CSVSchema::Empty() const {
 	return columns.empty();
 }
 
-bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_result, const string &cur_file_path,
-                             bool is_minimal_sniffer) const {
+bool CSVSchema::SchemasMatch(string &error_message, weak_ptr<SnifferResult> sniffer_result_ptr,
+                             const string &cur_file_path, bool is_minimal_sniffer) const {
+	auto sniffer_result = sniffer_result_ptr.lock();
+	D_ASSERT(sniffer_result);
 	D_ASSERT(sniffer_result->names.size() == sniffer_result->return_types.size());
 	bool match = true;
 	unordered_map<string, TypeIdxPair> current_schema;
@@ -71,8 +73,8 @@ bool CSVSchema::SchemasMatch(string &error_message, SnifferResult *sniffer_resul
 		current_schema[sniffer_result->names[i]] = {sniffer_result->return_types[i], i};
 	}
 	if (is_minimal_sniffer) {
-		auto min_sniffer = static_cast<AdaptiveSnifferResult *>(sniffer_result);
-		if (!min_sniffer->more_than_one_row) {
+		auto min_sniffer = static_cast<AdaptiveSnifferResult *>(sniffer_result.get());
+		if (min_sniffer && !min_sniffer->more_than_one_row) {
 			bool min_sniff_match = true;
 			// If we don't have more than one row, either the names must match or the types must match.
 			for (auto &column : columns) {
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index d3017f111a5..f25d8675bde 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -88,10 +88,10 @@ void CSVSniffer::SetResultOptions() {
 	options.dialect_options.rows_until_header = best_candidate->GetStateMachine().dialect_options.rows_until_header;
 }
 
-AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
+shared_ptr<AdaptiveSnifferResult> CSVSniffer::MinimalSniff() {
 	if (set_columns.IsSet()) {
 		// Nothing to see here
-		return AdaptiveSnifferResult(*set_columns.types, *set_columns.names, true);
+		return make_shared_ptr<AdaptiveSnifferResult>(*set_columns.types, *set_columns.names, true);
 	}
 	// Return Types detected
 	vector<LogicalType> return_types;
@@ -106,7 +106,7 @@ AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
 	auto &sniffed_column_counts = count_scanner.ParseChunk();
 	if (sniffed_column_counts.result_position == 0) {
 		// The file is an empty file, we just return
-		return {{}, {}, false};
+		return make_shared_ptr<AdaptiveSnifferResult>();
 	}
 
 	state_machine->dialect_options.num_cols = sniffed_column_counts[0].number_of_columns;
@@ -153,7 +153,7 @@ AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
 		detected_types.push_back(d_type);
 	}
 
-	return {detected_types, names, sniffed_column_counts.result_position > 1};
+	return make_shared_ptr<AdaptiveSnifferResult>(detected_types, names, sniffed_column_counts.result_position > 1);
 }
 
 SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
@@ -164,24 +164,25 @@ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 		// If we got no errors, we also run full if schemas do not match.
 		if (!set_columns.IsSet() && !options.file_options.AnySet()) {
 			string error;
-			run_full = !file_schema.SchemasMatch(error, &min_sniff_res, options.file_path, true);
+			run_full = !file_schema.SchemasMatch(error, min_sniff_res, options.file_path, true);
 		}
 	}
 	if (run_full) {
 		// We run full sniffer
-		auto full_sniffer = SniffCSV();
+		auto full_sniffer = SniffCSVShared();
 		if (!set_columns.IsSet() && !options.file_options.AnySet()) {
 			string error;
-			if (!file_schema.SchemasMatch(error, &full_sniffer, options.file_path, false) &&
+			if (!file_schema.SchemasMatch(error, full_sniffer, options.file_path, false) &&
 			    !options.ignore_errors.GetValue()) {
 				throw InvalidInputException(error);
 			}
 		}
-		return full_sniffer;
+		return *full_sniffer;
 	}
-	return min_sniff_res.ToSnifferResult();
+	return min_sniff_res->ToSnifferResult();
 }
-SnifferResult CSVSniffer::SniffCSV(bool force_match) {
+
+void CSVSniffer::SniffCSVInternal(bool force_match) {
 	buffer_manager->sniffing = true;
 	// 1. Dialect Detection
 	DetectDialect();
@@ -266,10 +267,22 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 		throw InvalidInputException(error);
 	}
 	options.was_type_manually_set = manually_set;
+}
+
+SnifferResult CSVSniffer::SniffCSV(bool force_match) {
+	SniffCSVInternal(force_match);
 	if (set_columns.IsSet()) {
 		return SnifferResult(*set_columns.types, *set_columns.names);
 	}
 	return SnifferResult(detected_types, names);
 }
 
+shared_ptr<SnifferResult> CSVSniffer::SniffCSVShared(bool force_match) {
+	SniffCSVInternal(force_match);
+	if (set_columns.IsSet()) {
+		return make_shared_ptr<SnifferResult>(*set_columns.types, *set_columns.names);
+	}
+	return make_shared_ptr<SnifferResult>(detected_types, names);
+}
+
 } // namespace duckdb
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
index 1e76c3a13f2..334ba6e41eb 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
@@ -24,7 +24,7 @@ struct CSVColumnInfo {
 struct CSVSchema {
 	void Initialize(vector<string> &names, vector<LogicalType> &types, const string &file_path);
 	bool Empty() const;
-	bool SchemasMatch(string &error_message, SnifferResult *sniffer_resilt, const string &cur_file_path,
+	bool SchemasMatch(string &error_message, weak_ptr<SnifferResult> sniffer_result, const string &cur_file_path,
 	                  bool is_minimal_sniffer) const;
 
 private:
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
index 6de097deaa1..0b9aaa459b4 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
@@ -118,8 +118,9 @@ class CSVSniffer {
 	//! 3. Type Refinement: Refines the types of the columns for the remaining chunks
 	//! 4. Header Detection: Figures out if  the CSV file has a header and produces the names of the columns
 	//! 5. Type Replacement: Replaces the types of the columns if the user specified them
+	void SniffCSVInternal(bool force_match);
+	shared_ptr<SnifferResult> SniffCSVShared(bool force_match = false);
 	SnifferResult SniffCSV(bool force_match = false);
-
 	//! I call it adaptive, since that's a sexier term.
 	//! In practice this Function that only sniffs the first two rows, to verify if a header exists and what are the
 	//! data types It does this considering a priorly set CSV schema. If there is a mismatch of the schema it runs the
@@ -128,7 +129,7 @@ class CSVSniffer {
 	SnifferResult AdaptiveSniff(const CSVSchema &file_schema);
 
 	//! Function that only sniffs the first two rows, to verify if a header exists and what are the data types
-	AdaptiveSnifferResult MinimalSniff();
+	shared_ptr<AdaptiveSnifferResult> MinimalSniff();
 
 	static NewLineIdentifier DetectNewLineDelimiter(CSVBufferManager &buffer_manager);
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
index 2a8e0de19fa..405b34f69f1 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
@@ -25,7 +25,8 @@ struct SnifferResult {
 };
 
 struct AdaptiveSnifferResult : SnifferResult {
-	AdaptiveSnifferResult(vector<LogicalType> return_types_p, vector<string> names_p, bool more_than_one_row_p)
+	explicit AdaptiveSnifferResult(vector<LogicalType> return_types_p = {}, vector<string> names_p = {},
+	                               bool more_than_one_row_p = false)
 	    : SnifferResult(std::move(return_types_p), std::move(names_p)), more_than_one_row(more_than_one_row_p) {
 	}
 	bool more_than_one_row;

From e7759b9c12b8b7350367887cee61177478ad7fb3 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Thu, 10 Oct 2024 17:22:31 +0200
Subject: [PATCH 08/20] clang

---
 src/execution/operator/csv_scanner/scanner/csv_schema.cpp       | 2 +-
 .../duckdb/execution/operator/csv_scanner/csv_schema.hpp        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
index fc6115cdc5f..52ddd07dbdc 100644
--- a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
+++ b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
@@ -60,7 +60,7 @@ bool CSVSchema::Empty() const {
 	return columns.empty();
 }
 
-bool CSVSchema::SchemasMatch(string &error_message, weak_ptr<SnifferResult> sniffer_result_ptr,
+bool CSVSchema::SchemasMatch(string &error_message, const weak_ptr<SnifferResult> &sniffer_result_ptr,
                              const string &cur_file_path, bool is_minimal_sniffer) const {
 	auto sniffer_result = sniffer_result_ptr.lock();
 	D_ASSERT(sniffer_result);
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
index 334ba6e41eb..e351cbbec9e 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
@@ -24,7 +24,7 @@ struct CSVColumnInfo {
 struct CSVSchema {
 	void Initialize(vector<string> &names, vector<LogicalType> &types, const string &file_path);
 	bool Empty() const;
-	bool SchemasMatch(string &error_message, weak_ptr<SnifferResult> sniffer_result, const string &cur_file_path,
+	bool SchemasMatch(string &error_message, const weak_ptr<SnifferResult> &sniffer_result, const string &cur_file_path,
 	                  bool is_minimal_sniffer) const;
 
 private:

From 46a0f292081670f1733c0b3b6cf4aa7d3ace2fa4 Mon Sep 17 00:00:00 2001
From: pdet <pedroholanda@gmail.com>
Date: Fri, 11 Oct 2024 16:57:01 +0200
Subject: [PATCH 09/20]  Referenec

---
 .../csv_scanner/scanner/csv_schema.cpp        | 22 +++++++-------
 .../csv_scanner/sniffer/csv_sniffer.cpp       | 29 +++++--------------
 .../operator/csv_scanner/csv_schema.hpp       |  2 +-
 .../csv_scanner/sniffer/csv_sniffer.hpp       |  5 ++--
 .../csv_scanner/sniffer/sniff_result.hpp      |  3 +-
 5 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
index 52ddd07dbdc..139398d7180 100644
--- a/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
+++ b/src/execution/operator/csv_scanner/scanner/csv_schema.cpp
@@ -60,21 +60,19 @@ bool CSVSchema::Empty() const {
 	return columns.empty();
 }
 
-bool CSVSchema::SchemasMatch(string &error_message, const weak_ptr<SnifferResult> &sniffer_result_ptr,
-                             const string &cur_file_path, bool is_minimal_sniffer) const {
-	auto sniffer_result = sniffer_result_ptr.lock();
-	D_ASSERT(sniffer_result);
-	D_ASSERT(sniffer_result->names.size() == sniffer_result->return_types.size());
+bool CSVSchema::SchemasMatch(string &error_message, SnifferResult &sniffer_result, const string &cur_file_path,
+                             bool is_minimal_sniffer) const {
+	D_ASSERT(sniffer_result.names.size() == sniffer_result.return_types.size());
 	bool match = true;
 	unordered_map<string, TypeIdxPair> current_schema;
 
-	for (idx_t i = 0; i < sniffer_result->names.size(); i++) {
+	for (idx_t i = 0; i < sniffer_result.names.size(); i++) {
 		// Populate our little schema
-		current_schema[sniffer_result->names[i]] = {sniffer_result->return_types[i], i};
+		current_schema[sniffer_result.names[i]] = {sniffer_result.return_types[i], i};
 	}
 	if (is_minimal_sniffer) {
-		auto min_sniffer = static_cast<AdaptiveSnifferResult *>(sniffer_result.get());
-		if (min_sniffer && !min_sniffer->more_than_one_row) {
+		auto min_sniffer = static_cast<AdaptiveSnifferResult &>(sniffer_result);
+		if (!min_sniffer.more_than_one_row) {
 			bool min_sniff_match = true;
 			// If we don't have more than one row, either the names must match or the types must match.
 			for (auto &column : columns) {
@@ -88,10 +86,10 @@ bool CSVSchema::SchemasMatch(string &error_message, const weak_ptr<SnifferResult
 			}
 			// Otherwise, the types must match.
 			min_sniff_match = true;
-			if (sniffer_result->return_types.size() == columns.size()) {
+			if (sniffer_result.return_types.size() == columns.size()) {
 				idx_t return_type_idx = 0;
 				for (auto &column : columns) {
-					if (column.type != sniffer_result->return_types[return_type_idx++]) {
+					if (column.type != sniffer_result.return_types[return_type_idx++]) {
 						min_sniff_match = false;
 						break;
 					}
@@ -103,7 +101,7 @@ bool CSVSchema::SchemasMatch(string &error_message, const weak_ptr<SnifferResult
 				// If we got here, we have the right types but the wrong names, lets fix the names
 				idx_t sniff_name_idx = 0;
 				for (auto &column : columns) {
-					sniffer_result->names[sniff_name_idx++] = column.name;
+					sniffer_result.names[sniff_name_idx++] = column.name;
 				}
 				return true;
 			}
diff --git a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
index f25d8675bde..950d74891b0 100644
--- a/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
+++ b/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp
@@ -88,10 +88,10 @@ void CSVSniffer::SetResultOptions() {
 	options.dialect_options.rows_until_header = best_candidate->GetStateMachine().dialect_options.rows_until_header;
 }
 
-shared_ptr<AdaptiveSnifferResult> CSVSniffer::MinimalSniff() {
+AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
 	if (set_columns.IsSet()) {
 		// Nothing to see here
-		return make_shared_ptr<AdaptiveSnifferResult>(*set_columns.types, *set_columns.names, true);
+		return AdaptiveSnifferResult(*set_columns.types, *set_columns.names, true);
 	}
 	// Return Types detected
 	vector<LogicalType> return_types;
@@ -106,7 +106,7 @@ shared_ptr<AdaptiveSnifferResult> CSVSniffer::MinimalSniff() {
 	auto &sniffed_column_counts = count_scanner.ParseChunk();
 	if (sniffed_column_counts.result_position == 0) {
 		// The file is an empty file, we just return
-		return make_shared_ptr<AdaptiveSnifferResult>();
+		return {{}, {}, false};
 	}
 
 	state_machine->dialect_options.num_cols = sniffed_column_counts[0].number_of_columns;
@@ -153,7 +153,7 @@ shared_ptr<AdaptiveSnifferResult> CSVSniffer::MinimalSniff() {
 		detected_types.push_back(d_type);
 	}
 
-	return make_shared_ptr<AdaptiveSnifferResult>(detected_types, names, sniffed_column_counts.result_position > 1);
+	return {detected_types, names, sniffed_column_counts.result_position > 1};
 }
 
 SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
@@ -169,7 +169,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 	}
 	if (run_full) {
 		// We run full sniffer
-		auto full_sniffer = SniffCSVShared();
+		auto full_sniffer = SniffCSV();
 		if (!set_columns.IsSet() && !options.file_options.AnySet()) {
 			string error;
 			if (!file_schema.SchemasMatch(error, full_sniffer, options.file_path, false) &&
@@ -177,12 +177,11 @@ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
 				throw InvalidInputException(error);
 			}
 		}
-		return *full_sniffer;
+		return full_sniffer;
 	}
-	return min_sniff_res->ToSnifferResult();
+	return min_sniff_res.ToSnifferResult();
 }
-
-void CSVSniffer::SniffCSVInternal(bool force_match) {
+SnifferResult CSVSniffer::SniffCSV(bool force_match) {
 	buffer_manager->sniffing = true;
 	// 1. Dialect Detection
 	DetectDialect();
@@ -267,22 +266,10 @@ void CSVSniffer::SniffCSVInternal(bool force_match) {
 		throw InvalidInputException(error);
 	}
 	options.was_type_manually_set = manually_set;
-}
-
-SnifferResult CSVSniffer::SniffCSV(bool force_match) {
-	SniffCSVInternal(force_match);
 	if (set_columns.IsSet()) {
 		return SnifferResult(*set_columns.types, *set_columns.names);
 	}
 	return SnifferResult(detected_types, names);
 }
 
-shared_ptr<SnifferResult> CSVSniffer::SniffCSVShared(bool force_match) {
-	SniffCSVInternal(force_match);
-	if (set_columns.IsSet()) {
-		return make_shared_ptr<SnifferResult>(*set_columns.types, *set_columns.names);
-	}
-	return make_shared_ptr<SnifferResult>(detected_types, names);
-}
-
 } // namespace duckdb
diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
index e351cbbec9e..39e1b809a9c 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp
@@ -24,7 +24,7 @@ struct CSVColumnInfo {
 struct CSVSchema {
 	void Initialize(vector<string> &names, vector<LogicalType> &types, const string &file_path);
 	bool Empty() const;
-	bool SchemasMatch(string &error_message, const weak_ptr<SnifferResult> &sniffer_result, const string &cur_file_path,
+	bool SchemasMatch(string &error_message, SnifferResult &sniffer_result, const string &cur_file_path,
 	                  bool is_minimal_sniffer) const;
 
 private:
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
index 0b9aaa459b4..6de097deaa1 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
@@ -118,9 +118,8 @@ class CSVSniffer {
 	//! 3. Type Refinement: Refines the types of the columns for the remaining chunks
 	//! 4. Header Detection: Figures out if  the CSV file has a header and produces the names of the columns
 	//! 5. Type Replacement: Replaces the types of the columns if the user specified them
-	void SniffCSVInternal(bool force_match);
-	shared_ptr<SnifferResult> SniffCSVShared(bool force_match = false);
 	SnifferResult SniffCSV(bool force_match = false);
+
 	//! I call it adaptive, since that's a sexier term.
 	//! In practice this Function that only sniffs the first two rows, to verify if a header exists and what are the
 	//! data types It does this considering a priorly set CSV schema. If there is a mismatch of the schema it runs the
@@ -129,7 +128,7 @@ class CSVSniffer {
 	SnifferResult AdaptiveSniff(const CSVSchema &file_schema);
 
 	//! Function that only sniffs the first two rows, to verify if a header exists and what are the data types
-	shared_ptr<AdaptiveSnifferResult> MinimalSniff();
+	AdaptiveSnifferResult MinimalSniff();
 
 	static NewLineIdentifier DetectNewLineDelimiter(CSVBufferManager &buffer_manager);
 
diff --git a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
index 405b34f69f1..2a8e0de19fa 100644
--- a/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
+++ b/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp
@@ -25,8 +25,7 @@ struct SnifferResult {
 };
 
 struct AdaptiveSnifferResult : SnifferResult {
-	explicit AdaptiveSnifferResult(vector<LogicalType> return_types_p = {}, vector<string> names_p = {},
-	                               bool more_than_one_row_p = false)
+	AdaptiveSnifferResult(vector<LogicalType> return_types_p, vector<string> names_p, bool more_than_one_row_p)
 	    : SnifferResult(std::move(return_types_p), std::move(names_p)), more_than_one_row(more_than_one_row_p) {
 	}
 	bool more_than_one_row;

From cf510e0eccfdede3abd77d064f3cd689a84470f6 Mon Sep 17 00:00:00 2001
From: Richard Wesley <13156216+hawkfish@users.noreply.github.com>
Date: Fri, 11 Oct 2024 13:47:54 -0700
Subject: [PATCH 10/20] Internal #3251: DateDiff Across Epoch

Integer division in C++ rounds towards 0, not down.
This was impacting all resolutions hour and below.

fixes: duckdblabs/duckdb-internal#3251
---
 src/core_functions/scalar/date/date_diff.cpp  | 18 +++--
 .../timestamp/test_date_diff_epoch.test       | 66 +++++++++++++++++++
 2 files changed, 78 insertions(+), 6 deletions(-)
 create mode 100644 test/sql/function/timestamp/test_date_diff_epoch.test

diff --git a/src/core_functions/scalar/date/date_diff.cpp b/src/core_functions/scalar/date/date_diff.cpp
index 6266dda3503..36376a2b571 100644
--- a/src/core_functions/scalar/date/date_diff.cpp
+++ b/src/core_functions/scalar/date/date_diff.cpp
@@ -28,6 +28,14 @@ struct DateDiff {
 		    });
 	}
 
+	//	We need to truncate down, not towards 0
+	static inline int64_t Truncate(int64_t value, int64_t units) {
+		return (value + (value < 0)) / units - (value < 0);
+	}
+	static inline int64_t Diff(int64_t start, int64_t end, int64_t units) {
+		return Truncate(end, units) - Truncate(start, units);
+	}
+
 	struct YearOperator {
 		template <class TA, class TB, class TR>
 		static inline TR Operation(TA startdate, TB enddate) {
@@ -204,30 +212,28 @@ template <>
 int64_t DateDiff::MillisecondsOperator::Operation(timestamp_t startdate, timestamp_t enddate) {
 	D_ASSERT(Timestamp::IsFinite(startdate));
 	D_ASSERT(Timestamp::IsFinite(enddate));
-	return Timestamp::GetEpochMs(enddate) - Timestamp::GetEpochMs(startdate);
+	return Diff(startdate.value, enddate.value, Interval::MICROS_PER_MSEC);
 }
 
 template <>
 int64_t DateDiff::SecondsOperator::Operation(timestamp_t startdate, timestamp_t enddate) {
 	D_ASSERT(Timestamp::IsFinite(startdate));
 	D_ASSERT(Timestamp::IsFinite(enddate));
-	return Timestamp::GetEpochSeconds(enddate) - Timestamp::GetEpochSeconds(startdate);
+	return Diff(startdate.value, enddate.value, Interval::MICROS_PER_SEC);
 }
 
 template <>
 int64_t DateDiff::MinutesOperator::Operation(timestamp_t startdate, timestamp_t enddate) {
 	D_ASSERT(Timestamp::IsFinite(startdate));
 	D_ASSERT(Timestamp::IsFinite(enddate));
-	return Timestamp::GetEpochSeconds(enddate) / Interval::SECS_PER_MINUTE -
-	       Timestamp::GetEpochSeconds(startdate) / Interval::SECS_PER_MINUTE;
+	return Diff(startdate.value, enddate.value, Interval::MICROS_PER_MINUTE);
 }
 
 template <>
 int64_t DateDiff::HoursOperator::Operation(timestamp_t startdate, timestamp_t enddate) {
 	D_ASSERT(Timestamp::IsFinite(startdate));
 	D_ASSERT(Timestamp::IsFinite(enddate));
-	return Timestamp::GetEpochSeconds(enddate) / Interval::SECS_PER_HOUR -
-	       Timestamp::GetEpochSeconds(startdate) / Interval::SECS_PER_HOUR;
+	return Diff(startdate.value, enddate.value, Interval::MICROS_PER_HOUR);
 }
 
 // TIME specialisations
diff --git a/test/sql/function/timestamp/test_date_diff_epoch.test b/test/sql/function/timestamp/test_date_diff_epoch.test
new file mode 100644
index 00000000000..4402462e3be
--- /dev/null
+++ b/test/sql/function/timestamp/test_date_diff_epoch.test
@@ -0,0 +1,66 @@
+# name: test/sql/function/timestamp/test_date_diff_epoch.test
+# description: Test equal period timestamp diffs across the Unix epoch
+# group: [timestamp]
+
+statement ok
+PRAGMA enable_verification
+
+query III
+SELECT 
+	start_ts, 
+	end_ts, 
+	DATEDIFF('day', start_ts, end_ts) AS dd_hour 
+FROM VALUES (
+	'1970-01-03 12:12:12'::TIMESTAMP, 
+	'1969-12-25 05:05:05'::TIMESTAMP
+	) x(start_ts, end_ts);
+----
+1970-01-03 12:12:12	1969-12-25 05:05:05	-9
+
+query III
+SELECT 
+	start_ts, 
+	end_ts, 
+	DATEDIFF('hour', start_ts, end_ts) AS dd_hour 
+FROM VALUES (
+	'1970-01-01 12:12:12'::TIMESTAMP, 
+	'1969-12-31 05:05:05'::TIMESTAMP
+	) x(start_ts, end_ts);
+----
+1970-01-01 12:12:12	1969-12-31 05:05:05	-31
+
+query III
+SELECT 
+	start_ts, 
+	end_ts, 
+	DATEDIFF('minute', start_ts, end_ts) AS dd_minute 
+FROM VALUES (
+	'1970-01-01 00:12:12'::TIMESTAMP, 
+	'1969-12-31 23:05:05'::TIMESTAMP
+	) x(start_ts, end_ts);
+----
+1970-01-01 00:12:12	1969-12-31 23:05:05	-67
+
+query III
+SELECT 
+	start_ts, 
+	end_ts, 
+	DATEDIFF('second', start_ts, end_ts) AS dd_second 
+FROM VALUES (
+	'1970-01-01 00:00:12.456'::TIMESTAMP, 
+	'1969-12-31 23:59:05.123'::TIMESTAMP
+	) x(start_ts, end_ts);
+----
+1970-01-01 00:00:12.456	1969-12-31 23:59:05.123	-67
+
+query III
+SELECT 
+	start_ts, 
+	end_ts, 
+	DATEDIFF('millisecond', start_ts, end_ts) AS dd_second 
+FROM VALUES (
+	'1970-01-01 00:00:12.456789'::TIMESTAMP, 
+	'1969-12-31 23:59:05.123456'::TIMESTAMP
+	) x(start_ts, end_ts);
+----
+1970-01-01 00:00:12.456789	1969-12-31 23:59:05.123456	-67333

From 00848e12a928864f2c8e700594269472ba4d704e Mon Sep 17 00:00:00 2001
From: Tishj <t_b@live.nl>
Date: Sat, 12 Oct 2024 19:24:43 +0200
Subject: [PATCH 11/20] add test with generated columns

---
 src/include/duckdb/main/table_description.hpp | 14 +++++
 src/main/appender.cpp                         |  3 +
 src/main/client_context.cpp                   | 10 ++-
 test/appender/test_appender.cpp               | 63 +++++++++++++++++++
 4 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/src/include/duckdb/main/table_description.hpp b/src/include/duckdb/main/table_description.hpp
index 2a35b4f1669..151592f4935 100644
--- a/src/include/duckdb/main/table_description.hpp
+++ b/src/include/duckdb/main/table_description.hpp
@@ -13,12 +13,26 @@
 namespace duckdb {
 
 struct TableDescription {
+public:
 	//! The schema of the table
 	string schema;
 	//! The table name of the table
 	string table;
 	//! The columns of the table
 	vector<ColumnDefinition> columns;
+
+public:
+	idx_t PhysicalColumnCount() const {
+		idx_t count = 0;
+		for (auto &column : columns) {
+			if (column.Generated()) {
+				continue;
+			}
+			count++;
+		}
+		D_ASSERT(count != 0);
+		return count;
+	}
 };
 
 } // namespace duckdb
diff --git a/src/main/appender.cpp b/src/main/appender.cpp
index ae360852414..33ef17bf444 100644
--- a/src/main/appender.cpp
+++ b/src/main/appender.cpp
@@ -62,6 +62,9 @@ Appender::Appender(Connection &con, const string &schema_name, const string &tab
 	}
 	vector<optional_ptr<const ParsedExpression>> defaults;
 	for (auto &column : description->columns) {
+		if (column.Generated()) {
+			continue;
+		}
 		types.push_back(column.Type());
 		defaults.push_back(column.HasDefaultValue() ? &column.DefaultValue() : nullptr);
 	}
diff --git a/src/main/client_context.cpp b/src/main/client_context.cpp
index 467f28dacde..90c029ab498 100644
--- a/src/main/client_context.cpp
+++ b/src/main/client_context.cpp
@@ -1132,13 +1132,19 @@ void ClientContext::Append(TableDescription &description, ColumnDataCollection &
 		auto &table_entry =
 		    Catalog::GetEntry<TableCatalogEntry>(*this, INVALID_CATALOG, description.schema, description.table);
 		// verify that the table columns and types match up
-		if (description.columns.size() != table_entry.GetColumns().PhysicalColumnCount()) {
+		if (description.PhysicalColumnCount() != table_entry.GetColumns().PhysicalColumnCount()) {
 			throw InvalidInputException("Failed to append: table entry has different number of columns!");
 		}
+		idx_t table_entry_col_idx = 0;
 		for (idx_t i = 0; i < description.columns.size(); i++) {
-			if (description.columns[i].Type() != table_entry.GetColumns().GetColumn(PhysicalIndex(i)).Type()) {
+			auto &column = description.columns[i];
+			if (column.Generated()) {
+				continue;
+			}
+			if (column.Type() != table_entry.GetColumns().GetColumn(PhysicalIndex(table_entry_col_idx)).Type()) {
 				throw InvalidInputException("Failed to append: table entry has different number of columns!");
 			}
+			table_entry_col_idx++;
 		}
 		auto binder = Binder::CreateBinder(*this);
 		auto bound_constraints = binder->BindConstraints(table_entry);
diff --git a/test/appender/test_appender.cpp b/test/appender/test_appender.cpp
index bb659982e2c..e20d498c883 100644
--- a/test/appender/test_appender.cpp
+++ b/test/appender/test_appender.cpp
@@ -169,6 +169,69 @@ TEST_CASE("Test AppendRow", "[appender]") {
 	REQUIRE(CHECK_COLUMN(result, 2, {Value::TIMESTAMP(1992, 1, 1, 1, 1, 1, 0)}));
 }
 
+TEST_CASE("Test appender with generated column", "[appender]") {
+	DuckDB db(nullptr); // Create an in-memory DuckDB database
+	Connection con(db); // Create a connection to the database
+
+	SECTION("Insert into table with generated column first") {
+		// Try to create a table with a generated column
+		REQUIRE_NOTHROW(con.Query(R"(
+			CREATE TABLE tbl (
+				b VARCHAR GENERATED ALWAYS AS (a),
+				a VARCHAR
+			)
+		)"));
+
+		Appender appender(con, "tbl");
+		REQUIRE_NOTHROW(appender.BeginRow());
+		REQUIRE_NOTHROW(appender.Append("a"));
+
+		// Column 'b' is generated from 'a', so it does not need to be explicitly appended
+		// End the row
+		REQUIRE_NOTHROW(appender.EndRow());
+
+		// Close the appender
+		REQUIRE_NOTHROW(appender.Close());
+
+		// Query the table to verify that the row was inserted correctly
+		auto result = con.Query("SELECT * FROM tbl");
+		REQUIRE_NO_FAIL(*result);
+
+		// Check that the column 'a' contains "a" and 'b' contains the generated value "a"
+		REQUIRE(CHECK_COLUMN(result, 0, {Value("a")}));
+		REQUIRE(CHECK_COLUMN(result, 1, {Value("a")}));
+	}
+
+	SECTION("Insert into table with generated column second") {
+		// Try to create a table with a generated column
+		REQUIRE_NOTHROW(con.Query(R"(
+			CREATE TABLE tbl (
+				a VARCHAR,
+				b VARCHAR GENERATED ALWAYS AS (a)
+			)
+		)"));
+
+		Appender appender(con, "tbl");
+		REQUIRE_NOTHROW(appender.BeginRow());
+		REQUIRE_NOTHROW(appender.Append("a"));
+
+		// Column 'b' is generated from 'a', so it does not need to be explicitly appended
+		// End the row
+		REQUIRE_NOTHROW(appender.EndRow());
+
+		// Close the appender
+		REQUIRE_NOTHROW(appender.Close());
+
+		// Query the table to verify that the row was inserted correctly
+		auto result = con.Query("SELECT * FROM tbl");
+		REQUIRE_NO_FAIL(*result);
+
+		// Check that the column 'a' contains "a" and 'b' contains the generated value "a"
+		REQUIRE(CHECK_COLUMN(result, 0, {Value("a")}));
+		REQUIRE(CHECK_COLUMN(result, 1, {Value("a")}));
+	}
+}
+
 TEST_CASE("Test default value appender", "[appender]") {
 	duckdb::unique_ptr<QueryResult> result;
 	DuckDB db(nullptr);

From 4d78c47390720a37692f1b623703c8a930c58f0b Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Sat, 12 Oct 2024 21:50:45 +0200
Subject: [PATCH 12/20] Never generate maps if map_inference_threshold is -1

---
 extension/json/json_functions/json_structure.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extension/json/json_functions/json_structure.cpp b/extension/json/json_functions/json_structure.cpp
index d83ccf33bfb..7982003fc13 100644
--- a/extension/json/json_functions/json_structure.cpp
+++ b/extension/json/json_functions/json_structure.cpp
@@ -716,7 +716,8 @@ static LogicalType StructureToTypeObject(ClientContext &context, const JSONStruc
 	}
 
 	// If it's an inconsistent object we also just do MAP with the best-possible, recursively-merged value type
-	if (IsStructureInconsistent(desc, node.count, node.null_count, field_appearance_threshold)) {
+	if (map_inference_threshold != DConstants::INVALID_INDEX &&
+	    IsStructureInconsistent(desc, node.count, node.null_count, field_appearance_threshold)) {
 		return LogicalType::MAP(LogicalType::VARCHAR,
 		                        GetMergedType(context, node, max_depth, field_appearance_threshold,
 		                                      map_inference_threshold, depth + 1, null_type));

From a736a346d9d5e4d16959eda64cece5d13787d1a3 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Sun, 13 Oct 2024 14:29:13 +0200
Subject: [PATCH 13/20] Bump spatial to
 3f94d52aa9f7d67b1a30e6cea642bbb790c04aa2

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 7a7a82fb065..ef5605066c4 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -101,7 +101,7 @@ endif()
 duckdb_extension_load(spatial
     DONT_LINK LOAD_TESTS
     GIT_URL https://github.com/duckdb/duckdb_spatial.git
-    GIT_TAG bb9c829693965f029eb5a312aefed4c538fad781
+    GIT_TAG 3f94d52aa9f7d67b1a30e6cea642bbb790c04aa2
     INCLUDE_DIR spatial/include
     TEST_DIR test/sql
     )

From bba96ac0d56d2ad33ba57714930b1ad2c0ecc94a Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Sun, 13 Oct 2024 14:36:18 +0200
Subject: [PATCH 14/20] Bump azure and delta extensions commits

---
 .github/config/out_of_tree_extensions.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 7a7a82fb065..131484a3885 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -38,7 +38,7 @@ if (NOT MINGW)
     duckdb_extension_load(azure
             LOAD_TESTS
             GIT_URL https://github.com/duckdb/duckdb_azure
-            GIT_TAG b0ffe7ada20cdbd0bee2bbe5461ecd22fb468062
+            GIT_TAG a40ecb7bc9036eb8ecc5bf30db935a31b78011f5
             )
 endif()
 
@@ -49,7 +49,7 @@ if (NOT MINGW AND NOT "${OS_NAME}" STREQUAL "linux")
     duckdb_extension_load(delta
             LOAD_TESTS
             GIT_URL https://github.com/duckdb/duckdb_delta
-            GIT_TAG 3933ebd800ad06a64656c9aef6ca7d62897fa4db
+            GIT_TAG 811db25f5bd405dea186d6c461a642a387502ad8
     )
 endif()
 

From 50535f75788faba2355776d73d353230dbdb3b4c Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Sun, 13 Oct 2024 14:37:29 +0200
Subject: [PATCH 15/20] Bump icebergeg to latest main

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 7a7a82fb065..18469621b60 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -73,7 +73,7 @@ if (NOT MINGW)
     duckdb_extension_load(iceberg
             ${LOAD_ICEBERG_TESTS}
             GIT_URL https://github.com/duckdb/duckdb_iceberg
-            GIT_TAG 3f6d753787252e3da1d12157910b62edf729fc6e
+            GIT_TAG 8b48d1261564613274ac8e9fae01e572d965c99d
             )
 endif()
 

From 75ac4a6307cc1025cb49090493b3a00053ea18f2 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Sun, 13 Oct 2024 14:37:44 +0200
Subject: [PATCH 16/20] Bump sqlite_scanner to latest main

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 18469621b60..2554a61bb6d 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -117,7 +117,7 @@ endif()
 duckdb_extension_load(sqlite_scanner
         ${STATIC_LINK_SQLITE} LOAD_TESTS
         GIT_URL https://github.com/duckdb/sqlite_scanner
-        GIT_TAG 315861963c8106397af36cbda10faebc8dae485a
+        GIT_TAG d5d62657702d33cb44a46cddc7ffc4b67bf7e961
         )
 
 duckdb_extension_load(sqlsmith

From e88ac0a07dfa614c5853828f828d0e6c98237b10 Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Sun, 13 Oct 2024 14:37:52 +0200
Subject: [PATCH 17/20] Bump vss to latest main

---
 .github/config/out_of_tree_extensions.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake
index 2554a61bb6d..befca34b15d 100644
--- a/.github/config/out_of_tree_extensions.cmake
+++ b/.github/config/out_of_tree_extensions.cmake
@@ -141,7 +141,7 @@ duckdb_extension_load(vss
         LOAD_TESTS
         DONT_LINK
         GIT_URL https://github.com/duckdb/duckdb_vss
-        GIT_TAG 77739ea5382cce3220af83803ac0b1e98b3ab7d8
+        GIT_TAG dd880d6121c0f3dff27131e54e057c9db0f1c710
         TEST_DIR test/sql
     )
 

From c1b37dea152e7197155ca7d2dd4475ae68f758cc Mon Sep 17 00:00:00 2001
From: Mark Raasveldt <mark.raasveldt@gmail.com>
Date: Mon, 14 Oct 2024 09:10:21 +0200
Subject: [PATCH 18/20] Add profiling for extension operators

---
 src/main/query_profiler.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/query_profiler.cpp b/src/main/query_profiler.cpp
index 01c777c0a57..4f91d46d536 100644
--- a/src/main/query_profiler.cpp
+++ b/src/main/query_profiler.cpp
@@ -121,6 +121,7 @@ bool QueryProfiler::OperatorRequiresProfiling(PhysicalOperatorType op_type) {
 	case PhysicalOperatorType::UNION:
 	case PhysicalOperatorType::RECURSIVE_CTE:
 	case PhysicalOperatorType::EMPTY_RESULT:
+	case PhysicalOperatorType::EXTENSION:
 		return true;
 	default:
 		return false;

From 28333fa97229ea134e0093c25e577172597915f3 Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Tue, 15 Oct 2024 10:33:48 +0200
Subject: [PATCH 19/20] increase bounds

---
 test/sql/copy/file_size_bytes.test | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/sql/copy/file_size_bytes.test b/test/sql/copy/file_size_bytes.test
index bfd27106dce..a16e5389786 100644
--- a/test/sql/copy/file_size_bytes.test
+++ b/test/sql/copy/file_size_bytes.test
@@ -135,7 +135,6 @@ SELECT count(*) > 1 FROM glob('__TEST_DIR__/file_size_bytes_csv5/*.csv')
 true
 
 # each thread sees ~240kb if it's balanced, what about a 190kb limit
-# even in the case of extreme thread imbalance, this always yield around 8 files
 statement ok
 COPY (FROM bigdata) TO '__TEST_DIR__/file_size_bytes_csv6' (FORMAT CSV, FILE_SIZE_BYTES '190kb', PER_THREAD_OUTPUT TRUE);
 
@@ -144,9 +143,9 @@ SELECT COUNT(*) FROM read_csv_auto('__TEST_DIR__/file_size_bytes_csv6/*.csv')
 ----
 100000
 
-# ~2 files per thread, around 8 in total
+# ~2 files per thread, around 8 in total (5 output files in case of extreme thread imbalance and only 1 thread runs)
 query I
-SELECT count(*) BETWEEN 6 AND 10 FROM glob('__TEST_DIR__/file_size_bytes_csv6/*.csv')
+SELECT count(*) BETWEEN 5 AND 10 FROM glob('__TEST_DIR__/file_size_bytes_csv6/*.csv')
 ----
 1
 

From 1aac7adf60de3acf6480b529ba8abb8ed05c517d Mon Sep 17 00:00:00 2001
From: Carlo Piovesan <piovesan.carlo@gmail.com>
Date: Tue, 15 Oct 2024 11:09:52 +0200
Subject: [PATCH 20/20] Move _rtools platform to be equivalent to _mingw

---
 .github/config/distribution_matrix.json | 2 +-
 .github/workflows/R.yml                 | 4 ++--
 src/include/duckdb/common/platform.hpp  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/config/distribution_matrix.json b/.github/config/distribution_matrix.json
index 5de0ce0e5b1..77c2d4dc644 100644
--- a/.github/config/distribution_matrix.json
+++ b/.github/config/distribution_matrix.json
@@ -39,7 +39,7 @@
         "vcpkg_triplet": "x64-windows-static-md"
       },
       {
-        "duckdb_arch": "windows_amd64_rtools",
+        "duckdb_arch": "windows_amd64_mingw",
         "vcpkg_triplet": "x64-mingw-static"
       }
     ]
diff --git a/.github/workflows/R.yml b/.github/workflows/R.yml
index 8dbfc032b0d..17fad947801 100644
--- a/.github/workflows/R.yml
+++ b/.github/workflows/R.yml
@@ -62,8 +62,8 @@ jobs:
 
       - uses: ./.github/actions/build_extensions
         with:
-          deploy_as: windows_amd64_rtools
-          duckdb_arch: windows_amd64_rtools
+          deploy_as: windows_amd64_mingw
+          duckdb_arch: windows_amd64_mingw
           vcpkg_target_triplet: x64-mingw-static
           treat_warn_as_error: 0
           s3_id: ${{ secrets.S3_ID }}
diff --git a/src/include/duckdb/common/platform.hpp b/src/include/duckdb/common/platform.hpp
index 3166ff9684e..94eea304dec 100644
--- a/src/include/duckdb/common/platform.hpp
+++ b/src/include/duckdb/common/platform.hpp
@@ -48,9 +48,9 @@ std::string DuckDBPlatform() { // NOLINT: allow definition in header
 #ifdef __MINGW32__
 	postfix = "_mingw";
 #endif
-// this is used for the windows R builds which use a separate build environment
+// this is used for the windows R builds which use `mingw` equivalent extensions
 #ifdef DUCKDB_PLATFORM_RTOOLS
-	postfix = "_rtools";
+	postfix = "_mingw";
 #endif
 	return os + "_" + arch + postfix;
 }