From 1f22e768b1ea4bef20939a67049ae2435fad210c Mon Sep 17 00:00:00 2001
From: Laurens Kuiper <laurens.kuiper@cwi.nl>
Date: Wed, 13 Nov 2024 10:47:38 +0100
Subject: [PATCH] implement all wide variants

---
 benchmark/fivetran/fivetran.benchmark.in |  2 ++
 benchmark/fivetran/q51.benchmark         | 10 ++++++
 benchmark/fivetran/q52.benchmark         | 10 ++++++
 benchmark/fivetran/q53.benchmark         | 10 ++++++
 benchmark/fivetran/q54.benchmark         | 10 ++++++
 benchmark/fivetran/q55.benchmark         | 10 ++++++
 benchmark/fivetran/q56.benchmark         | 10 ++++++
 benchmark/fivetran/q57.benchmark         | 10 ++++++
 benchmark/fivetran/q58.benchmark         | 10 ++++++
 benchmark/fivetran/q59.benchmark         | 10 ++++++
 benchmark/fivetran/q60.benchmark         | 10 ++++++
 benchmark/fivetran/q61.benchmark         | 10 ++++++
 benchmark/fivetran/q62.benchmark         | 10 ++++++
 benchmark/fivetran/queries/q10.sql       |  2 +-
 benchmark/fivetran/queries/q14.sql       |  2 +-
 benchmark/fivetran/queries/q51.sql       | 28 +++++++++++++++++
 benchmark/fivetran/queries/q52.sql       | 29 ++++++++++++++++++
 benchmark/fivetran/queries/q53.sql       | 29 ++++++++++++++++++
 benchmark/fivetran/queries/q54.sql       | 30 ++++++++++++++++++
 benchmark/fivetran/queries/q55.sql       | 37 ++++++++++++++++++++++
 benchmark/fivetran/queries/q56.sql       | 38 +++++++++++++++++++++++
 benchmark/fivetran/queries/q57.sql       | 38 +++++++++++++++++++++++
 benchmark/fivetran/queries/q58.sql       | 39 ++++++++++++++++++++++++
 benchmark/fivetran/queries/q59.sql       | 18 +++++++++++
 benchmark/fivetran/queries/q60.sql       | 19 ++++++++++++
 benchmark/fivetran/queries/q61.sql       | 19 ++++++++++++
 benchmark/fivetran/queries/q62.sql       | 20 ++++++++++++
 27 files changed, 468 insertions(+), 2 deletions(-)
 create mode 100644 benchmark/fivetran/q51.benchmark
 create mode 100644 benchmark/fivetran/q52.benchmark
 create mode 100644 benchmark/fivetran/q53.benchmark
 create mode 100644 benchmark/fivetran/q54.benchmark
 create mode 100644 benchmark/fivetran/q55.benchmark
 create mode 100644 benchmark/fivetran/q56.benchmark
 create mode 100644 benchmark/fivetran/q57.benchmark
 create mode 100644 benchmark/fivetran/q58.benchmark
 create mode 100644 benchmark/fivetran/q59.benchmark
 create mode 100644 benchmark/fivetran/q60.benchmark
 create mode 100644 benchmark/fivetran/q61.benchmark
 create mode 100644 benchmark/fivetran/q62.benchmark
 create mode 100644 benchmark/fivetran/queries/q51.sql
 create mode 100644 benchmark/fivetran/queries/q52.sql
 create mode 100644 benchmark/fivetran/queries/q53.sql
 create mode 100644 benchmark/fivetran/queries/q54.sql
 create mode 100644 benchmark/fivetran/queries/q55.sql
 create mode 100644 benchmark/fivetran/queries/q56.sql
 create mode 100644 benchmark/fivetran/queries/q57.sql
 create mode 100644 benchmark/fivetran/queries/q58.sql
 create mode 100644 benchmark/fivetran/queries/q59.sql
 create mode 100644 benchmark/fivetran/queries/q60.sql
 create mode 100644 benchmark/fivetran/queries/q61.sql
 create mode 100644 benchmark/fivetran/queries/q62.sql

diff --git a/benchmark/fivetran/fivetran.benchmark.in b/benchmark/fivetran/fivetran.benchmark.in
index 7d76666e284..827d4d92aeb 100644
--- a/benchmark/fivetran/fivetran.benchmark.in
+++ b/benchmark/fivetran/fivetran.benchmark.in
@@ -30,6 +30,8 @@ cache fivetran.duckdb no_connect
 # we also create an empty data/update file that doesn't match anything (try to mess with parquet cardinality estimation)
 # after generating thin variant, repeat entire data generation for wide variant
 # for the wide variant we modulo 4 to reduce the size of the data (so the number of parquet files is similar to thin)
+# for the wide variant queries we number the equivalent queries with number +50
+# this makes it so the thin/wide queries aren't interleaved, and makes it easy to compare
 
 load benchmark/fivetran/init/load.sql
 
diff --git a/benchmark/fivetran/q51.benchmark b/benchmark/fivetran/q51.benchmark
new file mode 100644
index 00000000000..a9b7530639c
--- /dev/null
+++ b/benchmark/fivetran/q51.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q51.benchmark
+# description: Run query 51 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=51
+QUERY_NUMBER_PADDED=51
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet') - (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q52.benchmark b/benchmark/fivetran/q52.benchmark
new file mode 100644
index 00000000000..b7d7d4970b0
--- /dev/null
+++ b/benchmark/fivetran/q52.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q52.benchmark
+# description: Run query 52 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=52
+QUERY_NUMBER_PADDED=52
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet') - (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q53.benchmark b/benchmark/fivetran/q53.benchmark
new file mode 100644
index 00000000000..7c37309ca31
--- /dev/null
+++ b/benchmark/fivetran/q53.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q53.benchmark
+# description: Run query 53 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=53
+QUERY_NUMBER_PADDED=53
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet') - (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q54.benchmark b/benchmark/fivetran/q54.benchmark
new file mode 100644
index 00000000000..5577fd37b0f
--- /dev/null
+++ b/benchmark/fivetran/q54.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q54.benchmark
+# description: Run query 54 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=54
+QUERY_NUMBER_PADDED=54
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet') - (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q55.benchmark b/benchmark/fivetran/q55.benchmark
new file mode 100644
index 00000000000..7621fc498f0
--- /dev/null
+++ b/benchmark/fivetran/q55.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q55.benchmark
+# description: Run query 55 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=55
+QUERY_NUMBER_PADDED=55
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q56.benchmark b/benchmark/fivetran/q56.benchmark
new file mode 100644
index 00000000000..1fe6d8cd25c
--- /dev/null
+++ b/benchmark/fivetran/q56.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q56.benchmark
+# description: Run query 56 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=56
+QUERY_NUMBER_PADDED=56
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q57.benchmark b/benchmark/fivetran/q57.benchmark
new file mode 100644
index 00000000000..cc972ccfb73
--- /dev/null
+++ b/benchmark/fivetran/q57.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q57.benchmark
+# description: Run query 57 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=57
+QUERY_NUMBER_PADDED=57
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q58.benchmark b/benchmark/fivetran/q58.benchmark
new file mode 100644
index 00000000000..c49a3b2d583
--- /dev/null
+++ b/benchmark/fivetran/q58.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q58.benchmark
+# description: Run query 58 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=58
+QUERY_NUMBER_PADDED=58
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/output/*.parquet') IS NOT DISTINCT FROM (SELECT count(DISTINCT string_pk) FROM 'duckdb_benchmark_data/fivetran/wide/existing/*.parquet')
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q59.benchmark b/benchmark/fivetran/q59.benchmark
new file mode 100644
index 00000000000..1d95c159beb
--- /dev/null
+++ b/benchmark/fivetran/q59.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q59.benchmark
+# description: Run query 59 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=59
+QUERY_NUMBER_PADDED=59
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(*) FROM __answer)/(SELECT count(*) FROM glob('duckdb_benchmark_data/fivetran/wide/existing/*.parquet')) BETWEEN 0.3 AND 0.5
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q60.benchmark b/benchmark/fivetran/q60.benchmark
new file mode 100644
index 00000000000..6e645714229
--- /dev/null
+++ b/benchmark/fivetran/q60.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q60.benchmark
+# description: Run query 60 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=60
+QUERY_NUMBER_PADDED=60
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(*) FROM __answer)/(SELECT count(*) FROM glob('duckdb_benchmark_data/fivetran/wide/existing/*.parquet')) BETWEEN 0.3 AND 0.5
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q61.benchmark b/benchmark/fivetran/q61.benchmark
new file mode 100644
index 00000000000..1544913215b
--- /dev/null
+++ b/benchmark/fivetran/q61.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q61.benchmark
+# description: Run query 61 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=61
+QUERY_NUMBER_PADDED=61
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(*) FROM __answer)/(SELECT count(*) FROM glob('duckdb_benchmark_data/fivetran/wide/existing/*.parquet')) BETWEEN 0.3 AND 0.5
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/q62.benchmark b/benchmark/fivetran/q62.benchmark
new file mode 100644
index 00000000000..2567ac330ca
--- /dev/null
+++ b/benchmark/fivetran/q62.benchmark
@@ -0,0 +1,10 @@
+# name: benchmark/fivetran/q62.benchmark
+# description: Run query 62 from the Fivetran benchmarks
+# group: [fivetran]
+
+template benchmark/fivetran/fivetran.benchmark.in
+QUERY_NUMBER=62
+QUERY_NUMBER_PADDED=62
+RESULT_COLUMNS=I
+RESULT_QUERY=SELECT (SELECT count(*) FROM __answer)/(SELECT count(*) FROM glob('duckdb_benchmark_data/fivetran/wide/existing/*.parquet')) BETWEEN 0.3 AND 0.5
+RESULT_ANSWER=1
diff --git a/benchmark/fivetran/queries/q10.sql b/benchmark/fivetran/queries/q10.sql
index ef5005e8118..b896966bd47 100644
--- a/benchmark/fivetran/queries/q10.sql
+++ b/benchmark/fivetran/queries/q10.sql
@@ -3,7 +3,7 @@ SELECT
     DISTINCT("_fivetran_filename")
 FROM
     read_parquet([
-         'duckdb_benchmark_data/fivetran/thin/tiny_data.parquet',
+        'duckdb_benchmark_data/fivetran/thin/tiny_data.parquet',
         'duckdb_benchmark_data/fivetran/thin/existing/*.parquet'
     ], filename=_fivetran_filename) AS existing
 WHERE EXISTS (
diff --git a/benchmark/fivetran/queries/q14.sql b/benchmark/fivetran/queries/q14.sql
index a3a641ce912..151e97b305a 100644
--- a/benchmark/fivetran/queries/q14.sql
+++ b/benchmark/fivetran/queries/q14.sql
@@ -3,7 +3,7 @@ SELECT
     DISTINCT("_fivetran_filename")
 FROM
     read_parquet([
-         'duckdb_benchmark_data/fivetran/thin/tiny_data.parquet',
+        'duckdb_benchmark_data/fivetran/thin/tiny_data.parquet',
         'duckdb_benchmark_data/fivetran/thin/existing/*.parquet'
     ], filename=_fivetran_filename) AS existing
 WHERE EXISTS (
diff --git a/benchmark/fivetran/queries/q51.sql b/benchmark/fivetran/queries/q51.sql
new file mode 100644
index 00000000000..82f6431c30b
--- /dev/null
+++ b/benchmark/fivetran/queries/q51.sql
@@ -0,0 +1,28 @@
+-- wide delete scenario with string + instant pk
+COPY (
+	SELECT
+		*
+	FROM
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+        ]) AS existing
+	WHERE NOT EXISTS (
+		SELECT
+			TRUE
+		FROM
+			read_parquet([
+                'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+            ]) AS staging
+		WHERE
+			"existing"."string_pk" = "staging"."string_pk"
+        AND "existing"."instant_id" = "staging"."instant_id"
+	)
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q52.sql b/benchmark/fivetran/queries/q52.sql
new file mode 100644
index 00000000000..6c185c97b14
--- /dev/null
+++ b/benchmark/fivetran/queries/q52.sql
@@ -0,0 +1,29 @@
+-- wide delete scenario with string + instant pk (with tiny existing file)
+COPY (
+	SELECT
+		*
+	FROM
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/existing/*.parquet',
+            'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet'
+        ]) AS existing
+	WHERE NOT EXISTS (
+		SELECT
+			TRUE
+		FROM
+			read_parquet([
+                'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+            ]) AS staging
+		WHERE
+			"existing"."string_pk" = "staging"."string_pk"
+        AND "existing"."instant_id" = "staging"."instant_id"
+	)
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q53.sql b/benchmark/fivetran/queries/q53.sql
new file mode 100644
index 00000000000..bb2ec0223fa
--- /dev/null
+++ b/benchmark/fivetran/queries/q53.sql
@@ -0,0 +1,29 @@
+-- wide delete scenario with string + instant pk (with tiny incoming file)
+COPY (
+	SELECT
+		*
+	FROM
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+        ]) AS existing
+	WHERE NOT EXISTS (
+		SELECT
+			TRUE
+		FROM
+			read_parquet([
+                'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+                'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+            ]) AS staging
+		WHERE
+			"existing"."string_pk" = "staging"."string_pk"
+        AND "existing"."instant_id" = "staging"."instant_id"
+	)
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q54.sql b/benchmark/fivetran/queries/q54.sql
new file mode 100644
index 00000000000..6d376737df4
--- /dev/null
+++ b/benchmark/fivetran/queries/q54.sql
@@ -0,0 +1,30 @@
+-- wide delete scenario with string + instant pk (with tiny existing and incoming file)
+COPY (
+	SELECT
+		*
+	FROM
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+            'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+        ]) AS existing
+	WHERE NOT EXISTS (
+		SELECT
+			TRUE
+		FROM
+			read_parquet([
+                'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+                'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+            ]) AS staging
+		WHERE
+			"existing"."string_pk" = "staging"."string_pk"
+        AND "existing"."instant_id" = "staging"."instant_id"
+	)
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q55.sql b/benchmark/fivetran/queries/q55.sql
new file mode 100644
index 00000000000..40168ca051a
--- /dev/null
+++ b/benchmark/fivetran/queries/q55.sql
@@ -0,0 +1,37 @@
+-- wide update scenario with string + instant pk
+COPY (
+	SELECT
+		"existing".* REPLACE (
+			update_macro("existing"."string_0", "staging"."string_0", 1, "_fivetran_updated_cols") AS "string_0",
+			update_macro("existing"."string_1", "staging"."string_1", 2, "_fivetran_updated_cols") AS "string_1",
+			update_macro("existing"."string_10", "staging"."string_10", 3, "_fivetran_updated_cols") AS "string_10",
+			update_macro("existing"."string_11", "staging"."string_11", 3, "_fivetran_updated_cols") AS "string_11",
+			update_macro("existing"."string_20", "staging"."string_20", 4, "_fivetran_updated_cols") AS "string_20",
+			update_macro("existing"."string_21", "staging"."string_21", 5, "_fivetran_updated_cols") AS "string_21",
+			update_macro("existing"."string_30", "staging"."string_30", 6, "_fivetran_updated_cols") AS "string_30",
+			update_macro("existing"."string_31", "staging"."string_31", 7, "_fivetran_updated_cols") AS "string_31",
+			update_macro("existing"."string_40", "staging"."string_40", 7, "_fivetran_updated_cols") AS "string_40",
+			update_macro("existing"."string_41", "staging"."string_41", 8, "_fivetran_updated_cols") AS "string_41",
+			CASE WHEN "staging"."_fivetran_synced" IS NULL THEN "existing"."_fivetran_synced" ELSE "staging"."_fivetran_synced" END AS "_fivetran_synced",
+		)
+	FROM
+		read_parquet([
+			'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+		]) AS "existing"
+	LEFT JOIN
+		read_parquet([
+			'duckdb_benchmark_data/fivetran/wide/update/*.parquet'
+		]) AS "staging"
+	ON
+		"existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+	AND "existing"."_fivetran_start" = "staging"."_fivetran_start"
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q56.sql b/benchmark/fivetran/queries/q56.sql
new file mode 100644
index 00000000000..b3951009d9e
--- /dev/null
+++ b/benchmark/fivetran/queries/q56.sql
@@ -0,0 +1,38 @@
+-- wide update scenario with string + instant pk  (with tiny existing file)
+COPY (
+	SELECT
+		"existing".* REPLACE (
+			update_macro("existing"."string_0", "staging"."string_0", 1, "_fivetran_updated_cols") AS "string_0",
+			update_macro("existing"."string_1", "staging"."string_1", 2, "_fivetran_updated_cols") AS "string_1",
+			update_macro("existing"."string_10", "staging"."string_10", 3, "_fivetran_updated_cols") AS "string_10",
+			update_macro("existing"."string_11", "staging"."string_11", 3, "_fivetran_updated_cols") AS "string_11",
+			update_macro("existing"."string_20", "staging"."string_20", 4, "_fivetran_updated_cols") AS "string_20",
+			update_macro("existing"."string_21", "staging"."string_21", 5, "_fivetran_updated_cols") AS "string_21",
+			update_macro("existing"."string_30", "staging"."string_30", 6, "_fivetran_updated_cols") AS "string_30",
+			update_macro("existing"."string_31", "staging"."string_31", 7, "_fivetran_updated_cols") AS "string_31",
+			update_macro("existing"."string_40", "staging"."string_40", 7, "_fivetran_updated_cols") AS "string_40",
+			update_macro("existing"."string_41", "staging"."string_41", 8, "_fivetran_updated_cols") AS "string_41",
+			CASE WHEN "staging"."_fivetran_synced" IS NULL THEN "existing"."_fivetran_synced" ELSE "staging"."_fivetran_synced" END AS "_fivetran_synced",
+		)
+	FROM
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+			'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+		]) AS "existing"
+	LEFT JOIN
+		read_parquet([
+			'duckdb_benchmark_data/fivetran/wide/update/*.parquet'
+		]) AS "staging"
+	ON
+		"existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+	AND "existing"."_fivetran_start" = "staging"."_fivetran_start"
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q57.sql b/benchmark/fivetran/queries/q57.sql
new file mode 100644
index 00000000000..1126fc5a912
--- /dev/null
+++ b/benchmark/fivetran/queries/q57.sql
@@ -0,0 +1,38 @@
+-- wide update scenario with string + instant pk (with tiny incoming file)
+COPY (
+	SELECT
+		"existing".* REPLACE (
+			update_macro("existing"."string_0", "staging"."string_0", 1, "_fivetran_updated_cols") AS "string_0",
+			update_macro("existing"."string_1", "staging"."string_1", 2, "_fivetran_updated_cols") AS "string_1",
+			update_macro("existing"."string_10", "staging"."string_10", 3, "_fivetran_updated_cols") AS "string_10",
+			update_macro("existing"."string_11", "staging"."string_11", 3, "_fivetran_updated_cols") AS "string_11",
+			update_macro("existing"."string_20", "staging"."string_20", 4, "_fivetran_updated_cols") AS "string_20",
+			update_macro("existing"."string_21", "staging"."string_21", 5, "_fivetran_updated_cols") AS "string_21",
+			update_macro("existing"."string_30", "staging"."string_30", 6, "_fivetran_updated_cols") AS "string_30",
+			update_macro("existing"."string_31", "staging"."string_31", 7, "_fivetran_updated_cols") AS "string_31",
+			update_macro("existing"."string_40", "staging"."string_40", 7, "_fivetran_updated_cols") AS "string_40",
+			update_macro("existing"."string_41", "staging"."string_41", 8, "_fivetran_updated_cols") AS "string_41",
+			CASE WHEN "staging"."_fivetran_synced" IS NULL THEN "existing"."_fivetran_synced" ELSE "staging"."_fivetran_synced" END AS "_fivetran_synced",
+		)
+	FROM
+		read_parquet([
+			'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+		]) AS "existing"
+	LEFT JOIN
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_update.parquet',
+			'duckdb_benchmark_data/fivetran/wide/update/*.parquet'
+		]) AS "staging"
+	ON
+		"existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+	AND "existing"."_fivetran_start" = "staging"."_fivetran_start"
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q58.sql b/benchmark/fivetran/queries/q58.sql
new file mode 100644
index 00000000000..0b2a9201959
--- /dev/null
+++ b/benchmark/fivetran/queries/q58.sql
@@ -0,0 +1,39 @@
+-- wide update scenario with string + instant pk (with tiny existing and incoming file)
+COPY (
+	SELECT
+		"existing".* REPLACE (
+			update_macro("existing"."string_0", "staging"."string_0", 1, "_fivetran_updated_cols") AS "string_0",
+			update_macro("existing"."string_1", "staging"."string_1", 2, "_fivetran_updated_cols") AS "string_1",
+			update_macro("existing"."string_10", "staging"."string_10", 3, "_fivetran_updated_cols") AS "string_10",
+			update_macro("existing"."string_11", "staging"."string_11", 3, "_fivetran_updated_cols") AS "string_11",
+			update_macro("existing"."string_20", "staging"."string_20", 4, "_fivetran_updated_cols") AS "string_20",
+			update_macro("existing"."string_21", "staging"."string_21", 5, "_fivetran_updated_cols") AS "string_21",
+			update_macro("existing"."string_30", "staging"."string_30", 6, "_fivetran_updated_cols") AS "string_30",
+			update_macro("existing"."string_31", "staging"."string_31", 7, "_fivetran_updated_cols") AS "string_31",
+			update_macro("existing"."string_40", "staging"."string_40", 7, "_fivetran_updated_cols") AS "string_40",
+			update_macro("existing"."string_41", "staging"."string_41", 8, "_fivetran_updated_cols") AS "string_41",
+			CASE WHEN "staging"."_fivetran_synced" IS NULL THEN "existing"."_fivetran_synced" ELSE "staging"."_fivetran_synced" END AS "_fivetran_synced",
+		)
+	FROM
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+			'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+		]) AS "existing"
+	LEFT JOIN
+		read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_update.parquet',
+			'duckdb_benchmark_data/fivetran/wide/update/*.parquet'
+		]) AS "staging"
+	ON
+		"existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+	AND "existing"."_fivetran_start" = "staging"."_fivetran_start"
+) TO 'duckdb_benchmark_data/fivetran/output' (
+	FORMAT PARQUET,
+	COMPRESSION ZSTD,
+	COMPRESSION_LEVEL 1,
+	PER_THREAD_OUTPUT TRUE,
+	ROW_GROUP_SIZE_BYTES '64 MB',
+	ROW_GROUPS_PER_FILE 1,
+	OVERWRITE TRUE
+);
diff --git a/benchmark/fivetran/queries/q59.sql b/benchmark/fivetran/queries/q59.sql
new file mode 100644
index 00000000000..afe340dcba4
--- /dev/null
+++ b/benchmark/fivetran/queries/q59.sql
@@ -0,0 +1,18 @@
+-- wide recon scenario with string + instant pk
+SELECT
+    DISTINCT("_fivetran_filename")
+FROM
+    read_parquet([
+        'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+    ], filename=_fivetran_filename) AS existing
+WHERE EXISTS (
+    SELECT
+        TRUE
+    FROM
+        read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+        ]) AS staging
+    WHERE
+        "existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+);
diff --git a/benchmark/fivetran/queries/q60.sql b/benchmark/fivetran/queries/q60.sql
new file mode 100644
index 00000000000..13ce46d6aa2
--- /dev/null
+++ b/benchmark/fivetran/queries/q60.sql
@@ -0,0 +1,19 @@
+-- wide recon scenario with string + instant pk (with tiny existing file)
+SELECT
+    DISTINCT("_fivetran_filename")
+FROM
+    read_parquet([
+        'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+        'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+    ], filename=_fivetran_filename) AS existing
+WHERE EXISTS (
+    SELECT
+        TRUE
+    FROM
+        read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+        ]) AS staging
+    WHERE
+        "existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+);
diff --git a/benchmark/fivetran/queries/q61.sql b/benchmark/fivetran/queries/q61.sql
new file mode 100644
index 00000000000..32c8ad102f8
--- /dev/null
+++ b/benchmark/fivetran/queries/q61.sql
@@ -0,0 +1,19 @@
+-- wide recon scenario with string + instant pk (with tiny incoming file)
+SELECT
+    DISTINCT("_fivetran_filename")
+FROM
+    read_parquet([
+        'duckdb_benchmark_data/fivetran/thin/existing/*.parquet'
+    ], filename=_fivetran_filename) AS existing
+WHERE EXISTS (
+    SELECT
+        TRUE
+    FROM
+        read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+            'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+        ]) AS staging
+    WHERE
+        "existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+);
diff --git a/benchmark/fivetran/queries/q62.sql b/benchmark/fivetran/queries/q62.sql
new file mode 100644
index 00000000000..511fd2683ff
--- /dev/null
+++ b/benchmark/fivetran/queries/q62.sql
@@ -0,0 +1,20 @@
+-- wide recon scenario with string + instant pk (with tiny existing and incoming file)
+SELECT
+    DISTINCT("_fivetran_filename")
+FROM
+    read_parquet([
+        'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+        'duckdb_benchmark_data/fivetran/wide/existing/*.parquet'
+    ], filename=_fivetran_filename) AS existing
+WHERE EXISTS (
+    SELECT
+        TRUE
+    FROM
+        read_parquet([
+            'duckdb_benchmark_data/fivetran/wide/tiny_data.parquet',
+            'duckdb_benchmark_data/fivetran/wide/incoming/*.parquet'
+        ]) AS staging
+    WHERE
+        "existing"."string_pk" = "staging"."string_pk"
+	AND "existing"."instant_id" = "staging"."instant_id"
+);