From b09a87cc3f4a6f932ae208ce37ee77cd14708d17 Mon Sep 17 00:00:00 2001
From: Gabor Szarnyas <szarnyasg@gmail.com>
Date: Wed, 5 Jun 2024 17:35:27 +0200
Subject: [PATCH] Rework JSON page examples

---
 data/arrays.json           |  3 ++
 data/records-in-array.json |  5 +++
 data/records.json          |  3 ++
 data/unstructured.json     | 12 ++++++
 docs/data/json/overview.md | 83 ++++++++++++++++++++------------------
 docs/extensions/json.md    |  6 ++-
 6 files changed, 71 insertions(+), 41 deletions(-)
 create mode 100644 data/arrays.json
 create mode 100644 data/records-in-array.json
 create mode 100644 data/records.json
 create mode 100644 data/unstructured.json
diff --git a/data/arrays.json b/data/arrays.json
new file mode 100644
index 00000000000..c0c88ce419e
--- /dev/null
+++ b/data/arrays.json
@@ -0,0 +1,3 @@
+[1, 2, 3]
+[4, 5, 6]
+[7, 8, 9]
diff --git a/data/records-in-array.json b/data/records-in-array.json
new file mode 100644
index 00000000000..5be8a470eb3
--- /dev/null
+++ b/data/records-in-array.json
@@ -0,0 +1,5 @@
+[
+    {"key1":"value1", "key2": "value1"},
+    {"key1":"value2", "key2": "value2"},
+    {"key1":"value3", "key2": "value3"}
+]
diff --git a/data/records.json b/data/records.json
new file mode 100644
index 00000000000..c4a4fd7d634
--- /dev/null
+++ b/data/records.json
@@ -0,0 +1,3 @@
+{"key1":"value1", "key2": "value1"}
+{"key1":"value2", "key2": "value2"}
+{"key1":"value3", "key2": "value3"}
diff --git a/data/unstructured.json b/data/unstructured.json
new file mode 100644
index 00000000000..77e80cfc212
--- /dev/null
+++ b/data/unstructured.json
@@ -0,0 +1,12 @@
+{
+    "key1":"value1",
+    "key2":"value1"
+}
+{
+    "key1":"value2",
+    "key2":"value2"
+}
+{
+    "key1":"value3",
+    "key2":"value3"
+}
diff --git a/docs/data/json/overview.md b/docs/data/json/overview.md
index d06abcecfe6..9a0da677470 100644
--- a/docs/data/json/overview.md
+++ b/docs/data/json/overview.md
@@ -7,13 +7,13 @@ redirect_from:
 
 ## Examples
 
-Read a JSON file from disk, auto-infer options.
+Read a JSON file from disk, auto-infer options:
 
 ```sql
 SELECT * FROM 'todos.json';
 ```
 
-Use the `read_json` function with custom options.
+Use the `read_json` function with custom options:
 
 ```sql
 SELECT *
@@ -31,20 +31,21 @@ Read a JSON file from stdin, auto-infer options:
 cat data/json/todos.json | duckdb -c "SELECT * FROM read_json_auto('/dev/stdin')"
 ```
 
-Read a JSON file into a table.
+Read a JSON file into a table:
 
 ```sql
 CREATE TABLE todos (userId UBIGINT, id UBIGINT, title VARCHAR, completed BOOLEAN);
 COPY todos FROM 'todos.json';
 ```
 
-Alternatively, create a table without specifying the schema manually.
+Alternatively, create a table without specifying the schema manually with a [`CREATE TABLE ... AS SELECT` clause](../../sql/statements/create_table#create-table--as-select-ctas):
 
 ```sql
-CREATE TABLE todos AS SELECT * FROM 'todos.json';
+CREATE TABLE todos AS
+    SELECT * FROM 'todos.json';
 ```
 
-Write the result of a query to a JSON file.
+Write the result of a query to a JSON file:
 
 ```sql
 COPY (SELECT * FROM todos) TO 'todos.json';
@@ -99,6 +100,8 @@ FROM filename.json;
 With `format = 'newline_delimited'` newline-delimited JSON can be parsed.
 Each line is a JSON.
 
+We use the example file [`records.json`](/data/records.json) with the following content:
+
 ```json
 {"key1":"value1", "key2": "value1"}
 {"key1":"value2", "key2": "value2"}
@@ -112,15 +115,16 @@ FROM read_json_auto('records.json', format = 'newline_delimited');
 
 <div class="narrow_table"></div>
 
-|   key1   |   key2   |
-|----------|----------|
-| `value1` | `value1` |
-| `value2` | `value2` |
-| `value3` | `value3` |
+|  key1  |  key2  |
+|--------|--------|
+| value1 | value1 |
+| value2 | value2 |
+| value3 | value3 |
 
 ### Format: `array`
 
 If the JSON file contains a JSON array of objects (pretty-printed or not), `array_of_objects` may be used.
+To demonstrate its use, we use the example file [`records-in-array.json`](/data/records-in-array.json):
 
 ```json
 [
@@ -132,20 +136,21 @@ If the JSON file contains a JSON array of objects (pretty-printed or not), `arra
 
 ```sql
 SELECT *
-FROM read_json_auto('array.json', format = 'array');
+FROM read_json_auto('records-in-array.json', format = 'array');
 ```
 
 <div class="narrow_table"></div>
 
-|   key1   |   key2   |
-|----------|----------|
-| `value1` | `value1` |
-| `value2` | `value2` |
-| `value3` | `value3` |
+|  key1  |  key2  |
+|--------|--------|
+| value1 | value1 |
+| value2 | value2 |
+| value3 | value3 |
 
 ### Format: `unstructured`
 
 If the JSON file contains JSON that is not newline-delimited or an array, `unstructured` may be used.
+To demonstrate its use, we use the example file [`unstructured.json`](/data/unstructured.json):
 
 ```json
 {
@@ -169,18 +174,18 @@ FROM read_json_auto('unstructured.json', format = 'unstructured');
 
 <div class="narrow_table"></div>
 
-|   key1   |   key2   |
-|----------|----------|
-| `value1` | `value1` |
-| `value2` | `value2` |
-| `value3` | `value3` |
+|  key1  |  key2  |
+|--------|--------|
+| value1 | value1 |
+| value2 | value2 |
+| value3 | value3 |
 
 ## Examples of Records Settings
 
 The JSON extension can attempt to determine whether a JSON file contains records when setting `records = auto`.
 When `records = true`, the JSON extension expects JSON objects, and will unpack the fields of JSON objects into individual columns.
 
-Continuing with the same example file from before:
+Continuing with the same example file, [`records.json`](/data/records.json):
 
 ```json
 {"key1":"value1", "key2": "value1"}
@@ -195,11 +200,11 @@ FROM read_json_auto('records.json', records = true);
 
 <div class="narrow_table"></div>
 
-|   key1   |   key2   |
-|----------|----------|
-| `value1` | `value1` |
-| `value2` | `value2` |
-| `value3` | `value3` |
+|  key1  |  key2  |
+|--------|--------|
+| value1 | value1 |
+| value2 | value2 |
+| value3 | value3 |
 
 When `records = false`, the JSON extension will not unpack the top-level objects, and create `STRUCT`s instead:
 
@@ -210,13 +215,13 @@ FROM read_json_auto('records.json', records = false);
 
 <div class="narrow_table"></div>
 
-|                json                |
-|------------------------------------|
-| `{'key1': value1, 'key2': value1}` |
-| `{'key1': value2, 'key2': value2}` |
-| `{'key1': value3, 'key2': value3}` |
+|               json               |
+|----------------------------------|
+| {'key1': value1, 'key2': value1} |
+| {'key1': value2, 'key2': value2} |
+| {'key1': value3, 'key2': value3} |
 
-This is especially useful if we have non-object JSON, for example:
+This is especially useful if we have non-object JSON, for example, [`arrays.json`](/data/arrays.json):
 
 ```json
 [1, 2, 3]
@@ -231,11 +236,11 @@ FROM read_json_auto('arrays.json', records = false);
 
 <div class="narrow_table"></div>
 
-|    json     |
-|-------------|
-| `[1, 2, 3]` |
-| `[4, 5, 6]` |
-| `[7, 8, 9]` |
+|   json    |
+|-----------|
+| [1, 2, 3] |
+| [4, 5, 6] |
+| [7, 8, 9] |
 
 ## Writing
 
diff --git a/docs/extensions/json.md b/docs/extensions/json.md
index c1885670ad8..d1eff65391f 100644
--- a/docs/extensions/json.md
+++ b/docs/extensions/json.md
@@ -251,8 +251,10 @@ DuckDB can convert JSON arrays directly to its internal `LIST` type, and missing
 
 ```sql
 SELECT *
-FROM read_json(['my_file1.json', 'my_file2.json'],
-               columns = {duck: 'INTEGER', goose: 'INTEGER[]', swan: 'DOUBLE'});
+FROM read_json(
+        ['my_file1.json', 'my_file2.json'],
+        columns = {duck: 'INTEGER', goose: 'INTEGER[]', swan: 'DOUBLE'}
+    );
 ```
 
 <div class="narrow_table"></div>