From b09a87cc3f4a6f932ae208ce37ee77cd14708d17 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Wed, 5 Jun 2024 17:35:27 +0200 Subject: [PATCH] Rework JSON page examples --- data/arrays.json | 3 ++ data/records-in-array.json | 5 +++ data/records.json | 3 ++ data/unstructured.json | 12 ++++++ docs/data/json/overview.md | 83 ++++++++++++++++++++------------------ docs/extensions/json.md | 6 ++- 6 files changed, 71 insertions(+), 41 deletions(-) create mode 100644 data/arrays.json create mode 100644 data/records-in-array.json create mode 100644 data/records.json create mode 100644 data/unstructured.json diff --git a/data/arrays.json b/data/arrays.json new file mode 100644 index 00000000000..c0c88ce419e --- /dev/null +++ b/data/arrays.json @@ -0,0 +1,3 @@ +[1, 2, 3] +[4, 5, 6] +[7, 8, 9] diff --git a/data/records-in-array.json b/data/records-in-array.json new file mode 100644 index 00000000000..5be8a470eb3 --- /dev/null +++ b/data/records-in-array.json @@ -0,0 +1,5 @@ +[ + {"key1":"value1", "key2": "value1"}, + {"key1":"value2", "key2": "value2"}, + {"key1":"value3", "key2": "value3"} +] diff --git a/data/records.json b/data/records.json new file mode 100644 index 00000000000..c4a4fd7d634 --- /dev/null +++ b/data/records.json @@ -0,0 +1,3 @@ +{"key1":"value1", "key2": "value1"} +{"key1":"value2", "key2": "value2"} +{"key1":"value3", "key2": "value3"} diff --git a/data/unstructured.json b/data/unstructured.json new file mode 100644 index 00000000000..77e80cfc212 --- /dev/null +++ b/data/unstructured.json @@ -0,0 +1,12 @@ +{ + "key1":"value1", + "key2":"value1" +} +{ + "key1":"value2", + "key2":"value2" +} +{ + "key1":"value3", + "key2":"value3" +} diff --git a/docs/data/json/overview.md b/docs/data/json/overview.md index d06abcecfe6..9a0da677470 100644 --- a/docs/data/json/overview.md +++ b/docs/data/json/overview.md @@ -7,13 +7,13 @@ redirect_from: ## Examples -Read a JSON file from disk, auto-infer options. +Read a JSON file from disk, auto-infer options: ```sql SELECT * FROM 'todos.json'; ``` -Use the `read_json` function with custom options. +Use the `read_json` function with custom options: ```sql SELECT * @@ -31,20 +31,21 @@ Read a JSON file from stdin, auto-infer options: cat data/json/todos.json | duckdb -c "SELECT * FROM read_json_auto('/dev/stdin')" ``` -Read a JSON file into a table. +Read a JSON file into a table: ```sql CREATE TABLE todos (userId UBIGINT, id UBIGINT, title VARCHAR, completed BOOLEAN); COPY todos FROM 'todos.json'; ``` -Alternatively, create a table without specifying the schema manually. +Alternatively, create a table without specifying the schema manually with a [`CREATE TABLE ... AS SELECT` clause](../../sql/statements/create_table#create-table--as-select-ctas): ```sql -CREATE TABLE todos AS SELECT * FROM 'todos.json'; +CREATE TABLE todos AS + SELECT * FROM 'todos.json'; ``` -Write the result of a query to a JSON file. +Write the result of a query to a JSON file: ```sql COPY (SELECT * FROM todos) TO 'todos.json'; @@ -99,6 +100,8 @@ FROM filename.json; With `format = 'newline_delimited'` newline-delimited JSON can be parsed. Each line is a JSON. +We use the example file [`records.json`](/data/records.json) with the following content: + ```json {"key1":"value1", "key2": "value1"} {"key1":"value2", "key2": "value2"} @@ -112,15 +115,16 @@ FROM read_json_auto('records.json', format = 'newline_delimited');
-| key1 | key2 | -|----------|----------| -| `value1` | `value1` | -| `value2` | `value2` | -| `value3` | `value3` | +| key1 | key2 | +|--------|--------| +| value1 | value1 | +| value2 | value2 | +| value3 | value3 | ### Format: `array` If the JSON file contains a JSON array of objects (pretty-printed or not), `array_of_objects` may be used. +To demonstrate its use, we use the example file [`records-in-array.json`](/data/records-in-array.json): ```json [ @@ -132,20 +136,21 @@ If the JSON file contains a JSON array of objects (pretty-printed or not), `arra ```sql SELECT * -FROM read_json_auto('array.json', format = 'array'); +FROM read_json_auto('records-in-array.json', format = 'array'); ```
-| key1 | key2 | -|----------|----------| -| `value1` | `value1` | -| `value2` | `value2` | -| `value3` | `value3` | +| key1 | key2 | +|--------|--------| +| value1 | value1 | +| value2 | value2 | +| value3 | value3 | ### Format: `unstructured` If the JSON file contains JSON that is not newline-delimited or an array, `unstructured` may be used. +To demonstrate its use, we use the example file [`unstructured.json`](/data/unstructured.json): ```json { @@ -169,18 +174,18 @@ FROM read_json_auto('unstructured.json', format = 'unstructured');
-| key1 | key2 | -|----------|----------| -| `value1` | `value1` | -| `value2` | `value2` | -| `value3` | `value3` | +| key1 | key2 | +|--------|--------| +| value1 | value1 | +| value2 | value2 | +| value3 | value3 | ## Examples of Records Settings The JSON extension can attempt to determine whether a JSON file contains records when setting `records = auto`. When `records = true`, the JSON extension expects JSON objects, and will unpack the fields of JSON objects into individual columns. -Continuing with the same example file from before: +Continuing with the same example file, [`records.json`](/data/records.json): ```json {"key1":"value1", "key2": "value1"} @@ -195,11 +200,11 @@ FROM read_json_auto('records.json', records = true);
-| key1 | key2 | -|----------|----------| -| `value1` | `value1` | -| `value2` | `value2` | -| `value3` | `value3` | +| key1 | key2 | +|--------|--------| +| value1 | value1 | +| value2 | value2 | +| value3 | value3 | When `records = false`, the JSON extension will not unpack the top-level objects, and create `STRUCT`s instead: @@ -210,13 +215,13 @@ FROM read_json_auto('records.json', records = false);
-| json | -|------------------------------------| -| `{'key1': value1, 'key2': value1}` | -| `{'key1': value2, 'key2': value2}` | -| `{'key1': value3, 'key2': value3}` | +| json | +|----------------------------------| +| {'key1': value1, 'key2': value1} | +| {'key1': value2, 'key2': value2} | +| {'key1': value3, 'key2': value3} | -This is especially useful if we have non-object JSON, for example: +This is especially useful if we have non-object JSON, for example, [`arrays.json`](/data/arrays.json): ```json [1, 2, 3] @@ -231,11 +236,11 @@ FROM read_json_auto('arrays.json', records = false);
-| json | -|-------------| -| `[1, 2, 3]` | -| `[4, 5, 6]` | -| `[7, 8, 9]` | +| json | +|-----------| +| [1, 2, 3] | +| [4, 5, 6] | +| [7, 8, 9] | ## Writing diff --git a/docs/extensions/json.md b/docs/extensions/json.md index c1885670ad8..d1eff65391f 100644 --- a/docs/extensions/json.md +++ b/docs/extensions/json.md @@ -251,8 +251,10 @@ DuckDB can convert JSON arrays directly to its internal `LIST` type, and missing ```sql SELECT * -FROM read_json(['my_file1.json', 'my_file2.json'], - columns = {duck: 'INTEGER', goose: 'INTEGER[]', swan: 'DOUBLE'}); +FROM read_json( + ['my_file1.json', 'my_file2.json'], + columns = {duck: 'INTEGER', goose: 'INTEGER[]', swan: 'DOUBLE'} + ); ```