From 14687eca33b9d77417fbb76d553d253145955fb9 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Tue, 26 Dec 2023 22:07:03 -0500
Subject: [PATCH 01/14] adds unnest_wider/longer, nest/by

---
 docs/examples/UserGuide/unnest.jl |  29 ++++
 docs/mkdocs.yml                   |   1 +
 src/TidierData.jl                 |   4 +-
 src/docstrings.jl                 | 179 ++++++++++++++++++++-
 src/nests.jl                      | 252 ++++++++++++++++++++++++++++++
 src/separate_unite.jl             |  33 ++--
 6 files changed, 481 insertions(+), 17 deletions(-)
 create mode 100644 docs/examples/UserGuide/unnest.jl
 create mode 100644 src/nests.jl

diff --git a/docs/examples/UserGuide/unnest.jl b/docs/examples/UserGuide/unnest.jl
new file mode 100644
index 00000000..9dc4cd19
--- /dev/null
+++ b/docs/examples/UserGuide/unnest.jl
@@ -0,0 +1,29 @@
+# ## `@unnest_longer`
+
+# `@unnest_longer` adds one row per entry of an array, lengthening dataframe by flattening the column or columns. 
+
+df = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]]);
+
+@chain df begin
+    @unnest_longer(y)
+end
+
+# If there are rows with empty arrays, `keep_empty` will prevent these rows from being dropped. `include_indices` will add a new column for each flattened column that logs the position of each entry in the array.
+
+@chain df begin
+    @unnest_longer(y, keep_empty = true, indices_include = true)
+end
+
+# ## @unnest_wider
+
+# `@unnest_wider` will widen a column of Dicts or a column(s) of arrays into multiple columns.
+
+df2 = DataFrame(
+           name = ["Zaki", "Farida"],
+           attributes = [
+               Dict("age" => 25, "city" => "New York"),
+               Dict("age" => 30, "city" => "Los Angeles")]);
+
+@chain df2 begin
+    @unnest_wider(attributes)
+end
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 751c5499..f48693c9 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -132,6 +132,7 @@ nav:
   - "Binding" : "examples/generated/UserGuide/binding.md" 
   - "Pivoting": "examples/generated/UserGuide/pivots.md"
   - "Separating" : "examples/generated/UserGuide/sep_unite.md"
+  - "Unnesting" : "examples/generated/UserGuide/unnest.md"
   - "@summary" : "examples/generated/UserGuide/summary.md"
   - "Column names": "examples/generated/UserGuide/column_names.md"
   - "Interpolation" : "examples/generated/UserGuide/interpolation.md"
diff --git a/src/TidierData.jl b/src/TidierData.jl
index c64f9538..25e00b10 100644
--- a/src/TidierData.jl
+++ b/src/TidierData.jl
@@ -20,7 +20,8 @@ export TidierData_set, across, desc, n, row_number, everything, starts_with, end
       @select, @transmute, @rename, @mutate, @summarize, @summarise, @filter,
       @group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join, @anti_join, @semi_join,
       @pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_missing, @glimpse, @separate,
-      @unite, @summary, @fill_missing, @slice_sample, @slice_min, @slice_max, @slice_head, @slice_tail, @rename_with, @separate_rows
+      @unite, @summary, @fill_missing, @slice_sample, @slice_min, @slice_max, @slice_head, @slice_tail, @rename_with, @separate_rows,
+      @unnest_longer, @unnest_wider, @nest, @nest_by
 
 # Package global variables
 const code = Ref{Bool}(false) # output DataFrames.jl code?
@@ -51,6 +52,7 @@ include("separate_unite.jl")
 include("summary.jl")
 include("is_type.jl")
 include("missings.jl")
+include("nests.jl")
 
 # Function to set global variables
 """
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 10b2e3e8..5b66f4e2 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3077,4 +3077,181 @@ julia> @separate_rows(df, b:d, ";" )
    5 │     3  dd         5          11
    6 │     3  ee         6          12
 ```
-"""
\ No newline at end of file
+"""
+
+const docstring_unnest_wider =
+"""
+    @unnest_wider(df, columns, names_sep=)
+
+Unnest specified columns of arrays or dictionaries into wider format dataframe with individual columns.
+
+# Arguments
+- `df`: A DataFrame.
+- `columns`: Columns to be unnested. These columns should contain arrays or dictionaries. Dictionarys headings will be converted to column names.
+- `names_sep`: An optional string to specify the separator for creating new column names. If not provided, defaults to no separator.
+
+# Examples
+```jldoctest
+julia> df = DataFrame(name = ["Zaki", "Farida"], attributes = [
+               Dict("age" => 25, "city" => "New York"),
+               Dict("age" => 30, "city" => "Los Angeles")]);
+
+julia> @unnest_wider(df, attributes)
+2×3 DataFrame
+ Row │ name    city         age   
+     │ String  String       Int64 
+─────┼────────────────────────────
+   1 │ Zaki    New York        25
+   2 │ Farida  Los Angeles     30
+
+julia> df2 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
+2×3 DataFrame
+ Row │ a      b       c      
+     │ Int64  Array…  Array… 
+─────┼───────────────────────
+   1 │     1  [1, 2]  [5, 6]
+   2 │     2  [3, 4]  [7, 8]
+
+julia> @unnest_wider(df1, b:c, names_sep = "_")
+2×5 DataFrame
+ Row │ a      b_1    b_2    c_1    c_2   
+     │ Int64  Int64  Int64  Int64  Int64 
+─────┼───────────────────────────────────
+   1 │     1      1      2      5      6
+   2 │     2      3      4      7      8
+```
+"""
+
+const docstring_unnest_longer =
+"""
+    @unnest_longer(df, columns, indices_include=false)
+
+Unnest arrays in columns from a DataFrame to create a longer DataFrame with one row for each entry of the array.
+
+# Arguments
+- `df`: A DataFrame.
+- `columnss`: Columns to unnest. Can be a column symbols or a range. 
+- `indices_include`: Optional. When set to `true`, adds an index column for each unnested column, which logs the position of each array entry.
+- `keep_empty`: Optional. When set to `true`, rows with empty arrays are kept, not skipped, and unnested as missing. 
+
+# Examples
+```jldoctest
+julia> df = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
+2×3 DataFrame
+ Row │ a      b       c      
+     │ Int64  Array…  Array… 
+─────┼───────────────────────
+   1 │     1  [1, 2]  [5, 6]
+   2 │     2  [3, 4]  [7, 8]
+
+julia> @unnest_longer(df, 2)
+4×3 DataFrame
+ Row │ a      b      c      
+     │ Int64  Int64  Array… 
+─────┼──────────────────────
+   1 │     1      1  [5, 6]
+   2 │     1      2  [5, 6]
+   3 │     2      3  [7, 8]
+   4 │     2      4  [7, 8]
+
+julia> @unnest_longer(df, b:c, indices_include=true)
+6×5 DataFrame
+ Row │ a     b      c      b_id  c_id 
+     │ Int64 Int64  Int64  Int64 Int64
+─────┼────────────────────────────────
+   1 │     1     1      5      1     1
+   2 │     1     2      6      2     2
+   3 │     2     3      7      1     1
+   4 │     2     4      8      2     2
+
+julia> df2 = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]])
+4×2 DataFrame
+ Row │ x      y            
+     │ Int64  Array…       
+─────┼─────────────────────
+   1 │     1  Any[]
+   2 │     2  Any[1, 2, 3]
+   3 │     3  Any[4, 5]
+   4 │     4  Any[]
+
+julia> @unnest_longer(df2, y, keep_empty = true)
+7×2 DataFrame
+ Row │ x      y       
+     │ Int64  Any     
+─────┼────────────────
+   1 │     1  missing 
+   2 │     2  1
+   3 │     2  2
+   4 │     2  3
+   5 │     3  4
+   6 │     3  5
+   7 │     4  missing 
+```
+"""
+
+const docstring_nest =
+"""
+    @nest(df, new_column = nesting_columns)
+
+Multiple columns are nested into one or more new columns in a DataFrame. 
+# Arguments
+- `df`: A DataFrame 
+- `new_column`: New column name 
+- `nesting_columns`: Columns to be nested into the new_column  
+# Examples
+```jldoctest
+julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, ab = 12:-1:7);
+
+julia> @nest(df, n2 = starts_with("a"), n3 = (x:z))
+6×2 DataFrame
+ Row │ n2       n3         
+     │ Array…   Array…     
+─────┼─────────────────────
+   1 │ [7, 12]  [1, 1, 13]
+   2 │ [8, 11]  [1, 2, 14]
+   3 │ [9, 10]  [1, 3, 15]
+   4 │ [10, 9]  [2, 4, 16]
+   5 │ [11, 8]  [2, 5, 17]
+   6 │ [12, 7]  [3, 6, 18]
+```
+"""
+
+const docstring_nest_by =
+"""
+   @nest_by(df, by; key)
+
+Nest by a column or set of columns, meaning all columns not selected in the `by` argument are nested into one column. This is not a group_by and then nest.
+# Arguments
+- `df`: A DataFrame 
+- `by`: column or columns to remain in the outer dataframe, while the others are nested into one column
+- `key`: optional argument to determine new column name when using `by`
+
+# Examples
+```jldoctest
+julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, b = 12:-1:7);
+
+julia> @nest_by(df, z)
+6×2 DataFrame
+ Row │ z      data          
+     │ Int64  Array…        
+─────┼──────────────────────
+   1 │    13  [1, 1, 7, 12]
+   2 │    14  [1, 2, 8, 11]
+   3 │    15  [1, 3, 9, 10]
+   4 │    16  [2, 4, 10, 9]
+   5 │    17  [2, 5, 11, 8]
+   6 │    18  [3, 6, 12, 7]
+
+julia> @nest_by(df, (a,z), new_column)
+6×3 DataFrame
+ Row │ a      z      new_column 
+     │ Int64  Int64  Array…     
+─────┼──────────────────────────
+   1 │     7     13  [1, 1, 12]
+   2 │     8     14  [1, 2, 11]
+   3 │     9     15  [1, 3, 10]
+   4 │    10     16  [2, 4, 9]
+   5 │    11     17  [2, 5, 8]
+   6 │    12     18  [3, 6, 7]
+```
+"""
diff --git a/src/nests.jl b/src/nests.jl
new file mode 100644
index 00000000..a22ff6a6
--- /dev/null
+++ b/src/nests.jl
@@ -0,0 +1,252 @@
+function unnest_wider(df::Union{DataFrame, GroupedDataFrame}, cols; names_sep::Union{String, Nothing}=nothing)
+  is_grouped = df isa GroupedDataFrame
+  grouping_columns = is_grouped ? groupcols(df) : Symbol[]
+  # Ungroup if necessary
+  df_copy = copy(is_grouped ? parent(df) : df)
+  # getting column names from parse tidy
+  cols_expr = cols isa Expr ? (cols,) : cols
+  column_symbols = names(df_copy, Cols(cols_expr...)) 
+
+  for col in column_symbols
+      col_type = typeof(df_copy[1, col])
+      if col_type <: Dict
+          keys_set = Set{String}()
+          for item in df_copy[!, col]
+              union!(keys_set, keys(item))
+          end
+
+          for key in keys_set
+              new_col_name = names_sep === nothing ? Symbol(key) : Symbol(string(col, names_sep, key))
+              df_copy[!, new_col_name] = getindex.(df_copy[!, col], key)
+          end
+      elseif col_type <: Array
+          n = length(first(df_copy[!, col]))
+          for i in 1:n
+              new_col_name = names_sep === nothing ? Symbol(string(col, i)) : Symbol(string(col, names_sep, i))
+              df_copy[!, new_col_name] = getindex.(df_copy[!, col], i)
+          end
+      else
+          error("Column $col contains neither dictionaries nor arrays")
+      end
+      select!(df_copy, Not(col))
+  end
+   if is_grouped
+    df_copy = groupby(df_copy, grouping_columns)
+   end
+  return df_copy
+end
+
+"""
+$docstring_unnest_wider
+"""
+macro unnest_wider(df, exprs...)
+  names_sep = :(nothing) 
+  if length(exprs) >= 2 && isa(exprs[end], Expr) && exprs[end].head == :(=) && exprs[end].args[1] == :names_sep
+    names_sep = esc(exprs[end].args[2]) 
+    exprs = exprs[1:end-1] 
+  end
+
+  interpolated_exprs = parse_interpolation.(exprs)
+  tidy_exprs = [parse_tidy(i[1]) for i in interpolated_exprs]
+
+  df_expr = quote
+      unnest_wider($(esc(df)), [$(tidy_exprs...)], names_sep=$names_sep)
+  end
+
+  return df_expr
+end
+
+function unnest_longer(df::Union{DataFrame, GroupedDataFrame}, cols; indices_include::Union{Nothing, Bool}=nothing, keep_empty::Bool=false)
+  is_grouped = df isa GroupedDataFrame
+  grouping_columns = is_grouped ? groupcols(df) : Symbol[]
+  df_copy = copy(is_grouped ? parent(df) : df)
+  
+  cols_expr = cols isa Expr ? (cols,) : cols 
+  column_symbols = names(df_copy, Cols(cols_expr...))
+
+  # Handle empty arrays if keep_empty is true
+    if keep_empty && keep_empty === true
+        for col in column_symbols
+         df_copy[!, col] = [isempty(arr) || arr === nothing ? [missing] : arr for arr in df_copy[!, col]]
+        end
+         flattened_df = flatten(df_copy, column_symbols, scalar=Missing)
+        else
+         flattened_df = flatten(df_copy, column_symbols)
+    end 
+
+    if indices_include === true
+        for col in column_symbols
+            col_indices = Symbol(string(col), "_id")
+            indices = [j for sublist in df_copy[!, col] for j in 1:length(sublist)]
+            flattened_df[!, col_indices] = indices
+        end
+    end
+
+    if is_grouped
+        flattened_df = groupby(flattened_df, grouping_columns)
+    end
+
+    return flattened_df
+end
+  
+"""
+$docstring_unnest_longer
+"""
+macro unnest_longer(df, exprs...)
+    indices_include = :(nothing)  
+    keep_empty = :(false)         
+  
+    named_args = filter(e -> isa(e, Expr) && e.head == :(=), exprs)
+    for arg in named_args
+        if arg.args[1] == :indices_include
+            indices_include = esc(arg.args[2])
+        elseif arg.args[1] == :keep_empty
+            keep_empty = esc(arg.args[2])
+        end
+    end
+    column_exprs = filter(e -> !(isa(e, Expr) && e.head == :(=)), exprs)
+  
+    interpolated_exprs = parse_interpolation.(column_exprs)
+    tidy_exprs = [parse_tidy(i[1]) for i in interpolated_exprs]
+  
+    df_expr = quote
+      unnest_longer($(esc(df)), [$(tidy_exprs...)], indices_include=$indices_include, keep_empty = $keep_empty)
+    end
+  
+    return df_expr
+end
+
+
+function nest_by(df::DataFrame; by, key = :data)
+    by_expr = by isa Expr ? (by,) : (by,)
+    by_symbols = names(df, Cols(by_expr...))
+  
+    cols_to_nest = setdiff(names(df), by_symbols)
+  
+    nested_data = map(eachrow(df)) do row
+        [row[c] for c in cols_to_nest]
+    end
+  
+    nested_df = DataFrame()
+    for sym in by_symbols
+        nested_df[!, sym] = df[!, sym]
+    end
+    nested_df[!, key] = nested_data
+  
+    return nested_df
+end
+  
+"""
+$docstring_nest_by
+"""
+macro nest_by(df, args...)
+    if length(args) == 2
+        by_cols, new_col = args
+        new_col_quoted = QuoteNode(new_col)
+    elseif length(args) == 1
+        by_cols = args[1]
+        new_col_quoted = :(:data)  
+    else
+        error("Incorrect number of arguments provided to @nest")
+    end
+  
+    interpolated_by_cols, _, _ = parse_interpolation(by_cols)
+    interpolated_by_cols = parse_tidy(interpolated_by_cols)
+  
+    if @capture(interpolated_by_cols, (first_col:last_col))
+        by_cols_expr = :($(first_col):$(last_col))
+    elseif @capture(interpolated_by_cols, (args__,)) || @capture(interpolated_by_cols, [args__])
+        args = QuoteNode.(args)
+        by_cols_expr = :[$(args...)]
+    else
+        by_cols_expr = quote
+            if typeof($interpolated_by_cols) <: Tuple
+                collect(Symbol.($interpolated_by_cols))
+            else
+                $interpolated_by_cols
+            end
+        end
+    end
+  
+    return quote
+        nest_by($(esc(df)), by = $by_cols_expr, key = $new_col_quoted)
+    end
+end
+
+function nest_pairs(df::DataFrame; kwargs...)
+  result_df = copy(df)
+
+  for (new_col_name, cols) in kwargs
+      if isa(cols, Expr) && cols.head == :(:) && length(cols.args) == 2
+          start_col, end_col = cols.args
+          # Get index range of columns
+          start_idx = findfirst(==(start_col), names(df))
+          end_idx = findfirst(==(end_col), names(df))
+          if isnothing(start_idx) || isnothing(end_idx)
+              throw(ArgumentError("Column range $cols is invalid"))
+          end
+          # Convert range into a list of column names
+          cols = names(df)[start_idx:end_idx]
+      elseif isa(cols, Symbol)
+          cols = [cols]  # Convert single column name into a list
+      end
+
+      # Get the column symbols
+      column_symbols = names(df, Cols(cols))
+
+      # Nest the specified columns into an array
+      nested_column = map(eachrow(df)) do row
+          [row[c] for c in column_symbols]
+      end
+
+      # Add the new nested column
+      result_df[!, new_col_name] = nested_column
+
+      # Optionally remove the original columns that were nested
+       select!(result_df, Not(column_symbols))
+  end
+
+  return result_df
+end
+
+"""
+$docstring_nest
+"""
+macro nest(df, args...)
+  kwargs_exprs = []
+
+  for arg in args
+      if isa(arg, Expr) && arg.head == :(=)
+          key = esc(arg.args[1])  # Extract and escape the key
+
+          # Check if the argument is a range expression
+          if isa(arg.args[2], Expr) && arg.args[2].head == :(:) && length(arg.args[2].args) == 2
+              # Handle range expressions as Between selectors
+              first_col, last_col = arg.args[2].args
+              value_expr = Expr(:call, :Between, esc(first_col), esc(last_col))
+          else
+              # Apply parse_interpolation and parse_tidy to the value
+              interpolated_value, _, _ = parse_interpolation(arg.args[2])
+              tidy_value = parse_tidy(interpolated_value)
+
+              # Use the existing logic for non-range expressions
+              if @capture(tidy_value, (args__,)) || @capture(tidy_value, [args__])
+                  args = QuoteNode.(args)
+                  value_expr = :[$(args...)]
+              else
+                  value_expr = tidy_value
+              end
+          end
+
+          # Construct the keyword argument expression
+          push!(kwargs_exprs, Expr(:kw, key, value_expr))
+      else
+          println("Argument is not recognized as a keyword argument: ", arg)
+      end
+  end
+
+  # Construct the function call to nest24 with keyword arguments
+  return quote
+    nest_pairs($(esc(df)), $(kwargs_exprs...))
+  end
+end
\ No newline at end of file
diff --git a/src/separate_unite.jl b/src/separate_unite.jl
index be34947e..ebcf42d3 100644
--- a/src/separate_unite.jl
+++ b/src/separate_unite.jl
@@ -56,29 +56,32 @@ $docstring_unite
 macro unite(df, new_col, from_cols, sep)
     new_col_quoted = QuoteNode(new_col)
     interpolated_from_cols, _, _ = parse_interpolation(from_cols)
+    interpolated_from_cols = parse_tidy(interpolated_from_cols)
 
-    if @capture(interpolated_from_cols, (args__,)) || @capture(interpolated_from_cols, [args__])
-        args = QuoteNode.(args)
-        from_cols_expr = :[$(args...)]
+    if @capture(interpolated_from_cols, (first_col:last_col))
+      from_cols_expr = :($(first_col):$(last_col))
+    elseif @capture(interpolated_from_cols, (args__,)) || @capture(interpolated_from_cols, [args__])
+      args = QuoteNode.(args)
+      from_cols_expr = :[$(args...)]
     else
-        from_cols_expr = quote
-            if typeof($interpolated_from_cols) <: Tuple
-                collect(Symbol.($interpolated_from_cols))
-
-            else
-                $interpolated_from_cols
-            end
-        end
+      from_cols_expr = quote
+          if typeof($interpolated_from_cols) <: Tuple
+              collect(Symbol.($interpolated_from_cols))
+          else
+            $interpolated_from_cols
+          end
+      end
     end
-    
     return quote
-        unite($(esc(df)), $new_col_quoted, $(from_cols_expr), $(esc(sep)))
+        unite($(esc(df)), $new_col_quoted, [$(from_cols_expr)], $(esc(sep)))
     end
 end
 
-function unite(df::DataFrame, new_col_name::Symbol, cols::Vector{Symbol}, sep::String="_")
+function unite(df::DataFrame, new_col_name::Symbol, columns, sep::String="_")
   new_df = df[:, :]
-  new_df[:, new_col_name] = [join(skipmissing(row), sep) for row in eachrow(df[:, cols])]
+  cols_expr = columns isa Expr ? (columns,) : columns
+  column_symbols = names(df, Cols(cols_expr...)) 
+  new_df[:, new_col_name] = [join(skipmissing(row), sep) for row in eachrow(df[:, column_symbols])]
   return new_df
 end
 

From 913000635cf4450149cb4aab3212b5c6981395f2 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Tue, 26 Dec 2023 22:56:48 -0500
Subject: [PATCH 02/14] unnest docstring  fixes

---
 src/docstrings.jl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/docstrings.jl b/src/docstrings.jl
index 5b66f4e2..43d43a1d 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3112,7 +3112,7 @@ julia> df2 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
    1 │     1  [1, 2]  [5, 6]
    2 │     2  [3, 4]  [7, 8]
 
-julia> @unnest_wider(df1, b:c, names_sep = "_")
+julia> @unnest_wider(df2, b:c, names_sep = "_")
 2×5 DataFrame
  Row │ a      b_1    b_2    c_1    c_2   
      │ Int64  Int64  Int64  Int64  Int64 
@@ -3155,14 +3155,14 @@ julia> @unnest_longer(df, 2)
    4 │     2      4  [7, 8]
 
 julia> @unnest_longer(df, b:c, indices_include=true)
-6×5 DataFrame
- Row │ a     b      c      b_id  c_id 
-     │ Int64 Int64  Int64  Int64 Int64
-─────┼────────────────────────────────
-   1 │     1     1      5      1     1
-   2 │     1     2      6      2     2
-   3 │     2     3      7      1     1
-   4 │     2     4      8      2     2
+4×5 DataFrame
+ Row │ a      b      c      b_id   c_id  
+     │ Int64  Int64  Int64  Int64  Int64 
+─────┼───────────────────────────────────
+   1 │     1      1      5      1      1
+   2 │     1      2      6      2      2
+   3 │     2      3      7      1      1
+   4 │     2      4      8      2      2
 
 julia> df2 = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]])
 4×2 DataFrame

From 665fbdf83edbbcc16ac3415ab108f683ccefd67e Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Tue, 26 Dec 2023 23:07:03 -0500
Subject: [PATCH 03/14] fixed nesting documentation

---
 docs/examples/UserGuide/unnest.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/examples/UserGuide/unnest.jl b/docs/examples/UserGuide/unnest.jl
index 9dc4cd19..8606442e 100644
--- a/docs/examples/UserGuide/unnest.jl
+++ b/docs/examples/UserGuide/unnest.jl
@@ -2,6 +2,7 @@
 
 # `@unnest_longer` adds one row per entry of an array, lengthening dataframe by flattening the column or columns. 
 
+using TidierData
 df = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]]);
 
 @chain df begin

From 7096d70f33561131a452516ed0aae95cc8da316e Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sat, 30 Dec 2023 10:37:34 -0500
Subject: [PATCH 04/14] swicthed nest to dfs, added unnest df support

---
 docs/examples/UserGuide/unnest.jl |  33 +++-
 src/TidierData.jl                 |   2 +-
 src/docstrings.jl                 |  46 +----
 src/nests.jl                      | 306 ++++++++++++++++++------------
 4 files changed, 219 insertions(+), 168 deletions(-)

diff --git a/docs/examples/UserGuide/unnest.jl b/docs/examples/UserGuide/unnest.jl
index 8606442e..51aeeca3 100644
--- a/docs/examples/UserGuide/unnest.jl
+++ b/docs/examples/UserGuide/unnest.jl
@@ -1,6 +1,6 @@
 # ## `@unnest_longer`
 
-# `@unnest_longer` adds one row per entry of an array, lengthening dataframe by flattening the column or columns. 
+# `@unnest_longer` adds one row per entry of an array or dataframe, lengthening dataframe by flattening the column or columns. 
 
 using TidierData
 df = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]]);
@@ -15,9 +15,9 @@ end
     @unnest_longer(y, keep_empty = true, indices_include = true)
 end
 
-# ## @unnest_wider
+# ## `@unnest_wider`
 
-# `@unnest_wider` will widen a column of Dicts or a column(s) of arrays into multiple columns.
+# `@unnest_wider` will widen a column or column(s) of Dicts, Arrays, Tuples or Dataframes into multiple columns.
 
 df2 = DataFrame(
            name = ["Zaki", "Farida"],
@@ -28,3 +28,30 @@ df2 = DataFrame(
 @chain df2 begin
     @unnest_wider(attributes)
 end
+
+
+# ## Unnesting nested Dataframes with different lengths which contains arrays
+
+df3 = DataFrame(
+    x = 1:3,
+    y = Any[
+        DataFrame(),
+        DataFrame(a = ["A"], b = [14]),
+        DataFrame(a = ["A", "B", "C"], b = [13, 12, 11], c = [4, 4, 4])
+    ]
+)
+# `df3` contains dataframes in with different widths that also contain arrays. Chaining together `@unnest_wider` and `@unnest_longer` will unnest the columns to tuples first and then they will be fully unnested after.
+
+@chain df3 begin 
+    @unnest_wider(y)
+    @unnest_longer(a:c, keep_empty = true)
+end
+
+
+# ## `@nest`
+
+# Nest columns into a dataframe nested into a new column
+
+df4 = DataFrame(x = ["a", "b", "a", "b", "C", "a"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)
+
+@nest(df4, n2 = starts_with("y"), n3 = a:ab)
\ No newline at end of file
diff --git a/src/TidierData.jl b/src/TidierData.jl
index 25e00b10..c56d6a4f 100644
--- a/src/TidierData.jl
+++ b/src/TidierData.jl
@@ -21,7 +21,7 @@ export TidierData_set, across, desc, n, row_number, everything, starts_with, end
       @group_by, @ungroup, @slice, @arrange, @distinct, @pull, @left_join, @right_join, @inner_join, @full_join, @anti_join, @semi_join,
       @pivot_wider, @pivot_longer, @bind_rows, @bind_cols, @clean_names, @count, @tally, @drop_missing, @glimpse, @separate,
       @unite, @summary, @fill_missing, @slice_sample, @slice_min, @slice_max, @slice_head, @slice_tail, @rename_with, @separate_rows,
-      @unnest_longer, @unnest_wider, @nest, @nest_by
+      @unnest_longer, @unnest_wider, @nest
 
 # Package global variables
 const code = Ref{Bool}(false) # output DataFrames.jl code?
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 43d43a1d..6e9dad36 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3087,7 +3087,7 @@ Unnest specified columns of arrays or dictionaries into wider format dataframe w
 
 # Arguments
 - `df`: A DataFrame.
-- `columns`: Columns to be unnested. These columns should contain arrays or dictionaries. Dictionarys headings will be converted to column names.
+- `columns`: Columns to be unnested. These columns should contain arrays, dictionaries, dataframes, or tuples. Dictionarys headings will be converted to column names.
 - `names_sep`: An optional string to specify the separator for creating new column names. If not provided, defaults to no separator.
 
 # Examples
@@ -3130,7 +3130,7 @@ Unnest arrays in columns from a DataFrame to create a longer DataFrame with one
 
 # Arguments
 - `df`: A DataFrame.
-- `columnss`: Columns to unnest. Can be a column symbols or a range. 
+- `columns`: Columns to unnest. Can be a column symbols or a range of columns if they align for number of values.
 - `indices_include`: Optional. When set to `true`, adds an index column for each unnested column, which logs the position of each array entry.
 - `keep_empty`: Optional. When set to `true`, rows with empty arrays are kept, not skipped, and unnested as missing. 
 
@@ -3214,44 +3214,4 @@ julia> @nest(df, n2 = starts_with("a"), n3 = (x:z))
    5 │ [11, 8]  [2, 5, 17]
    6 │ [12, 7]  [3, 6, 18]
 ```
-"""
-
-const docstring_nest_by =
-"""
-   @nest_by(df, by; key)
-
-Nest by a column or set of columns, meaning all columns not selected in the `by` argument are nested into one column. This is not a group_by and then nest.
-# Arguments
-- `df`: A DataFrame 
-- `by`: column or columns to remain in the outer dataframe, while the others are nested into one column
-- `key`: optional argument to determine new column name when using `by`
-
-# Examples
-```jldoctest
-julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, b = 12:-1:7);
-
-julia> @nest_by(df, z)
-6×2 DataFrame
- Row │ z      data          
-     │ Int64  Array…        
-─────┼──────────────────────
-   1 │    13  [1, 1, 7, 12]
-   2 │    14  [1, 2, 8, 11]
-   3 │    15  [1, 3, 9, 10]
-   4 │    16  [2, 4, 10, 9]
-   5 │    17  [2, 5, 11, 8]
-   6 │    18  [3, 6, 12, 7]
-
-julia> @nest_by(df, (a,z), new_column)
-6×3 DataFrame
- Row │ a      z      new_column 
-     │ Int64  Int64  Array…     
-─────┼──────────────────────────
-   1 │     7     13  [1, 1, 12]
-   2 │     8     14  [1, 2, 11]
-   3 │     9     15  [1, 3, 10]
-   4 │    10     16  [2, 4, 9]
-   5 │    11     17  [2, 5, 8]
-   6 │    12     18  [3, 6, 7]
-```
-"""
+"""
\ No newline at end of file
diff --git a/src/nests.jl b/src/nests.jl
index a22ff6a6..63d7232f 100644
--- a/src/nests.jl
+++ b/src/nests.jl
@@ -1,39 +1,79 @@
 function unnest_wider(df::Union{DataFrame, GroupedDataFrame}, cols; names_sep::Union{String, Nothing}=nothing)
-  is_grouped = df isa GroupedDataFrame
-  grouping_columns = is_grouped ? groupcols(df) : Symbol[]
-  # Ungroup if necessary
-  df_copy = copy(is_grouped ? parent(df) : df)
-  # getting column names from parse tidy
-  cols_expr = cols isa Expr ? (cols,) : cols
-  column_symbols = names(df_copy, Cols(cols_expr...)) 
-
-  for col in column_symbols
-      col_type = typeof(df_copy[1, col])
-      if col_type <: Dict
-          keys_set = Set{String}()
+    is_grouped = df isa GroupedDataFrame
+    grouping_columns = is_grouped ? groupcols(df) : Symbol[]
+    df_copy = copy(is_grouped ? parent(df) : df)
+  
+    cols_expr = cols isa Expr ? (cols,) : cols
+    column_symbols = names(df_copy, Cols(cols_expr...))
+  
+    for col in column_symbols
+        col_type = typeof(df_copy[1, col])
+    
+        if col_type <: DataFrame
+          # Handling DataFrames
+          nested_col_names = unique([name for i in 1:nrow(df_copy) for name in names(df_copy[i, col])])
+    
+          for nested_col in nested_col_names
+              new_col_name = names_sep === nothing ? nested_col : Symbol(string(col, names_sep, nested_col))
+              combined_nested_col = Any[missing for _ in 1:nrow(df_copy)]
+    
+              for row in 1:nrow(df_copy)
+                  nested_df = df_copy[row, col]
+                  if ncol(nested_df) > 0 && haskey(nested_df[1, :], nested_col)
+                      combined_nested_col[row] = nested_df[!, nested_col]
+                      # Extract single value if there's only one element
+                      if length(combined_nested_col[row]) == 1
+                          combined_nested_col[row] = combined_nested_col[row][1]
+                      end
+                  end
+              end
+              df_copy[!, new_col_name] = combined_nested_col
+          end
+      elseif col_type <: NamedTuple || col_type <: Union{NamedTuple, Missing}
+          # Handling NamedTuples and missing values
+          keys_set = Set{Symbol}()
           for item in df_copy[!, col]
-              union!(keys_set, keys(item))
+              if item !== missing
+                  union!(keys_set, keys(item))
+              end
           end
-
+    
           for key in keys_set
-              new_col_name = names_sep === nothing ? Symbol(key) : Symbol(string(col, names_sep, key))
-              df_copy[!, new_col_name] = getindex.(df_copy[!, col], key)
-          end
-      elseif col_type <: Array
-          n = length(first(df_copy[!, col]))
-          for i in 1:n
-              new_col_name = names_sep === nothing ? Symbol(string(col, i)) : Symbol(string(col, names_sep, i))
-              df_copy[!, new_col_name] = getindex.(df_copy[!, col], i)
+              new_col_name = names_sep === nothing ? key : Symbol(string(col, names_sep, key))
+              df_copy[!, new_col_name] = [item !== missing ? get(item, key, missing) : missing for item in df_copy[!, col]]
           end
-      else
-          error("Column $col contains neither dictionaries nor arrays")
-      end
-      select!(df_copy, Not(col))
-  end
-   if is_grouped
-    df_copy = groupby(df_copy, grouping_columns)
-   end
-  return df_copy
+      
+  
+        elseif col_type <: Dict
+            keys_set = Set{String}()
+            for item in df_copy[!, col]
+                union!(keys_set, keys(item))
+            end
+  
+            for key in keys_set
+                new_col_name = names_sep === nothing ? Symbol(key) : Symbol(string(col, names_sep, key))
+                df_copy[!, new_col_name] = getindex.(df_copy[!, col], key)
+            end
+  
+        elseif col_type <: Array
+            n = length(first(df_copy[!, col]))
+            for i in 1:n
+                new_col_name = names_sep === nothing ? Symbol(string(col, i)) : Symbol(string(col, names_sep, i))
+                df_copy[!, new_col_name] = getindex.(df_copy[!, col], i)
+            end
+  
+        else
+            error("Column $col contains neither dictionaries nor arrays nor DataFrames")
+        end
+  
+        select!(df_copy, Not(col))
+    end
+  
+    if is_grouped
+        df_copy = groupby(df_copy, grouping_columns)
+    end
+  
+    return df_copy
 end
 
 """
@@ -57,35 +97,40 @@ macro unnest_wider(df, exprs...)
 end
 
 function unnest_longer(df::Union{DataFrame, GroupedDataFrame}, cols; indices_include::Union{Nothing, Bool}=nothing, keep_empty::Bool=false)
-  is_grouped = df isa GroupedDataFrame
-  grouping_columns = is_grouped ? groupcols(df) : Symbol[]
-  df_copy = copy(is_grouped ? parent(df) : df)
+    is_grouped = df isa GroupedDataFrame
+    grouping_columns = is_grouped ? groupcols(df) : Symbol[]
+    df_copy = copy(is_grouped ? parent(df) : df)
+  
+    cols_expr = cols isa Expr ? (cols,) : cols 
+    column_symbols = names(df_copy, Cols(cols_expr...))
+  
+    # Preprocess columns
+    for col in column_symbols
+        df_copy[!, col] = [ismissing(x) ? (keep_empty ? [missing] : missing) :
+                           isa(x, DataFrame) ? (nrow(x) > 0 ? Tables.rowtable(x) : (keep_empty ? [missing] : [])) :
+                           isempty(x) ? (keep_empty ? [missing] : x) : 
+                           x for x in df_copy[!, col]]
+    end
+  
+    # Apply filter if keep_empty is false
+    if !keep_empty
+      df_copy = filter(row -> !any(ismissing, [row[col] for col in column_symbols]), df_copy)
+    end
+    # Flatten the dataframe
+    flattened_df = flatten(df_copy, column_symbols)
   
-  cols_expr = cols isa Expr ? (cols,) : cols 
-  column_symbols = names(df_copy, Cols(cols_expr...))
-
-  # Handle empty arrays if keep_empty is true
-    if keep_empty && keep_empty === true
-        for col in column_symbols
-         df_copy[!, col] = [isempty(arr) || arr === nothing ? [missing] : arr for arr in df_copy[!, col]]
-        end
-         flattened_df = flatten(df_copy, column_symbols, scalar=Missing)
-        else
-         flattened_df = flatten(df_copy, column_symbols)
-    end 
-
     if indices_include === true
         for col in column_symbols
             col_indices = Symbol(string(col), "_id")
-            indices = [j for sublist in df_copy[!, col] for j in 1:length(sublist)]
+            indices = [j for i in 1:nrow(df_copy) for j in 1:length(df_copy[i, col])]
             flattened_df[!, col_indices] = indices
         end
     end
-
+  
     if is_grouped
         flattened_df = groupby(flattened_df, grouping_columns)
     end
-
+  
     return flattened_df
 end
   
@@ -117,66 +162,12 @@ macro unnest_longer(df, exprs...)
 end
 
 
-function nest_by(df::DataFrame; by, key = :data)
-    by_expr = by isa Expr ? (by,) : (by,)
-    by_symbols = names(df, Cols(by_expr...))
-  
-    cols_to_nest = setdiff(names(df), by_symbols)
+function nest_pairs(df; kwargs...) 
+    df_copy = copy(df)
   
-    nested_data = map(eachrow(df)) do row
-        [row[c] for c in cols_to_nest]
-    end
-  
-    nested_df = DataFrame()
-    for sym in by_symbols
-        nested_df[!, sym] = df[!, sym]
-    end
-    nested_df[!, key] = nested_data
-  
-    return nested_df
-end
-  
-"""
-$docstring_nest_by
-"""
-macro nest_by(df, args...)
-    if length(args) == 2
-        by_cols, new_col = args
-        new_col_quoted = QuoteNode(new_col)
-    elseif length(args) == 1
-        by_cols = args[1]
-        new_col_quoted = :(:data)  
-    else
-        error("Incorrect number of arguments provided to @nest")
-    end
-  
-    interpolated_by_cols, _, _ = parse_interpolation(by_cols)
-    interpolated_by_cols = parse_tidy(interpolated_by_cols)
-  
-    if @capture(interpolated_by_cols, (first_col:last_col))
-        by_cols_expr = :($(first_col):$(last_col))
-    elseif @capture(interpolated_by_cols, (args__,)) || @capture(interpolated_by_cols, [args__])
-        args = QuoteNode.(args)
-        by_cols_expr = :[$(args...)]
-    else
-        by_cols_expr = quote
-            if typeof($interpolated_by_cols) <: Tuple
-                collect(Symbol.($interpolated_by_cols))
-            else
-                $interpolated_by_cols
-            end
-        end
-    end
-  
-    return quote
-        nest_by($(esc(df)), by = $by_cols_expr, key = $new_col_quoted)
-    end
-end
-
-function nest_pairs(df::DataFrame; kwargs...)
-  result_df = copy(df)
-
-  for (new_col_name, cols) in kwargs
+    for (new_col_name, cols) in kwargs
+      # This section here was unavoidable to maintain tidy selection
+      # Check if cols is a range expression (e.g., :z:b)
       if isa(cols, Expr) && cols.head == :(:) && length(cols.args) == 2
           start_col, end_col = cols.args
           # Get index range of columns
@@ -190,25 +181,41 @@ function nest_pairs(df::DataFrame; kwargs...)
       elseif isa(cols, Symbol)
           cols = [cols]  # Convert single column name into a list
       end
-
+  
       # Get the column symbols
       column_symbols = names(df, Cols(cols))
-
+  
       # Nest the specified columns into an array
       nested_column = map(eachrow(df)) do row
-          [row[c] for c in column_symbols]
+        DataFrame(Dict(c => [row[c]] for c in column_symbols))
       end
-
+  
       # Add the new nested column
-      result_df[!, new_col_name] = nested_column
-
-      # Optionally remove the original columns that were nested
-       select!(result_df, Not(column_symbols))
-  end
+      df_copy[!, new_col_name] = nested_column
+  
+       select!(df_copy, Not(column_symbols))
+    end
+  
+    return df_copy
+end
 
-  return result_df
+# For groups. Its a little bit slow i think but it works. 
+# I am not sure if this is something that could ungroup -> regroup
+# so for now I have opted for the safer strategy
+function nest_pairs(gdf::GroupedDataFrame; kwargs...)
+    group_cols = groupcols(gdf)
+    results = []
+    for group in gdf
+        # Convert the group to a DataFrame
+        df_group = DataFrame(group)
+        processed_group = nest_pairs(df_group; kwargs...)
+        push!(results, processed_group)
+    end
+    combined_df = vcat(results...)
+    return groupby(combined_df, group_cols)
 end
 
+
 """
 $docstring_nest
 """
@@ -218,7 +225,7 @@ macro nest(df, args...)
   for arg in args
       if isa(arg, Expr) && arg.head == :(=)
           key = esc(arg.args[1])  # Extract and escape the key
-
+          # this extra processing was unavoidable for some reason to enable tidy selection
           # Check if the argument is a range expression
           if isa(arg.args[2], Expr) && arg.args[2].head == :(:) && length(arg.args[2].args) == 2
               # Handle range expressions as Between selectors
@@ -249,4 +256,61 @@ macro nest(df, args...)
   return quote
     nest_pairs($(esc(df)), $(kwargs_exprs...))
   end
-end
\ No newline at end of file
+end
+
+
+#function nest_by(df::DataFrame; by, key = :data)
+#    by_expr = by isa Expr ? (by,) : (by,)
+#    by_symbols = names(df, Cols(by_expr...))
+  
+#    cols_to_nest = setdiff(names(df), by_symbols)
+  
+#    nested_data = map(eachrow(df)) do row
+#        [row[c] for c in cols_to_nest]
+#    end
+  
+#    nested_df = DataFrame()
+#    for sym in by_symbols
+#        nested_df[!, sym] = df[!, sym]
+#    end
+#    nested_df[!, key] = nested_data
+#  
+#    return nested_df
+#end
+  
+#"""
+#$docstring_nest_by
+#"""
+#macro nest_by(df, args...)
+#    if length(args) == 2
+#        by_cols, new_col = args
+#        new_col_quoted = QuoteNode(new_col)
+#    elseif length(args) == 1
+#        by_cols = args[1]
+#        new_col_quoted = :(:data)  
+#    else
+#        error("Incorrect number of arguments provided to @nest")
+#    end
+#  
+#    interpolated_by_cols, _, _ = parse_interpolation(by_cols)
+#    interpolated_by_cols = parse_tidy(interpolated_by_cols)
+#  
+#    if @capture(interpolated_by_cols, (first_col:last_col))
+#        by_cols_expr = :($(first_col):$(last_col))
+#    elseif @capture(interpolated_by_cols, (args__,)) || @capture(interpolated_by_cols, [args__])
+#        args = QuoteNode.(args)
+#        by_cols_expr = :[$(args...)]
+#    else
+#        by_cols_expr = quote
+#            if typeof($interpolated_by_cols) <: Tuple
+#                collect(Symbol.($interpolated_by_cols))
+#            else
+#                $interpolated_by_cols
+#            end
+#        end
+#    end
+#  
+#    return quote
+#        nest_by($(esc(df)), by = $by_cols_expr, key = $new_col_quoted)
+#    end
+#end
\ No newline at end of file

From 934d14bf5036e3604f872cd8a33843f890a66d6d Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sat, 30 Dec 2023 10:43:59 -0500
Subject: [PATCH 05/14] updated nest docstring

---
 src/docstrings.jl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/docstrings.jl b/src/docstrings.jl
index 6e9dad36..d10fa0b7 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3202,16 +3202,16 @@ Multiple columns are nested into one or more new columns in a DataFrame.
 ```jldoctest
 julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, ab = 12:-1:7);
 
-julia> @nest(df, n2 = starts_with("a"), n3 = (x:z))
-6×2 DataFrame
- Row │ n2       n3         
-     │ Array…   Array…     
-─────┼─────────────────────
-   1 │ [7, 12]  [1, 1, 13]
-   2 │ [8, 11]  [1, 2, 14]
-   3 │ [9, 10]  [1, 3, 15]
-   4 │ [10, 9]  [2, 4, 16]
-   5 │ [11, 8]  [2, 5, 17]
-   6 │ [12, 7]  [3, 6, 18]
+julia> @nest(df, n2 = starts_with("a"), n3 = (y:z))
+6×3 DataFrame
+ Row │ x      n2             n3            
+     │ Int64  DataFrame      DataFrame     
+─────┼─────────────────────────────────────
+   1 │     1  1×2 DataFrame  1×2 DataFrame 
+   2 │     1  1×2 DataFrame  1×2 DataFrame 
+   3 │     1  1×2 DataFrame  1×2 DataFrame 
+   4 │     2  1×2 DataFrame  1×2 DataFrame 
+   5 │     2  1×2 DataFrame  1×2 DataFrame 
+   6 │     3  1×2 DataFrame  1×2 DataFrame 
 ```
 """
\ No newline at end of file

From ed1b5eeb2dde4e81faa712b6ce63fa36376e6a57 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sat, 30 Dec 2023 11:11:56 -0500
Subject: [PATCH 06/14] bumps version, updates news.

---
 NEWS.md      | 6 ++++++
 Project.toml | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 83745563..8febdb2b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,11 @@
 # TidierData.jl updates
 
+## v0.14.4 - 2023-12-30
+- Adds `@unnest_wider()`
+- Adds `@unnest_longer()`
+- Adds `@nest()`
+- Fixes tidy selection in `@unite()`
+
 ## v0.14.3 - 2023-12-22
 - Adds support for interpolation and tidy selection in `@fill_missing`
 - Fixes tidy selection in `@separate_rows()`
diff --git a/Project.toml b/Project.toml
index 20b11173..2bc6ccb4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierData"
 uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
 authors = ["Karandeep Singh"]
-version = "0.14.3"
+version = "0.14.4"
 
 [deps]
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

From f75ec0212b109fe96443a004fce3c6c8a63a728a Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sat, 30 Dec 2023 11:14:55 -0500
Subject: [PATCH 07/14] update readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index ec736967..2f178683 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,7 @@ TidierData.jl currently supports the following top-level macros:
 - `@pivot_wider()` and `@pivot_longer()`
 - `@separate()`, `@separate_rows()`, and `@unite()`
 - `@drop_missing()` and `@fill_missing()`
+- `@unnest_longer()`, `@unnest_wider()`, `@nest()`
 - `@clean_names()` (as in R's `janitor::clean_names()` function)
 - `@summary()` (as in R's `summary()` function)
 

From 2b95aeb0ccc8c9d6a9b4107fa14d92d60a09baef Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 31 Dec 2023 14:01:20 -0500
Subject: [PATCH 08/14] corrected nest function

---
 docs/examples/UserGuide/unnest.jl |  2 +-
 src/docstrings.jl                 | 13 +++-----
 src/nests.jl                      | 55 ++++++++++++++++---------------
 3 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/docs/examples/UserGuide/unnest.jl b/docs/examples/UserGuide/unnest.jl
index 51aeeca3..1c7c7934 100644
--- a/docs/examples/UserGuide/unnest.jl
+++ b/docs/examples/UserGuide/unnest.jl
@@ -54,4 +54,4 @@ end
 
 df4 = DataFrame(x = ["a", "b", "a", "b", "C", "a"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)
 
-@nest(df4, n2 = starts_with("y"), n3 = a:ab)
\ No newline at end of file
+@nest(df4, n2 = starts_with("b"))
\ No newline at end of file
diff --git a/src/docstrings.jl b/src/docstrings.jl
index d10fa0b7..d2cdacd0 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3203,15 +3203,12 @@ Multiple columns are nested into one or more new columns in a DataFrame.
 julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, ab = 12:-1:7);
 
 julia> @nest(df, n2 = starts_with("a"), n3 = (y:z))
-6×3 DataFrame
- Row │ x      n2             n3            
+3×3 DataFrame
+ Row │ x      n3             n2            
      │ Int64  DataFrame      DataFrame     
 ─────┼─────────────────────────────────────
-   1 │     1  1×2 DataFrame  1×2 DataFrame 
-   2 │     1  1×2 DataFrame  1×2 DataFrame 
-   3 │     1  1×2 DataFrame  1×2 DataFrame 
-   4 │     2  1×2 DataFrame  1×2 DataFrame 
-   5 │     2  1×2 DataFrame  1×2 DataFrame 
-   6 │     3  1×2 DataFrame  1×2 DataFrame 
+   1 │     1  3×2 DataFrame  1×2 DataFrame 
+   2 │     2  2×2 DataFrame  1×2 DataFrame 
+   3 │     3  1×2 DataFrame  1×2 DataFrame 
 ```
 """
\ No newline at end of file
diff --git a/src/nests.jl b/src/nests.jl
index 63d7232f..c1c27dcd 100644
--- a/src/nests.jl
+++ b/src/nests.jl
@@ -162,42 +162,43 @@ macro unnest_longer(df, exprs...)
 end
 
 
-function nest_pairs(df; kwargs...) 
+function nest_pairs(df; kwargs...)
     df_copy = copy(df)
+    nested_dataframes = Dict()
+    grouping_columns = names(df)
   
     for (new_col_name, cols) in kwargs
-      # This section here was unavoidable to maintain tidy selection
-      # Check if cols is a range expression (e.g., :z:b)
-      if isa(cols, Expr) && cols.head == :(:) && length(cols.args) == 2
-          start_col, end_col = cols.args
-          # Get index range of columns
-          start_idx = findfirst(==(start_col), names(df))
-          end_idx = findfirst(==(end_col), names(df))
-          if isnothing(start_idx) || isnothing(end_idx)
-              throw(ArgumentError("Column range $cols is invalid"))
-          end
-          # Convert range into a list of column names
-          cols = names(df)[start_idx:end_idx]
-      elseif isa(cols, Symbol)
-          cols = [cols]  # Convert single column name into a list
-      end
+        if isa(cols, Expr) && cols.head == :(:) && length(cols.args) == 2
+            start_col, end_col = cols.args
+            start_idx = findfirst(==(start_col), names(df))
+            end_idx = findfirst(==(end_col), names(df))
+            if isnothing(start_idx) || isnothing(end_idx)
+                throw(ArgumentError("Column range $cols is invalid"))
+            end
+            cols = names(df)[start_idx:end_idx]
+        elseif isa(cols, Symbol)
+            cols = [cols]  
+        end
   
-      # Get the column symbols
-      column_symbols = names(df, Cols(cols))
+        column_symbols = names(df, Cols(cols))
+        grouping_columns = setdiff(grouping_columns, column_symbols)
+        grouped_df = groupby(df, grouping_columns)
   
-      # Nest the specified columns into an array
-      nested_column = map(eachrow(df)) do row
-        DataFrame(Dict(c => [row[c]] for c in column_symbols))
-      end
+        nested_dataframes[new_col_name] = [DataFrame(select(sub_df, column_symbols)) for sub_df in grouped_df]
+    end
   
-      # Add the new nested column
-      df_copy[!, new_col_name] = nested_column
+    # Creating a new DataFrame with all grouping columns
+    unique_groups = unique(df[:, grouping_columns])
+    new_df = DataFrame(unique_groups)
   
-       select!(df_copy, Not(column_symbols))
+    # Aligning and adding the nested DataFrame columns
+    for (new_col_name, nested_df_list) in nested_dataframes
+        aligned_nested_df = [nested_df_list[i] for i in 1:nrow(new_df)]
+        new_df[!, new_col_name] = aligned_nested_df
     end
   
-    return df_copy
-end
+    return new_df
+  end
 
 # For groups. Its a little bit slow i think but it works. 
 # I am not sure if this is something that could ungroup -> regroup

From bca8d9129ac7301d6d706554a7a6058ecd33825e Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 31 Dec 2023 21:34:19 -0500
Subject: [PATCH 09/14] fixed unnest_wider

---
 src/nests.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/nests.jl b/src/nests.jl
index c1c27dcd..f1530d03 100644
--- a/src/nests.jl
+++ b/src/nests.jl
@@ -68,7 +68,11 @@ function unnest_wider(df::Union{DataFrame, GroupedDataFrame}, cols; names_sep::U
   
         select!(df_copy, Not(col))
     end
-  
+    # if there are arrays of obersvations following a nest and now they are being unnested, 
+    # this will flatten them to the original dataframe. 
+    new_cols = setdiff(names(df_copy), names(df))
+    df_copy = flatten(df_copy, new_cols)
+
     if is_grouped
         df_copy = groupby(df_copy, grouping_columns)
     end

From fc583198211faff0f7178560039faf7daeea1ecf Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 31 Dec 2023 21:44:00 -0500
Subject: [PATCH 10/14] actually fixed unnest_wider

---
 src/nests.jl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/nests.jl b/src/nests.jl
index f1530d03..759f4026 100644
--- a/src/nests.jl
+++ b/src/nests.jl
@@ -70,9 +70,14 @@ function unnest_wider(df::Union{DataFrame, GroupedDataFrame}, cols; names_sep::U
     end
     # if there are arrays of obersvations following a nest and now they are being unnested, 
     # this will flatten them to the original dataframe. 
-    new_cols = setdiff(names(df_copy), names(df))
-    df_copy = flatten(df_copy, new_cols)
+      new_cols = setdiff(names(df_copy), names(df))
+     # df_copy = flatten(df_copy, new_cols)
+     cols_to_flatten = [col for col in new_cols if any(cell -> cell isa Array, df_copy[!, col])]
 
+     # Apply flatten selectively
+     if !isempty(cols_to_flatten)
+       df_copy = flatten(df_copy, cols_to_flatten)
+    end
     if is_grouped
         df_copy = groupby(df_copy, grouping_columns)
     end

From 60447e602b2758f9dd415c5d896eebf48eab026d Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Mon, 1 Jan 2024 09:19:17 -0500
Subject: [PATCH 11/14] properly fixed nest, reverted unnest_wider

---
 docs/examples/UserGuide/unnest.jl |  9 ++++++++-
 src/docstrings.jl                 | 12 +++++------
 src/nests.jl                      | 33 +++++++++++++++++++------------
 3 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/docs/examples/UserGuide/unnest.jl b/docs/examples/UserGuide/unnest.jl
index 1c7c7934..e879576a 100644
--- a/docs/examples/UserGuide/unnest.jl
+++ b/docs/examples/UserGuide/unnest.jl
@@ -54,4 +54,11 @@ end
 
 df4 = DataFrame(x = ["a", "b", "a", "b", "C", "a"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)
 
-@nest(df4, n2 = starts_with("b"))
\ No newline at end of file
+nested_df = @nest(df4, n2 = starts_with("a"), n3 = y:yz)
+
+# To return to the original dataframe
+
+@chain nested_df begin
+    @unnest_wider(n3:n2)
+    @unnest_longer(y:ab)
+  end
\ No newline at end of file
diff --git a/src/docstrings.jl b/src/docstrings.jl
index d2cdacd0..aee9866b 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3204,11 +3204,11 @@ julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, ab =
 
 julia> @nest(df, n2 = starts_with("a"), n3 = (y:z))
 3×3 DataFrame
- Row │ x      n3             n2            
-     │ Int64  DataFrame      DataFrame     
-─────┼─────────────────────────────────────
-   1 │     1  3×2 DataFrame  1×2 DataFrame 
-   2 │     2  2×2 DataFrame  1×2 DataFrame 
-   3 │     3  1×2 DataFrame  1×2 DataFrame 
+ Row │ x       n3             n2            
+     │ String  DataFrame      DataFrame     
+─────┼──────────────────────────────────────
+   1 │ a       3×2 DataFrame  3×2 DataFrame 
+   2 │ b       2×2 DataFrame  2×2 DataFrame 
+   3 │ C       1×2 DataFrame  1×2 DataFrame 
 ```
 """
\ No newline at end of file
diff --git a/src/nests.jl b/src/nests.jl
index 759f4026..061aab89 100644
--- a/src/nests.jl
+++ b/src/nests.jl
@@ -68,16 +68,7 @@ function unnest_wider(df::Union{DataFrame, GroupedDataFrame}, cols; names_sep::U
   
         select!(df_copy, Not(col))
     end
-    # if there are arrays of obersvations following a nest and now they are being unnested, 
-    # this will flatten them to the original dataframe. 
-      new_cols = setdiff(names(df_copy), names(df))
-     # df_copy = flatten(df_copy, new_cols)
-     cols_to_flatten = [col for col in new_cols if any(cell -> cell isa Array, df_copy[!, col])]
 
-     # Apply flatten selectively
-     if !isempty(cols_to_flatten)
-       df_copy = flatten(df_copy, cols_to_flatten)
-    end
     if is_grouped
         df_copy = groupby(df_copy, grouping_columns)
     end
@@ -176,7 +167,8 @@ function nest_pairs(df; kwargs...)
     nested_dataframes = Dict()
     grouping_columns = names(df)
   
-    for (new_col_name, cols) in kwargs
+    # Determine grouping columns based on all specified column sets
+    for (_, cols) in kwargs
         if isa(cols, Expr) && cols.head == :(:) && length(cols.args) == 2
             start_col, end_col = cols.args
             start_idx = findfirst(==(start_col), names(df))
@@ -186,13 +178,28 @@ function nest_pairs(df; kwargs...)
             end
             cols = names(df)[start_idx:end_idx]
         elseif isa(cols, Symbol)
-            cols = [cols]  
+            cols = [cols]
         end
   
         column_symbols = names(df, Cols(cols))
         grouping_columns = setdiff(grouping_columns, column_symbols)
-        grouped_df = groupby(df, grouping_columns)
+    end
+  
+    # Group the DataFrame once using these grouping columns
+    grouped_df = groupby(df_copy, grouping_columns)
+  
+    # Nest each specified set of columns based on the single grouped DataFrame
+    for (new_col_name, cols) in kwargs
+        if isa(cols, Expr) && cols.head == :(:) && length(cols.args) == 2
+            start_col, end_col = cols.args
+            start_idx = findfirst(==(start_col), names(df))
+            end_idx = findfirst(==(end_col), names(df))
+            cols = names(df)[start_idx:end_idx]
+        elseif isa(cols, Symbol)
+            cols = [cols]
+        end
   
+        column_symbols = names(df, Cols(cols))
         nested_dataframes[new_col_name] = [DataFrame(select(sub_df, column_symbols)) for sub_df in grouped_df]
     end
   
@@ -207,7 +214,7 @@ function nest_pairs(df; kwargs...)
     end
   
     return new_df
-  end
+end
 
 # For groups. Its a little bit slow i think but it works. 
 # I am not sure if this is something that could ungroup -> regroup

From 8f45b2c8c730d413ec6407b1b13119037aac9923 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Mon, 1 Jan 2024 09:26:04 -0500
Subject: [PATCH 12/14] fixed docstring spacing

---
 src/docstrings.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/docstrings.jl b/src/docstrings.jl
index aee9866b..2411d2c6 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -3202,13 +3202,13 @@ Multiple columns are nested into one or more new columns in a DataFrame.
 ```jldoctest
 julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, ab = 12:-1:7);
 
-julia> @nest(df, n2 = starts_with("a"), n3 = (y:z))
+julia> @nest(df, n3 = (y:z), n2 = starts_with("a"))
 3×3 DataFrame
- Row │ x       n3             n2            
-     │ String  DataFrame      DataFrame     
-─────┼──────────────────────────────────────
-   1 │ a       3×2 DataFrame  3×2 DataFrame 
-   2 │ b       2×2 DataFrame  2×2 DataFrame 
-   3 │ C       1×2 DataFrame  1×2 DataFrame 
+ Row │ x      n3             n2            
+     │ Int64  DataFrame      DataFrame     
+─────┼─────────────────────────────────────
+   1 │     1  3×2 DataFrame  3×2 DataFrame 
+   2 │     2  2×2 DataFrame  2×2 DataFrame 
+   3 │     3  1×2 DataFrame  1×2 DataFrame 
 ```
 """
\ No newline at end of file

From 9bf411ddbb8c5621c4aecfa674fce47695470b03 Mon Sep 17 00:00:00 2001
From: Karandeep Singh <karandeep@gmail.com>
Date: Tue, 2 Jan 2024 18:00:46 -0500
Subject: [PATCH 13/14] Minor clean-up to docstrings, README.md, and
 documentation.

---
 README.md                                     |   2 +-
 .../UserGuide/{unnest.jl => nesting.jl}       |  40 ++---
 docs/mkdocs.yml                               |   2 +-
 docs/src/index.md                             |   3 +-
 src/docstrings.jl                             | 144 +++++++++++++++---
 5 files changed, 151 insertions(+), 40 deletions(-)
 rename docs/examples/UserGuide/{unnest.jl => nesting.jl} (90%)

diff --git a/README.md b/README.md
index 2f178683..66b021f1 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ TidierData.jl currently supports the following top-level macros:
 - `@pivot_wider()` and `@pivot_longer()`
 - `@separate()`, `@separate_rows()`, and `@unite()`
 - `@drop_missing()` and `@fill_missing()`
-- `@unnest_longer()`, `@unnest_wider()`, `@nest()`
+- `@unnest_longer()`, `@unnest_wider()`, and `@nest()`
 - `@clean_names()` (as in R's `janitor::clean_names()` function)
 - `@summary()` (as in R's `summary()` function)
 
diff --git a/docs/examples/UserGuide/unnest.jl b/docs/examples/UserGuide/nesting.jl
similarity index 90%
rename from docs/examples/UserGuide/unnest.jl
rename to docs/examples/UserGuide/nesting.jl
index e879576a..001be5ee 100644
--- a/docs/examples/UserGuide/unnest.jl
+++ b/docs/examples/UserGuide/nesting.jl
@@ -1,3 +1,25 @@
+# ## `@nest`
+
+# Nest columns into a dataframe nested into a new column
+
+df4 = DataFrame(x = ["a", "b", "a", "b", "C", "a"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)
+
+nested_df = @nest(df4, n2 = starts_with("a"), n3 = y:yz)
+
+# To return to the original dataframe, you can unnest wider and then longer.
+
+@chain nested_df begin
+    @unnest_wider(n3:n2)
+    @unnest_longer(y:ab)
+end
+
+# Or you can unnest longer and then wider.
+
+@chain nested_df begin
+  @unnest_longer(n3:n2)
+  @unnest_wider(n3:n2)
+end
+
 # ## `@unnest_longer`
 
 # `@unnest_longer` adds one row per entry of an array or dataframe, lengthening dataframe by flattening the column or columns. 
@@ -45,20 +67,4 @@ df3 = DataFrame(
 @chain df3 begin 
     @unnest_wider(y)
     @unnest_longer(a:c, keep_empty = true)
-end
-
-
-# ## `@nest`
-
-# Nest columns into a dataframe nested into a new column
-
-df4 = DataFrame(x = ["a", "b", "a", "b", "C", "a"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)
-
-nested_df = @nest(df4, n2 = starts_with("a"), n3 = y:yz)
-
-# To return to the original dataframe
-
-@chain nested_df begin
-    @unnest_wider(n3:n2)
-    @unnest_longer(y:ab)
-  end
\ No newline at end of file
+end
\ No newline at end of file
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index f48693c9..a745da80 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -132,7 +132,7 @@ nav:
   - "Binding" : "examples/generated/UserGuide/binding.md" 
   - "Pivoting": "examples/generated/UserGuide/pivots.md"
   - "Separating" : "examples/generated/UserGuide/sep_unite.md"
-  - "Unnesting" : "examples/generated/UserGuide/unnest.md"
+  - "Nesting" : "examples/generated/UserGuide/nesting.md"
   - "@summary" : "examples/generated/UserGuide/summary.md"
   - "Column names": "examples/generated/UserGuide/column_names.md"
   - "Interpolation" : "examples/generated/UserGuide/interpolation.md"
diff --git a/docs/src/index.md b/docs/src/index.md
index 6d7540fa..7425e442 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -104,7 +104,8 @@ TidierData.jl currently supports the following top-level macros:
     - `@bind_rows()` and `@bind_cols()`
     - `@pivot_wider()` and `@pivot_longer()`
     - `@separate()`, `@separate_rows()`, and `@unite()`
-    - `@drop_missing()` and `@fill_missing`
+    - `@drop_missing()` and `@fill_missing()`
+    - `@unnest_longer()`, `@unnest_wider()`, and `@nest()`
     - `@clean_names()` (as in R's `janitor::clean_names()` function)
     - `@summary()` (as in R's `summary()` function)
 ```
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 2411d2c6..a904b81a 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -28,7 +28,7 @@ This function should only be called inside of TidierData.jl macros.
 
 # Examples
 ```jldoctest
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @summarize(across(b, minimum))
@@ -98,7 +98,7 @@ This function should only be called inside of TidierData.jl macros.
 
 # Examples
 ```jldoctest
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @select(where(is_number))
@@ -203,7 +203,7 @@ Select variables in a DataFrame.
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df @select(a, b, c)
 5×3 DataFrame
@@ -360,7 +360,7 @@ Create a new DataFrame with only computed columns.
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @transmute(d = b + c)
@@ -390,7 +390,7 @@ to rename and select columns.
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @rename(d = b, e = c)
@@ -421,7 +421,7 @@ rows as `df`.
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @mutate(d = b + c, b_minus_mean_b = b - mean(b))
@@ -508,7 +508,7 @@ Create a new DataFrame with one row that aggregating all observations from the i
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @summarize(mean_b = mean(b), median_b = median(b))
@@ -560,7 +560,7 @@ Subset a DataFrame and return a copy of DataFrame where specified conditions are
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @filter(b >= mean(b))
@@ -608,7 +608,7 @@ sets of `cols`.
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @group_by(a)
@@ -653,7 +653,7 @@ If this is applied to a `GroupedDataFrame`, then it removes the grouping. If thi
 
 # Examples
 ```jldoctest 
-julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
+julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
 
 julia> @chain df begin
          @group_by(a)
@@ -3200,15 +3200,119 @@ Multiple columns are nested into one or more new columns in a DataFrame.
 - `nesting_columns`: Columns to be nested into the new_column  
 # Examples
 ```jldoctest
-julia> df = DataFrame(x = [1, 1, 1, 2, 2, 3], y = 1:6, z = 13:18, a = 7:12, ab = 12:-1:7);
+julia> df = DataFrame(a = repeat('a':'e', inner = 3),
+                      b = 1:15,
+                      c_1 = 16:30,
+                      c_2 = 31:45);
 
-julia> @nest(df, n3 = (y:z), n2 = starts_with("a"))
-3×3 DataFrame
- Row │ x      n3             n2            
-     │ Int64  DataFrame      DataFrame     
-─────┼─────────────────────────────────────
-   1 │     1  3×2 DataFrame  3×2 DataFrame 
-   2 │     2  2×2 DataFrame  2×2 DataFrame 
-   3 │     3  1×2 DataFrame  1×2 DataFrame 
+julia> @nest(df, data = b:c_2)
+5×2 DataFrame
+ Row │ a     data          
+     │ Char  DataFrame     
+─────┼─────────────────────
+   1 │ a     3×3 DataFrame 
+   2 │ b     3×3 DataFrame 
+   3 │ c     3×3 DataFrame 
+   4 │ d     3×3 DataFrame 
+   5 │ e     3×3 DataFrame 
+
+julia> @nest(df, data_1 = b, data_2 = starts_with("c"))
+5×3 DataFrame
+ Row │ a     data_1         data_2        
+     │ Char  DataFrame      DataFrame     
+─────┼────────────────────────────────────
+   1 │ a     3×1 DataFrame  3×2 DataFrame 
+   2 │ b     3×1 DataFrame  3×2 DataFrame 
+   3 │ c     3×1 DataFrame  3×2 DataFrame 
+   4 │ d     3×1 DataFrame  3×2 DataFrame 
+   5 │ e     3×1 DataFrame  3×2 DataFrame 
+
+julia> @chain df begin
+         @nest(data = b:c_2)
+         @unnest_longer(data)
+       end
+15×2 DataFrame
+ Row │ a     data                         
+     │ Char  NamedTup…                    
+─────┼────────────────────────────────────
+   1 │ a     (b = 1, c_1 = 16, c_2 = 31)
+   2 │ a     (b = 2, c_1 = 17, c_2 = 32)
+   3 │ a     (b = 3, c_1 = 18, c_2 = 33)
+   4 │ b     (b = 4, c_1 = 19, c_2 = 34)
+   5 │ b     (b = 5, c_1 = 20, c_2 = 35)
+   6 │ b     (b = 6, c_1 = 21, c_2 = 36)
+   7 │ c     (b = 7, c_1 = 22, c_2 = 37)
+   8 │ c     (b = 8, c_1 = 23, c_2 = 38)
+   9 │ c     (b = 9, c_1 = 24, c_2 = 39)
+  10 │ d     (b = 10, c_1 = 25, c_2 = 40)
+  11 │ d     (b = 11, c_1 = 26, c_2 = 41)
+  12 │ d     (b = 12, c_1 = 27, c_2 = 42)
+  13 │ e     (b = 13, c_1 = 28, c_2 = 43)
+  14 │ e     (b = 14, c_1 = 29, c_2 = 44)
+  15 │ e     (b = 15, c_1 = 30, c_2 = 45)
+
+julia> @chain df begin
+         @nest(data = b:c_2)
+         @unnest_wider(data)
+       end
+5×4 DataFrame
+ Row │ a     b             c_1           c_2          
+     │ Char  Any           Any           Any          
+─────┼────────────────────────────────────────────────
+   1 │ a     [1, 2, 3]     [16, 17, 18]  [31, 32, 33]
+   2 │ b     [4, 5, 6]     [19, 20, 21]  [34, 35, 36]
+   3 │ c     [7, 8, 9]     [22, 23, 24]  [37, 38, 39]
+   4 │ d     [10, 11, 12]  [25, 26, 27]  [40, 41, 42]
+   5 │ e     [13, 14, 15]  [28, 29, 30]  [43, 44, 45]
+
+julia> @chain df begin
+         @nest(data = -a)
+         @unnest_wider(data) # wider first
+         @unnest_longer(-a)  # then longer
+       end
+15×4 DataFrame
+ Row │ a     b      c_1    c_2   
+     │ Char  Int64  Int64  Int64 
+─────┼───────────────────────────
+   1 │ a         1     16     31
+   2 │ a         2     17     32
+   3 │ a         3     18     33
+   4 │ b         4     19     34
+   5 │ b         5     20     35
+   6 │ b         6     21     36
+   7 │ c         7     22     37
+   8 │ c         8     23     38
+   9 │ c         9     24     39
+  10 │ d        10     25     40
+  11 │ d        11     26     41
+  12 │ d        12     27     42
+  13 │ e        13     28     43
+  14 │ e        14     29     44
+  15 │ e        15     30     45
+
+julia> @chain df begin
+         @nest(data = -a)
+         @unnest_longer(data) # longer first
+         @unnest_wider(-a)    # then wider
+       end
+15×4 DataFrame
+ Row │ a     b      c_2    c_1   
+     │ Char  Int64  Int64  Int64 
+─────┼───────────────────────────
+   1 │ a         1     31     16
+   2 │ a         2     32     17
+   3 │ a         3     33     18
+   4 │ b         4     34     19
+   5 │ b         5     35     20
+   6 │ b         6     36     21
+   7 │ c         7     37     22
+   8 │ c         8     38     23
+   9 │ c         9     39     24
+  10 │ d        10     40     25
+  11 │ d        11     41     26
+  12 │ d        12     42     27
+  13 │ e        13     43     28
+  14 │ e        14     44     29
+  15 │ e        15     45     30
 ```
-"""
\ No newline at end of file
+"""

From 94d418b06386bb38f2ab636d8904401abbbde64f Mon Sep 17 00:00:00 2001
From: Karandeep Singh <karandeep@gmail.com>
Date: Tue, 2 Jan 2024 18:29:11 -0500
Subject: [PATCH 14/14] Fixed docs error that I introduced.

---
 docs/examples/UserGuide/nesting.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/examples/UserGuide/nesting.jl b/docs/examples/UserGuide/nesting.jl
index 001be5ee..585f7c7e 100644
--- a/docs/examples/UserGuide/nesting.jl
+++ b/docs/examples/UserGuide/nesting.jl
@@ -2,6 +2,8 @@
 
 # Nest columns into a dataframe nested into a new column
 
+using TidierData
+
 df4 = DataFrame(x = ["a", "b", "a", "b", "C", "a"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)
 
 nested_df = @nest(df4, n2 = starts_with("a"), n3 = y:yz)
@@ -24,7 +26,6 @@ end
 
 # `@unnest_longer` adds one row per entry of an array or dataframe, lengthening dataframe by flattening the column or columns. 
 
-using TidierData
 df = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]]);
 
 @chain df begin