Skip to content
This repository has been archived by the owner on Sep 26, 2023. It is now read-only.

Commit

Permalink
Add rust examples for expressions/{lists.rs,structs.rs}
Browse files Browse the repository at this point in the history
  • Loading branch information
Atreyagaurav authored and stinodego committed Sep 13, 2023
1 parent eb4859c commit 4af7ec9
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 0 deletions.
162 changes: 162 additions & 0 deletions docs/src/rust/user-guide/expressions/lists.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// --8<-- [start:setup]
use polars::prelude::*;
// --8<-- [end:setup]
fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [start:weather_df]
let stns: Vec<String> = (1..6).map(|i| format!("Station {i}")).collect();
let weather = df!(
"station"=> &stns,
"temperatures"=> &[
"20 5 5 E1 7 13 19 9 6 20",
"18 8 16 11 23 E2 8 E2 E2 E2 90 70 40",
"19 24 E9 16 6 12 10 22",
"E2 E0 15 7 8 10 E1 24 17 13 6",
"14 8 E0 16 22 24 E1",
],
)?;
println!("{}", &weather);
// --8<-- [end:weather_df]

// --8<-- [start:string_to_list]
let out = weather
.clone()
.lazy()
.with_columns([col("temperatures").str().split(" ")])
.collect()?;
println!("{}", &out);
// --8<-- [end:string_to_list]

// --8<-- [start:explode_to_atomic]
let out = weather
.clone()
.lazy()
.with_columns([col("temperatures").str().split(" ")])
.explode(["temperatures"])
.collect()?;
println!("{}", &out);
// --8<-- [end:explode_to_atomic]

// --8<-- [start:list_ops]
let out = weather
.clone()
.lazy()
.with_columns([col("temperatures").str().split(" ")])
.with_columns([
col("temperatures").list().head(lit(3)).alias("top3"),
col("temperatures")
.list()
.slice(lit(-3), lit(3))
.alias("bottom_3"),
col("temperatures").list().lengths().alias("obs"),
])
.collect()?;
println!("{}", &out);
// --8<-- [end:list_ops]

// --8<-- [start:count_errors]
let out = weather
.clone()
.lazy()
.with_columns([col("temperatures")
.str()
.split(" ")
.list()
.eval(col("").cast(DataType::Int64).is_null(), false)
.list()
.sum()
.alias("errors")])
.collect()?;
println!("{}", &out);
// --8<-- [end:count_errors]

// --8<-- [start:count_errors_regex]
let out = weather
.clone()
.lazy()
.with_columns([col("temperatures")
.str()
.split(" ")
.list()
.eval(col("").str().contains(lit("(?i)[a-z]"), false), false)
.list()
.sum()
.alias("errors")])
.collect()?;
println!("{}", &out);
// --8<-- [end:count_errors_regex]

// --8<-- [start:weather_by_day]
let stns: Vec<String> = (1..11).map(|i| format!("Station {i}")).collect();
let weather_by_day = df!(
"station" => &stns,
"day_1" => &[17, 11, 8, 22, 9, 21, 20, 8, 8, 17],
"day_2" => &[15, 11, 10, 8, 7, 14, 18, 21, 15, 13],
"day_3" => &[16, 15, 24, 24, 8, 23, 19, 23, 16, 10],
)?;
println!("{}", &weather_by_day);
// --8<-- [end:weather_by_day]

// --8<-- [start:weather_by_day_rank]
let rank_pct = (col("")
.rank(
RankOptions {
method: RankMethod::Average,
descending: true,
},
None,
)
.cast(DataType::Float32)
/ col("*").count().cast(DataType::Float32))
.round(2);

let out = weather_by_day
.clone()
.lazy()
.with_columns(
// create the list of homogeneous data
[concat_list([all().exclude(["station"])])?.alias("all_temps")],
)
.select(
// select all columns except the intermediate list
[
all().exclude(["all_temps"]),
// compute the rank by calling `list.eval`
col("all_temps")
.list()
.eval(rank_pct, true)
.alias("temps_rank"),
],
)
.collect()?;

println!("{}", &out);
// --8<-- [end:weather_by_day_rank]

// --8<-- [start:array_df]
let mut col1: ListPrimitiveChunkedBuilder<Int32Type> =
ListPrimitiveChunkedBuilder::new("Array_1", 8, 8, DataType::Int32);
col1.append_slice(&[1, 3]);
col1.append_slice(&[2, 5]);
let mut col2: ListPrimitiveChunkedBuilder<Int32Type> =
ListPrimitiveChunkedBuilder::new("Array_2", 8, 8, DataType::Int32);
col2.append_slice(&[1, 7, 3]);
col2.append_slice(&[8, 1, 0]);
let array_df = DataFrame::new([col1.finish(), col2.finish()].into())?;

println!("{}", &array_df);
// --8<-- [end:array_df]

// --8<-- [start:array_ops]
let out = array_df
.clone()
.lazy()
.select([
col("Array_1").list().min().suffix("_min"),
col("Array_2").list().sum().suffix("_sum"),
])
.collect()?;
println!("{}", &out);
// --8<-- [end:array_ops]

Ok(())
}
99 changes: 99 additions & 0 deletions docs/src/rust/user-guide/expressions/structs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// --8<-- [start:setup]
use polars::{lazy::dsl::count, prelude::*};
// --8<-- [end:setup]
fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [start:ratings_df]
let ratings = df!(
"Movie"=> &["Cars", "IT", "ET", "Cars", "Up", "IT", "Cars", "ET", "Up", "ET"],
"Theatre"=> &["NE", "ME", "IL", "ND", "NE", "SD", "NE", "IL", "IL", "SD"],
"Avg_Rating"=> &[4.5, 4.4, 4.6, 4.3, 4.8, 4.7, 4.7, 4.9, 4.7, 4.6],
"Count"=> &[30, 27, 26, 29, 31, 28, 28, 26, 33, 26],

)?;
println!("{}", &ratings);
// --8<-- [end:ratings_df]

// --8<-- [start:state_value_counts]
let out = ratings
.clone()
.lazy()
.select([col("Theatre").value_counts(true, true)])
.collect()?;
println!("{}", &out);
// --8<-- [end:state_value_counts]

// --8<-- [start:struct_unnest]
let out = ratings
.clone()
.lazy()
.select([col("Theatre").value_counts(true, true)])
.unnest(["Theatre"])
.collect()?;
println!("{}", &out);
// --8<-- [end:struct_unnest]

// --8<-- [start:series_struct]
// Don't think we can make it the same way in rust, but this works
let rating_series = df!(
"Movie" => &["Cars", "Toy Story"],
"Theatre" => &["NE", "ME"],
"Avg_Rating" => &[4.5, 4.9],
)?
.into_struct("ratings")
.into_series();
println!("{}", &rating_series);
// // --8<-- [end:series_struct]

// --8<-- [start:series_struct_extract]
let out = rating_series.struct_()?.field_by_name("Movie")?;
println!("{}", &out);
// --8<-- [end:series_struct_extract]

// --8<-- [start:series_struct_rename]
let out = DataFrame::new([rating_series].into())?
.lazy()
.select([col("ratings")
.struct_()
.rename_fields(["Film".into(), "State".into(), "Value".into()].to_vec())])
.unnest(["ratings"])
.collect()?;

println!("{}", &out);
// --8<-- [end:series_struct_rename]

// --8<-- [start:struct_duplicates]
let out = ratings
.clone()
.lazy()
// .filter(as_struct(&[col("Movie"), col("Theatre")]).is_duplicated())
// Error: .is_duplicated() not available if you try that
// https://github.com/pola-rs/polars/issues/3803
.filter(count().over([col("Movie"), col("Theatre")]).gt(lit(1)))
.collect()?;
println!("{}", &out);
// --8<-- [end:struct_duplicates]

// --8<-- [start:struct_ranking]
let out = ratings
.clone()
.lazy()
.with_columns([as_struct(&[col("Count"), col("Avg_Rating")])
.rank(
RankOptions {
method: RankMethod::Dense,
descending: false,
},
None,
)
.over([col("Movie"), col("Theatre")])
.alias("Rank")])
// .filter(as_struct(&[col("Movie"), col("Theatre")]).is_duplicated())
// Error: .is_duplicated() not available if you try that
// https://github.com/pola-rs/polars/issues/3803
.filter(count().over([col("Movie"), col("Theatre")]).gt(lit(1)))
.collect()?;
println!("{}", &out);
// --8<-- [end:struct_ranking]

Ok(())
}

0 comments on commit 4af7ec9

Please sign in to comment.