Skip to content
This repository has been archived by the owner on Sep 26, 2023. It is now read-only.

Commit

Permalink
Add rust examples for transformations/time-series (partial)
Browse files Browse the repository at this point in the history
  • Loading branch information
Atreyagaurav authored and stinodego committed Sep 13, 2023
1 parent 75f0a73 commit d741489
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 0 deletions.
61 changes: 61 additions & 0 deletions docs/src/rust/user-guide/transformations/time-series/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// --8<-- [start:setup]
use chrono::prelude::*;
use polars::io::prelude::*;
use polars::lazy::dsl::StrptimeOptions;
use polars::prelude::*;
// --8<-- [end:setup]

fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [start:df]
let df = CsvReader::from_path("docs/data/apple_stock.csv")
.unwrap()
.with_try_parse_dates(true)
.finish()
.unwrap();
println!("{}", &df);
// --8<-- [end:df]

// --8<-- [start:filter]
let filtered_df = df
.clone()
.lazy()
.filter(col("Date").eq(lit(NaiveDate::from_ymd_opt(1995, 10, 16).unwrap())))
.collect()?;
println!("{}", &filtered_df);
// --8<-- [end:filter]

// --8<-- [start:range]
let filtered_range_df = df
.clone()
.lazy()
.filter(
col("Date")
.gt(lit(NaiveDate::from_ymd_opt(1995, 7, 1).unwrap()))
.and(col("Date").lt(lit(NaiveDate::from_ymd_opt(1995, 11, 1).unwrap()))),
)
.collect()?;
println!("{}", &filtered_range_df);
// --8<-- [end:range]

// --8<-- [start:negative]
let negative_dates_df = df!(
"ts"=> &["-1300-05-23", "-1400-03-02"],
"values"=> &[3, 4])?
.lazy()
.with_column(
col("ts")
.str()
.strptime(DataType::Date, StrptimeOptions::default()),
)
.collect()?;

let negative_dates_filtered_df = negative_dates_df
.clone()
.lazy()
.filter(col("ts").dt().year().lt(-1300))
.collect()?;
println!("{}", &negative_dates_filtered_df);
// --8<-- [end:negative]

Ok(())
}
75 changes: 75 additions & 0 deletions docs/src/rust/user-guide/transformations/time-series/parsing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// --8<-- [start:setup]
use polars::io::prelude::*;
use polars::lazy::dsl::StrptimeOptions;
use polars::prelude::*;
// --8<-- [end:setup]

fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [start:df]
let df = CsvReader::from_path("docs/data/apple_stock.csv")
.unwrap()
.with_try_parse_dates(true)
.finish()
.unwrap();
println!("{}", &df);
// --8<-- [end:df]

// --8<-- [start:cast]
let df = CsvReader::from_path("docs/data/apple_stock.csv")
.unwrap()
.with_try_parse_dates(false)
.finish()
.unwrap();
let df = df
.clone()
.lazy()
.with_columns([col("Date")
.str()
.strptime(DataType::Date, StrptimeOptions::default())])
.collect()?;
println!("{}", &df);
// --8<-- [end:cast]

// --8<-- [start:df3]
let df_with_year = df
.clone()
.lazy()
.with_columns([col("Date").dt().year().alias("year")])
.collect()?;
println!("{}", &df_with_year);
// --8<-- [end:df3]

// --8<-- [start:extract]
let df_with_year = df
.clone()
.lazy()
.with_columns([col("Date").dt().year().alias("year")])
.collect()?;
println!("{}", &df_with_year);
// --8<-- [end:extract]

// --8<-- [start:mixed]
let data = [
"2021-03-27T00:00:00+0100",
"2021-03-28T00:00:00+0100",
"2021-03-29T00:00:00+0200",
"2021-03-30T00:00:00+0200",
];
let q = col("date")
.str()
.strptime(
DataType::Datetime(TimeUnit::Microseconds, None),
StrptimeOptions {
format: Some("%Y-%m-%dT%H:%M:%S%z".to_string()),
..Default::default()
},
)
.dt()
.convert_time_zone("Europe/Brussels".to_string());
let mixed_parsed = df!("date" => &data)?.lazy().select([q]).collect()?;

println!("{}", &mixed_parsed);
// --8<-- [end:mixed]

Ok(())
}
130 changes: 130 additions & 0 deletions docs/src/rust/user-guide/transformations/time-series/rolling.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// --8<-- [start:setup]
use chrono::prelude::*;
use polars::io::prelude::*;
use polars::lazy::dsl::GetOutput;
use polars::prelude::*;
use polars::time::prelude::*;
// --8<-- [end:setup]

fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [start:df]
let df = CsvReader::from_path("docs/data/apple_stock.csv")
.unwrap()
.with_try_parse_dates(true)
.finish()
.unwrap()
.sort(["Date"], false, true)?;
println!("{}", &df);
// --8<-- [end:df]

// --8<-- [start:group_by]
let annual_average_df = df
.clone()
.lazy()
.groupby_dynamic(
col("Date"),
[],
DynamicGroupOptions {
every: Duration::parse("1y"),
period: Duration::parse("1y"),
offset: Duration::parse("0"),
..Default::default()
},
)
.agg([col("Close").mean()])
.collect()?;

let df_with_year = annual_average_df
.lazy()
.with_columns([col("Date").dt().year().alias("year")])
.collect()?;
println!("{}", &df_with_year);
// --8<-- [end:group_by]

// --8<-- [start:group_by_dyn]
let df = df!(
"time" => date_range(
"time",
NaiveDate::from_ymd_opt(2021, 1, 1).unwrap().and_hms_opt(0, 0, 0).unwrap(),
NaiveDate::from_ymd_opt(2021, 12, 31).unwrap().and_hms_opt(0, 0, 0).unwrap(),
Duration::parse("1d"),
ClosedWindow::Both,
TimeUnit::Milliseconds, None)?.cast(&DataType::Date)?)?;

let out = df
.clone()
.lazy()
.groupby_dynamic(
col("time"),
[],
DynamicGroupOptions {
every: Duration::parse("1mo"),
period: Duration::parse("1mo"),
offset: Duration::parse("0"),
closed_window: ClosedWindow::Left,
..Default::default()
},
)
.agg([
col("time")
.cumcount(true) // python example has false
.reverse()
.head(Some(3))
.alias("day/eom"),
((col("time").last() - col("time").first()).map(
// had to use map as .duration().days() is not available
|s| {
Ok(Some(
s.duration()?
.into_iter()
.map(|d| d.map(|v| v / 1000 / 24 / 60 / 60))
.collect::<Int64Chunked>()
.into_series(),
))
},
GetOutput::from_type(DataType::Int64),
) + lit(1))
.alias("days_in_month"),
])
.explode([col("day/eom")])
.collect()?;
println!("{}", &out);
// --8<-- [end:group_by_dyn]

// --8<-- [start:group_by_roll]
let df = df!(
"time" => date_range(
"time",
NaiveDate::from_ymd_opt(2021, 12, 16).unwrap().and_hms_opt(0, 0, 0).unwrap(),
NaiveDate::from_ymd_opt(2021, 12, 16).unwrap().and_hms_opt(3, 0, 0).unwrap(),
Duration::parse("30m"),
ClosedWindow::Both,
TimeUnit::Milliseconds, None)?,
"groups"=> ["a", "a", "a", "b", "b", "a", "a"],
)?;
println!("{}", &df);
// --8<-- [end:group_by_roll]

// --8<-- [start:group_by_dyn2]
let out = df
.clone()
.lazy()
.groupby_dynamic(
col("time"),
[col("groups")],
DynamicGroupOptions {
every: Duration::parse("1h"),
period: Duration::parse("1h"),
offset: Duration::parse("0"),
include_boundaries: true,
closed_window: ClosedWindow::Both,
..Default::default()
},
)
.agg([count()])
.collect()?;
println!("{}", &out);
// --8<-- [end:group_by_dyn2]

Ok(())
}

0 comments on commit d741489

Please sign in to comment.