From ef71e8665ec0a738e046c25a291bf530e4a0e48d Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 18 Jul 2023 16:47:38 +0800 Subject: [PATCH] feat: Ser/de partition spec --- src/types/in_memory.rs | 15 +++++ src/types/on_disk/partition_spec.rs | 100 +++++++++++++++++++--------- 2 files changed, 83 insertions(+), 32 deletions(-) diff --git a/src/types/in_memory.rs b/src/types/in_memory.rs index 4805b13..cab117c 100644 --- a/src/types/in_memory.rs +++ b/src/types/in_memory.rs @@ -392,6 +392,21 @@ impl Transform { } } +impl<'a> ToString for &'a Transform { + fn to_string(&self) -> String { + match self { + Transform::Identity => "identity".to_string(), + Transform::Year => "year".to_string(), + Transform::Month => "month".to_string(), + Transform::Day => "day".to_string(), + Transform::Hour => "hour".to_string(), + Transform::Void => "void".to_string(), + Transform::Bucket(length) => format!("bucket[{}]", length), + Transform::Truncate(width) => format!("truncate[{}]", width), + } + } +} + /// Data files are stored in manifests with a tuple of partition values /// that are used in scans to filter out files that cannot contain records /// that match the scan’s filter predicate. diff --git a/src/types/on_disk/partition_spec.rs b/src/types/on_disk/partition_spec.rs index 1aadf8f..1341a84 100644 --- a/src/types/on_disk/partition_spec.rs +++ b/src/types/on_disk/partition_spec.rs @@ -1,9 +1,9 @@ use serde::{Deserialize, Serialize}; use super::transform::parse_transform; +use crate::types; use crate::Error; use crate::Result; -use crate::{types, ErrorKind}; /// Parse schema from json bytes. pub fn parse_partition_spec(bs: &[u8]) -> Result { @@ -11,6 +11,17 @@ pub fn parse_partition_spec(bs: &[u8]) -> Result { t.try_into() } +/// Serialize partition spec to json bytes. +pub fn serialize_partition_spec(spec: &types::PartitionSpec) -> Result { + let t = PartitionSpec::try_from(spec)?; + Ok(serde_json::to_string(&t)?) +} + +pub fn serialize_partition_spec_fields(spec: &types::PartitionSpec) -> Result { + let t = PartitionSpec::try_from(spec)?; + Ok(serde_json::to_string(&t.fields)?) +} + #[derive(Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct PartitionSpec { @@ -36,11 +47,15 @@ impl TryFrom for types::PartitionSpec { impl<'a> TryFrom<&'a types::PartitionSpec> for PartitionSpec { type Error = Error; - fn try_from(_value: &'a types::PartitionSpec) -> Result { - Err(Error::new( - ErrorKind::IcebergFeatureUnsupported, - "Serializing partition spec!", - )) + fn try_from(v: &'a types::PartitionSpec) -> Result { + Ok(Self { + spec_id: v.spec_id, + fields: v + .fields + .iter() + .map(PartitionField::try_from) + .collect::>>()?, + }) } } @@ -71,11 +86,13 @@ impl TryFrom for types::PartitionField { impl<'a> TryFrom<&'a types::PartitionField> for PartitionField { type Error = Error; - fn try_from(_v: &'a types::PartitionField) -> Result { - Err(Error::new( - ErrorKind::IcebergFeatureUnsupported, - "Serializing partition field!", - )) + fn try_from(v: &'a types::PartitionField) -> Result { + Ok(Self { + source_id: v.source_column_id, + field_id: v.partition_field_id, + name: v.name.clone(), + transform: (&v.transform).to_string(), + }) } } @@ -83,6 +100,27 @@ impl<'a> TryFrom<&'a types::PartitionField> for PartitionField { mod tests { use super::*; + fn check_partition_spec_conversion(json: &str, expected_partition_spec: types::PartitionSpec) { + let parsed = parse_partition_spec(json.as_bytes()).unwrap(); + assert_eq!(expected_partition_spec, parsed); + + let serialized_json = serialize_partition_spec(&expected_partition_spec).unwrap(); + let parsed = parse_partition_spec(serialized_json.as_bytes()).unwrap(); + assert_eq!(expected_partition_spec, parsed); + + let serialized_fields_json = + serialize_partition_spec_fields(&expected_partition_spec).unwrap(); + let parse_fields: Vec = + serde_json::from_slice(serialized_fields_json.as_bytes()).unwrap(); + let parse_type_fields = parse_fields + .into_iter() + .map(types::PartitionField::try_from) + .collect::>>() + .unwrap(); + + assert_eq!(expected_partition_spec.fields, parse_type_fields); + } + #[test] fn test_parse_partition_spec() { let content = r#" @@ -102,27 +140,25 @@ mod tests { } "#; - let v = parse_partition_spec(content.as_bytes()).unwrap(); - - assert_eq!(v.spec_id, 1); - assert_eq!(v.fields.len(), 2); - assert_eq!( - v.fields[0], - types::PartitionField { - source_column_id: 4, - partition_field_id: 1000, - transform: types::Transform::Day, - name: "ts_day".to_string(), - } + check_partition_spec_conversion( + content, + types::PartitionSpec { + spec_id: 1, + fields: vec![ + types::PartitionField { + source_column_id: 4, + partition_field_id: 1000, + transform: types::Transform::Day, + name: "ts_day".to_string(), + }, + types::PartitionField { + source_column_id: 1, + partition_field_id: 1001, + transform: types::Transform::Bucket(16), + name: "id_bucket".to_string(), + }, + ], + }, ); - assert_eq!( - v.fields[1], - types::PartitionField { - source_column_id: 1, - partition_field_id: 1001, - transform: types::Transform::Bucket(16), - name: "id_bucket".to_string(), - } - ) } }