diff --git a/crates/paimon/src/spec/data_file.rs b/crates/paimon/src/spec/data_file.rs index 4f6c41f..e9e1206 100644 --- a/crates/paimon/src/spec/data_file.rs +++ b/crates/paimon/src/spec/data_file.rs @@ -45,7 +45,7 @@ pub const EMPTY_BINARY_ROW: BinaryRow = BinaryRow::new(0); /// An implementation of InternalRow. /// /// Impl Reference: -#[derive(Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Eq, PartialEq, Serialize, Deserialize, Copy, Clone)] #[serde(rename_all = "camelCase")] pub struct BinaryRow { arity: i32, diff --git a/crates/paimon/src/spec/manifest.rs b/crates/paimon/src/spec/manifest.rs new file mode 100644 index 0000000..3d12e1c --- /dev/null +++ b/crates/paimon/src/spec/manifest.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +use crate::spec::{DataFileMeta, RowType, SchemaManager}; +use crate::spec::BinaryRow; +use crate::spec::manifest::FileKind::{ADD, DELETE}; + +enum FileKind { + ADD, + DELETE, +} + +impl FileKind { + pub fn byte_value(&self) -> u8 { + match self { + ADD => 0, + DELETE => 1, + } + } +} + +impl From for FileKind { + fn from(value: u8) -> Self { + match value { + 0 => ADD, + 1 => DELETE, + _ => unimplemented!() + } + } +} + +struct Identifier { + partition: BinaryRow, + bucket: i32, + level: i32, + file_name: String +} + +impl Identifier { + pub fn new(partition: BinaryRow, bucket: i32, level: i32, file_name: String) -> Self { + Self { partition, bucket, level, file_name } + } +} + +pub trait FileEntry { + fn partition(&self) -> BinaryRow; + fn bucket(&self) -> i32; + fn level(&self) -> i32; + fn file_name(&self) -> String; + + fn identifier(&self) -> Identifier; + + fn min_key(&self) -> BinaryRow; + fn max_key(&self) -> BinaryRow; + + // TODO Implement default methods +} + +struct ManifestEntry { + kind: FileKind, + partition: BinaryRow, + bucket: i32, + total_buckets: i32, + file: DataFileMeta +} +impl FileEntry for ManifestEntry { + fn partition(&self) -> BinaryRow { + self.partition + } + + fn bucket(&self) -> i32 { + self.bucket + } + + fn level(&self) -> i32 { + self.file.level + } + + fn file_name(&self) -> String { + self.file.file_name.clone() + } + + fn identifier(&self) -> Identifier { + Identifier::new(self.partition, self.bucket, self.file.level, self.file.file_name.clone()) + } + + fn min_key(&self) -> BinaryRow { + self.file.min_key + } + + fn max_key(&self) -> BinaryRow { + self.file.max_key + } +} + +struct ManifestFileMeta { + file_name: String, + file_size: i64, + num_added_files: i64, + num_deleted_files: i64, + // FIXME: add missing SimpleStats + schema_id: i64, +} + +//! This file includes several [ManifestEntry], representing the additional changes since last snapshot. +struct ManifestFile { + row_type: RowType, + suggested_file_size: u64, + schema_manager: SchemaManager +} \ No newline at end of file diff --git a/crates/paimon/src/spec/mod.rs b/crates/paimon/src/spec/mod.rs index fc09dcd..a61811a 100644 --- a/crates/paimon/src/spec/mod.rs +++ b/crates/paimon/src/spec/mod.rs @@ -30,3 +30,4 @@ pub use snapshot::*; mod types; pub use types::*; +mod manifest; diff --git a/crates/paimon/src/spec/schema.rs b/crates/paimon/src/spec/schema.rs index 7a9b0d0..e6bdcf1 100644 --- a/crates/paimon/src/spec/schema.rs +++ b/crates/paimon/src/spec/schema.rs @@ -51,3 +51,11 @@ pub struct DataField { typ: DataType, description: Option, } + +/// Schema Manager to manage schema versions. +/// +/// Impl Reference: +// FIXME: This struct is partially implemented +pub struct SchemaManager { + branch: String +} \ No newline at end of file