From fd7f7333e10ec020adbcedad92da0c868da8b9d4 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 1 Sep 2023 12:37:36 +0800 Subject: [PATCH 01/12] feat: Add Catalog API Signed-off-by: Xuanwo --- crates/iceberg/Cargo.toml | 1 + crates/iceberg/src/catalog.rs | 163 ++++++++++++++++++++++++++++++++++ crates/iceberg/src/lib.rs | 5 ++ 3 files changed, 169 insertions(+) create mode 100644 crates/iceberg/src/catalog.rs diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 665ccad4d..75f073c5a 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -45,6 +45,7 @@ bimap = "0.6" derive_builder = "0.12.0" either = "1" lazy_static = "1" +async-trait = "0.1" [dev-dependencies] diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs new file mode 100644 index 000000000..fe872ee3d --- /dev/null +++ b/crates/iceberg/src/catalog.rs @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Catalog API for Apache Iceberg + +use crate::spec::{PartitionSpec, Schema, SortOrder, TableMetadata}; +use crate::Result; +use async_trait::async_trait; +use std::collections::HashMap; + +/// The catalog API for Iceberg Rust. +#[async_trait] +pub trait Catalog { + /// Get the catalog configuration for specified warehouse. + async fn get_config(&self, warehouse: Option<&str>) -> Result; + + /// List namespaces from table. + async fn list_namespaces(&self, parent: Option) -> Result>; + + /// Create a new namespace inside the catalog. + async fn create_namespace( + &self, + namespace: NamespaceIdent, + properties: HashMap, + ) -> Result; + + /// Get a namespace information from the catalog. + async fn get_namespace(&self, namespace: NamespaceIdent) -> Result; + + /// Update a namespace inside the catalog. + /// + /// # Behavior + /// + /// The properties must be the full set of namespace. + async fn update_namespace( + &self, + namespace: NamespaceIdent, + properties: HashMap, + ) -> Result<()>; + + /// Drop a namespace from the catalog. + async fn drop_namespace(&self, namespace: NamespaceIdent) -> Result<()>; + + /// List tables from namespace. + async fn list_tables(&self, namespace: NamespaceIdent) -> Result>; + + /// Create a new table inside the namespace. + async fn create_table( + &self, + namespace: NamespaceIdent, + creation: TableCreation, + ) -> Result; + + /// Load table from the catalog. + async fn load_table(&self, table: TableIdent) -> Result
; + + /// Drop a table from the catalog. + async fn drop_table(&self, table: TableIdent) -> Result<()>; + + /// Check if a table exists in the catalog. + async fn stat_table(&self, table: TableIdent) -> Result; + + /// Rename a table in the catalog. + async fn rename_table(&self, src: TableIdent, dest: TableIdent) -> Result<()>; + + /// Commit a table to the catalog. + async fn commit_table(&self, table: TableIdent, commit: TableCommit) -> Result
; + + /// Commit multiple tables to the catalog as an atomic operation. + async fn commit_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; +} + +/// The config the catalog. +pub struct CatalogConfig { + defaults: HashMap, + overrides: HashMap, +} + +/// NamespaceIdent represents the identifier of a namespace in the catalog. +pub struct NamespaceIdent(Vec); + +/// Namespace represents a namespace in the catalog. +pub struct Namespace { + name: NamespaceIdent, + properties: HashMap, +} + +/// TableIdent represents the identifier of a table in the catalog. +pub struct TableIdent { + namespace: NamespaceIdent, + name: String, +} + +/// Table represents a table in the catalog. +pub struct Table { + metadata: TableMetadata, + location: String, + config: HashMap, +} + +/// TableCreation represents the creation of a table in the catalog. +pub struct TableCreation { + name: String, + location: String, + schema: Schema, + partition_spec: PartitionSpec, + sort_order: SortOrder, + properties: HashMap, +} + +/// TableCommit represents the commit of a table in the catalog. +pub struct TableCommit { + ident: TableIdent, + requirements: Vec, + updates: Vec, +} + +/// TableRequirement represents a requirement for a table in the catalog. +pub enum TableRequirement { + /// The table must not already exist; used for create transactions + NotExist, + /// The table UUID must match the requirement. + UuidMatch(String), + /// The table branch or tag identified by the requirement's `reference` must + /// reference the requirement's `snapshot-id`. + RefSnapshotIdMatch { + /// The reference of the table to assert. + reference: String, + /// The snapshot id of the table to assert. + /// If the id is `None`, the ref must not already exist. + snapshot_id: Option, + }, + /// The table's last assigned column id must match the requirement. + LastAssignedFieldIdMatch(i64), + /// The table's current schema id must match the requirement. + CurrentSchemaIdMatch(i64), + /// The table's last assigned partition id must match the + /// requirement. + LastAssignedPartitionIdMatch(i64), + /// The table's default spec id must match the requirement. + DefaultSpecIdMatch(i64), + /// The table's default sort order id must match the requirement. + DefaultSortOrderIdMatch(i64), +} + +/// TableUpdate represents an update to a table in the catalog. +/// +/// TODO: we should fill with UpgradeFormatVersionUpdate, AddSchemaUpdate and so on. +pub enum TableUpdate {} diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 5ef9ad300..e962b0852 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -27,5 +27,10 @@ pub use error::Error; pub use error::ErrorKind; pub use error::Result; +/// There is no implementation for this trait, allow dead code for now, should +/// be removed after we have one. +#[allow(dead_code)] +pub mod catalog; + mod avro; pub mod spec; From b57521b1685516709d5b7a9af4861c1f67751eac Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 1 Sep 2023 16:11:14 +0800 Subject: [PATCH 02/12] remove get config Signed-off-by: Xuanwo --- crates/iceberg/src/catalog.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs index fe872ee3d..e0ff8d50c 100644 --- a/crates/iceberg/src/catalog.rs +++ b/crates/iceberg/src/catalog.rs @@ -25,9 +25,6 @@ use std::collections::HashMap; /// The catalog API for Iceberg Rust. #[async_trait] pub trait Catalog { - /// Get the catalog configuration for specified warehouse. - async fn get_config(&self, warehouse: Option<&str>) -> Result; - /// List namespaces from table. async fn list_namespaces(&self, parent: Option) -> Result>; @@ -84,12 +81,6 @@ pub trait Catalog { async fn commit_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; } -/// The config the catalog. -pub struct CatalogConfig { - defaults: HashMap, - overrides: HashMap, -} - /// NamespaceIdent represents the identifier of a namespace in the catalog. pub struct NamespaceIdent(Vec); From 921ce802fe7dd8c1afbbcd17e7b3aa9a668ab63c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 1 Sep 2023 16:11:49 +0800 Subject: [PATCH 03/12] Fix naming Signed-off-by: Xuanwo --- crates/iceberg/src/catalog.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs index e0ff8d50c..0e0514c9a 100644 --- a/crates/iceberg/src/catalog.rs +++ b/crates/iceberg/src/catalog.rs @@ -74,11 +74,11 @@ pub trait Catalog { /// Rename a table in the catalog. async fn rename_table(&self, src: TableIdent, dest: TableIdent) -> Result<()>; - /// Commit a table to the catalog. - async fn commit_table(&self, table: TableIdent, commit: TableCommit) -> Result
; + /// Update a table to the catalog. + async fn update_table(&self, table: TableIdent, commit: TableCommit) -> Result
; - /// Commit multiple tables to the catalog as an atomic operation. - async fn commit_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; + /// Update multiple tables to the catalog as an atomic operation. + async fn update_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; } /// NamespaceIdent represents the identifier of a namespace in the catalog. From 7b7243ce4216862e05aacafbba673ad4f7c2d34f Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 1 Sep 2023 16:13:13 +0800 Subject: [PATCH 04/12] Use ref instead Signed-off-by: Xuanwo --- crates/iceberg/src/catalog.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs index 0e0514c9a..4266d5e5b 100644 --- a/crates/iceberg/src/catalog.rs +++ b/crates/iceberg/src/catalog.rs @@ -26,17 +26,18 @@ use std::collections::HashMap; #[async_trait] pub trait Catalog { /// List namespaces from table. - async fn list_namespaces(&self, parent: Option) -> Result>; + async fn list_namespaces(&self, parent: Option<&NamespaceIdent>) + -> Result>; /// Create a new namespace inside the catalog. async fn create_namespace( &self, - namespace: NamespaceIdent, + namespace: &NamespaceIdent, properties: HashMap, ) -> Result; /// Get a namespace information from the catalog. - async fn get_namespace(&self, namespace: NamespaceIdent) -> Result; + async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result; /// Update a namespace inside the catalog. /// @@ -45,40 +46,40 @@ pub trait Catalog { /// The properties must be the full set of namespace. async fn update_namespace( &self, - namespace: NamespaceIdent, + namespace: &NamespaceIdent, properties: HashMap, ) -> Result<()>; /// Drop a namespace from the catalog. - async fn drop_namespace(&self, namespace: NamespaceIdent) -> Result<()>; + async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()>; /// List tables from namespace. - async fn list_tables(&self, namespace: NamespaceIdent) -> Result>; + async fn list_tables(&self, namespace: &NamespaceIdent) -> Result>; /// Create a new table inside the namespace. async fn create_table( &self, - namespace: NamespaceIdent, + namespace: &NamespaceIdent, creation: TableCreation, ) -> Result
; /// Load table from the catalog. - async fn load_table(&self, table: TableIdent) -> Result
; + async fn load_table(&self, table: &TableIdent) -> Result
; /// Drop a table from the catalog. - async fn drop_table(&self, table: TableIdent) -> Result<()>; + async fn drop_table(&self, table: &TableIdent) -> Result<()>; /// Check if a table exists in the catalog. - async fn stat_table(&self, table: TableIdent) -> Result; + async fn stat_table(&self, table: &TableIdent) -> Result; /// Rename a table in the catalog. - async fn rename_table(&self, src: TableIdent, dest: TableIdent) -> Result<()>; + async fn rename_table(&self, src: &TableIdent, dest: &TableIdent) -> Result<()>; /// Update a table to the catalog. - async fn update_table(&self, table: TableIdent, commit: TableCommit) -> Result
; + async fn update_table(&self, table: &TableIdent, commit: TableCommit) -> Result
; /// Update multiple tables to the catalog as an atomic operation. - async fn update_tables(&self, tables: Vec<(TableIdent, TableCommit)>) -> Result<()>; + async fn update_tables(&self, tables: &[(TableIdent, TableCommit)]) -> Result<()>; } /// NamespaceIdent represents the identifier of a namespace in the catalog. From 7436b973eb880236b078b30f36adb6e8e8db7630 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 1 Sep 2023 16:14:35 +0800 Subject: [PATCH 05/12] Move table out Signed-off-by: Xuanwo --- crates/iceberg/src/catalog.rs | 10 ++-------- crates/iceberg/src/lib.rs | 2 ++ crates/iceberg/src/table.rs | 28 ++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 crates/iceberg/src/table.rs diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs index 4266d5e5b..fb7d1d38f 100644 --- a/crates/iceberg/src/catalog.rs +++ b/crates/iceberg/src/catalog.rs @@ -17,7 +17,8 @@ //! Catalog API for Apache Iceberg -use crate::spec::{PartitionSpec, Schema, SortOrder, TableMetadata}; +use crate::spec::{PartitionSpec, Schema, SortOrder}; +use crate::table::Table; use crate::Result; use async_trait::async_trait; use std::collections::HashMap; @@ -97,13 +98,6 @@ pub struct TableIdent { name: String, } -/// Table represents a table in the catalog. -pub struct Table { - metadata: TableMetadata, - location: String, - config: HashMap, -} - /// TableCreation represents the creation of a table in the catalog. pub struct TableCreation { name: String, diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index e962b0852..71e52cdc0 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -31,6 +31,8 @@ pub use error::Result; /// be removed after we have one. #[allow(dead_code)] pub mod catalog; +#[allow(dead_code)] +pub mod table; mod avro; pub mod spec; diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs new file mode 100644 index 000000000..e2f07ea41 --- /dev/null +++ b/crates/iceberg/src/table.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Table API for Apache Iceberg + +use crate::spec::TableMetadata; +use std::collections::HashMap; + +/// Table represents a table in the catalog. +pub struct Table { + metadata: TableMetadata, + location: String, + config: HashMap, +} From f2ac4d6c468f31626a86c3ca562e6f7a27b3ee10 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 1 Sep 2023 16:15:03 +0800 Subject: [PATCH 06/12] Fix typo Signed-off-by: Xuanwo --- crates/iceberg/src/spec/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/schema.rs b/crates/iceberg/src/spec/schema.rs index 2e9ead29b..c7f3b553c 100644 --- a/crates/iceberg/src/spec/schema.rs +++ b/crates/iceberg/src/spec/schema.rs @@ -60,7 +60,7 @@ pub struct SchemaBuilder { } impl SchemaBuilder { - /// Add fields to schem builder. + /// Add fields to scheme builder. pub fn with_fields(mut self, fields: impl IntoIterator) -> Self { self.fields.extend(fields); self From d4c4e4516b1a3f2b3e76bde7f4a14b3a334a6951 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 4 Sep 2023 10:37:56 +0800 Subject: [PATCH 07/12] Update crates/iceberg/src/spec/schema.rs Co-authored-by: Renjie Liu --- crates/iceberg/src/spec/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/schema.rs b/crates/iceberg/src/spec/schema.rs index c7f3b553c..cef2dccbd 100644 --- a/crates/iceberg/src/spec/schema.rs +++ b/crates/iceberg/src/spec/schema.rs @@ -60,7 +60,7 @@ pub struct SchemaBuilder { } impl SchemaBuilder { - /// Add fields to scheme builder. + /// Add fields to schema builder. pub fn with_fields(mut self, fields: impl IntoIterator) -> Self { self.fields.extend(fields); self From 64aedca519dc9116ea6d4c8ed95a86fb367520d7 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 14 Sep 2023 12:25:19 +0800 Subject: [PATCH 08/12] Make partition_spec optional Signed-off-by: Xuanwo --- crates/iceberg/src/catalog.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs index fb7d1d38f..f5255391c 100644 --- a/crates/iceberg/src/catalog.rs +++ b/crates/iceberg/src/catalog.rs @@ -103,7 +103,7 @@ pub struct TableCreation { name: String, location: String, schema: Schema, - partition_spec: PartitionSpec, + partition_spec: Option, sort_order: SortOrder, properties: HashMap, } From 5cd53243372d2713a04166cd5899113b8f4d48ca Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 14 Sep 2023 12:25:49 +0800 Subject: [PATCH 09/12] Update crates/iceberg/src/table.rs Co-authored-by: Fokko Driesprong --- crates/iceberg/src/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index e2f07ea41..ef071ffd8 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -22,7 +22,7 @@ use std::collections::HashMap; /// Table represents a table in the catalog. pub struct Table { + metadata_location: String, metadata: TableMetadata, - location: String, config: HashMap, } From a8e1bb6b86fdd23777fb19d63b52992a7189dba9 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 14 Sep 2023 12:55:16 +0800 Subject: [PATCH 10/12] Fix sort Signed-off-by: Xuanwo --- crates/iceberg/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 513367e08..db23fbd13 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -29,6 +29,7 @@ keywords = ["iceberg"] [dependencies] anyhow = "1.0.72" apache-avro = "0.15" +async-trait = "0.1" bimap = "0.6" bitvec = "1.0.1" chrono = "0.4" @@ -36,7 +37,6 @@ derive_builder = "0.12.0" either = "1" itertools = "0.11" lazy_static = "1" -async-trait = "0.1" once_cell = "1" ordered-float = "3.7.0" rust_decimal = "1.31.0" From 0d3c93614bd9c0124e9708aeb1ce4a8f9a9f38fd Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 20 Sep 2023 22:00:46 +0800 Subject: [PATCH 11/12] Remove config Signed-off-by: Xuanwo --- crates/iceberg/src/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index ef071ffd8..13a7e6656 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -24,5 +24,4 @@ use std::collections::HashMap; pub struct Table { metadata_location: String, metadata: TableMetadata, - config: HashMap, } From 56c3cb70e7ddc43a5232432a370de77b2315f951 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 21 Sep 2023 11:56:26 +0800 Subject: [PATCH 12/12] Make clippy happy Signed-off-by: Xuanwo --- crates/iceberg/src/table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index 13a7e6656..ebe6753ee 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -18,7 +18,6 @@ //! Table API for Apache Iceberg use crate::spec::TableMetadata; -use std::collections::HashMap; /// Table represents a table in the catalog. pub struct Table {