diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 007c5c2d2..4a4839d09 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -29,6 +29,7 @@ keywords = ["iceberg"] [dependencies] anyhow = "1.0.72" apache-avro = "0.15" +async-trait = "0.1" bimap = "0.6" bitvec = "1.0.1" chrono = "0.4" diff --git a/crates/iceberg/src/catalog.rs b/crates/iceberg/src/catalog.rs new file mode 100644 index 000000000..f5255391c --- /dev/null +++ b/crates/iceberg/src/catalog.rs @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Catalog API for Apache Iceberg + +use crate::spec::{PartitionSpec, Schema, SortOrder}; +use crate::table::Table; +use crate::Result; +use async_trait::async_trait; +use std::collections::HashMap; + +/// The catalog API for Iceberg Rust. +#[async_trait] +pub trait Catalog { + /// List namespaces from table. + async fn list_namespaces(&self, parent: Option<&NamespaceIdent>) + -> Result>; + + /// Create a new namespace inside the catalog. + async fn create_namespace( + &self, + namespace: &NamespaceIdent, + properties: HashMap, + ) -> Result; + + /// Get a namespace information from the catalog. + async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result; + + /// Update a namespace inside the catalog. + /// + /// # Behavior + /// + /// The properties must be the full set of namespace. + async fn update_namespace( + &self, + namespace: &NamespaceIdent, + properties: HashMap, + ) -> Result<()>; + + /// Drop a namespace from the catalog. + async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()>; + + /// List tables from namespace. + async fn list_tables(&self, namespace: &NamespaceIdent) -> Result>; + + /// Create a new table inside the namespace. + async fn create_table( + &self, + namespace: &NamespaceIdent, + creation: TableCreation, + ) -> Result; + + /// Load table from the catalog. + async fn load_table(&self, table: &TableIdent) -> Result
; + + /// Drop a table from the catalog. + async fn drop_table(&self, table: &TableIdent) -> Result<()>; + + /// Check if a table exists in the catalog. + async fn stat_table(&self, table: &TableIdent) -> Result; + + /// Rename a table in the catalog. + async fn rename_table(&self, src: &TableIdent, dest: &TableIdent) -> Result<()>; + + /// Update a table to the catalog. + async fn update_table(&self, table: &TableIdent, commit: TableCommit) -> Result
; + + /// Update multiple tables to the catalog as an atomic operation. + async fn update_tables(&self, tables: &[(TableIdent, TableCommit)]) -> Result<()>; +} + +/// NamespaceIdent represents the identifier of a namespace in the catalog. +pub struct NamespaceIdent(Vec); + +/// Namespace represents a namespace in the catalog. +pub struct Namespace { + name: NamespaceIdent, + properties: HashMap, +} + +/// TableIdent represents the identifier of a table in the catalog. +pub struct TableIdent { + namespace: NamespaceIdent, + name: String, +} + +/// TableCreation represents the creation of a table in the catalog. +pub struct TableCreation { + name: String, + location: String, + schema: Schema, + partition_spec: Option, + sort_order: SortOrder, + properties: HashMap, +} + +/// TableCommit represents the commit of a table in the catalog. +pub struct TableCommit { + ident: TableIdent, + requirements: Vec, + updates: Vec, +} + +/// TableRequirement represents a requirement for a table in the catalog. +pub enum TableRequirement { + /// The table must not already exist; used for create transactions + NotExist, + /// The table UUID must match the requirement. + UuidMatch(String), + /// The table branch or tag identified by the requirement's `reference` must + /// reference the requirement's `snapshot-id`. + RefSnapshotIdMatch { + /// The reference of the table to assert. + reference: String, + /// The snapshot id of the table to assert. + /// If the id is `None`, the ref must not already exist. + snapshot_id: Option, + }, + /// The table's last assigned column id must match the requirement. + LastAssignedFieldIdMatch(i64), + /// The table's current schema id must match the requirement. + CurrentSchemaIdMatch(i64), + /// The table's last assigned partition id must match the + /// requirement. + LastAssignedPartitionIdMatch(i64), + /// The table's default spec id must match the requirement. + DefaultSpecIdMatch(i64), + /// The table's default sort order id must match the requirement. + DefaultSortOrderIdMatch(i64), +} + +/// TableUpdate represents an update to a table in the catalog. +/// +/// TODO: we should fill with UpgradeFormatVersionUpdate, AddSchemaUpdate and so on. +pub enum TableUpdate {} diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 93413d75b..573b58ef2 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -27,6 +27,13 @@ pub use error::Error; pub use error::ErrorKind; pub use error::Result; +/// There is no implementation for this trait, allow dead code for now, should +/// be removed after we have one. +#[allow(dead_code)] +pub mod catalog; +#[allow(dead_code)] +pub mod table; + mod avro; pub mod io; pub mod spec; diff --git a/crates/iceberg/src/spec/schema.rs b/crates/iceberg/src/spec/schema.rs index 2e9ead29b..cef2dccbd 100644 --- a/crates/iceberg/src/spec/schema.rs +++ b/crates/iceberg/src/spec/schema.rs @@ -60,7 +60,7 @@ pub struct SchemaBuilder { } impl SchemaBuilder { - /// Add fields to schem builder. + /// Add fields to schema builder. pub fn with_fields(mut self, fields: impl IntoIterator) -> Self { self.fields.extend(fields); self diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs new file mode 100644 index 000000000..ebe6753ee --- /dev/null +++ b/crates/iceberg/src/table.rs @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Table API for Apache Iceberg + +use crate::spec::TableMetadata; + +/// Table represents a table in the catalog. +pub struct Table { + metadata_location: String, + metadata: TableMetadata, +}