diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs index fa44188e2..99a0d8605 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs @@ -137,8 +137,6 @@ impl<'a> InclusiveMetricsEvaluator<'a> { } } -// Remove this annotation once all todos have been removed -#[allow(unused_variables)] impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> { type T = bool; @@ -303,8 +301,8 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> { fn not_eq( &mut self, - reference: &BoundReference, - datum: &Datum, + _reference: &BoundReference, + _datum: &Datum, _predicate: &BoundPredicate, ) -> crate::Result { // Because the bounds are not necessarily a min or max value, @@ -494,8 +492,8 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> { fn not_in( &mut self, - reference: &BoundReference, - literals: &FnvHashSet, + _reference: &BoundReference, + _literals: &FnvHashSet, _predicate: &BoundPredicate, ) -> crate::Result { // Because the bounds are not necessarily a min or max value, @@ -508,14 +506,23 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> { #[cfg(test)] mod test { use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator; - use crate::expr::PredicateOperator::NotNull; - use crate::expr::{Bind, BoundPredicate, Predicate, Reference, UnaryExpression}; + use crate::expr::PredicateOperator::{ + Eq, GreaterThan, GreaterThanOrEq, IsNan, IsNull, LessThan, LessThanOrEq, NotNan, NotNull, + NotStartsWith, StartsWith, NotEq, + }; + use crate::expr::{ + BinaryExpression, Bind, BoundPredicate, Predicate, Reference, UnaryExpression, + }; use crate::spec::{ - DataContentType, DataFile, DataFileFormat, FieldSummary, NestedField, PartitionField, - PartitionSpec, PrimitiveType, Schema, Struct, Transform, Type, + DataContentType, DataFile, DataFileFormat, Datum, FieldSummary, Literal, NestedField, + PartitionField, PartitionSpec, PrimitiveType, Schema, Struct, Transform, Type, }; + use std::collections::HashMap; use std::sync::Arc; + const INT_MIN_VALUE: i32 = 30; + const INT_MAX_VALUE: i32 = 79; + #[test] fn test_data_file_no_partitions() { let (table_schema_ref, _partition_spec_ref) = create_test_schema_and_partition_spec(); @@ -537,25 +544,251 @@ mod test { #[test] fn test_all_nulls() { - let result = InclusiveMetricsEvaluator::eval(¬_null("all_nulls"), get_test_file(), true).unwrap(); - + let result = + InclusiveMetricsEvaluator::eval(¬_null("all_nulls"), &get_test_file_1(), true) + .unwrap(); assert!(!result, "Should skip: no non-null value in all null column"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than("all_nulls", "a"), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: LessThan on an all null column"); + + let result = InclusiveMetricsEvaluator::eval( + &less_than_or_equal("all_nulls", "a"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!( + !result, + "Should skip: LessThanOrEqual on an all null column" + ); + + let result = InclusiveMetricsEvaluator::eval( + &greater_than("all_nulls", "a"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!(!result, "Should skip: GreaterThan on an all null column"); + + let result = InclusiveMetricsEvaluator::eval( + &greater_than_or_equal("all_nulls", "a"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!( + !result, + "Should skip: GreaterThanOrEqual on an all null column" + ); + + let result = + InclusiveMetricsEvaluator::eval(&equal("all_nulls", "a"), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: Equal on an all null column"); + + let result = InclusiveMetricsEvaluator::eval( + &starts_with("all_nulls", "a"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!(!result, "Should skip: StartsWith on an all null column"); + + let result = InclusiveMetricsEvaluator::eval( + ¬_starts_with("all_nulls", "a"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!(result, "Should read: NotStartsWith on an all null column"); + + let result = + InclusiveMetricsEvaluator::eval(¬_null("some_nulls"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with some nulls could contain a non-null value" + ); + + let result = + InclusiveMetricsEvaluator::eval(¬_null("no_nulls"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with all nulls contains a non-null value" + ); } #[test] - fn test_no_nulls() {} + fn test_no_nulls() { + let result = + InclusiveMetricsEvaluator::eval(&is_null("all_nulls"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with all nulls contains a non-null value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_null("some_nulls"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with some nulls could contain a non-null value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_null("no_nulls"), &get_test_file_1(), true) + .unwrap(); + assert!( + !result, + "Should skip: col with no nulls can't contains a non-null value" + ); + } #[test] - fn test_is_nan() {} + fn test_is_nan() { + let result = + InclusiveMetricsEvaluator::eval(&is_nan("all_nans"), &get_test_file_1(), true).unwrap(); + assert!( + result, + "Should read: col with all nans must contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_nan("some_nans"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with some nans could contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_nan("no_nans"), &get_test_file_1(), true).unwrap(); + assert!( + !result, + "Should skip: col with no nans can't contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_nan("all_nulls_double"), &get_test_file_1(), true) + .unwrap(); + assert!( + !result, + "Should skip: col with no nans can't contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_nan("no_nan_stats"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: no guarantee col is nan-free without nan stats" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_nan("all_nans_v1_stats"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with all nans must contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(&is_nan("nan_and_null_only"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with nans and nulls must contain a nan value" + ); + } #[test] - fn test_not_nan() {} + fn test_not_nan() { + let result = + InclusiveMetricsEvaluator::eval(¬_nan("all_nans"), &get_test_file_1(), true) + .unwrap(); + assert!( + !result, + "Should read: col with all nans must contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(¬_nan("some_nans"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with some nans could contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(¬_nan("no_nans"), &get_test_file_1(), true).unwrap(); + assert!( + result, + "Should read: col with no nans might contains a non-nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(¬_nan("all_nulls_double"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: col with no nans can't contains a nan value" + ); + + let result = + InclusiveMetricsEvaluator::eval(¬_nan("no_nan_stats"), &get_test_file_1(), true) + .unwrap(); + assert!( + result, + "Should read: no guarantee col is nan-free without nan stats" + ); + + let result = InclusiveMetricsEvaluator::eval( + ¬_nan("all_nans_v1_stats"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!( + result, + "Should read: col with all nans must contains a nan value" + ); + + let result = InclusiveMetricsEvaluator::eval( + ¬_nan("nan_and_null_only"), + &get_test_file_1(), + true, + ) + .unwrap(); + assert!( + result, + "Should read: col with nans and nulls may contain a non-nan value" + ); + } #[test] - fn test_required_column() {} + fn test_required_column() { + let result = + InclusiveMetricsEvaluator::eval(¬_null("required"), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: required columns are always non-null"); + + let result = + InclusiveMetricsEvaluator::eval(&is_null("required"), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: required columns are always non-null"); + } #[test] - fn test_missing_column() {} + #[should_panic] + fn test_missing_column() { + let _result = + InclusiveMetricsEvaluator::eval(&less_than("missing", "a"), &get_test_file_1(), true); + } #[test] fn test_missing_stats() {} @@ -563,6 +796,12 @@ mod test { #[test] fn test_zero_record_file() {} + #[test] + fn test_not() { + // Not sure if we need a test for this, as we'd expect as a precondition + // that rewrite-not has already been applied + } + #[test] fn test_and() {} @@ -570,37 +809,327 @@ mod test { fn test_or() {} #[test] - fn test_integer_lt() {} + fn test_integer_lt() { + let result = + InclusiveMetricsEvaluator::eval(&less_than_int("id", INT_MIN_VALUE -25), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range below lower bound (5 < 30)"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than_int("id", INT_MIN_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range below lower bound (30 is not < 30)"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than_int("id", INT_MIN_VALUE + 1), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: one possible id"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than_int("id", INT_MAX_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: many possible ids"); + } #[test] - fn test_integer_lt_eq() {} + fn test_integer_lt_eq() { + let result = + InclusiveMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MIN_VALUE -25), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range below lower bound (5 < 30)"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MIN_VALUE - 1), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range below lower bound (29 < 30)"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MIN_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: one possible id"); + + let result = + InclusiveMetricsEvaluator::eval(&less_than_or_equal_int("id", INT_MAX_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: many possible ids"); + } #[test] - fn test_integer_gt() {} + fn test_integer_gt() { + let result = + InclusiveMetricsEvaluator::eval(&greater_than_int("id", INT_MAX_VALUE + 6), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range above upper bound (85 < 79)"); + + let result = + InclusiveMetricsEvaluator::eval(&greater_than_int("id", INT_MAX_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range above upper bound (79 is not > 79)"); + + let result = + InclusiveMetricsEvaluator::eval(&greater_than_int("id", INT_MAX_VALUE - 1), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: one possible id"); + + let result = + InclusiveMetricsEvaluator::eval(&greater_than_int("id", INT_MAX_VALUE - 4), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: many possible ids"); + } #[test] - fn test_integer_gt_eq() {} + fn test_integer_gt_eq() { + let result = + InclusiveMetricsEvaluator::eval(&greater_than_or_equal_int("id", INT_MAX_VALUE + 6), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range above upper bound (85 < 79)"); + + let result = + InclusiveMetricsEvaluator::eval(&greater_than_or_equal_int("id", INT_MAX_VALUE + 1), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id range above upper bound (80 > 79)"); + + let result = + InclusiveMetricsEvaluator::eval(&greater_than_or_equal_int("id", INT_MAX_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: one possible id"); + + let result = + InclusiveMetricsEvaluator::eval(&greater_than_or_equal_int("id", INT_MAX_VALUE - 4), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: many possible ids"); + } #[test] - fn test_integer_eq() {} + fn test_integer_eq() { + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MIN_VALUE - 25), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id below lower bound"); + + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MIN_VALUE - 1), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id below lower bound"); + + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MIN_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id equal to lower bound"); + + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MAX_VALUE - 4), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id between lower and upper bounds"); + + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MAX_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id equal to upper bound"); + + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MAX_VALUE + 1), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id above upper bound"); + + let result = + InclusiveMetricsEvaluator::eval(&equal_int("id", INT_MAX_VALUE + 6), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: id above upper bound"); + } #[test] - fn test_integer_not_eq() {} + fn test_integer_not_eq() { + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MIN_VALUE - 25), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id below lower bound"); + + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MIN_VALUE - 1), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id below lower bound"); + + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MIN_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id equal to lower bound"); + + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MAX_VALUE - 4), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id between lower and upper bounds"); + + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MAX_VALUE), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id equal to upper bound"); + + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MAX_VALUE + 1), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id above upper bound"); + + let result = + InclusiveMetricsEvaluator::eval(¬_equal_int("id", INT_MAX_VALUE + 6), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: id above upper bound"); + } #[test] - fn test_integer_not_eq_rewritten() {} + fn test_integer_not_eq_rewritten() { + // Do we need this? + } #[test] fn test_case_insensitive_integer_not_eq_rewritten() {} #[test] - fn test_case_sensitive_integer_not_eq_rewritten() {} + #[should_panic] + fn test_case_sensitive_integer_not_eq_rewritten() { + let result = + InclusiveMetricsEvaluator::eval(¬(equal_int("ID", 5)), &get_test_file_1(), true) + .unwrap(); + } #[test] - fn test_string_starts_with() {} + fn test_string_starts_with() { + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "a"), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: no stats"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "a"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "aa"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "aaa"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "1s"), &get_test_file_3(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "1str1x"), &get_test_file_3(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "ff"), &get_test_file_4(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "aB"), &get_test_file_2(), true) + .unwrap(); + assert!(!result, "Should skip: range does not match"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "dWX"), &get_test_file_2(), true) + .unwrap(); + assert!(!result, "Should skip: range does not match"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "5"), &get_test_file_3(), true) + .unwrap(); + assert!(!result, "Should skip: range does not match"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", "3str3x"), &get_test_file_3(), true) + .unwrap(); + assert!(!result, "Should skip: range does not match"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("some_empty", "房东整租霍"), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: range does matches"); + + let result = + InclusiveMetricsEvaluator::eval(&starts_with("all_nulls", ""), &get_test_file_1(), true) + .unwrap(); + assert!(!result, "Should skip: range does not match"); + + let above_max = "イロハニホヘト"[..4]; + let result = + InclusiveMetricsEvaluator::eval(&starts_with("required", &above_max), &get_test_file_4(), true) + .unwrap(); + assert!(!result, "Should skip: range does not match"); + } #[test] - fn test_string_not_starts_with() {} + fn test_string_not_starts_with() { + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "a"), &get_test_file_1(), true) + .unwrap(); + assert!(result, "Should read: no stats"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "a"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "aa"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "aaa"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "1s"), &get_test_file_3(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "1str1x"), &get_test_file_3(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "ff"), &get_test_file_4(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "aB"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "dWX"), &get_test_file_2(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "5"), &get_test_file_3(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", "3str3x"), &get_test_file_3(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + + + let above_max = "イロハニホヘト"[..4]; + let result = + InclusiveMetricsEvaluator::eval(¬_starts_with("required", &above_max), &get_test_file_4(), true) + .unwrap(); + assert!(result, "Should read: range matches"); + } #[test] fn test_integer_in() {} @@ -639,6 +1168,154 @@ mod test { filter.bind(schema.clone(), true).unwrap() } + fn is_null(reference: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Unary(UnaryExpression::new(IsNull, Reference::new(reference))); + filter.bind(schema.clone(), true).unwrap() + } + + fn not_nan(reference: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Unary(UnaryExpression::new(NotNan, Reference::new(reference))); + filter.bind(schema.clone(), true).unwrap() + } + + fn is_nan(reference: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Unary(UnaryExpression::new(IsNan, Reference::new(reference))); + filter.bind(schema.clone(), true).unwrap() + } + + fn less_than(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + LessThan, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn less_than_or_equal(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + LessThanOrEq, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn greater_than(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + GreaterThan, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn greater_than_or_equal(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + GreaterThanOrEq, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn equal(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + Eq, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn less_than_int(reference: &str, int_literal: i32) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + LessThan, + Reference::new(reference), + Datum::int(int_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn less_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + LessThanOrEq, + Reference::new(reference), + Datum::int(int_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn greater_than_int(reference: &str, int_literal: i32) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + GreaterThan, + Reference::new(reference), + Datum::int(int_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn greater_than_or_equal_int(reference: &str, int_literal: i32) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + GreaterThanOrEq, + Reference::new(reference), + Datum::int(int_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn equal_int(reference: &str, int_literal: i32) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + Eq, + Reference::new(reference), + Datum::int(int_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn not_equal_int(reference: &str, int_literal: i32) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + NotEq, + Reference::new(reference), + Datum::int(int_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn starts_with(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + StartsWith, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + + fn not_starts_with(reference: &str, str_literal: &str) -> BoundPredicate { + let schema = create_test_schema(); + let filter = Predicate::Binary(BinaryExpression::new( + NotStartsWith, + Reference::new(reference), + Datum::string(str_literal), + )); + filter.bind(schema.clone(), true).unwrap() + } + fn create_test_schema() -> Arc { let table_schema = Schema::builder() .with_fields(vec![ @@ -716,9 +1393,7 @@ mod test { .build() .unwrap(); - let table_schema_ref = Arc::new(table_schema); - - table_schema_ref + Arc::new(table_schema) } fn create_test_data_file(_manifest_file_partitions: Vec) -> DataFile { @@ -741,4 +1416,66 @@ mod test { sort_order_id: None, } } + + fn get_test_file_1() -> DataFile { + DataFile { + content: DataContentType::Data, + file_path: "/test/path".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 50, + file_size_in_bytes: 10, + + value_counts: HashMap::from([ + (4, 50), + (5, 50), + (6, 50), + (7, 50), + (8, 50), + (9, 50), + (10, 50), + (11, 50), + (12, 50), + (13, 50), + (14, 50), + ]), + + null_value_counts: HashMap::from([ + (4, 50), + (5, 10), + (6, 0), + (10, 50), + (11, 0), + (12, 1), + (14, 0), + ]), + + nan_value_counts: HashMap::from([(7, 50), (8, 10), (9, 0)]), + + lower_bounds: HashMap::from([ + (1, Literal::int(INT_MIN_VALUE)), + (11, Literal::float(f32::NAN)), + (12, Literal::double(f64::NAN)), + (14, Literal::string("")), + ]), + + upper_bounds: HashMap::from([ + (1, Literal::int(INT_MAX_VALUE)), + (11, Literal::float(f32::NAN)), + (12, Literal::double(f64::NAN)), + ( + 14, + Literal::string( + "აისბერგი გასაოცარია. განსაკუთრებით მაშინ, როცა ჟანგით არის დაწერილი", + ), + ), + ]), + + column_sizes: Default::default(), + key_metadata: vec![], + split_offsets: vec![], + equality_ids: vec![], + sort_order_id: None, + } + } }