Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

k_smallest variants (2) #885

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/k_smallest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ where
}

#[inline]
pub(crate) fn key_to_cmp<T, K, F>(key: F) -> impl Fn(&T, &T) -> Ordering
pub(crate) fn key_to_cmp<T, K, F>(mut key: F) -> impl FnMut(&T, &T) -> Ordering
where
F: Fn(&T) -> K,
F: FnMut(&T) -> K,
K: Ord,
{
move |a, b| key(a).cmp(&key(b))
Expand Down
110 changes: 95 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2981,36 +2981,79 @@ pub trait Itertools: Iterator {

/// Sort the k smallest elements into a new iterator using the provided comparison.
///
/// The sorted iterator, if directly collected to a `Vec`, is converted
/// without any extra copying or allocation cost.
///
/// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
/// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
/// [`k_smallest`](Itertools::k_smallest) corresponds to `self.sorted().take(k)`,
/// in both semantics and complexity.
///
/// Particularly, a custom heap implementation ensures the comparison is not cloned.
///
/// ```
/// use itertools::Itertools;
///
/// // A random permutation of 0..15
/// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5];
///
/// let five_smallest = numbers
/// .into_iter()
/// .k_smallest_by(5, |a, b| (a % 7).cmp(&(b % 7)).then(a.cmp(b)));
///
/// itertools::assert_equal(five_smallest, vec![0, 7, 14, 1, 8]);
/// ```
#[cfg(feature = "use_alloc")]
fn k_smallest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
where
Self: Sized,
F: Fn(&Self::Item, &Self::Item) -> Ordering,
F: FnMut(&Self::Item, &Self::Item) -> Ordering,
{
k_smallest::k_smallest_general(self, k, cmp).into_iter()
}

/// Return the elements producing the k smallest outputs of the provided function
/// Return the elements producing the k smallest outputs of the provided function.
///
/// This corresponds to `self.sorted_by_key(cmp).take(k)` in the same way that
/// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and time complexity.
/// The sorted iterator, if directly collected to a `Vec`, is converted
/// without any extra copying or allocation cost.
///
/// This corresponds to `self.sorted_by_key(key).take(k)` in the same way that
/// [`k_smallest`](Itertools::k_smallest) corresponds to `self.sorted().take(k)`,
/// in both semantics and complexity.
///
/// Particularly, a custom heap implementation ensures the comparison is not cloned.
///
/// ```
/// use itertools::Itertools;
///
/// // A random permutation of 0..15
/// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5];
///
/// let five_smallest = numbers
/// .into_iter()
/// .k_smallest_by_key(5, |n| (n % 7, *n));
///
/// itertools::assert_equal(five_smallest, vec![0, 7, 14, 1, 8]);
/// ```
#[cfg(feature = "use_alloc")]
fn k_smallest_by_key<F, K>(self, k: usize, key: F) -> VecIntoIter<Self::Item>
where
Self: Sized,
F: Fn(&Self::Item) -> K,
F: FnMut(&Self::Item) -> K,
K: Ord,
{
self.k_smallest_by(k, k_smallest::key_to_cmp(key))
}

/// Sort the k largest elements into a new iterator, in descending order.
/// Semantically equivalent to `k_smallest` with a reversed `Ord`
/// However, this is implemented by way of a custom binary heap
/// which does not have the same performance characteristics for very large `Self::Item`
///
/// The sorted iterator, if directly collected to a `Vec`, is converted
/// without any extra copying or allocation cost.
///
/// It is semantically equivalent to [`k_smallest`](Itertools::k_smallest)
/// with a reversed `Ord`.
/// However, this is implemented with a custom binary heap which does not
/// have the same performance characteristics for very large `Self::Item`.
///
/// ```
/// use itertools::Itertools;
///
Expand All @@ -3021,7 +3064,7 @@ pub trait Itertools: Iterator {
/// .into_iter()
/// .k_largest(5);
///
/// itertools::assert_equal(five_largest, vec![14,13,12,11,10]);
/// itertools::assert_equal(five_largest, vec![14, 13, 12, 11, 10]);
/// ```
#[cfg(feature = "use_alloc")]
fn k_largest(self, k: usize) -> VecIntoIter<Self::Item>
Expand All @@ -3033,22 +3076,59 @@ pub trait Itertools: Iterator {
}

/// Sort the k largest elements into a new iterator using the provided comparison.
/// Functionally equivalent to `k_smallest_by` with a reversed `Ord`
///
/// The sorted iterator, if directly collected to a `Vec`, is converted
/// without any extra copying or allocation cost.
///
/// Functionally equivalent to [`k_smallest_by`](Itertools::k_smallest_by)
/// with a reversed `Ord`.
///
/// ```
/// use itertools::Itertools;
///
/// // A random permutation of 0..15
/// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5];
///
/// let five_largest = numbers
/// .into_iter()
/// .k_largest_by(5, |a, b| (a % 7).cmp(&(b % 7)).then(a.cmp(b)));
///
/// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]);
/// ```
#[cfg(feature = "use_alloc")]
fn k_largest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
fn k_largest_by<F>(self, k: usize, mut cmp: F) -> VecIntoIter<Self::Item>
where
Self: Sized,
F: Fn(&Self::Item, &Self::Item) -> Ordering,
F: FnMut(&Self::Item, &Self::Item) -> Ordering,
{
self.k_smallest_by(k, move |a, b| cmp(b, a))
}

/// Return the elements producing the k largest outputs of the provided function
/// Return the elements producing the k largest outputs of the provided function.
///
/// The sorted iterator, if directly collected to a `Vec`, is converted
/// without any extra copying or allocation cost.
///
/// Functionally equivalent to [`k_smallest_by_key`](Itertools::k_smallest_by_key)
/// with a reversed `Ord`.
///
/// ```
/// use itertools::Itertools;
///
/// // A random permutation of 0..15
/// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5];
///
/// let five_largest = numbers
/// .into_iter()
/// .k_largest_by_key(5, |n| (n % 7, *n));
///
/// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]);
/// ```
#[cfg(feature = "use_alloc")]
fn k_largest_by_key<F, K>(self, k: usize, key: F) -> VecIntoIter<Self::Item>
where
Self: Sized,
F: Fn(&Self::Item) -> K,
F: FnMut(&Self::Item) -> K,
K: Ord,
{
self.k_largest_by(k, k_smallest::key_to_cmp(key))
Expand Down
38 changes: 20 additions & 18 deletions tests/test_std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,34 +508,24 @@ qc::quickcheck! {
let num_elements = min(k, m as _);

// Compute the top and bottom k in various combinations
let sorted_smallest = sorted[..num_elements].iter().cloned();
let smallest = v.iter().cloned().k_smallest(k);
let smallest_by = v.iter().cloned().k_smallest_by(k, Ord::cmp);
let smallest_by_key = v.iter().cloned().k_smallest_by_key(k, |&x| x);

let sorted_largest = sorted[sorted.len() - num_elements..].iter().rev().cloned();
let largest = v.iter().cloned().k_largest(k);
let largest_by = v.iter().cloned().k_largest_by(k, Ord::cmp);
let largest_by_key = v.iter().cloned().k_largest_by_key(k, |&x| x);

// Check the variations produce the same answers and that they're right
for (a,b,c,d) in izip!(
sorted[..num_elements].iter().cloned(),
smallest,
smallest_by,
smallest_by_key) {
assert_eq!(a,b);
assert_eq!(a,c);
assert_eq!(a,d);
}
it::assert_equal(smallest, sorted_smallest.clone());
it::assert_equal(smallest_by, sorted_smallest.clone());
it::assert_equal(smallest_by_key, sorted_smallest);

for (a,b,c,d) in izip!(
sorted[sorted.len()-num_elements..].iter().rev().cloned(),
largest,
largest_by,
largest_by_key) {
assert_eq!(a,b);
assert_eq!(a,c);
assert_eq!(a,d);
}
it::assert_equal(largest, sorted_largest.clone());
it::assert_equal(largest_by, sorted_largest.clone());
it::assert_equal(largest_by_key, sorted_largest);
}
}

Expand Down Expand Up @@ -585,6 +575,17 @@ where
it::assert_equal(i.k_smallest(k), j.sorted().take(k))
}

// Similar to `k_smallest_sort` but for our custom heap implementation.
fn k_smallest_by_sort<I>(i: I, k: u16)
where
I: Iterator + Clone,
I::Item: Ord + Debug,
{
let j = i.clone();
let k = k as usize;
it::assert_equal(i.k_smallest_by(k, Ord::cmp), j.sorted().take(k))
}

macro_rules! generic_test {
($f:ident, $($t:ty),+) => {
$(paste::item! {
Expand All @@ -598,6 +599,7 @@ macro_rules! generic_test {
}

generic_test!(k_smallest_sort, u8, u16, u32, u64, i8, i16, i32, i64);
generic_test!(k_smallest_by_sort, u8, u16, u32, u64, i8, i16, i32, i64);

#[test]
fn sorted_by_key() {
Expand Down