Skip to content

Add IndexedRandom::choose_multiple_array, index::sample_array #1453

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 7 commits into from
Jun 4, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.

## [Unreleased]
- Add `rand::distributions::WeightedIndex::{weight, weights, total_weight}` (#1420)
- Add `IndexedRandom::choose_multiple_array`, `index::sample_array` (#1453)
- Bump the MSRV to 1.61.0
- Rename `Rng::gen` to `Rng::random` to avoid conflict with the new `gen` keyword in Rust 2024 (#1435)
- Move all benchmarks to new `benches` crate (#1439)
72 changes: 53 additions & 19 deletions src/seq/index.rs
Original file line number Diff line number Diff line change
@@ -7,35 +7,29 @@
// except according to those terms.

//! Low-level API for sampling indices
use core::{cmp::Ordering, hash::Hash, ops::AddAssign};

#[cfg(feature = "alloc")]
use core::slice;

#[cfg(feature = "alloc")]
use alloc::vec::{self, Vec};
use core::slice;
use core::{hash::Hash, ops::AddAssign};
// BTreeMap is not as fast in tests, but better than nothing.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::collections::BTreeSet;
#[cfg(feature = "std")]
use std::collections::HashSet;

#[cfg(feature = "std")]
use super::WeightError;

use crate::distributions::uniform::SampleUniform;
#[cfg(feature = "alloc")]
use crate::{
distributions::{uniform::SampleUniform, Distribution, Uniform},
Rng,
};

use crate::distributions::{Distribution, Uniform};
use crate::Rng;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::collections::BTreeSet;
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "std")]
use std::collections::HashSet;

/// A vector of indices.
///
/// Multiple internal representations are possible.
#[derive(Clone, Debug)]
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub enum IndexVec {
#[doc(hidden)]
@@ -44,6 +38,7 @@ pub enum IndexVec {
USize(Vec<usize>),
}

#[cfg(feature = "alloc")]
impl IndexVec {
/// Returns the number of indices
#[inline]
@@ -94,6 +89,7 @@ impl IndexVec {
}
}

#[cfg(feature = "alloc")]
impl IntoIterator for IndexVec {
type IntoIter = IndexVecIntoIter;
type Item = usize;
@@ -108,6 +104,7 @@ impl IntoIterator for IndexVec {
}
}

#[cfg(feature = "alloc")]
impl PartialEq for IndexVec {
fn eq(&self, other: &IndexVec) -> bool {
use self::IndexVec::*;
@@ -124,13 +121,15 @@ impl PartialEq for IndexVec {
}
}

#[cfg(feature = "alloc")]
impl From<Vec<u32>> for IndexVec {
#[inline]
fn from(v: Vec<u32>) -> Self {
IndexVec::U32(v)
}
}

#[cfg(feature = "alloc")]
impl From<Vec<usize>> for IndexVec {
#[inline]
fn from(v: Vec<usize>) -> Self {
@@ -171,6 +170,7 @@ impl<'a> Iterator for IndexVecIter<'a> {
impl<'a> ExactSizeIterator for IndexVecIter<'a> {}

/// Return type of `IndexVec::into_iter`.
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
pub enum IndexVecIntoIter {
#[doc(hidden)]
@@ -179,6 +179,7 @@ pub enum IndexVecIntoIter {
USize(vec::IntoIter<usize>),
}

#[cfg(feature = "alloc")]
impl Iterator for IndexVecIntoIter {
type Item = usize;

@@ -201,6 +202,7 @@ impl Iterator for IndexVecIntoIter {
}
}

#[cfg(feature = "alloc")]
impl ExactSizeIterator for IndexVecIntoIter {}

/// Randomly sample exactly `amount` distinct indices from `0..length`, and
@@ -225,6 +227,7 @@ impl ExactSizeIterator for IndexVecIntoIter {}
/// to adapt the internal `sample_floyd` implementation.
///
/// Panics if `amount > length`.
#[cfg(feature = "alloc")]
#[track_caller]
pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
where
@@ -267,6 +270,33 @@ where
}
}

/// Randomly sample exactly `N` distinct indices from `0..len`, and
/// return them in random order (fully shuffled).
///
/// This is implemented via Floyd's algorithm. Time complexity is `O(N^2)`
/// and memory complexity is `O(N)`.
///
/// Returns `None` if (and only if) `N > len`.
pub fn sample_array<R, const N: usize>(rng: &mut R, len: usize) -> Option<[usize; N]>
where
R: Rng + ?Sized,
{
if N > len {
return None;
}

// Floyd's algorithm
let mut indices = [0; N];
for (i, j) in (len - N..len).enumerate() {
let t = rng.gen_range(0..=j);
if let Some(pos) = indices[0..i].iter().position(|&x| x == t) {
indices[pos] = j;
}
indices[i] = t;
}
Some(indices)
}

/// Randomly sample exactly `amount` distinct indices from `0..length`, and
/// return them in an arbitrary order (there is no guarantee of shuffling or
/// ordering). The weights are to be provided by the input function `weights`,
@@ -329,6 +359,8 @@ where
N: UInt,
IndexVec: From<Vec<N>>,
{
use std::cmp::Ordering;

if amount == N::zero() {
return Ok(IndexVec::U32(Vec::new()));
}
@@ -399,6 +431,7 @@ where
/// The output values are fully shuffled. (Overhead is under 50%.)
///
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
#[cfg(feature = "alloc")]
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
@@ -430,6 +463,7 @@ where
/// performance in all cases).
///
/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
#[cfg(feature = "alloc")]
fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
@@ -495,6 +529,7 @@ impl UInt for usize {
///
/// This function is generic over X primarily so that results are value-stable
/// over 32-bit and 64-bit platforms.
#[cfg(feature = "alloc")]
fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec
where
R: Rng + ?Sized,
@@ -519,9 +554,11 @@ where
IndexVec::from(indices)
}

#[cfg(feature = "alloc")]
#[cfg(test)]
mod test {
use super::*;
use alloc::vec;

#[test]
#[cfg(feature = "serde1")]
@@ -542,9 +579,6 @@ mod test {
}
}

#[cfg(feature = "alloc")]
use alloc::vec;

#[test]
fn test_sample_boundaries() {
let mut r = crate::test::rng(404);
Loading