Skip to content

Commit 0e92bb1

Browse files
authored
Merge pull request #23 from nh13/feat/read_record_set_limit
feat: add method to read a maximum # of records
2 parents ffb1b35 + 6401605 commit 0e92bb1

File tree

4 files changed

+89
-0
lines changed

4 files changed

+89
-0
lines changed

src/fasta.rs

+16
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,19 @@ where
254254
// TODO: in next major version, return Result<bool> instead!
255255
#[inline]
256256
pub fn read_record_set(&mut self, rset: &mut RecordSet) -> Option<Result<(), Error>> {
257+
self.read_record_set_limited(rset, usize::MAX)
258+
}
259+
260+
/// Updates a [RecordSet](struct.RecordSet.html) with new data. The contents of the internal
261+
/// buffer are just copied over to the record set and the positions of all records are found.
262+
/// Old data will be erased. Returns `None` if the input reached its end.
263+
// TODO: in next major version, return Result<bool> instead!
264+
#[inline]
265+
pub fn read_record_set_limited(
266+
&mut self,
267+
rset: &mut RecordSet,
268+
max_records: usize,
269+
) -> Option<Result<(), Error>> {
257270
// after read_record_set(), the state is always Positioned, Parsing or Finished
258271
match self.state {
259272
State::New => {
@@ -312,6 +325,9 @@ where
312325
}
313326
rset.npos += 1;
314327
self.increment_record();
328+
if rset.npos >= max_records {
329+
break;
330+
}
315331
}
316332

317333
rset.buffer.clear();

src/fastq.rs

+16
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ where
211211
// TODO: in next major version, return Result<bool> instead!
212212
#[inline]
213213
pub fn read_record_set(&mut self, rset: &mut RecordSet) -> Option<Result<(), Error>> {
214+
self.read_record_set_limited(rset, usize::MAX)
215+
}
216+
217+
/// Updates a [RecordSet](struct.RecordSet.html) with new data. The contents of the internal
218+
/// buffer are just copied over to the record set and the positions of all records are found.
219+
/// Old data will be erased. Returns `None` if the input reached its end.
220+
// TODO: in next major version, return Result<bool> instead!
221+
#[inline]
222+
pub fn read_record_set_limited(
223+
&mut self,
224+
rset: &mut RecordSet,
225+
max_records: usize,
226+
) -> Option<Result<(), Error>> {
214227
// after read_record_set(), the state is always Positioned, Parsing or Finished
215228
match self.state {
216229
State::New => {
@@ -259,6 +272,9 @@ where
259272
}
260273
rset.buf_positions.push(self.buf_pos.clone());
261274
self.increment_record();
275+
if rset.buf_positions.len() >= max_records {
276+
break;
277+
}
262278
}
263279

264280
rset.buffer.clear();

tests/fasta.rs

+28
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,34 @@ fn test_fasta_seq_lines() {
9797
assert_eq!(lines, vec![b"AT", b"GC"]);
9898
}
9999

100+
#[test]
101+
fn test_fastq_read_record_set_limited() {
102+
for max_records in 3..10 {
103+
use std::io::Write;
104+
let mut fasta_vec = Vec::with_capacity(13 * max_records);
105+
for i in 0..max_records {
106+
write!(&mut fasta_vec, ">id{i}\nATGC\n").unwrap();
107+
}
108+
109+
let mut reader = Reader::new(&fasta_vec[..]);
110+
let mut rset = RecordSet::default();
111+
reader.read_record_set_limited(&mut rset, max_records);
112+
assert_eq!(rset.len(), max_records);
113+
114+
let mut rset_iter = rset.into_iter();
115+
let mut reader = Reader::new(&fasta_vec[..]);
116+
117+
for _ in 0..max_records {
118+
let r0 = reader.next().unwrap().unwrap();
119+
let rec = rset_iter.next().unwrap();
120+
assert_eq!(rec.id(), r0.id());
121+
assert_eq!(rec.desc(), r0.desc());
122+
assert_eq!(rec.head(), r0.head());
123+
assert_eq!(rec.seq(), r0.seq());
124+
}
125+
}
126+
}
127+
100128
#[test]
101129
fn test_fasta_full_seq() {
102130
use std::borrow::Cow;

tests/fastq.rs

+29
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,35 @@ fn test_fastq_recset() {
196196
}
197197
}
198198

199+
#[test]
200+
fn test_fastq_read_record_set_limited() {
201+
for max_records in 3..10 {
202+
let mut fastq_vec: Vec<u8> = Vec::with_capacity(FASTQ.len() * max_records);
203+
for i in 0..max_records {
204+
println!("i: {i} max_records: {max_records}");
205+
fastq_vec.extend_from_slice(FASTQ);
206+
}
207+
208+
let mut reader = Reader::new(&fastq_vec[..]);
209+
let mut rset = RecordSet::default();
210+
reader.read_record_set_limited(&mut rset, max_records);
211+
assert_eq!(rset.len(), max_records);
212+
213+
let mut rset_iter = rset.into_iter();
214+
let mut reader = Reader::new(&fastq_vec[..]);
215+
216+
for _ in 0..max_records {
217+
let r0 = reader.next().unwrap().unwrap();
218+
let rec = rset_iter.next().unwrap();
219+
assert_eq!(rec.id(), r0.id());
220+
assert_eq!(rec.desc(), r0.desc());
221+
assert_eq!(rec.head(), r0.head());
222+
assert_eq!(rec.seq(), r0.seq());
223+
assert_eq!(rec.qual(), r0.qual());
224+
}
225+
}
226+
}
227+
199228
#[test]
200229
fn test_fastq_parallel() {
201230
for cap in 3..400 {

0 commit comments

Comments
 (0)