Skip to content

test on custom type with std::io::Read impl fails on 1.76.0 #123700

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Closed
bananaturtlesandwich opened this issue Apr 9, 2024 · 8 comments
Closed
Labels
C-discussion Category: Discussion or questions that doesn't represent real issues. T-libs Relevant to the library team, which will review and decide on the PR/issue.

Comments

@bananaturtlesandwich
Copy link

bananaturtlesandwich commented Apr 9, 2024

the data structure

//! Chain for chaining two `Read` + `Seek` implementations

use std::io::{Read, Result, Seek, SeekFrom};

/// Chain for chaining two `Read` + `Seek` implementations
pub struct Chain<C: Read + Seek> {
    first: C,
    second: Option<C>,
    first_len: u64,
    second_len: u64,
    pos: u64,
}

impl<C: Read + Seek> Chain<C> {
    /// Create a new chain
    pub fn new(mut first: C, mut second: Option<C>) -> Self {
        // ignore errors for now
        let first_len = first.seek(SeekFrom::End(0)).unwrap_or_default();
        first.rewind().unwrap_or_default();
        let second_len = match second.as_mut() {
            Some(sec) => {
                let len = sec.seek(SeekFrom::End(0)).unwrap_or_default();
                sec.rewind().unwrap_or_default();
                len
            }
            None => 0,
        };
        Self {
            first,
            second,
            first_len,
            second_len,
            pos: 0,
        }
    }
}

impl<C: Read + Seek> Read for Chain<C> {
    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
        match self.second.as_mut() {
            Some(sec) => {
                let len_read = match self.pos >= self.first_len {
                    true => sec.read(buf)?,
                    false => {
                        let len = buf.len();
                        let to_end = (self.first_len - self.pos) as usize;
                        match to_end >= len {
                            true => self.first.read(buf)?,
                            false => {
                                let mut first = vec![0; to_end];
                                let mut second = vec![0; len - to_end];
                                self.first.read_exact(&mut first)?;
                                sec.read_exact(&mut second)?;
                                first.append(&mut second);
                                first.as_slice().read(buf)?
                            }
                        }
                    }
                };
                self.pos += len_read as u64;
                Ok(len_read)
            }
            None => self.first.read(buf),
        }
    }
}

impl<C: Read + Seek> Seek for Chain<C> {
    fn seek(&mut self, pos: SeekFrom) -> Result<u64> {
        match self.second.as_mut() {
            Some(sec) => match pos {
                SeekFrom::Start(offset) => {
                    self.pos = match offset < self.first_len {
                        true => self.first.seek(pos)?,
                        false => {
                            self.first_len + sec.seek(SeekFrom::Start(offset - self.first_len))?
                        }
                    };
                    Ok(offset)
                }
                SeekFrom::End(offset) => self.seek(SeekFrom::Start(
                    ((self.first_len + self.second_len) as i64 + offset) as u64,
                )),
                SeekFrom::Current(offset) => {
                    self.seek(SeekFrom::Start((self.pos as i64 + offset) as u64))
                }
            },
            None => self.first.seek(pos),
        }
    }
}

the tests:

use std::io::{Cursor, Read, Seek, SeekFrom};

#[test]
fn read() {
    use std::io::Cursor;
    let mut v = Vec::with_capacity(12);
    Chain::new(
        Cursor::new(vec![0, 1, 2, 3, 4, 5, 6, 7]),
        Some(Cursor::new(vec![0, 1, 2, 3])),
    )
    .read_to_end(&mut v)
    .unwrap();
    assert_eq!(v, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3]);
}

#[test]
fn seek() {
    let mut chain = Chain::new(
        Cursor::new(vec![0, 1, 2, 3]),
        Some(Cursor::new(vec![4, 5, 6, 7])),
    );
    let mut read_at = |pos| {
        use byteorder::ReadBytesExt;
        use Seek;
        chain.seek(pos)?;
        chain.read_u8()
    };
    assert_eq!(read_at(SeekFrom::Start(0)).unwrap(), 0);
    assert!(read_at(SeekFrom::Start(8)).is_err());
    assert_eq!(read_at(SeekFrom::Current(-1)).unwrap(), 7);
    assert_eq!(read_at(SeekFrom::Current(-5)).unwrap(), 3);
    assert_eq!(read_at(SeekFrom::End(-4)).unwrap(), 4);
    assert!(read_at(SeekFrom::End(-12)).is_err());
}

I expected to see this happen: the tests should pass

Instead, this happened: the read test failed with an end of file error

Version it worked on

It most recently worked on: Rust 1.75.0

Version with regression

Rust 1.76.0

rustc --version --verbose:

rustc 1.76.0 (07dca489a 2024-02-04)
binary: rustc
commit-hash: 07dca489ac2d933c78d3c5158e3f43beefeb02ce
commit-date: 2024-02-04
host: x86_64-pc-windows-msvc
release: 1.76.0
LLVM version: 17.0.6
@bananaturtlesandwich bananaturtlesandwich added C-bug Category: This is a bug. regression-untriaged Untriaged performance or correctness regression. labels Apr 9, 2024
@rustbot rustbot added I-prioritize Issue: Indicates that prioritization has been requested for this issue. needs-triage This issue may need triage. Remove it if it has been sufficiently triaged. regression-from-stable-to-stable Performance or correctness regression from one stable version to another. and removed regression-untriaged Untriaged performance or correctness regression. labels Apr 9, 2024
@jieyouxu jieyouxu added the E-needs-bisection Call for participation: This issue needs bisection: https://github.com/rust-lang/cargo-bisect-rustc label Apr 9, 2024
@bananaturtlesandwich bananaturtlesandwich changed the title test on custom type with std::io::Read impl fails on 1.77.1 test on custom type with std::io::Read impl fails on 1.76.0 Apr 9, 2024
@jieyouxu
Copy link
Member

jieyouxu commented Apr 9, 2024

searched nightlies: from nightly-2023-11-11 to nightly-2024-02-01
regressed nightly: nightly-2023-11-30
searched commit range: 5facb42...b10cfcd
regressed commit: bbefc98

bisected with cargo-bisect-rustc v0.6.8

Host triple: aarch64-unknown-linux-gnu
Reproduce with:

cargo bisect-rustc --start=1.75.0 --end=1.77.1 -- test

@bananaturtlesandwich
Copy link
Author

bananaturtlesandwich commented Apr 9, 2024

@jieyouxu just tested with 1.76.0 and the regression was there so updated the comment and title

@jieyouxu
Copy link
Member

jieyouxu commented Apr 9, 2024

I'm going to guess it's #118222 cc @the8472. Pinging because not sure if the change in observable behavior here is intended.

@jieyouxu jieyouxu added T-libs Relevant to the library team, which will review and decide on the PR/issue. and removed E-needs-bisection Call for participation: This issue needs bisection: https://github.com/rust-lang/cargo-bisect-rustc labels Apr 9, 2024
@the8472
Copy link
Member

the8472 commented Apr 9, 2024

This is failing with a failed to fill whole buffer message, which points at read_exact, not read_to_end. So it's probably not #118222

The false branch in that match is kind of sus anyway. Turning a single read call into multiple read* calls can lead to bytes from an earlier call getting dropped when a later one returns an error. But I haven't analyzed the code yet, so it might not be relevant here.

Edit: Oh I see it's a bisection result. I'll take another look.

@konsti219
Copy link

konsti219 commented Apr 10, 2024

I did some testing for what types of Vec initialization this code actually panics and found these results:

// 1.75.0 panic, 1.76.0 panic
let mut v = Vec::new();

// 1.75.0 panic, 1.76.0 panic
let mut v = Vec::with_capacity(7);

// 1.75.0 NO panic, 1.76.0 panic
let mut v = Vec::with_capacity(8);

Looking at these results I would guess that the old behavior was actually wrong and that it got fixed.

@bananaturtlesandwich
Copy link
Author

bananaturtlesandwich commented Apr 10, 2024

@konsti219 looks like initialisation is needed now e.g like vec![0; 12] so that is a fix if using read_exact

however read_to_end is still broken

@the8472
Copy link
Member

the8472 commented Apr 10, 2024

Ok, I had a closer look at the code.

The issue is with both the read function and with the test.

A) The test assumes that read_to_end would pass in the Vec's spare capacity to the reader. This is not guaranteed. Both the current and the previous impl used a stack buffer for probing, #118222 just changed when the probing happens.

B) The read function does not handle large buffers getting passed in properly.

                        let len = buf.len();
                        let to_end = (self.first_len - self.pos) as usize;
                        match to_end >= len {
                            true => self.first.read(buf)?,
                            false => {
                                let mut first = vec![0; to_end];
                                let mut second = vec![0; len - to_end];
                                self.first.read_exact(&mut first)?;
                                sec.read_exact(&mut second)?;
                                first.append(&mut second);
                                first.as_slice().read(buf)?

With buf: &mut [u8; 32] then len == 32. In the test to_end = 8 - 0 = 8. Therefore the false case is taken.
Now second = vec![0; 32 - 8], 24 bytes.
But sec is only 4 bytes long.
Therefore read_exact fails.

So the change in the read_to_end impl merely uncovered preexisting bug in your code.

@the8472 the8472 added C-discussion Category: Discussion or questions that doesn't represent real issues. and removed regression-from-stable-to-stable Performance or correctness regression from one stable version to another. C-bug Category: This is a bug. I-prioritize Issue: Indicates that prioritization has been requested for this issue. needs-triage This issue may need triage. Remove it if it has been sufficiently triaged. labels Apr 10, 2024
@bananaturtlesandwich
Copy link
Author

So the change in the read_to_end impl merely uncovered preexisting bug in your code.

thanks so much @the8472 - so it was that buf is now not always guaranteed to match the vec capacity which also exposed bad parity with large buffers

changing to this now allows passing the test :)

let len = buf.len() as u64;
let to_end = self.first_len - self.pos;
match to_end >= len {
    true => self.first.read(buf)?,
    false => {
        let mut first = vec![0; to_end as usize];
        let excess = len - to_end;
        let mut second = vec![
            0;
            match excess > self.second_len {
                true => self.second_len,
                false => excess,
            } as usize
        ];
        self.first.read_exact(&mut first)?;
        sec.read_exact(&mut second)?;
        first.append(&mut second);
        first.as_slice().read(buf)?
    }
}

thank you so much - this turned out to be an improvement rather than a regression

# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
C-discussion Category: Discussion or questions that doesn't represent real issues. T-libs Relevant to the library team, which will review and decide on the PR/issue.
Projects
None yet
Development

No branches or pull requests

5 participants