Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

fix OOB arithmetic in ASCII encoding #54

Merged
merged 2 commits into from
Aug 24, 2020
Merged

Conversation

RalfJung
Copy link
Contributor

Just a few lines down from what #53 fixed, the same issue exists again. Here's the Miri error:

error: Undefined Behavior: inbounds test failed: pointer must be in-bounds at offset 12, but is outside bounds of alloc2066040 which has size 2
    --> /home/r/.rustup/toolchains/miri/lib/rustlib/src/rust/library/core/src/ptr/const_ptr.rs:225:18
     |
225  |         unsafe { intrinsics::offset(self, count) }
     |                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ inbounds test failed: pointer must be in-bounds at offset 12, but is outside bounds of alloc2066040 which has size 2
     |
     = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
     = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
             
     = note: inside `std::ptr::const_ptr::<impl *const u16>::offset` at /home/r/.rustup/toolchains/miri/lib/rustlib/src/rust/library/core/src/ptr/const_ptr.rs:225:18
     = note: inside `std::ptr::const_ptr::<impl *const u16>::add` at /home/r/.rustup/toolchains/miri/lib/rustlib/src/rust/library/core/src/ptr/const_ptr.rs:499:18
note: inside `ascii::basic_latin_to_ascii` at src/ascii.rs:223:29
    --> src/ascii.rs:223:29
     |
223  |                         if (src.add(dst_until_alignment) as usize) & ALU_ALIGNMENT_MASK != 0 {
     |                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...
1476 |         basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_alu);
     |         --------------------------------------------------------------------------------- in this macro invocation
note: inside `handles::Utf16Source::copy_ascii_to_check_space_two` at src/handles.rs:1244:17
    --> src/handles.rs:1244:17
     |
1244 |                 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
     |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `big5::Big5Encoder::encode_from_utf16_raw` at src/macros.rs:1079:19
    --> src/macros.rs:1079:19
     |
1079 |               match $source.$copy_ascii(&mut dest) {
     |                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     | 
    ::: src/big5.rs:195:5
     |
195  | /     ascii_compatible_encoder_functions!(
196  | |         {
197  | |             // For simplicity, unified ideographs
198  | |             // in the pointer range 11206...11212 are handled
...    |
259  | |         false
260  | |     );
     | |______- in this macro invocation
note: inside `variant::VariantEncoder::encode_from_utf16_raw` at src/variant.rs:311:48
    --> src/variant.rs:311:48
     |
311  |             VariantEncoder::Big5(ref mut v) => v.encode_from_utf16_raw(src, dst, last),
     |                                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `Encoder::encode_from_utf16_without_replacement` at src/lib.rs:4732:9
    --> src/lib.rs:4732:9
     |
4732 |         self.variant.encode_from_utf16_raw(src, dst, last)
     |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `Encoder::encode_from_utf16` at src/lib.rs:4661:43
    --> src/lib.rs:4661:43
     |
4661 |               let (result, read, written) = self.encode_from_utf16_without_replacement(
     |  ___________________________________________^
4662 | |                 &src[total_read..],
4663 | |                 &mut dst[total_written..effective_dst_len],
4664 | |                 last,
4665 | |             );
     | |_____________^
note: inside `testing::encode_from_utf16` at src/testing.rs:219:40
    --> src/testing.rs:219:40
     |
219  |     let (complete, read, written, _) = encoder.encode_from_utf16(string, &mut dest, true);
     |                                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `testing::encode_without_padding` at src/testing.rs:65:5
    --> src/testing.rs:65:5
     |
65   |     encode_from_utf16(encoding, &utf16_from_utf8(string)[..], expect);
     |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `testing::encode` at src/testing.rs:59:9
    --> src/testing.rs:59:9
     |
59   |         encode_without_padding(encoding, &string[..], &vec[..]);
     |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `big5::tests::encode_big5` at src/big5.rs:276:9
    --> src/big5.rs:276:9
     |
276  |         encode(BIG5, string, expect);
     |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `big5::tests::test_big5_encode` at src/big5.rs:363:9
    --> src/big5.rs:363:9
     |
363  |         encode_big5("", b"");
     |         ^^^^^^^^^^^^^^^^^^^^
note: inside closure at src/big5.rs:361:5
    --> src/big5.rs:361:5
     |
361  | /     fn test_big5_encode() {
362  | |         // Empty
363  | |         encode_big5("", b"");
364  | |
...    |
386  | |         encode_big5("\u{2550}", b"\xF9\xF9");
387  | |     }
     | |_____^

@RalfJung
Copy link
Contributor Author

Miri found a similar case elsewhere in that same file, so I changed two additional uses of add to add_wrapping.

This is the error I got; I did not try to also trigger the problem in the dst line but it seems similar enough:

error: Undefined Behavior: inbounds test failed: pointer must be in-bounds at offset 8, but is outside bounds of alloc12913144 which has size 2
    --> /home/r/.rustup/toolchains/miri/lib/rustlib/src/rust/library/core/src/ptr/const_ptr.rs:225:18
     |
225  |         unsafe { intrinsics::offset(self, count) }
     |                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ inbounds test failed: pointer must be in-bounds at offset 8, but is outside bounds of alloc12913144 which has size 2
     |
     = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
     = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
             
     = note: inside `std::ptr::const_ptr::<impl *const u16>::offset` at /home/r/.rustup/toolchains/miri/lib/rustlib/src/rust/library/core/src/ptr/const_ptr.rs:225:18
     = note: inside `std::ptr::const_ptr::<impl *const u16>::add` at /home/r/.rustup/toolchains/miri/lib/rustlib/src/rust/library/core/src/ptr/const_ptr.rs:499:18
note: inside `ascii::pack_latin1` at src/ascii.rs:303:29
    --> src/ascii.rs:303:29
     |
303  |                         if (src.add(dst_until_alignment) as usize) & ALU_ALIGNMENT_MASK != 0 {
     |                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...
1478 |         latin1_alu!(pack_latin1, u16, u8, pack_latin1_stride_alu);
     |         ---------------------------------------------------------- in this macro invocation
note: inside `mem::convert_utf16_to_latin1_lossy` at src/mem.rs:1980:9
    --> src/mem.rs:1980:9
     |
1980 |         pack_latin1(src.as_ptr(), dst.as_mut_ptr(), src.len());
     |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `mem::tests::test_convert_utf16_to_latin1_lossy_panics` at src/mem.rs:2462:17
    --> src/mem.rs:2462:17
     |
2462 |         let _ = convert_utf16_to_latin1_lossy(&[0x0100u16], &mut dst[..]);
     |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside closure at src/mem.rs:2460:5
    --> src/mem.rs:2460:5
     |
2460 | /     fn test_convert_utf16_to_latin1_lossy_panics() {
2461 | |         let mut dst = [0u8; 16];
2462 | |         let _ = convert_utf16_to_latin1_lossy(&[0x0100u16], &mut dst[..]);
2463 | |     }
     | |_____^

There are many more add in this file and quite a few of them look similar in their logic; I'd assume many of those also need the patch but I don't understand any of this code so that might be the wrong guess.

@hsivonen hsivonen merged commit 1676109 into hsivonen:master Aug 24, 2020
@hsivonen
Copy link
Owner

Thank you! I'll audit the other case before pushing another release.

# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants