-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Add -Zmutable-noalias flag #45012
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
Add -Zmutable-noalias flag #45012
Conversation
r? @arielb1 (rust_highfive has picked a reviewer for you, use r? to override) |
cc @jrmuizel |
Some codgen tests using serde: code#[macro_use]
extern crate serde_derive;
extern crate bincode;
use std::io::Write;
use std::{io, ptr};
use bincode::{serialize, deserialize, Infinite};
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Entity {
x: f64,
y: f64,
z: f64,
o: u64
}
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct World(Vec<Entity>);
struct UnsafeVecWriter<'a>(&'a mut Vec<u8>);
impl<'a> Write for UnsafeVecWriter<'a> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
unsafe {
let old_len = self.0.len();
self.0.set_len(old_len + buf.len());
ptr::copy_nonoverlapping(buf.as_ptr(), self.0.as_mut_ptr().offset(old_len as isize), buf.len());
}
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> { Ok(()) }
}
struct SizeCounter(usize);
impl<'a> Write for SizeCounter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.0 += buf.len();
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> { Ok(()) }
}
#[inline(never)]
fn make_bytes(vec: &mut Vec<u8>, e: &Entity) {
let mut size = SizeCounter(0);
bincode::serialize_into(&mut size,e , Infinite).unwrap();
vec.reserve(size.0);
//vec.reserve(bincode::serialized_size(&e) as usize);
bincode::serialize_into(&mut UnsafeVecWriter(vec), e, Infinite).unwrap();
}
#[inline(never)]
fn slow_make_bytes(vec: &mut Vec<u8>, e: &Entity) {
bincode::serialize_into(vec, e, Infinite).unwrap();
}
fn main() {
let world = Entity { x: 0.0, y: 4.0, z: 5.0, o: 0 };
let mut encoded = Vec::new();
make_bytes(&mut encoded, &world);
slow_make_bytes(&mut encoded, &world);
// 8 bytes for the length of the vector, 4 bytes per float.
//assert_eq!(encoded.len(), 8 + 4 * 4);
let decoded: Entity = deserialize(&encoded[..]).unwrap();
assert_eq!(world, decoded);
} make_bytes__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
movl $32, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq (%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
movq 8(%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
movq 16(%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
movq 24(%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc make_bytes -Zmutable-noalias__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
movl $32, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq (%rbx), %rax
movq 16(%rbx), %rcx
movups (%r14), %xmm0
movups %xmm0, (%rax,%rcx)
movups 16(%r14), %xmm0
leaq 32(%rcx), %rdx
movq %rdx, 16(%rbx)
movups %xmm0, 16(%rax,%rcx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc Changing Entity to be make_bytes__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
movl $20, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movl 8(%r14), %eax
movq 16(%rbx), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movl %eax, (%rdx,%rcx)
movl 12(%r14), %eax
movq 16(%rbx), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movl %eax, (%rdx,%rcx)
movl 16(%r14), %eax
movq 16(%rbx), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movl %eax, (%rdx,%rcx)
movq (%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc make_bytes -Zmutable-noalias__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %rbx
movq %rdi, %r14
movl $20, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movl 8(%rbx), %eax
movq (%r14), %rcx
movq 16(%r14), %rdx
movl %eax, (%rcx,%rdx)
movl 12(%rbx), %eax
movl %eax, 4(%rcx,%rdx)
movl 16(%rbx), %eax
movl %eax, 8(%rcx,%rdx)
movq (%rbx), %rax
leaq 20(%rdx), %rsi
movq %rsi, 16(%r14)
movq %rax, 12(%rcx,%rdx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc slow_make_bytes (uniform, but generally unaffected by any change)__ZN10serde_fast15slow_make_bytes17hf60759a7955c0485E:
.cfi_startproc
pushq %rbp
Lcfi76:
.cfi_def_cfa_offset 16
Lcfi77:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi78:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Lcfi79:
.cfi_offset %rbx, -40
Lcfi80:
.cfi_offset %r14, -32
Lcfi81:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
movq (%r14), %r15
movl $8, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r15, (%rcx,%rax)
movq 8(%r14), %r15
movl $8, %esi
movq %rbx, %rdi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r15, (%rcx,%rax)
movq 16(%r14), %r15
movl $8, %esi
movq %rbx, %rdi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r15, (%rcx,%rax)
movq 24(%r14), %r14
movl $8, %esi
movq %rbx, %rdi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r14, (%rcx,%rax)
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
retq
.cfi_endproc This significantly improves our codegen. |
This is very interesting. Kudos for running tests 😄 |
Anywhere you see SetLenOnDrop inside the vec code, it's probably a workaround for the missing noalias. |
@bors r+ |
📌 Commit 3647129 has been approved by |
@arielb1 what's the legislative process for promoting this to a -C flag? (e.g. usable on stable) |
Should this become a |
That's basically https://forge.rust-lang.org/stabilization-guide.html - create a tracking issue, nominate it to T-compiler FCP, write docs. But I don't think we want this to be a |
@rkruppe it might be reasonable to default if panic=abort, but idk yet about otherwise. |
Are you talking about lingering misoptimizations? Surely if those exist, end users shouldn't be encouraged to enable it? |
Based on discussion in #45029, I've added a commit to enable this by default if |
Let's have a go at this @bors r+ |
📌 Commit a6dea41 has been approved by |
This is the same mode as Gecko uses and will get better codegen once rust-lang/rust#45012 has landed.
Do we have some benchmarks? Would love to know if this has significant effect on |
Add -Zmutable-noalias flag We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges. Since then, a few things have happened: * llvm has cleaned up a bunch of the issues (I'm told) * we've added a nounwind codegen option As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).
☀️ Test successful - status-appveyor, status-travis |
There's a "null result" with this option in bluss/arrayvec#74 (but I wouldn't know if it definitely should apply there anyway). I'll run some more benchmarks when I can. |
Use panic=abort instead of panic=unwind This is the same mode as Gecko uses and will get better codegen once rust-lang/rust#45012 has landed. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/webrender/1825) <!-- Reviewable:end -->
We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges.
Since then, a few things have happened:
As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).