Skip to content

Add -Zmutable-noalias flag #45012

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 2 commits into from
Oct 8, 2017
Merged

Add -Zmutable-noalias flag #45012

merged 2 commits into from
Oct 8, 2017

Conversation

Gankra
Copy link
Contributor

@Gankra Gankra commented Oct 3, 2017

We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges.

Since then, a few things have happened:

  • llvm has cleaned up a bunch of the issues (I'm told)
  • we've added a nounwind codegen option

As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).

@rust-highfive
Copy link
Contributor

r? @arielb1

(rust_highfive has picked a reviewer for you, use r? to override)

@Gankra
Copy link
Contributor Author

Gankra commented Oct 3, 2017

cc @jrmuizel

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

Some codgen tests using serde:

code
#[macro_use]
extern crate serde_derive;
extern crate bincode;

use std::io::Write;
use std::{io, ptr};

use bincode::{serialize, deserialize, Infinite};

#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Entity {
    x: f64,
    y: f64,
    z: f64,
    o: u64
}

#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct World(Vec<Entity>);

struct UnsafeVecWriter<'a>(&'a mut Vec<u8>);

impl<'a> Write for UnsafeVecWriter<'a> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        unsafe {
            let old_len = self.0.len();
            self.0.set_len(old_len + buf.len());
            ptr::copy_nonoverlapping(buf.as_ptr(), self.0.as_mut_ptr().offset(old_len as isize), buf.len());
        }
        Ok(buf.len())
    }
    fn flush(&mut self) -> io::Result<()> { Ok(()) }
}

struct SizeCounter(usize);

impl<'a> Write for SizeCounter {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.0 += buf.len();
        Ok(buf.len())
    }
    fn flush(&mut self) -> io::Result<()> { Ok(()) }
}

#[inline(never)]
fn make_bytes(vec: &mut Vec<u8>, e: &Entity) {
    let mut size = SizeCounter(0);
    bincode::serialize_into(&mut size,e , Infinite).unwrap();
    vec.reserve(size.0);
    //vec.reserve(bincode::serialized_size(&e) as usize);

    bincode::serialize_into(&mut UnsafeVecWriter(vec), e, Infinite).unwrap();
}

#[inline(never)]
fn slow_make_bytes(vec: &mut Vec<u8>, e: &Entity) {
    bincode::serialize_into(vec, e, Infinite).unwrap();
}

fn main() {
    let world = Entity { x: 0.0, y: 4.0, z: 5.0, o: 0 };

    let mut encoded = Vec::new();
    make_bytes(&mut encoded, &world);
    slow_make_bytes(&mut encoded, &world);

    // 8 bytes for the length of the vector, 4 bytes per float.
    //assert_eq!(encoded.len(), 8 + 4 * 4);

    let decoded: Entity = deserialize(&encoded[..]).unwrap();

    assert_eq!(world, decoded);
}

make_bytes
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movl	$32, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	movq	8(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	movq	16(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	movq	24(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc
make_bytes -Zmutable-noalias
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movl	$32, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	(%rbx), %rax
	movq	16(%rbx), %rcx
	movups	(%r14), %xmm0
	movups	%xmm0, (%rax,%rcx)
	movups	16(%r14), %xmm0
	leaq	32(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movups	%xmm0, 16(%rax,%rcx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc

Changing Entity to be (f32, f32, f32, u64) (non-uniform sizes)

make_bytes
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movl	$20, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movl	8(%r14), %eax
	movq	16(%rbx), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	12(%r14), %eax
	movq	16(%rbx), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	16(%r14), %eax
	movq	16(%rbx), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movl	%eax, (%rdx,%rcx)
	movq	(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc
make_bytes -Zmutable-noalias
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %rbx
	movq	%rdi, %r14
	movl	$20, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movl	8(%rbx), %eax
	movq	(%r14), %rcx
	movq	16(%r14), %rdx
	movl	%eax, (%rcx,%rdx)
	movl	12(%rbx), %eax
	movl	%eax, 4(%rcx,%rdx)
	movl	16(%rbx), %eax
	movl	%eax, 8(%rcx,%rdx)
	movq	(%rbx), %rax
	leaq	20(%rdx), %rsi
	movq	%rsi, 16(%r14)
	movq	%rax, 12(%rcx,%rdx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc

slow_make_bytes (uniform, but generally unaffected by any change)
__ZN10serde_fast15slow_make_bytes17hf60759a7955c0485E:
	.cfi_startproc
	pushq	%rbp
Lcfi76:
	.cfi_def_cfa_offset 16
Lcfi77:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi78:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Lcfi79:
	.cfi_offset %rbx, -40
Lcfi80:
	.cfi_offset %r14, -32
Lcfi81:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movq	(%r14), %r15
	movl	$8, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r15, (%rcx,%rax)
	movq	8(%r14), %r15
	movl	$8, %esi
	movq	%rbx, %rdi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r15, (%rcx,%rax)
	movq	16(%r14), %r15
	movl	$8, %esi
	movq	%rbx, %rdi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r15, (%rcx,%rax)
	movq	24(%r14), %r14
	movl	$8, %esi
	movq	%rbx, %rdi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r14, (%rcx,%rax)
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	retq
	.cfi_endproc

This significantly improves our codegen.

@arthurprs
Copy link
Contributor

This is very interesting. Kudos for running tests 😄

@bluss
Copy link
Member

bluss commented Oct 4, 2017

Anywhere you see SetLenOnDrop inside the vec code, it's probably a workaround for the missing noalias.

@arielb1
Copy link
Contributor

arielb1 commented Oct 4, 2017

@bors r+

@bors
Copy link
Collaborator

bors commented Oct 4, 2017

📌 Commit 3647129 has been approved by arielb1

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

@arielb1 what's the legislative process for promoting this to a -C flag? (e.g. usable on stable)

@hanna-kruppe
Copy link
Contributor

Should this become a -C flag? It seems like a temporary hack, almost as much for experimentation as for end users. Hopefully it'll just be the default behavior (again) soon-ish.

@arielb1
Copy link
Contributor

arielb1 commented Oct 4, 2017

@gankro

That's basically https://forge.rust-lang.org/stabilization-guide.html - create a tracking issue, nominate it to T-compiler FCP, write docs.

But I don't think we want this to be a -C feature. I think we should just make this the default behavior soon-ish.

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

@rkruppe it might be reasonable to default if panic=abort, but idk yet about otherwise.

@hanna-kruppe
Copy link
Contributor

Are you talking about lingering misoptimizations? Surely if those exist, end users shouldn't be encouraged to enable it?

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

Based on discussion in #45029, I've added a commit to enable this by default if panic=abort.

@arielb1
Copy link
Contributor

arielb1 commented Oct 5, 2017

Let's have a go at this

@bors r+

@bors
Copy link
Collaborator

bors commented Oct 5, 2017

📌 Commit a6dea41 has been approved by arielb1

jrmuizel added a commit to jrmuizel/webrender that referenced this pull request Oct 6, 2017
This is the same mode as Gecko uses and will get better codegen
once rust-lang/rust#45012 has landed.
@shepmaster shepmaster added the S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion. label Oct 6, 2017
@bstrie
Copy link
Contributor

bstrie commented Oct 7, 2017

Do we have some benchmarks? Would love to know if this has significant effect on panic=abort builds.

@bors
Copy link
Collaborator

bors commented Oct 8, 2017

⌛ Testing commit a6dea41 with merge ff8e264...

bors added a commit that referenced this pull request Oct 8, 2017
Add -Zmutable-noalias flag

We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges.

Since then, a few things have happened:

* llvm has cleaned up a bunch of the issues (I'm told)
* we've added a nounwind codegen option

As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).
@bors
Copy link
Collaborator

bors commented Oct 8, 2017

☀️ Test successful - status-appveyor, status-travis
Approved by: arielb1
Pushing ff8e264 to master...

@bors bors merged commit a6dea41 into rust-lang:master Oct 8, 2017
@bluss
Copy link
Member

bluss commented Oct 8, 2017

There's a "null result" with this option in bluss/arrayvec#74 (but I wouldn't know if it definitely should apply there anyway). I'll run some more benchmarks when I can.

bors-servo pushed a commit to servo/webrender that referenced this pull request Oct 8, 2017
Use panic=abort instead of panic=unwind

This is the same mode as Gecko uses and will get better codegen
once rust-lang/rust#45012 has landed.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/webrender/1825)
<!-- Reviewable:end -->
# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

9 participants