Skip to content

Commit 4cb17b4

Browse files
committed
Auto merge of #111803 - scottmcm:simple-swap-alternative, r=Mark-Simulacrum
Tweak the threshold for chunked swapping Thanks to `@AngelicosPhosphoros` for the tests here, which I copied from #98892. This is an experiment as a simple alternative to that PR that just tweaks the existing threshold, since that PR showed that 3×Align (like `String`) currently doesn't work as well as it could.
2 parents 038d115 + 60208a0 commit 4cb17b4

File tree

2 files changed

+65
-19
lines changed

2 files changed

+65
-19
lines changed

library/core/src/mem/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
736736
// tends to copy the whole thing to stack rather than doing it one part
737737
// at a time, so instead treat them as one-element slices and piggy-back
738738
// the slice optimizations that will split up the swaps.
739-
if size_of::<T>() / align_of::<T>() > 4 {
739+
if const { size_of::<T>() / align_of::<T>() > 2 } {
740740
// SAFETY: exclusive references always point to one non-overlapping
741741
// element and are non-null and properly aligned.
742742
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };

tests/codegen/swap-small-types.rs

+64-18
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
2626
// CHECK-LABEL: @swap_rgb48
2727
#[no_mangle]
2828
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
29-
// FIXME: See #115212 for why this has an alloca again
29+
// CHECK-NOT: alloca
3030

31-
// CHECK: alloca [3 x i16], align 2
32-
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
33-
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
34-
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 6, i1 false)
31+
// Whether `i8` is the best for this is unclear, but
32+
// might as well record what's actually happening right now.
33+
34+
// CHECK: load i8
35+
// CHECK: load i8
36+
// CHECK: store i8
37+
// CHECK: store i8
3538
swap(x, y)
3639
}
3740

@@ -41,10 +44,39 @@ type RGBA64 = [u16; 4];
4144
#[no_mangle]
4245
pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
4346
// CHECK-NOT: alloca
44-
// CHECK-DAG: %[[XVAL:.+]] = load <4 x i16>, ptr %x, align 2
45-
// CHECK-DAG: %[[YVAL:.+]] = load <4 x i16>, ptr %y, align 2
46-
// CHECK-DAG: store <4 x i16> %[[YVAL]], ptr %x, align 2
47-
// CHECK-DAG: store <4 x i16> %[[XVAL]], ptr %y, align 2
47+
// CHECK-DAG: %[[XVAL:.+]] = load i64, ptr %x, align 2
48+
// CHECK-DAG: %[[YVAL:.+]] = load i64, ptr %y, align 2
49+
// CHECK-DAG: store i64 %[[YVAL]], ptr %x, align 2
50+
// CHECK-DAG: store i64 %[[XVAL]], ptr %y, align 2
51+
swap(x, y)
52+
}
53+
54+
// CHECK-LABEL: @swap_vecs
55+
#[no_mangle]
56+
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
57+
// CHECK-NOT: alloca
58+
// There are plenty more loads and stores than just these,
59+
// but at least one sure better be 64-bit (for size or capacity).
60+
// CHECK: load i64
61+
// CHECK: load i64
62+
// CHECK: store i64
63+
// CHECK: store i64
64+
// CHECK: ret void
65+
swap(x, y)
66+
}
67+
68+
// CHECK-LABEL: @swap_slices
69+
#[no_mangle]
70+
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
71+
// CHECK-NOT: alloca
72+
// CHECK: load ptr
73+
// CHECK: load i64
74+
// CHECK: load ptr
75+
// CHECK: load i64
76+
// CHECK: store ptr
77+
// CHECK: store i64
78+
// CHECK: store ptr
79+
// CHECK: store i64
4880
swap(x, y)
4981
}
5082

@@ -55,9 +87,9 @@ type RGB24 = [u8; 3];
5587
// CHECK-LABEL: @swap_rgb24_slices
5688
#[no_mangle]
5789
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
58-
// CHECK-NOT: alloca
59-
// CHECK: load <{{[0-9]+}} x i8>
60-
// CHECK: store <{{[0-9]+}} x i8>
90+
// CHECK-NOT: alloca
91+
// CHECK: load <{{[0-9]+}} x i8>
92+
// CHECK: store <{{[0-9]+}} x i8>
6193
if x.len() == y.len() {
6294
x.swap_with_slice(y);
6395
}
@@ -69,9 +101,9 @@ type RGBA32 = [u8; 4];
69101
// CHECK-LABEL: @swap_rgba32_slices
70102
#[no_mangle]
71103
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
72-
// CHECK-NOT: alloca
73-
// CHECK: load <{{[0-9]+}} x i32>
74-
// CHECK: store <{{[0-9]+}} x i32>
104+
// CHECK-NOT: alloca
105+
// CHECK: load <{{[0-9]+}} x i32>
106+
// CHECK: store <{{[0-9]+}} x i32>
75107
if x.len() == y.len() {
76108
x.swap_with_slice(y);
77109
}
@@ -84,10 +116,24 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
84116
// CHECK-LABEL: @swap_string_slices
85117
#[no_mangle]
86118
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
87-
// CHECK-NOT: alloca
88-
// CHECK: load <{{[0-9]+}} x i64>
89-
// CHECK: store <{{[0-9]+}} x i64>
119+
// CHECK-NOT: alloca
120+
// CHECK: load <{{[0-9]+}} x i64>
121+
// CHECK: store <{{[0-9]+}} x i64>
90122
if x.len() == y.len() {
91123
x.swap_with_slice(y);
92124
}
93125
}
126+
127+
#[repr(C, packed)]
128+
pub struct Packed {
129+
pub first: bool,
130+
pub second: usize,
131+
}
132+
133+
// CHECK-LABEL: @swap_packed_structs
134+
#[no_mangle]
135+
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
136+
// CHECK-NOT: alloca
137+
// CHECK: ret void
138+
swap(x, y)
139+
}

0 commit comments

Comments
 (0)