@@ -329,27 +329,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
329
329
; CHECK-NEXT: .cfi_offset w29, -32
330
330
; CHECK-NEXT: addvl sp, sp, #-18
331
331
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
332
- ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333
- ; CHECK-NEXT: ptrue pn8.b
334
332
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335
- ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
336
- ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
337
333
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338
- ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
339
- ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
340
334
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341
- ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
342
- ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
343
335
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344
- ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
345
336
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346
337
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347
338
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
339
+ ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
348
340
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349
341
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350
342
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351
343
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352
- ; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
344
+ ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
345
+ ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
346
+ ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
347
+ ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
348
+ ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
349
+ ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
350
+ ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
351
+ ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
352
+ ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
353
+ ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
354
+ ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
355
+ ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
356
+ ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
357
+ ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
358
+ ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
359
+ ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
353
360
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
354
361
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
355
362
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -371,16 +378,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
371
378
; CHECK-NEXT: .cfi_restore vg
372
379
; CHECK-NEXT: addvl sp, sp, #1
373
380
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
374
- ; CHECK-NEXT: ptrue pn8.b
381
+ ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
382
+ ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
383
+ ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
384
+ ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
385
+ ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
386
+ ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
387
+ ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
388
+ ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
389
+ ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
390
+ ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
391
+ ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
392
+ ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
393
+ ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
394
+ ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
395
+ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
396
+ ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
375
397
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
376
- ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
377
- ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
378
- ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
379
- ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
380
- ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
381
- ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
382
- ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
383
- ; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
384
398
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
385
399
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
386
400
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -424,27 +438,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
424
438
; FP-CHECK-NEXT: .cfi_offset w30, -40
425
439
; FP-CHECK-NEXT: .cfi_offset w29, -48
426
440
; FP-CHECK-NEXT: addvl sp, sp, #-18
427
- ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
428
- ; FP-CHECK-NEXT: ptrue pn8.b
429
441
; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
430
- ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
431
- ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
432
442
; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
433
- ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
434
- ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
435
443
; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
436
- ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
437
- ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
438
444
; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
439
- ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
440
445
; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
441
446
; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
442
447
; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
448
+ ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
443
449
; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
444
450
; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
445
451
; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
446
452
; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
447
- ; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
453
+ ; FP-CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
454
+ ; FP-CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
455
+ ; FP-CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
456
+ ; FP-CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
457
+ ; FP-CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
458
+ ; FP-CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
459
+ ; FP-CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
460
+ ; FP-CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
461
+ ; FP-CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
462
+ ; FP-CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
463
+ ; FP-CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
464
+ ; FP-CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
465
+ ; FP-CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
466
+ ; FP-CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
467
+ ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
468
+ ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
448
469
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
449
470
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
450
471
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -464,16 +485,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
464
485
; FP-CHECK-NEXT: smstart sm
465
486
; FP-CHECK-NEXT: .cfi_restore vg
466
487
; FP-CHECK-NEXT: addvl sp, sp, #1
467
- ; FP-CHECK-NEXT: ptrue pn8.b
488
+ ; FP-CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
489
+ ; FP-CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
490
+ ; FP-CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
491
+ ; FP-CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
492
+ ; FP-CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
493
+ ; FP-CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
494
+ ; FP-CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
495
+ ; FP-CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
496
+ ; FP-CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
497
+ ; FP-CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
498
+ ; FP-CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
499
+ ; FP-CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
500
+ ; FP-CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
501
+ ; FP-CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
502
+ ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
503
+ ; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
468
504
; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
469
- ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
470
- ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
471
- ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
472
- ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
473
- ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
474
- ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
475
- ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
476
- ; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
477
505
; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
478
506
; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
479
507
; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
0 commit comments