1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Revert "[AArch64LoadStoreOptimizer] Recommit: Generate more STPs by renaming registers earlier"

This reverts commit ea011ec5ed53599305de62ca5fcfd31f4b3448c3.

This still causes some miscompiles, I'll follow up in the phabricator
review with a sample of that issue (which is part of the sample of
the previous issue).
This commit is contained in:
Martin Storsjö 2021-06-23 09:54:16 +03:00
parent dc6a85256b
commit 1bc7c6b964
5 changed files with 51 additions and 232 deletions

View File

@ -1515,33 +1515,6 @@ static Optional<MCPhysReg> tryToFindRegisterToRename(
return None;
}
// Returns a boolean that represents whether there exists a register
// from FirstMI to the beginning of the block that can be renamed. If
// one exists, we update Flags with its value.
static bool updateFlagsWithRenameReg(
Optional<bool> MaybeCanRename, LdStPairFlags &Flags, MachineInstr &FirstMI,
MachineInstr &MI, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
const TargetRegisterInfo *TRI) {
if (!DebugCounter::shouldExecute(RegRenamingCounter))
return false;
if (!MaybeCanRename)
MaybeCanRename = {
canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
if (*MaybeCanRename) {
Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI);
if (MaybeRenameReg) {
Flags.setRenameReg(*MaybeRenameReg);
Flags.setMergeForward(true);
return true;
}
}
return false;
}
/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
@ -1693,19 +1666,6 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
continue;
}
}
if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg()) &&
ModifiedRegUnits.available(BaseReg) &&
UsedRegUnits.available(BaseReg)) {
bool FlagsHaveRenameReg = updateFlagsWithRenameReg(
MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween,
RequiredClasses, TRI);
if (FlagsHaveRenameReg) {
MBBIWithRenameReg = MBBI;
continue;
}
}
// If the destination register of one load is the same register or a
// sub/super register of the other load, bail and keep looking. A
// load-pair instruction with both destination registers the same is
@ -1755,11 +1715,21 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
return MBBI;
}
bool FlagsHaveRenameReg = updateFlagsWithRenameReg(
MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween,
RequiredClasses, TRI);
if (FlagsHaveRenameReg) {
MBBIWithRenameReg = MBBI;
if (DebugCounter::shouldExecute(RegRenamingCounter)) {
if (!MaybeCanRename)
MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
RequiredClasses, TRI)};
if (*MaybeCanRename) {
Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
TRI);
if (MaybeRenameReg) {
Flags.setRenameReg(*MaybeRenameReg);
Flags.setMergeForward(true);
MBBIWithRenameReg = MBBI;
}
}
}
}
// Unable to combine these instructions due to interference in between.

View File

@ -33,30 +33,38 @@ define void @call_byval_a64i32([64 x i32]* %incoming) {
; CHECK-NEXT: .cfi_offset w28, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [sp]
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: stp q1, q0, [sp]
; CHECK-NEXT: ldr q1, [x0, #32]
; CHECK-NEXT: str q0, [sp, #16]
; CHECK-NEXT: ldr q0, [x0, #32]
; CHECK-NEXT: str q0, [sp, #32]
; CHECK-NEXT: ldr q0, [x0, #48]
; CHECK-NEXT: stp q1, q0, [sp, #32]
; CHECK-NEXT: ldr q1, [x0, #64]
; CHECK-NEXT: str q0, [sp, #48]
; CHECK-NEXT: ldr q0, [x0, #64]
; CHECK-NEXT: str q0, [sp, #64]
; CHECK-NEXT: ldr q0, [x0, #80]
; CHECK-NEXT: stp q1, q0, [sp, #64]
; CHECK-NEXT: ldr q1, [x0, #96]
; CHECK-NEXT: str q0, [sp, #80]
; CHECK-NEXT: ldr q0, [x0, #96]
; CHECK-NEXT: str q0, [sp, #96]
; CHECK-NEXT: ldr q0, [x0, #112]
; CHECK-NEXT: stp q1, q0, [sp, #96]
; CHECK-NEXT: ldr q1, [x0, #128]
; CHECK-NEXT: str q0, [sp, #112]
; CHECK-NEXT: ldr q0, [x0, #128]
; CHECK-NEXT: str q0, [sp, #128]
; CHECK-NEXT: ldr q0, [x0, #144]
; CHECK-NEXT: stp q1, q0, [sp, #128]
; CHECK-NEXT: ldr q1, [x0, #160]
; CHECK-NEXT: str q0, [sp, #144]
; CHECK-NEXT: ldr q0, [x0, #160]
; CHECK-NEXT: str q0, [sp, #160]
; CHECK-NEXT: ldr q0, [x0, #176]
; CHECK-NEXT: stp q1, q0, [sp, #160]
; CHECK-NEXT: ldr q1, [x0, #192]
; CHECK-NEXT: str q0, [sp, #176]
; CHECK-NEXT: ldr q0, [x0, #192]
; CHECK-NEXT: str q0, [sp, #192]
; CHECK-NEXT: ldr q0, [x0, #208]
; CHECK-NEXT: stp q1, q0, [sp, #192]
; CHECK-NEXT: ldr q1, [x0, #224]
; CHECK-NEXT: str q0, [sp, #208]
; CHECK-NEXT: ldr q0, [x0, #224]
; CHECK-NEXT: str q0, [sp, #224]
; CHECK-NEXT: ldr q0, [x0, #240]
; CHECK-NEXT: stp q1, q0, [sp, #224]
; CHECK-NEXT: str q0, [sp, #240]
; CHECK-NEXT: bl byval_a64i32
; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload

View File

@ -1,11 +1,9 @@
; RUN: llc -mtriple=aarch64-none-unknown-linuxeabi -consthoist-gep %s -o - | FileCheck %s
; CHECK-NOT: adrp x10, global+332
; CHECK-NOT: add x10, x10, :lo12:global+332
; CHECK: adrp x10, global+528
; CHECK-NEXT: and w12, w8, #0xffffff
; CHECK-NEXT: ldr w8, [x11]
; CHECK-NEXT: add x10, x10, :lo12:global+528
; CHECK-NOT: adrp x10, global+332
; CHECK-NOT: add x10, x10, :lo12:global+332
; CHECK: adrp x10, global+528
; CHECK-NEXT: add x10, x10, :lo12:global+528
%struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot }
%struct.bar = type { i8, i8, %struct.snork }

View File

@ -1117,7 +1117,7 @@ define void @store-pair-post-indexed-double() nounwind {
define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-word
; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #-4]
; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
br label %for.body
for.body:
%phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
@ -1141,7 +1141,7 @@ end:
define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-doubleword
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0, #-8]
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
br label %for.body
for.body:
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
@ -1165,7 +1165,7 @@ end:
define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-quadword
; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x0, #-16]
; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
br label %for.body
for.body:
%phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
@ -1189,7 +1189,7 @@ end:
define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-float
; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [x0, #-4]
; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
br label %for.body
for.body:
%phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
@ -1213,7 +1213,7 @@ end:
define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-double
; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [x0, #-8]
; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
br label %for.body
for.body:
%phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
@ -1237,7 +1237,7 @@ end:
define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-doubleword-offset-min
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256
; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #-256
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256
br label %for.body
for.body:
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
@ -1262,7 +1262,8 @@ define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %
; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}]
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #256
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}]
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
br label %for.body
for.body:

View File

@ -544,161 +544,3 @@ body: |
RET undef $lr
...
# During ISel, the order of load/store pairs can be optimized and changed
# so that only a single register is used. Due to this register reuse, LDP/STPs
# are not generated. These tests check that an STP instruction will be
# generated after register renaming is attempted.
...
---
#
# CHECK-LABEL: name: ldst32
# CHECK: liveins: $x0, $x1
# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
# CHECK-NEXT: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
# CHECK-NEXT: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 32)
# CHECK-NEXT: RET undef $lr
#
name: ldst32
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0', virtual-reg: '' }
- { reg: '$x1', virtual-reg: '' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo:
hasRedZone: false
body: |
bb.0.entry:
liveins: $x0, $x1
renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32)
renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
RET undef $lr
...
---
#
# CHECK-LABEL: name: ldst64
# CHECK: liveins: $x0, $x1
# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
# CHECK-NEXT: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64)
# CHECK-NEXT: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 64)
# CHECK-NEXT: RET undef $lr
#
name: ldst64
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0', virtual-reg: '' }
- { reg: '$x1', virtual-reg: '' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo:
hasRedZone: false
body: |
bb.0.entry:
liveins: $x0, $x1
renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 64)
renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64)
STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 64)
RET undef $lr
...
---
#
# CHECK-LABEL: name: ldst128
# CHECK: liveins: $x0, $x1
# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
# CHECK-NEXT: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128)
# CHECK-NEXT: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 128)
# CHECK-NEXT: RET undef $lr
#
name: ldst128
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0', virtual-reg: '' }
- { reg: '$x1', virtual-reg: '' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo:
hasRedZone: false
body: |
bb.0.entry:
liveins: $x0, $x1
renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 128)
renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128)
STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 128)
RET undef $lr
...
---
#
# CHECK-LABEL: name: ldst-no-reg-available
# CHECK: liveins: $x0, $x1, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $q8, $q9, $q10
# CHECK: renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
# CHECK-NEXT: STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, align 32)
# CHECK-NEXT: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
# CHECK-NEXT: STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
# CHECK-NEXT: RET undef $lr
#
name: ldst-no-reg-available
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo:
hasRedZone: false
body: |
bb.0.entry:
liveins: $x0, $x1, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $q8, $q9, $q10
renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32)
renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
RET undef $lr
...
---
#
# CHECK-LABEL: name: ldst-basereg-modified
# CHECK: liveins: $x0, $x1
# CHECK: renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
# CHECK-NEXT: STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, align 32)
# CHECK-NEXT: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
# CHECK-NEXT: STRHHui $wzr, renamable $x0, 12 :: (store 2, align 8)
# CHECK-NEXT: STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
# CHECK-NEXT: RET undef $lr
#
name: ldst-basereg-modified
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo:
hasRedZone: false
body: |
bb.0.entry:
liveins: $x0, $x1
renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32)
renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
STRHHui $wzr, renamable $x0, 12 :: (store 2, align 8)
STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
RET undef $lr