mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[ARM] Fix inline memcpy trip count sequence
The trip count for a memcpy/memset will be n/16 rounded up to the nearest integer. So (n+15)>>4. The old code was including a BIC too, to clear one of the bits, which does not seem correct. This remove the extra BIC. Note that ideally this would never actually be generated, as in the creation of a tail predicated loop we will DCE that setup code, letting the WLSTP perform the trip count calculation. So this doesn't usually come up in testing (and apparently the ARMLowOverheadLoops pass does not do any sort of validation on the tripcount). Only if the generation of the WLTP fails will it use the incorrect BIC instructions. Differential Revision: https://reviews.llvm.org/D102629
This commit is contained in:
parent
f5b495a5df
commit
5c433e70b6
@ -11110,7 +11110,7 @@ static Register genTPEntry(MachineBasicBlock *TpEntry,
|
||||
MachineBasicBlock *TpExit, Register OpSizeReg,
|
||||
const TargetInstrInfo *TII, DebugLoc Dl,
|
||||
MachineRegisterInfo &MRI) {
|
||||
// Calculates loop iteration count = ceil(n/16)/16 = ((n + 15)&(-16)) / 16.
|
||||
// Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
|
||||
Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
|
||||
BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
|
||||
.addUse(OpSizeReg)
|
||||
@ -11118,16 +11118,9 @@ static Register genTPEntry(MachineBasicBlock *TpEntry,
|
||||
.add(predOps(ARMCC::AL))
|
||||
.addReg(0);
|
||||
|
||||
Register BicDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
|
||||
BuildMI(TpEntry, Dl, TII->get(ARM::t2BICri), BicDestReg)
|
||||
.addUse(AddDestReg, RegState::Kill)
|
||||
.addImm(16)
|
||||
.add(predOps(ARMCC::AL))
|
||||
.addReg(0);
|
||||
|
||||
Register LsrDestReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
|
||||
Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
|
||||
BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
|
||||
.addUse(BicDestReg, RegState::Kill)
|
||||
.addUse(AddDestReg, RegState::Kill)
|
||||
.addImm(4)
|
||||
.add(predOps(ARMCC::AL))
|
||||
.addReg(0);
|
||||
|
@ -235,7 +235,6 @@ define void @test11(i8* nocapture %x, i8* nocapture %y, i32 %n) {
|
||||
; CHECK-NEXT: .LBB10_1: @ %prehead
|
||||
; CHECK-NEXT: add.w r3, r2, #15
|
||||
; CHECK-NEXT: mov r12, r1
|
||||
; CHECK-NEXT: bic r3, r3, #16
|
||||
; CHECK-NEXT: mov r4, r0
|
||||
; CHECK-NEXT: lsr.w lr, r3, #4
|
||||
; CHECK-NEXT: mov r3, r2
|
||||
@ -326,11 +325,11 @@ define void @twoloops(i32* %X, i32 %n, i32 %m) {
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: mov r3, r2
|
||||
; CHECK-NEXT: mov r1, r0
|
||||
; CHECK-NEXT: wlstp.8 lr, r3, .LBB13_2
|
||||
; CHECK-NEXT: mov r3, r0
|
||||
; CHECK-NEXT: mov r1, r2
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB13_2
|
||||
; CHECK-NEXT: .LBB13_1: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1], #16
|
||||
; CHECK-NEXT: vstrb.8 q0, [r3], #16
|
||||
; CHECK-NEXT: letp lr, .LBB13_1
|
||||
; CHECK-NEXT: .LBB13_2: @ %entry
|
||||
; CHECK-NEXT: wlstp.8 lr, r2, .LBB13_4
|
||||
|
@ -197,8 +197,8 @@ define dso_local i32 @e() #0 {
|
||||
; CHECK-NEXT: vmov.32 q4[0], r8
|
||||
; CHECK-NEXT: @ implicit-def: $r2
|
||||
; CHECK-NEXT: str.w r8, [sp, #52]
|
||||
; CHECK-NEXT: strh.w r12, [sp, #414]
|
||||
; CHECK-NEXT: vstrw.32 q3, [sp, #68]
|
||||
; CHECK-NEXT: strh.w r12, [sp, #414]
|
||||
; CHECK-NEXT: wlstp.8 lr, r1, .LBB1_2
|
||||
; CHECK-NEXT: .LBB1_1: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
|
||||
|
@ -67,15 +67,14 @@ body: |
|
||||
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri killed [[t2ADDri]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:gprlr = t2LSRri killed [[t2BICri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
||||
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
|
||||
; CHECK: .1:
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.0, %8, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %10, %bb.1
|
||||
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %12, %bb.1
|
||||
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %14, %bb.1
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.0, %7, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %9, %bb.1
|
||||
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %11, %bb.1
|
||||
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %13, %bb.1
|
||||
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg
|
||||
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
||||
@ -108,15 +107,14 @@ body: |
|
||||
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.1.for.body.preheader:
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri killed [[t2ADDri]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:gprlr = t2LSRri killed [[t2BICri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
||||
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
|
||||
; CHECK: bb.3:
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.1, %8, %bb.3
|
||||
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %10, %bb.3
|
||||
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %12, %bb.3
|
||||
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %14, %bb.3
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.1, %7, %bb.3
|
||||
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %9, %bb.3
|
||||
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %11, %bb.3
|
||||
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %13, %bb.3
|
||||
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg
|
||||
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
||||
@ -161,14 +159,13 @@ body: |
|
||||
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri killed [[t2ADDri]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:gprlr = t2LSRri killed [[t2BICri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
||||
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
|
||||
; CHECK: .1:
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %8, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %10, %bb.1
|
||||
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %12, %bb.1
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %7, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %9, %bb.1
|
||||
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %11, %bb.1
|
||||
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
|
||||
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
||||
@ -201,14 +198,13 @@ body: |
|
||||
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.1.for.body.preheader:
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri killed [[t2ADDri]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:gprlr = t2LSRri killed [[t2BICri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
||||
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
|
||||
; CHECK: bb.3:
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %8, %bb.3
|
||||
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %10, %bb.3
|
||||
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %12, %bb.3
|
||||
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %7, %bb.3
|
||||
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %9, %bb.3
|
||||
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %11, %bb.3
|
||||
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
|
||||
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
||||
|
Loading…
Reference in New Issue
Block a user