mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[MachineCopyPropagation] Extend MCP to do trivial copy backward propagation
Summary: This patch mainly do such transformation ``` $R0 = OP ... ... // No read/clobber of $R0 and $R1 $R1 = COPY $R0 // $R0 is killed ``` Replace $R0 with $R1 and remove the COPY, we have ``` $R1 = OP ... ``` This transformation can also expose more opportunities for existing copy elimination in MCP. Differential Revision: https://reviews.llvm.org/D67794
This commit is contained in:
parent
f32abf466c
commit
1bcb15e5e0
@ -37,6 +37,15 @@
|
||||
// ... // No clobber of %R0
|
||||
// %R1 = COPY %R0 <<< Removed
|
||||
//
|
||||
// or
|
||||
//
|
||||
// $R0 = OP ...
|
||||
// ... // No read/clobber of $R0 and $R1
|
||||
// $R1 = COPY $R0 // $R0 is killed
|
||||
// Replace $R0 with $R1 and remove the COPY
|
||||
// $R1 = OP ...
|
||||
// ...
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
@ -98,6 +107,28 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove register from copy maps.
|
||||
void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
|
||||
// Since Reg might be a subreg of some registers, only invalidate Reg is not
|
||||
// enough. We have to find the COPY defines Reg or registers defined by Reg
|
||||
// and invalidate all of them.
|
||||
DenseSet<unsigned> RegsToInvalidate{Reg};
|
||||
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
|
||||
auto I = Copies.find(*RUI);
|
||||
if (I != Copies.end()) {
|
||||
if (MachineInstr *MI = I->second.MI) {
|
||||
RegsToInvalidate.insert(MI->getOperand(0).getReg());
|
||||
RegsToInvalidate.insert(MI->getOperand(1).getReg());
|
||||
}
|
||||
RegsToInvalidate.insert(I->second.DefRegs.begin(),
|
||||
I->second.DefRegs.end());
|
||||
}
|
||||
}
|
||||
for (unsigned InvalidReg : RegsToInvalidate)
|
||||
for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
|
||||
Copies.erase(*RUI);
|
||||
}
|
||||
|
||||
/// Clobber a single register, removing it from the tracker's copy maps.
|
||||
void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
|
||||
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
|
||||
@ -151,6 +182,38 @@ public:
|
||||
return CI->second.MI;
|
||||
}
|
||||
|
||||
MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
|
||||
const TargetRegisterInfo &TRI) {
|
||||
auto CI = Copies.find(RegUnit);
|
||||
if (CI == Copies.end())
|
||||
return nullptr;
|
||||
if (CI->second.DefRegs.size() != 1)
|
||||
return nullptr;
|
||||
MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
|
||||
return findCopyForUnit(*RUI, TRI, true);
|
||||
}
|
||||
|
||||
MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
|
||||
const TargetRegisterInfo &TRI) {
|
||||
MCRegUnitIterator RUI(Reg, &TRI);
|
||||
MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
|
||||
if (!AvailCopy ||
|
||||
!TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
|
||||
return nullptr;
|
||||
|
||||
Register AvailSrc = AvailCopy->getOperand(1).getReg();
|
||||
Register AvailDef = AvailCopy->getOperand(0).getReg();
|
||||
for (const MachineInstr &MI :
|
||||
make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
|
||||
for (const MachineOperand &MO : MI.operands())
|
||||
if (MO.isRegMask())
|
||||
// FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef?
|
||||
if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
|
||||
return nullptr;
|
||||
|
||||
return AvailCopy;
|
||||
}
|
||||
|
||||
MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
|
||||
const TargetRegisterInfo &TRI) {
|
||||
// We check the first RegUnit here, since we'll only be interested in the
|
||||
@ -211,11 +274,16 @@ private:
|
||||
void ClobberRegister(unsigned Reg);
|
||||
void ReadRegister(unsigned Reg, MachineInstr &Reader,
|
||||
DebugType DT);
|
||||
void CopyPropagateBlock(MachineBasicBlock &MBB);
|
||||
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
|
||||
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
|
||||
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
|
||||
void forwardUses(MachineInstr &MI);
|
||||
void propagateDefs(MachineInstr &MI);
|
||||
bool isForwardableRegClassCopy(const MachineInstr &Copy,
|
||||
const MachineInstr &UseI, unsigned UseIdx);
|
||||
bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
|
||||
const MachineInstr &UseI,
|
||||
unsigned UseIdx);
|
||||
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
|
||||
|
||||
/// Candidates for deletion.
|
||||
@ -313,6 +381,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
|
||||
const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
|
||||
Register Def = Copy.getOperand(0).getReg();
|
||||
|
||||
if (const TargetRegisterClass *URC =
|
||||
UseI.getRegClassConstraint(UseIdx, TII, TRI))
|
||||
return URC->contains(Def);
|
||||
|
||||
// We don't process further if UseI is a COPY, since forward copy propagation
|
||||
// should handle that.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Decide whether we should forward the source of \param Copy to its use in
|
||||
/// \param UseI based on the physical register class constraints of the opcode
|
||||
/// and avoiding introducing more cross-class COPYs.
|
||||
@ -468,8 +549,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
|
||||
}
|
||||
}
|
||||
|
||||
void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
|
||||
void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
|
||||
<< "\n");
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
|
||||
MachineInstr *MI = &*I;
|
||||
@ -647,6 +729,128 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
Tracker.clear();
|
||||
}
|
||||
|
||||
static bool isBackwardPropagatableCopy(MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
assert(MI.isCopy() && "MI is expected to be a COPY");
|
||||
Register Def = MI.getOperand(0).getReg();
|
||||
Register Src = MI.getOperand(1).getReg();
|
||||
|
||||
if (MRI.isReserved(Def) || MRI.isReserved(Src))
|
||||
return false;
|
||||
|
||||
return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
|
||||
}
|
||||
|
||||
void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
|
||||
if (!Tracker.hasAnyCopies())
|
||||
return;
|
||||
|
||||
for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
|
||||
++OpIdx) {
|
||||
MachineOperand &MODef = MI.getOperand(OpIdx);
|
||||
|
||||
if (!MODef.isReg() || MODef.isUse())
|
||||
continue;
|
||||
|
||||
// Ignore non-trivial cases.
|
||||
if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit())
|
||||
continue;
|
||||
|
||||
// We only handle if the register comes from a vreg.
|
||||
if (!MODef.isRenamable())
|
||||
continue;
|
||||
|
||||
MachineInstr *Copy =
|
||||
Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
|
||||
if (!Copy)
|
||||
continue;
|
||||
|
||||
Register Def = Copy->getOperand(0).getReg();
|
||||
Register Src = Copy->getOperand(1).getReg();
|
||||
|
||||
if (MODef.getReg() != Src)
|
||||
continue;
|
||||
|
||||
if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx))
|
||||
continue;
|
||||
|
||||
if (hasImplicitOverlap(MI, MODef))
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
|
||||
<< "\n with " << printReg(Def, TRI) << "\n in "
|
||||
<< MI << " from " << *Copy);
|
||||
|
||||
MODef.setReg(Def);
|
||||
MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
|
||||
|
||||
LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
|
||||
MaybeDeadCopies.insert(Copy);
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
void MachineCopyPropagation::BackwardCopyPropagateBlock(
|
||||
MachineBasicBlock &MBB) {
|
||||
LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
|
||||
<< "\n");
|
||||
|
||||
for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
|
||||
I != E;) {
|
||||
MachineInstr *MI = &*I;
|
||||
++I;
|
||||
|
||||
// Ignore non-trivial COPYs.
|
||||
if (MI->isCopy() && MI->getNumOperands() == 2 &&
|
||||
!TRI->regsOverlap(MI->getOperand(0).getReg(),
|
||||
MI->getOperand(1).getReg())) {
|
||||
|
||||
Register Def = MI->getOperand(0).getReg();
|
||||
Register Src = MI->getOperand(1).getReg();
|
||||
|
||||
// Unlike forward cp, we don't invoke propagateDefs here,
|
||||
// just let forward cp do COPY-to-COPY propagation.
|
||||
if (isBackwardPropagatableCopy(*MI, *MRI)) {
|
||||
Tracker.invalidateRegister(Src, *TRI);
|
||||
Tracker.invalidateRegister(Def, *TRI);
|
||||
Tracker.trackCopy(MI, *TRI);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate any earlyclobber regs first.
|
||||
for (const MachineOperand &MO : MI->operands())
|
||||
if (MO.isReg() && MO.isEarlyClobber()) {
|
||||
Register Reg = MO.getReg();
|
||||
if (!Reg)
|
||||
continue;
|
||||
Tracker.invalidateRegister(Reg, *TRI);
|
||||
}
|
||||
|
||||
propagateDefs(*MI);
|
||||
for (const MachineOperand &MO : MI->operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
if (!MO.getReg())
|
||||
continue;
|
||||
|
||||
if (MO.isDef())
|
||||
Tracker.invalidateRegister(MO.getReg(), *TRI);
|
||||
|
||||
if (MO.readsReg())
|
||||
Tracker.invalidateRegister(MO.getReg(), *TRI);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *Copy : MaybeDeadCopies)
|
||||
Copy->eraseFromParent();
|
||||
|
||||
MaybeDeadCopies.clear();
|
||||
CopyDbgUsers.clear();
|
||||
Tracker.clear();
|
||||
}
|
||||
|
||||
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
@ -657,8 +861,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
CopyPropagateBlock(MBB);
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
BackwardCopyPropagateBlock(MBB);
|
||||
ForwardCopyPropagateBlock(MBB);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -11,8 +11,7 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: test0
|
||||
; CHECK: renamable $x4 = LI8 1024
|
||||
; CHECK: $x3 = COPY killed renamable $x4
|
||||
; CHECK: $x3 = LI8 1024
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x4 = LI8 1024
|
||||
$x3 = COPY renamable killed $x4
|
||||
@ -28,8 +27,7 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: test1
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: renamable $x5 = LI8 42
|
||||
; CHECK: renamable $x4 = COPY killed renamable $x5
|
||||
; CHECK: renamable $x4 = LI8 42
|
||||
; CHECK: B %bb.1
|
||||
; CHECK: bb.1:
|
||||
; CHECK: liveins: $x4
|
||||
@ -139,8 +137,8 @@ body: |
|
||||
|
||||
; CHECK-LABEL: name: iterative_deletion
|
||||
; CHECK: liveins: $x5
|
||||
; CHECK: renamable $x6 = ADDI8 killed renamable $x5, 1
|
||||
; CHECK: $x3 = COPY $x6
|
||||
; CHECK: renamable $x4 = ADDI8 killed renamable $x5, 1
|
||||
; CHECK: $x3 = COPY $x4
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x6 = ADDI8 renamable killed $x5, 1
|
||||
renamable $x4 = COPY renamable killed $x6
|
||||
@ -160,8 +158,8 @@ body: |
|
||||
; CHECK-LABEL: name: Enter
|
||||
; CHECK: liveins: $x4, $x7
|
||||
; CHECK: renamable $x5 = COPY killed renamable $x7
|
||||
; CHECK: renamable $x6 = ADDI8 killed renamable $x4, 1
|
||||
; CHECK: $x3 = ADD8 killed renamable $x5, $x6
|
||||
; CHECK: renamable $x7 = ADDI8 killed renamable $x4, 1
|
||||
; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x5 = COPY killed renamable $x7
|
||||
renamable $x6 = ADDI8 killed renamable $x4, 1
|
||||
@ -181,10 +179,9 @@ body: |
|
||||
; CHECK-LABEL: name: foo
|
||||
; CHECK: liveins: $x4, $x7
|
||||
; CHECK: renamable $x5 = COPY killed renamable $x7
|
||||
; CHECK: renamable $x6 = ADDI8 renamable $x4, 1
|
||||
; CHECK: renamable $x7 = COPY killed renamable $x6
|
||||
; CHECK: renamable $x8 = ADDI8 killed $x4, 2
|
||||
; CHECK: $x3 = ADD8 killed renamable $x5, $x8
|
||||
; CHECK: renamable $x7 = ADDI8 renamable $x4, 1
|
||||
; CHECK: renamable $x6 = ADDI8 killed $x4, 2
|
||||
; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x6
|
||||
; CHECK: $x3 = ADD8 $x3, killed renamable $x7
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x5 = COPY killed renamable $x7
|
||||
@ -208,10 +205,10 @@ body: |
|
||||
; CHECK-LABEL: name: bar
|
||||
; CHECK: liveins: $x4, $x7
|
||||
; CHECK: renamable $x5 = COPY killed renamable $x7
|
||||
; CHECK: renamable $x6 = ADDI8 renamable $x4, 1
|
||||
; CHECK: renamable $x8 = COPY $x6
|
||||
; CHECK: renamable $x6 = ADDI8 renamable $x5, 2
|
||||
; CHECK: $x3 = ADD8 killed renamable $x5, $x6
|
||||
; CHECK: renamable $x7 = ADDI8 renamable $x4, 1
|
||||
; CHECK: renamable $x8 = COPY killed renamable $x7
|
||||
; CHECK: renamable $x7 = ADDI8 renamable $x5, 2
|
||||
; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7
|
||||
; CHECK: $x3 = ADD8 $x3, killed renamable $x8
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x5 = COPY killed renamable $x7
|
||||
@ -236,10 +233,9 @@ body: |
|
||||
; CHECK-LABEL: name: bogus
|
||||
; CHECK: liveins: $x7
|
||||
; CHECK: renamable $x5 = COPY renamable $x7
|
||||
; CHECK: renamable $x6 = ADDI8 $x7, 1
|
||||
; CHECK: renamable $x7 = COPY $x6
|
||||
; CHECK: renamable $x4 = ADDI8 $x7, 1
|
||||
; CHECK: renamable $x6 = ADDI8 renamable $x5, 2
|
||||
; CHECK: $x3 = ADD8 $x7, killed renamable $x5
|
||||
; CHECK: $x3 = ADD8 killed renamable $x4, killed renamable $x5
|
||||
; CHECK: $x3 = ADD8 $x3, killed renamable $x6
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x5 = COPY killed renamable $x7
|
||||
@ -263,10 +259,10 @@ body: |
|
||||
liveins: $x7
|
||||
; CHECK-LABEL: name: foobar
|
||||
; CHECK: liveins: $x7
|
||||
; CHECK: renamable $x6 = ADDI8 $x7, 1
|
||||
; CHECK: renamable $x8 = COPY $x6
|
||||
; CHECK: renamable $x6 = ADDI8 $x7, 2
|
||||
; CHECK: $x3 = ADD8 $x6, $x7
|
||||
; CHECK: renamable $x4 = ADDI8 $x7, 1
|
||||
; CHECK: renamable $x8 = COPY killed renamable $x4
|
||||
; CHECK: renamable $x4 = ADDI8 $x7, 2
|
||||
; CHECK: $x3 = ADD8 killed renamable $x4, $x7
|
||||
; CHECK: $x3 = ADD8 $x3, killed renamable $x8
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x5 = COPY killed renamable $x7
|
||||
@ -280,3 +276,22 @@ body: |
|
||||
BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: cross_call
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x2, $x3, $x20
|
||||
; CHECK-LABEL: name: cross_call
|
||||
; CHECK: liveins: $x2, $x3, $x20
|
||||
; CHECK: renamable $x20 = LI8 1024
|
||||
; CHECK: BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2
|
||||
; CHECK: $x3 = COPY killed renamable $x20
|
||||
; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
renamable $x20 = LI8 1024
|
||||
BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2
|
||||
$x3 = COPY renamable killed $x20
|
||||
BLR8 implicit $lr8, implicit undef $rm, implicit $x3
|
||||
...
|
||||
|
@ -26,8 +26,7 @@ define dso_local i1 @t(%class.A* %this, i32 %color, i32 %vertex) local_unnamed_a
|
||||
; CHECK-P9-NEXT: cmplwi r3, 2
|
||||
; CHECK-P9-NEXT: bge- cr0, .LBB0_6
|
||||
; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1
|
||||
; CHECK-P9-NEXT: li r5, 0
|
||||
; CHECK-P9-NEXT: mr r3, r5
|
||||
; CHECK-P9-NEXT: li r3, 0
|
||||
; CHECK-P9-NEXT: blr
|
||||
; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false
|
||||
; CHECK-P9-NEXT: cmplwi cr0, r4, 0
|
||||
|
@ -224,8 +224,7 @@ define i64 @sll(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-NEXT: srli a4, a0, 1
|
||||
; RV32I-NEXT: srl a3, a4, a3
|
||||
; RV32I-NEXT: or a1, a1, a3
|
||||
; RV32I-NEXT: sll a2, a0, a2
|
||||
; RV32I-NEXT: mv a0, a2
|
||||
; RV32I-NEXT: sll a0, a0, a2
|
||||
; RV32I-NEXT: ret
|
||||
%1 = shl i64 %a, %b
|
||||
ret i64 %1
|
||||
@ -311,8 +310,7 @@ define i64 @srl(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-NEXT: slli a4, a1, 1
|
||||
; RV32I-NEXT: sll a3, a4, a3
|
||||
; RV32I-NEXT: or a0, a0, a3
|
||||
; RV32I-NEXT: srl a2, a1, a2
|
||||
; RV32I-NEXT: mv a1, a2
|
||||
; RV32I-NEXT: srl a1, a1, a2
|
||||
; RV32I-NEXT: ret
|
||||
%1 = lshr i64 %a, %b
|
||||
ret i64 %1
|
||||
|
@ -23,8 +23,7 @@ define i64 @lshr64(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-NEXT: slli a4, a1, 1
|
||||
; RV32I-NEXT: sll a3, a4, a3
|
||||
; RV32I-NEXT: or a0, a0, a3
|
||||
; RV32I-NEXT: srl a2, a1, a2
|
||||
; RV32I-NEXT: mv a1, a2
|
||||
; RV32I-NEXT: srl a1, a1, a2
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: lshr64:
|
||||
@ -114,8 +113,7 @@ define i64 @shl64(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-NEXT: srli a4, a0, 1
|
||||
; RV32I-NEXT: srl a3, a4, a3
|
||||
; RV32I-NEXT: or a1, a1, a3
|
||||
; RV32I-NEXT: sll a2, a0, a2
|
||||
; RV32I-NEXT: mv a0, a2
|
||||
; RV32I-NEXT: sll a0, a0, a2
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: shl64:
|
||||
@ -191,8 +189,7 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
|
||||
; RV64I-NEXT: slli a4, a1, 1
|
||||
; RV64I-NEXT: sll a3, a4, a3
|
||||
; RV64I-NEXT: or a0, a0, a3
|
||||
; RV64I-NEXT: srl a2, a1, a2
|
||||
; RV64I-NEXT: mv a1, a2
|
||||
; RV64I-NEXT: srl a1, a1, a2
|
||||
; RV64I-NEXT: ret
|
||||
%1 = lshr i128 %a, %b
|
||||
ret i128 %1
|
||||
@ -298,8 +295,7 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
|
||||
; RV64I-NEXT: srli a4, a0, 1
|
||||
; RV64I-NEXT: srl a3, a4, a3
|
||||
; RV64I-NEXT: or a1, a1, a3
|
||||
; RV64I-NEXT: sll a2, a0, a2
|
||||
; RV64I-NEXT: mv a0, a2
|
||||
; RV64I-NEXT: sll a0, a0, a2
|
||||
; RV64I-NEXT: ret
|
||||
%1 = shl i128 %a, %b
|
||||
ret i128 %1
|
||||
|
@ -496,9 +496,8 @@ define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result,
|
||||
; AVX-NEXT: testl %ebp, %ebp
|
||||
; AVX-NEXT: jle .LBB10_1
|
||||
; AVX-NEXT: # %bb.2: # %if.then
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX-NEXT: vmovaps %xmm1, %xmm2
|
||||
; AVX-NEXT: jmp .LBB10_3
|
||||
; AVX-NEXT: .LBB10_1:
|
||||
; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload
|
||||
|
@ -279,8 +279,7 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-SLOW-NEXT: orl %edi, %edx
|
||||
; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
|
||||
; X86-SLOW-NEXT: .LBB4_2:
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLOW-NEXT: movl %ecx, %edx
|
||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SLOW-NEXT: movl %ebx, %ecx
|
||||
; X86-SLOW-NEXT: shrl %cl, %edx
|
||||
; X86-SLOW-NEXT: movb %bl, %ah
|
||||
|
@ -88,9 +88,8 @@ define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind
|
||||
; X86-NEXT: movl 4(%eax,%ebp,8), %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: mull %edi
|
||||
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
|
@ -153,9 +153,8 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
|
||||
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: adcl $0, %edx
|
||||
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl %esi, %ecx
|
||||
; X32-NEXT: movl 8(%esi), %ebx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl 8(%ecx), %ebx
|
||||
; X32-NEXT: movl %ebx, %eax
|
||||
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||
|
@ -296,8 +296,7 @@ define void @test_shl_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind {
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %ebx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll %cl, %ebx
|
||||
; X86-NEXT: movl %ebp, %esi
|
||||
@ -534,8 +533,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
|
||||
; X86-NEXT: .LBB6_9: # %entry
|
||||
; X86-NEXT: movl %edi, %esi
|
||||
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: shrl %cl, %ebp
|
||||
; X86-NEXT: testb $32, %cl
|
||||
@ -795,9 +793,8 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
|
||||
; X86-NEXT: # %bb.4: # %entry
|
||||
; X86-NEXT: movl %edi, %ebx
|
||||
; X86-NEXT: .LBB7_5: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %ebp
|
||||
; X86-NEXT: movl %ecx, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl %ebp, %edi
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: sarl %cl, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
@ -835,8 +832,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
|
||||
; X86-NEXT: movl %esi, %edi
|
||||
; X86-NEXT: .LBB7_9: # %entry
|
||||
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: sarl %cl, %esi
|
||||
; X86-NEXT: testb $32, %cl
|
||||
@ -850,8 +846,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocaptur
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movb $64, %cl
|
||||
; X86-NEXT: subb %dl, %cl
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: shldl %cl, %ebx, %ebp
|
||||
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
@ -1059,12 +1054,11 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: subl $72, %esp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl %ebx, %ecx
|
||||
; X86-NEXT: shll %cl, %ebp
|
||||
; X86-NEXT: movl %eax, %esi
|
||||
; X86-NEXT: shll %cl, %esi
|
||||
; X86-NEXT: movl %edx, %eax
|
||||
; X86-NEXT: subl $64, %eax
|
||||
@ -1130,9 +1124,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture
|
||||
; X86-NEXT: movl %ecx, %ebp
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: shll %cl, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %esi
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: shll %cl, %esi
|
||||
; X86-NEXT: testb $32, %dl
|
||||
; X86-NEXT: movl $0, %edi
|
||||
@ -1210,8 +1202,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture
|
||||
; X86-NEXT: movl %edi, %ecx
|
||||
; X86-NEXT: .LBB8_23: # %entry
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll %cl, %edi
|
||||
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
|
@ -98,8 +98,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
|
||||
; X86-NEXT: addl %esi, %ecx
|
||||
; X86-NEXT: adcl $0, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: mull %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: addl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
|
Loading…
Reference in New Issue
Block a user