mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[FastISel][AArch64] Fold mul into the address computation of memory operations.
Teach 'computeAddress' to also fold multiplies into the address computation (when possible). This fixes rdar://problem/18369443. llvm-svn: 217977
This commit is contained in:
parent
aa59e21d08
commit
6305202d76
@ -425,6 +425,19 @@ unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
|
||||
return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
|
||||
}
|
||||
|
||||
/// \brief Check if the multiply is by a power-of-2 constant.
|
||||
static bool isMulPowOf2(const Value *I) {
|
||||
if (const auto *MI = dyn_cast<MulOperator>(I)) {
|
||||
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
|
||||
if (C->getValue().isPowerOf2())
|
||||
return true;
|
||||
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
|
||||
if (C->getValue().isPowerOf2())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Computes the address to get to an object.
|
||||
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
|
||||
{
|
||||
@ -589,7 +602,64 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case Instruction::Mul: {
|
||||
if (Addr.getOffsetReg())
|
||||
break;
|
||||
|
||||
if (!isMulPowOf2(U))
|
||||
break;
|
||||
|
||||
const Value *LHS = U->getOperand(0);
|
||||
const Value *RHS = U->getOperand(1);
|
||||
|
||||
// Canonicalize power-of-2 value to the RHS.
|
||||
if (const auto *C = dyn_cast<ConstantInt>(LHS))
|
||||
if (C->getValue().isPowerOf2())
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
|
||||
const auto *C = cast<ConstantInt>(RHS);
|
||||
unsigned Val = C->getValue().logBase2();
|
||||
if (Val < 1 || Val > 3)
|
||||
break;
|
||||
|
||||
uint64_t NumBytes = 0;
|
||||
if (Ty && Ty->isSized()) {
|
||||
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
|
||||
NumBytes = NumBits / 8;
|
||||
if (!isPowerOf2_64(NumBits))
|
||||
NumBytes = 0;
|
||||
}
|
||||
|
||||
if (NumBytes != (1ULL << Val))
|
||||
break;
|
||||
|
||||
Addr.setShift(Val);
|
||||
Addr.setExtendType(AArch64_AM::LSL);
|
||||
|
||||
if (const auto *I = dyn_cast<Instruction>(LHS))
|
||||
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
|
||||
U = I;
|
||||
|
||||
if (const auto *ZE = dyn_cast<ZExtInst>(U))
|
||||
if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::UXTW);
|
||||
LHS = U->getOperand(0);
|
||||
}
|
||||
|
||||
if (const auto *SE = dyn_cast<SExtInst>(U))
|
||||
if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::SXTW);
|
||||
LHS = U->getOperand(0);
|
||||
}
|
||||
|
||||
unsigned Reg = getRegForValue(LHS);
|
||||
if (!Reg)
|
||||
return false;
|
||||
Addr.setOffsetReg(Reg);
|
||||
return true;
|
||||
}
|
||||
} // end switch
|
||||
|
||||
if (Addr.getReg()) {
|
||||
if (!Addr.getOffsetReg()) {
|
||||
|
@ -339,6 +339,16 @@ define i32 @load_shift_offreg_1(i64 %a) {
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
define i32 @load_mul_offreg_1(i64 %a) {
|
||||
; CHECK-LABEL: load_mul_offreg_1
|
||||
; CHECK: lsl [[REG:x[0-9]+]], x0, #2
|
||||
; CHECK: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
|
||||
%1 = mul i64 %a, 4
|
||||
%2 = inttoptr i64 %1 to i32*
|
||||
%3 = load i32* %2
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
; Load Base Register + Scaled Register Offset
|
||||
define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: load_breg_shift_offreg_1
|
||||
@ -405,6 +415,15 @@ define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
|
||||
ret i32 %5
|
||||
}
|
||||
|
||||
define i32 @load_breg_mul_offreg_1(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: load_breg_mul_offreg_1
|
||||
; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2]
|
||||
%1 = mul i64 %a, 4
|
||||
%2 = add i64 %1, %b
|
||||
%3 = inttoptr i64 %2 to i32*
|
||||
%4 = load i32* %3
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
; Load Base Register + Scaled Register Offset + Sign/Zero extension
|
||||
define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
|
||||
@ -429,6 +448,17 @@ define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
|
||||
ret i32 %5
|
||||
}
|
||||
|
||||
define i32 @load_breg_zext_mul_offreg_1(i32 %a, i64 %b) {
|
||||
; CHECK-LABEL: load_breg_zext_mul_offreg_1
|
||||
; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
|
||||
%1 = zext i32 %a to i64
|
||||
%2 = mul i64 %1, 4
|
||||
%3 = add i64 %2, %b
|
||||
%4 = inttoptr i64 %3 to i32*
|
||||
%5 = load i32* %4
|
||||
ret i32 %5
|
||||
}
|
||||
|
||||
define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
|
||||
; CHECK-LABEL: load_breg_sext_shift_offreg_1
|
||||
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
|
||||
@ -451,6 +481,17 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
|
||||
ret i32 %5
|
||||
}
|
||||
|
||||
define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) {
|
||||
; CHECK-LABEL: load_breg_sext_mul_offreg_1
|
||||
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
|
||||
%1 = sext i32 %a to i64
|
||||
%2 = mul i64 %1, 4
|
||||
%3 = add i64 %2, %b
|
||||
%4 = inttoptr i64 %3 to i32*
|
||||
%5 = load i32* %4
|
||||
ret i32 %5
|
||||
}
|
||||
|
||||
; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
|
||||
define i64 @load_sext_shift_offreg_imm1(i32 %a) {
|
||||
; CHECK-LABEL: load_sext_shift_offreg_imm1
|
||||
|
Loading…
x
Reference in New Issue
Block a user