mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[FastISel][AArch64] Fold sign-/zero-extends into the load instruction.
The sign-/zero-extension of the loaded value can be performed by the memory instruction for free. If the result of the load has only one use and the use is a sign-/zero-extend, then we emit the proper load instruction. The extend is only a register copy and will be optimized away later on. Other instructions that consume the sign-/zero-extended value are also made aware of this fact, so they don't fold the extend too. This fixes rdar://problem/18495928. llvm-svn: 218653
This commit is contained in:
parent
d6d5162a97
commit
040a60a3d3
@ -177,7 +177,7 @@ private:
|
||||
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
|
||||
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
|
||||
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
|
||||
bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
|
||||
bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true,
|
||||
MachineMemOperand *MMO = nullptr);
|
||||
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
|
||||
MachineMemOperand *MMO = nullptr);
|
||||
@ -255,6 +255,23 @@ public:
|
||||
|
||||
#include "AArch64GenCallingConv.inc"
|
||||
|
||||
/// \brief Check if the sign-/zero-extend will be a noop.
|
||||
static bool isIntExtFree(const Instruction *I) {
|
||||
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
|
||||
"Unexpected integer extend instruction.");
|
||||
bool IsZExt = isa<ZExtInst>(I);
|
||||
|
||||
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
|
||||
if (LI->hasOneUse())
|
||||
return true;
|
||||
|
||||
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
|
||||
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Determine the implicit scale factor that is applied by a memory
|
||||
/// operation for a given value type.
|
||||
static unsigned getImplicitScaleFactor(MVT VT) {
|
||||
@ -585,72 +602,74 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
|
||||
if (Addr.getOffsetReg())
|
||||
break;
|
||||
|
||||
if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
|
||||
unsigned Val = CI->getZExtValue();
|
||||
if (Val < 1 || Val > 3)
|
||||
break;
|
||||
const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
|
||||
if (!CI)
|
||||
break;
|
||||
|
||||
uint64_t NumBytes = 0;
|
||||
if (Ty && Ty->isSized()) {
|
||||
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
|
||||
NumBytes = NumBits / 8;
|
||||
if (!isPowerOf2_64(NumBits))
|
||||
NumBytes = 0;
|
||||
}
|
||||
unsigned Val = CI->getZExtValue();
|
||||
if (Val < 1 || Val > 3)
|
||||
break;
|
||||
|
||||
if (NumBytes != (1ULL << Val))
|
||||
break;
|
||||
uint64_t NumBytes = 0;
|
||||
if (Ty && Ty->isSized()) {
|
||||
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
|
||||
NumBytes = NumBits / 8;
|
||||
if (!isPowerOf2_64(NumBits))
|
||||
NumBytes = 0;
|
||||
}
|
||||
|
||||
Addr.setShift(Val);
|
||||
Addr.setExtendType(AArch64_AM::LSL);
|
||||
if (NumBytes != (1ULL << Val))
|
||||
break;
|
||||
|
||||
const Value *Src = U->getOperand(0);
|
||||
if (const auto *I = dyn_cast<Instruction>(Src))
|
||||
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
|
||||
Src = I;
|
||||
Addr.setShift(Val);
|
||||
Addr.setExtendType(AArch64_AM::LSL);
|
||||
|
||||
if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
|
||||
if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
const Value *Src = U->getOperand(0);
|
||||
if (const auto *I = dyn_cast<Instruction>(Src))
|
||||
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
|
||||
Src = I;
|
||||
|
||||
// Fold the zext or sext when it won't become a noop.
|
||||
if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
|
||||
if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::UXTW);
|
||||
Src = ZE->getOperand(0);
|
||||
}
|
||||
} else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
|
||||
if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::SXTW);
|
||||
Src = SE->getOperand(0);
|
||||
}
|
||||
}
|
||||
} else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
|
||||
if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::SXTW);
|
||||
Src = SE->getOperand(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto *AI = dyn_cast<BinaryOperator>(Src))
|
||||
if (AI->getOpcode() == Instruction::And) {
|
||||
const Value *LHS = AI->getOperand(0);
|
||||
const Value *RHS = AI->getOperand(1);
|
||||
|
||||
if (const auto *C = dyn_cast<ConstantInt>(LHS))
|
||||
if (C->getValue() == 0xffffffff)
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
||||
if (C->getValue() == 0xffffffff) {
|
||||
Addr.setExtendType(AArch64_AM::UXTW);
|
||||
unsigned Reg = getRegForValue(LHS);
|
||||
if (!Reg)
|
||||
return false;
|
||||
bool RegIsKill = hasTrivialKill(LHS);
|
||||
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
|
||||
AArch64::sub_32);
|
||||
Addr.setOffsetReg(Reg);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto *AI = dyn_cast<BinaryOperator>(Src))
|
||||
if (AI->getOpcode() == Instruction::And) {
|
||||
const Value *LHS = AI->getOperand(0);
|
||||
const Value *RHS = AI->getOperand(1);
|
||||
|
||||
if (const auto *C = dyn_cast<ConstantInt>(LHS))
|
||||
if (C->getValue() == 0xffffffff)
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
||||
if (C->getValue() == 0xffffffff) {
|
||||
Addr.setExtendType(AArch64_AM::UXTW);
|
||||
unsigned Reg = getRegForValue(LHS);
|
||||
if (!Reg)
|
||||
return false;
|
||||
bool RegIsKill = hasTrivialKill(LHS);
|
||||
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
|
||||
AArch64::sub_32);
|
||||
Addr.setOffsetReg(Reg);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned Reg = getRegForValue(Src);
|
||||
if (!Reg)
|
||||
return false;
|
||||
Addr.setOffsetReg(Reg);
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
unsigned Reg = getRegForValue(Src);
|
||||
if (!Reg)
|
||||
return false;
|
||||
Addr.setOffsetReg(Reg);
|
||||
return true;
|
||||
}
|
||||
case Instruction::Mul: {
|
||||
if (Addr.getOffsetReg())
|
||||
@ -692,13 +711,15 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
|
||||
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
|
||||
Src = I;
|
||||
|
||||
|
||||
// Fold the zext or sext when it won't become a noop.
|
||||
if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
|
||||
if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::UXTW);
|
||||
Src = ZE->getOperand(0);
|
||||
}
|
||||
} else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
|
||||
if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
||||
Addr.setExtendType(AArch64_AM::SXTW);
|
||||
Src = SE->getOperand(0);
|
||||
}
|
||||
@ -1568,7 +1589,7 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
|
||||
}
|
||||
|
||||
bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
|
||||
MachineMemOperand *MMO) {
|
||||
bool WantZExt, MachineMemOperand *MMO) {
|
||||
// Simplify this down to something we can handle.
|
||||
if (!simplifyAddress(Addr, VT))
|
||||
return false;
|
||||
@ -1585,20 +1606,38 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
|
||||
ScaleFactor = 1;
|
||||
}
|
||||
|
||||
static const unsigned OpcTable[4][6] = {
|
||||
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
|
||||
AArch64::LDURSi, AArch64::LDURDi },
|
||||
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
|
||||
AArch64::LDRSui, AArch64::LDRDui },
|
||||
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
|
||||
AArch64::LDRSroX, AArch64::LDRDroX },
|
||||
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
|
||||
AArch64::LDRSroW, AArch64::LDRDroW }
|
||||
static const unsigned GPOpcTable[2][4][4] = {
|
||||
// Sign-extend.
|
||||
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi,
|
||||
AArch64::LDURXi },
|
||||
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui,
|
||||
AArch64::LDRXui },
|
||||
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX,
|
||||
AArch64::LDRXroX },
|
||||
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW,
|
||||
AArch64::LDRXroW },
|
||||
},
|
||||
// Zero-extend.
|
||||
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
|
||||
AArch64::LDURXi },
|
||||
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
|
||||
AArch64::LDRXui },
|
||||
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
|
||||
AArch64::LDRXroX },
|
||||
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
|
||||
AArch64::LDRXroW }
|
||||
}
|
||||
};
|
||||
|
||||
static const unsigned FPOpcTable[4][2] = {
|
||||
{ AArch64::LDURSi, AArch64::LDURDi },
|
||||
{ AArch64::LDRSui, AArch64::LDRDui },
|
||||
{ AArch64::LDRSroX, AArch64::LDRDroX },
|
||||
{ AArch64::LDRSroW, AArch64::LDRDroW }
|
||||
};
|
||||
|
||||
unsigned Opc;
|
||||
const TargetRegisterClass *RC;
|
||||
bool VTIsi1 = false;
|
||||
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
|
||||
Addr.getOffsetReg();
|
||||
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
|
||||
@ -1607,14 +1646,33 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
|
||||
Idx++;
|
||||
|
||||
switch (VT.SimpleTy) {
|
||||
default: llvm_unreachable("Unexpected value type.");
|
||||
case MVT::i1: VTIsi1 = true; // Intentional fall-through.
|
||||
case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
|
||||
case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
|
||||
case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
|
||||
case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
|
||||
case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
|
||||
case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected value type.");
|
||||
case MVT::i1: // Intentional fall-through.
|
||||
case MVT::i8:
|
||||
Opc = GPOpcTable[WantZExt][Idx][0];
|
||||
RC = &AArch64::GPR32RegClass;
|
||||
break;
|
||||
case MVT::i16:
|
||||
Opc = GPOpcTable[WantZExt][Idx][1];
|
||||
RC = &AArch64::GPR32RegClass;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opc = GPOpcTable[WantZExt][Idx][2];
|
||||
RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opc = GPOpcTable[WantZExt][Idx][3];
|
||||
RC = &AArch64::GPR64RegClass;
|
||||
break;
|
||||
case MVT::f32:
|
||||
Opc = FPOpcTable[Idx][0];
|
||||
RC = &AArch64::FPR32RegClass;
|
||||
break;
|
||||
case MVT::f64:
|
||||
Opc = FPOpcTable[Idx][1];
|
||||
RC = &AArch64::FPR64RegClass;
|
||||
break;
|
||||
}
|
||||
|
||||
// Create the base instruction, then add the operands.
|
||||
@ -1623,8 +1681,14 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
|
||||
TII.get(Opc), ResultReg);
|
||||
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
|
||||
|
||||
// For 32bit loads we do sign-extending loads to 64bit and then extract the
|
||||
// subreg. In the end this is just a NOOP.
|
||||
if (VT == MVT::i32 && !WantZExt)
|
||||
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, /*IsKill=*/true,
|
||||
AArch64::sub_32);
|
||||
|
||||
// Loading an i1 requires special handling.
|
||||
if (VTIsi1) {
|
||||
if (VT == MVT::i1) {
|
||||
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
|
||||
assert(ANDReg && "Unexpected AND instruction emission failure.");
|
||||
ResultReg = ANDReg;
|
||||
@ -1701,8 +1765,12 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
|
||||
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
|
||||
return false;
|
||||
|
||||
bool WantZExt = true;
|
||||
if (I->hasOneUse() && isa<SExtInst>(I->use_begin()->getUser()))
|
||||
WantZExt = false;
|
||||
|
||||
unsigned ResultReg;
|
||||
if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
|
||||
if (!emitLoad(VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor(I)))
|
||||
return false;
|
||||
|
||||
updateValueMap(I, ResultReg);
|
||||
@ -3776,46 +3844,60 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
|
||||
}
|
||||
|
||||
bool AArch64FastISel::selectIntExt(const Instruction *I) {
|
||||
// On ARM, in general, integer casts don't involve legal types; this code
|
||||
// handles promotable integers. The high bits for a type smaller than
|
||||
// the register size are assumed to be undefined.
|
||||
Type *DestTy = I->getType();
|
||||
Value *Src = I->getOperand(0);
|
||||
Type *SrcTy = Src->getType();
|
||||
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
|
||||
"Unexpected integer extend instruction.");
|
||||
MVT RetVT;
|
||||
MVT SrcVT;
|
||||
if (!isTypeSupported(I->getType(), RetVT))
|
||||
return false;
|
||||
|
||||
unsigned SrcReg = getRegForValue(Src);
|
||||
if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
|
||||
return false;
|
||||
|
||||
if (isIntExtFree(I)) {
|
||||
unsigned SrcReg = getRegForValue(I->getOperand(0));
|
||||
if (!SrcReg)
|
||||
return false;
|
||||
bool SrcIsKill = hasTrivialKill(I->getOperand(0));
|
||||
|
||||
const TargetRegisterClass *RC = (RetVT == MVT::i64) ?
|
||||
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
||||
unsigned ResultReg = createResultReg(RC);
|
||||
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
|
||||
.addImm(0)
|
||||
.addReg(SrcReg, getKillRegState(SrcIsKill))
|
||||
.addImm(AArch64::sub_32);
|
||||
} else {
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::COPY), ResultReg)
|
||||
.addReg(SrcReg, getKillRegState(SrcIsKill));
|
||||
}
|
||||
updateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned SrcReg = getRegForValue(I->getOperand(0));
|
||||
if (!SrcReg)
|
||||
return false;
|
||||
bool SrcRegIsKill = hasTrivialKill(I->getOperand(0));
|
||||
|
||||
EVT SrcEVT = TLI.getValueType(SrcTy, true);
|
||||
EVT DestEVT = TLI.getValueType(DestTy, true);
|
||||
if (!SrcEVT.isSimple())
|
||||
return false;
|
||||
if (!DestEVT.isSimple())
|
||||
return false;
|
||||
|
||||
MVT SrcVT = SrcEVT.getSimpleVT();
|
||||
MVT DestVT = DestEVT.getSimpleVT();
|
||||
unsigned ResultReg = 0;
|
||||
|
||||
bool IsZExt = isa<ZExtInst>(I);
|
||||
// Check if it is an argument and if it is already zero/sign-extended.
|
||||
if (const auto *Arg = dyn_cast<Argument>(Src)) {
|
||||
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
|
||||
if (DestVT == MVT::i64) {
|
||||
ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
|
||||
if (isIntExtFree(I)) {
|
||||
if (RetVT == MVT::i64) {
|
||||
ResultReg = createResultReg(&AArch64::GPR64RegClass);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
|
||||
.addImm(0)
|
||||
.addReg(SrcReg)
|
||||
.addReg(SrcReg, getKillRegState(SrcRegIsKill))
|
||||
.addImm(AArch64::sub_32);
|
||||
} else
|
||||
ResultReg = SrcReg;
|
||||
}
|
||||
} else
|
||||
ResultReg = SrcReg;
|
||||
}
|
||||
|
||||
if (!ResultReg)
|
||||
ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt);
|
||||
ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, isa<ZExtInst>(I));
|
||||
|
||||
if (!ResultReg)
|
||||
return false;
|
||||
@ -3891,18 +3973,22 @@ bool AArch64FastISel::selectMul(const Instruction *I) {
|
||||
MVT SrcVT = VT;
|
||||
bool IsZExt = true;
|
||||
if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
|
||||
MVT VT;
|
||||
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
|
||||
SrcVT = VT;
|
||||
IsZExt = true;
|
||||
Src0 = ZExt->getOperand(0);
|
||||
if (!isIntExtFree(ZExt)) {
|
||||
MVT VT;
|
||||
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
|
||||
SrcVT = VT;
|
||||
IsZExt = true;
|
||||
Src0 = ZExt->getOperand(0);
|
||||
}
|
||||
}
|
||||
} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
|
||||
MVT VT;
|
||||
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
|
||||
SrcVT = VT;
|
||||
IsZExt = false;
|
||||
Src0 = SExt->getOperand(0);
|
||||
if (!isIntExtFree(SExt)) {
|
||||
MVT VT;
|
||||
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
|
||||
SrcVT = VT;
|
||||
IsZExt = false;
|
||||
Src0 = SExt->getOperand(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3954,18 +4040,22 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
|
||||
bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
|
||||
const Value *Op0 = I->getOperand(0);
|
||||
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
|
||||
MVT TmpVT;
|
||||
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
|
||||
SrcVT = TmpVT;
|
||||
IsZExt = true;
|
||||
Op0 = ZExt->getOperand(0);
|
||||
if (!isIntExtFree(ZExt)) {
|
||||
MVT TmpVT;
|
||||
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
|
||||
SrcVT = TmpVT;
|
||||
IsZExt = true;
|
||||
Op0 = ZExt->getOperand(0);
|
||||
}
|
||||
}
|
||||
} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
|
||||
MVT TmpVT;
|
||||
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
|
||||
SrcVT = TmpVT;
|
||||
IsZExt = false;
|
||||
Op0 = SExt->getOperand(0);
|
||||
if (!isIntExtFree(SExt)) {
|
||||
MVT TmpVT;
|
||||
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
|
||||
SrcVT = TmpVT;
|
||||
IsZExt = false;
|
||||
Op0 = SExt->getOperand(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -4213,13 +4303,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
|
||||
case Instruction::FPToUI:
|
||||
return selectFPToInt(I, /*Signed=*/false);
|
||||
case Instruction::ZExt:
|
||||
if (!selectCast(I, ISD::ZERO_EXTEND))
|
||||
return selectIntExt(I);
|
||||
return true;
|
||||
case Instruction::SExt:
|
||||
if (!selectCast(I, ISD::SIGN_EXTEND))
|
||||
return selectIntExt(I);
|
||||
return true;
|
||||
return selectIntExt(I);
|
||||
case Instruction::Trunc:
|
||||
if (!selectCast(I, ISD::TRUNCATE))
|
||||
return selectTrunc(I);
|
||||
|
@ -17,7 +17,6 @@ entry:
|
||||
; CHECK: ldrh w0, [sp, #12]
|
||||
; CHECK: strb w0, [sp, #15]
|
||||
; CHECK: ldrb w0, [sp, #15]
|
||||
; CHECK: uxtb w0, w0
|
||||
; CHECK: add sp, sp, #16
|
||||
; CHECK: ret
|
||||
%a.addr = alloca i8, align 1
|
||||
@ -51,14 +50,11 @@ entry:
|
||||
; CHECK: str w2, [sp, #8]
|
||||
; CHECK: str x3, [sp]
|
||||
; CHECK: ldrb w0, [sp, #15]
|
||||
; CHECK: uxtb w0, w0
|
||||
; CHECK: strh w0, [sp, #12]
|
||||
; CHECK: ldrh w0, [sp, #12]
|
||||
; CHECK: uxth w0, w0
|
||||
; CHECK: str w0, [sp, #8]
|
||||
; CHECK: ldr w0, [sp, #8]
|
||||
; CHECK: mov x3, x0
|
||||
; CHECK: ubfx x3, x3, #0, #32
|
||||
; CHECK: str x3, [sp]
|
||||
; CHECK: ldr x0, [sp]
|
||||
; CHECK: ret
|
||||
@ -109,15 +105,11 @@ entry:
|
||||
; CHECK: strh w1, [sp, #12]
|
||||
; CHECK: str w2, [sp, #8]
|
||||
; CHECK: str x3, [sp]
|
||||
; CHECK: ldrb w0, [sp, #15]
|
||||
; CHECK: sxtb w0, w0
|
||||
; CHECK: ldrsb w0, [sp, #15]
|
||||
; CHECK: strh w0, [sp, #12]
|
||||
; CHECK: ldrh w0, [sp, #12]
|
||||
; CHECK: sxth w0, w0
|
||||
; CHECK: ldrsh w0, [sp, #12]
|
||||
; CHECK: str w0, [sp, #8]
|
||||
; CHECK: ldr w0, [sp, #8]
|
||||
; CHECK: mov x3, x0
|
||||
; CHECK: sxtw x3, w3
|
||||
; CHECK: ldrsw x3, [sp, #8]
|
||||
; CHECK: str x3, [sp]
|
||||
; CHECK: ldr x0, [sp]
|
||||
; CHECK: ret
|
||||
|
190
test/CodeGen/AArch64/fast-isel-int-ext.ll
Normal file
190
test/CodeGen/AArch64/fast-isel-int-ext.ll
Normal file
@ -0,0 +1,190 @@
|
||||
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; Test that we only use the sign/zero extend in the address calculation when
|
||||
; necessary.
|
||||
;
|
||||
; SHIFT
|
||||
;
|
||||
define i64 @load_addr_shift_zext1(i32 zeroext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_shift_zext1
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
|
||||
%1 = zext i32 %a to i64
|
||||
%2 = shl i64 %1, 3
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @load_addr_shift_zext2(i32 signext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_shift_zext2
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3{{\]}}
|
||||
%1 = zext i32 %a to i64
|
||||
%2 = shl i64 %1, 3
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @load_addr_shift_sext1(i32 signext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_shift_sext1
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
|
||||
%1 = sext i32 %a to i64
|
||||
%2 = shl i64 %1, 3
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @load_addr_shift_sext2(i32 zeroext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_shift_sext2
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
|
||||
%1 = sext i32 %a to i64
|
||||
%2 = shl i64 %1, 3
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
;
|
||||
; MUL
|
||||
;
|
||||
define i64 @load_addr_mul_zext1(i32 zeroext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_mul_zext1
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
|
||||
%1 = zext i32 %a to i64
|
||||
%2 = mul i64 %1, 8
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @load_addr_mul_zext2(i32 signext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_mul_zext2
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
|
||||
%1 = zext i32 %a to i64
|
||||
%2 = mul i64 %1, 8
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @load_addr_mul_sext1(i32 signext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_mul_sext1
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3]
|
||||
%1 = sext i32 %a to i64
|
||||
%2 = mul i64 %1, 8
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @load_addr_mul_sext2(i32 zeroext %a, i64 %b) {
|
||||
; CHECK-LABEL: load_addr_mul_sext2
|
||||
; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
|
||||
%1 = sext i32 %a to i64
|
||||
%2 = mul i64 %1, 8
|
||||
%3 = add i64 %b, %2
|
||||
%4 = inttoptr i64 %3 to i64*
|
||||
%5 = load i64* %4
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
; Test folding of the sign-/zero-extend into the load instruction.
|
||||
define i32 @load_zext_i8_to_i32(i8* %a) {
|
||||
; CHECK-LABEL: load_zext_i8_to_i32
|
||||
; CHECK: ldrb w0, [x0]
|
||||
; CHECK-NOT: uxtb
|
||||
%1 = load i8* %a
|
||||
%2 = zext i8 %1 to i32
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @load_zext_i16_to_i32(i16* %a) {
|
||||
; CHECK-LABEL: load_zext_i16_to_i32
|
||||
; CHECK: ldrh w0, [x0]
|
||||
; CHECK-NOT: uxth
|
||||
%1 = load i16* %a
|
||||
%2 = zext i16 %1 to i32
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i64 @load_zext_i8_to_i64(i8* %a) {
|
||||
; CHECK-LABEL: load_zext_i8_to_i64
|
||||
; CHECK: ldrb w0, [x0]
|
||||
; CHECK-NOT: uxtb
|
||||
%1 = load i8* %a
|
||||
%2 = zext i8 %1 to i64
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i64 @load_zext_i16_to_i64(i16* %a) {
|
||||
; CHECK-LABEL: load_zext_i16_to_i64
|
||||
; CHECK: ldrh w0, [x0]
|
||||
; CHECK-NOT: uxth
|
||||
%1 = load i16* %a
|
||||
%2 = zext i16 %1 to i64
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i64 @load_zext_i32_to_i64(i32* %a) {
|
||||
; CHECK-LABEL: load_zext_i32_to_i64
|
||||
; CHECK: ldr w0, [x0]
|
||||
; CHECK-NOT: uxtw
|
||||
%1 = load i32* %a
|
||||
%2 = zext i32 %1 to i64
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i32 @load_sext_i8_to_i32(i8* %a) {
|
||||
; CHECK-LABEL: load_sext_i8_to_i32
|
||||
; CHECK: ldrsb w0, [x0]
|
||||
; CHECK-NOT: sxtb
|
||||
%1 = load i8* %a
|
||||
%2 = sext i8 %1 to i32
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @load_sext_i16_to_i32(i16* %a) {
|
||||
; CHECK-LABEL: load_sext_i16_to_i32
|
||||
; CHECK: ldrsh w0, [x0]
|
||||
; CHECK-NOT: sxth
|
||||
%1 = load i16* %a
|
||||
%2 = sext i16 %1 to i32
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i64 @load_sext_i8_to_i64(i8* %a) {
|
||||
; CHECK-LABEL: load_sext_i8_to_i64
|
||||
; CHECK: ldrsb w0, [x0]
|
||||
; CHECK-NOT: sxtb
|
||||
%1 = load i8* %a
|
||||
%2 = sext i8 %1 to i64
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i64 @load_sext_i16_to_i64(i16* %a) {
|
||||
; CHECK-LABEL: load_sext_i16_to_i64
|
||||
; CHECK: ldrsh w0, [x0]
|
||||
; CHECK-NOT: sxth
|
||||
%1 = load i16* %a
|
||||
%2 = sext i16 %1 to i64
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i64 @load_sext_i32_to_i64(i32* %a) {
|
||||
; CHECK-LABEL: load_sext_i32_to_i64
|
||||
; CHECK: ldrsw x0, [x0]
|
||||
; CHECK-NOT: sxtw
|
||||
%1 = load i32* %a
|
||||
%2 = sext i32 %1 to i64
|
||||
ret i64 %2
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user