mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
Revert r238473, "Thumb2: Modify codegen for memcpy intrinsic to prefer LDM/STM."
as it caused miscompilations and assertion failures (PR23768, http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20150601/280380.html). llvm-svn: 239169
This commit is contained in:
parent
13692231c6
commit
3003aae9ae
@ -1125,7 +1125,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
|
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
|
||||||
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
|
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
|
||||||
case ARMISD::VBSL: return "ARMISD::VBSL";
|
case ARMISD::VBSL: return "ARMISD::VBSL";
|
||||||
case ARMISD::MCOPY: return "ARMISD::MCOPY";
|
|
||||||
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
|
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
|
||||||
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
|
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
|
||||||
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
|
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
|
||||||
@ -7676,59 +7675,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Lowers MCOPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD depending
|
|
||||||
/// on whether the result is used. This is done as a post-isel lowering instead
|
|
||||||
/// of as a custom inserter because we need the use list from the SDNode.
|
|
||||||
static void LowerMCOPY(const ARMSubtarget *Subtarget, MachineInstr *MI,
|
|
||||||
SDNode *Node) {
|
|
||||||
bool isThumb1 = Subtarget->isThumb1Only();
|
|
||||||
bool isThumb2 = Subtarget->isThumb2();
|
|
||||||
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
|
|
||||||
|
|
||||||
DebugLoc dl = MI->getDebugLoc();
|
|
||||||
MachineBasicBlock *BB = MI->getParent();
|
|
||||||
MachineFunction *MF = BB->getParent();
|
|
||||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
|
||||||
|
|
||||||
MachineInstrBuilder LD, ST;
|
|
||||||
if (isThumb1 || Node->hasAnyUseOfValue(1)) {
|
|
||||||
LD = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
|
|
||||||
: isThumb1 ? ARM::tLDMIA_UPD
|
|
||||||
: ARM::LDMIA_UPD))
|
|
||||||
.addOperand(MI->getOperand(1));
|
|
||||||
} else {
|
|
||||||
LD = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isThumb1 || Node->hasAnyUseOfValue(0)) {
|
|
||||||
ST = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
|
|
||||||
: isThumb1 ? ARM::tSTMIA_UPD
|
|
||||||
: ARM::STMIA_UPD))
|
|
||||||
.addOperand(MI->getOperand(0));
|
|
||||||
} else {
|
|
||||||
ST = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
|
|
||||||
}
|
|
||||||
|
|
||||||
LD.addOperand(MI->getOperand(3)).addImm(ARMCC::AL).addReg(0);
|
|
||||||
ST.addOperand(MI->getOperand(2)).addImm(ARMCC::AL).addReg(0);
|
|
||||||
|
|
||||||
for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
|
|
||||||
unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
|
|
||||||
: &ARM::GPRRegClass);
|
|
||||||
LD.addReg(TmpReg, RegState::Define);
|
|
||||||
ST.addReg(TmpReg, RegState::Kill);
|
|
||||||
}
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
|
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
|
||||||
SDNode *Node) const {
|
SDNode *Node) const {
|
||||||
if (MI->getOpcode() == ARM::MCOPY) {
|
|
||||||
LowerMCOPY(Subtarget, MI, Node);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const MCInstrDesc *MCID = &MI->getDesc();
|
const MCInstrDesc *MCID = &MI->getDesc();
|
||||||
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
|
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
|
||||||
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
|
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
|
||||||
|
@ -189,10 +189,6 @@ namespace llvm {
|
|||||||
// Vector bitwise select
|
// Vector bitwise select
|
||||||
VBSL,
|
VBSL,
|
||||||
|
|
||||||
// Pseudo-instruction representing a memory copy using ldm/stm
|
|
||||||
// instructions.
|
|
||||||
MCOPY,
|
|
||||||
|
|
||||||
// Vector load N-element structure to all lanes:
|
// Vector load N-element structure to all lanes:
|
||||||
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||||
VLD3DUP,
|
VLD3DUP,
|
||||||
|
@ -73,10 +73,6 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
|||||||
def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
|
def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
|
||||||
def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
|
def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
|
||||||
|
|
||||||
def SDT_ARMMCOPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
|
||||||
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
|
|
||||||
SDTCisVT<4, i32>]>;
|
|
||||||
|
|
||||||
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
|
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
|
||||||
[SDTCisSameAs<0, 2>,
|
[SDTCisSameAs<0, 2>,
|
||||||
SDTCisSameAs<0, 3>,
|
SDTCisSameAs<0, 3>,
|
||||||
@ -183,10 +179,6 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
|
|||||||
def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
|
def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
|
||||||
def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
|
def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
|
||||||
|
|
||||||
def ARMmcopy : SDNode<"ARMISD::MCOPY", SDT_ARMMCOPY,
|
|
||||||
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
|
|
||||||
SDNPMayStore, SDNPMayLoad]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// ARM Instruction Predicate Definitions.
|
// ARM Instruction Predicate Definitions.
|
||||||
//
|
//
|
||||||
@ -4586,13 +4578,6 @@ let usesCustomInserter = 1 in {
|
|||||||
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
|
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let hasPostISelHook = 1 in {
|
|
||||||
def MCOPY : PseudoInst<
|
|
||||||
(outs GPR:$newdst, GPR:$newsrc), (ins GPR:$dst, GPR:$src, i32imm:$nreg),
|
|
||||||
NoItinerary,
|
|
||||||
[(set GPR:$newdst, GPR:$newsrc, (ARMmcopy GPR:$dst, GPR:$src, imm:$nreg))]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
|
def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
|
||||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||||
}]>;
|
}]>;
|
||||||
|
@ -164,38 +164,41 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
|
|||||||
unsigned VTSize = 4;
|
unsigned VTSize = 4;
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
|
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
|
||||||
const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
|
const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
|
||||||
SDValue TFOps[6];
|
SDValue TFOps[6];
|
||||||
SDValue Loads[6];
|
SDValue Loads[6];
|
||||||
uint64_t SrcOff = 0, DstOff = 0;
|
uint64_t SrcOff = 0, DstOff = 0;
|
||||||
|
|
||||||
// FIXME: We should invent a VMCOPY pseudo-instruction that lowers to
|
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
|
||||||
// VLDM/VSTM and make this code emit it when appropriate. This would reduce
|
// same number of stores. The loads and stores will get combined into
|
||||||
// pressure on the general purpose registers. However this seems harder to map
|
// ldm/stm later on.
|
||||||
// onto the register allocator's view of the world.
|
while (EmittedNumMemOps < NumMemOps) {
|
||||||
|
for (i = 0;
|
||||||
|
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
|
||||||
|
Loads[i] = DAG.getLoad(VT, dl, Chain,
|
||||||
|
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
|
||||||
|
DAG.getConstant(SrcOff, dl, MVT::i32)),
|
||||||
|
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
|
||||||
|
false, false, 0);
|
||||||
|
TFOps[i] = Loads[i].getValue(1);
|
||||||
|
SrcOff += VTSize;
|
||||||
|
}
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
|
makeArrayRef(TFOps, i));
|
||||||
|
|
||||||
// The number of MCOPY pseudo-instructions to emit. We use up to MaxLoadsInLDM
|
for (i = 0;
|
||||||
// registers per mcopy, which will get lowered into ldm/stm later on. This is
|
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
|
||||||
// a lower bound on the number of MCOPY operations we must emit.
|
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
|
||||||
unsigned NumMCOPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
|
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
|
||||||
|
DAG.getConstant(DstOff, dl, MVT::i32)),
|
||||||
|
DstPtrInfo.getWithOffset(DstOff),
|
||||||
|
isVolatile, false, 0);
|
||||||
|
DstOff += VTSize;
|
||||||
|
}
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
|
makeArrayRef(TFOps, i));
|
||||||
|
|
||||||
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
|
EmittedNumMemOps += i;
|
||||||
|
|
||||||
for (unsigned I = 0; I != NumMCOPYs; ++I) {
|
|
||||||
// Evenly distribute registers among MCOPY operations to reduce register
|
|
||||||
// pressure.
|
|
||||||
unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMCOPYs;
|
|
||||||
unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
|
|
||||||
|
|
||||||
Dst = DAG.getNode(ARMISD::MCOPY, dl, VTs, Chain, Dst, Src,
|
|
||||||
DAG.getConstant(NumRegs, dl, MVT::i32));
|
|
||||||
Src = Dst.getValue(1);
|
|
||||||
Chain = Dst.getValue(2);
|
|
||||||
|
|
||||||
DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
|
|
||||||
SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
|
|
||||||
|
|
||||||
EmittedNumMemOps = NextEmittedNumMemOps;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (BytesLeft == 0)
|
if (BytesLeft == 0)
|
||||||
|
@ -744,21 +744,10 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
|
|||||||
const MCSubtargetInfo &STI,
|
const MCSubtargetInfo &STI,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
O << "{";
|
O << "{";
|
||||||
|
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
|
||||||
// The backend may have given us a register list in non-ascending order. Sort
|
if (i != OpNum)
|
||||||
// it now.
|
|
||||||
std::vector<MCOperand> RegOps(MI->size() - OpNum);
|
|
||||||
std::copy(MI->begin() + OpNum, MI->end(), RegOps.begin());
|
|
||||||
std::sort(RegOps.begin(), RegOps.end(),
|
|
||||||
[this](const MCOperand &O1, const MCOperand &O2) -> bool {
|
|
||||||
return MRI.getEncodingValue(O1.getReg()) <
|
|
||||||
MRI.getEncodingValue(O2.getReg());
|
|
||||||
});
|
|
||||||
|
|
||||||
for (unsigned i = 0, e = RegOps.size(); i != e; ++i) {
|
|
||||||
if (i != 0)
|
|
||||||
O << ", ";
|
O << ", ";
|
||||||
printRegName(O, RegOps[i].getReg());
|
printRegName(O, MI->getOperand(i).getReg());
|
||||||
}
|
}
|
||||||
O << "}";
|
O << "}";
|
||||||
}
|
}
|
||||||
|
@ -125,10 +125,7 @@ namespace {
|
|||||||
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
|
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
|
||||||
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
|
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
|
||||||
{ ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
|
{ ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
|
||||||
// ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
|
// ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
|
||||||
// tSTMIA_UPD is a change in semantics which can only be used if the base
|
|
||||||
// register is killed. This difference is correctly handled elsewhere.
|
|
||||||
{ ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
|
|
||||||
{ ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
|
{ ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
|
||||||
{ ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
|
{ ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
|
||||||
};
|
};
|
||||||
@ -435,14 +432,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
|
|||||||
isLdStMul = true;
|
isLdStMul = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ARM::t2STMIA: {
|
|
||||||
// If the base register is killed, we don't care what its value is after the
|
|
||||||
// instruction, so we can use an updating STMIA.
|
|
||||||
if (!MI->getOperand(0).isKill())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ARM::t2LDMIA_RET: {
|
case ARM::t2LDMIA_RET: {
|
||||||
unsigned BaseReg = MI->getOperand(1).getReg();
|
unsigned BaseReg = MI->getOperand(1).getReg();
|
||||||
if (BaseReg != ARM::SP)
|
if (BaseReg != ARM::SP)
|
||||||
@ -500,12 +489,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
|
|||||||
// Add the 16-bit load / store instruction.
|
// Add the 16-bit load / store instruction.
|
||||||
DebugLoc dl = MI->getDebugLoc();
|
DebugLoc dl = MI->getDebugLoc();
|
||||||
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
|
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
|
||||||
|
|
||||||
// tSTMIA_UPD takes a defining register operand. We've already checked that
|
|
||||||
// the register is killed, so mark it as dead here.
|
|
||||||
if (Entry.WideOpc == ARM::t2STMIA)
|
|
||||||
MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
|
|
||||||
|
|
||||||
if (!isLdStMul) {
|
if (!isLdStMul) {
|
||||||
MIB.addOperand(MI->getOperand(0));
|
MIB.addOperand(MI->getOperand(0));
|
||||||
MIB.addOperand(MI->getOperand(1));
|
MIB.addOperand(MI->getOperand(1));
|
||||||
|
@ -6,17 +6,15 @@ target triple = "thumbv6m-none--eabi"
|
|||||||
@b = external global i32*
|
@b = external global i32*
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
; Function Attrs: nounwind
|
||||||
define void @foo24() #0 {
|
define void @foo() #0 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: foo24:
|
; CHECK-LABEL: foo:
|
||||||
|
; CHECK: ldr r[[SB:[0-9]]], .LCPI
|
||||||
; CHECK: ldr r[[LB:[0-9]]], .LCPI
|
; CHECK: ldr r[[LB:[0-9]]], .LCPI
|
||||||
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
|
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
|
||||||
; CHECK: ldr r[[SB:[0-9]]], .LCPI
|
; CHECK-NEXT: ldm r[[NLB]],
|
||||||
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
|
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
|
; CHECK-NEXT: stm r[[NSB]]
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
|
|
||||||
%0 = load i32*, i32** @a, align 4
|
%0 = load i32*, i32** @a, align 4
|
||||||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
||||||
%1 = bitcast i32* %arrayidx to i8*
|
%1 = bitcast i32* %arrayidx to i8*
|
||||||
@ -27,70 +25,5 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @foo28() #0 {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: foo28:
|
|
||||||
; CHECK: ldr r[[LB:[0-9]]], .LCPI
|
|
||||||
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
|
|
||||||
; CHECK: ldr r[[SB:[0-9]]], .LCPI
|
|
||||||
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
|
|
||||||
%0 = load i32*, i32** @a, align 4
|
|
||||||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
||||||
%1 = bitcast i32* %arrayidx to i8*
|
|
||||||
%2 = load i32*, i32** @b, align 4
|
|
||||||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
||||||
%3 = bitcast i32* %arrayidx1 to i8*
|
|
||||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @foo32() #0 {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: foo32:
|
|
||||||
; CHECK: ldr r[[LB:[0-9]]], .LCPI
|
|
||||||
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
|
|
||||||
; CHECK: ldr r[[SB:[0-9]]], .LCPI
|
|
||||||
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
|
|
||||||
%0 = load i32*, i32** @a, align 4
|
|
||||||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
||||||
%1 = bitcast i32* %arrayidx to i8*
|
|
||||||
%2 = load i32*, i32** @b, align 4
|
|
||||||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
||||||
%3 = bitcast i32* %arrayidx1 to i8*
|
|
||||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @foo36() #0 {
|
|
||||||
entry:
|
|
||||||
; CHECK-LABEL: foo36:
|
|
||||||
; CHECK: ldr r[[LB:[0-9]]], .LCPI
|
|
||||||
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
|
|
||||||
; CHECK: ldr r[[SB:[0-9]]], .LCPI
|
|
||||||
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
|
|
||||||
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
|
|
||||||
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
|
|
||||||
%0 = load i32*, i32** @a, align 4
|
|
||||||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
||||||
%1 = bitcast i32* %arrayidx to i8*
|
|
||||||
%2 = load i32*, i32** @b, align 4
|
|
||||||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
||||||
%3 = bitcast i32* %arrayidx1 to i8*
|
|
||||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
; Function Attrs: nounwind
|
||||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
|
||||||
|
@ -7,8 +7,8 @@ define void @t1() #0 {
|
|||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: t1:
|
; CHECK-LABEL: t1:
|
||||||
; CHECK: ldr r[[LB:[0-9]]],
|
; CHECK: ldr r[[LB:[0-9]]],
|
||||||
; CHECK-NEXT: ldr r[[SB:[0-9]]],
|
|
||||||
; CHECK-NEXT: ldm r[[LB]]!,
|
; CHECK-NEXT: ldm r[[LB]]!,
|
||||||
|
; CHECK-NEXT: ldr r[[SB:[0-9]]],
|
||||||
; CHECK-NEXT: stm r[[SB]]!,
|
; CHECK-NEXT: stm r[[SB]]!,
|
||||||
; CHECK-NEXT: ldrb {{.*}}, [r[[LB]]]
|
; CHECK-NEXT: ldrb {{.*}}, [r[[LB]]]
|
||||||
; CHECK-NEXT: strb {{.*}}, [r[[SB]]]
|
; CHECK-NEXT: strb {{.*}}, [r[[SB]]]
|
||||||
@ -21,8 +21,8 @@ define void @t2() #0 {
|
|||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: t2:
|
; CHECK-LABEL: t2:
|
||||||
; CHECK: ldr r[[LB:[0-9]]],
|
; CHECK: ldr r[[LB:[0-9]]],
|
||||||
; CHECK-NEXT: ldr r[[SB:[0-9]]],
|
|
||||||
; CHECK-NEXT: ldm r[[LB]]!,
|
; CHECK-NEXT: ldm r[[LB]]!,
|
||||||
|
; CHECK-NEXT: ldr r[[SB:[0-9]]],
|
||||||
; CHECK-NEXT: stm r[[SB]]!,
|
; CHECK-NEXT: stm r[[SB]]!,
|
||||||
; CHECK-NEXT: ldrh {{.*}}, [r[[LB]]]
|
; CHECK-NEXT: ldrh {{.*}}, [r[[LB]]]
|
||||||
; CHECK-NEXT: ldrb {{.*}}, [r[[LB]], #2]
|
; CHECK-NEXT: ldrb {{.*}}, [r[[LB]], #2]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user