mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Revert r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses."
Caused a hang and eventually an assertion failure in LTO builds of 7zip-benchmark on aarch64 iOS targets. http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/ llvm-svn: 330063
This commit is contained in:
parent
ad72037b93
commit
82ccce4b49
@ -743,16 +743,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
|
||||
if (!GAN)
|
||||
return true;
|
||||
|
||||
if (GAN->getOffset() % Size == 0) {
|
||||
const GlobalValue *GV = GAN->getGlobal();
|
||||
unsigned Alignment = GV->getAlignment();
|
||||
Type *Ty = GV->getValueType();
|
||||
if (Alignment == 0 && Ty->isSized())
|
||||
Alignment = DL.getABITypeAlignment(Ty);
|
||||
const GlobalValue *GV = GAN->getGlobal();
|
||||
unsigned Alignment = GV->getAlignment();
|
||||
Type *Ty = GV->getValueType();
|
||||
if (Alignment == 0 && Ty->isSized())
|
||||
Alignment = DL.getABITypeAlignment(Ty);
|
||||
|
||||
if (Alignment >= Size)
|
||||
return true;
|
||||
}
|
||||
if (Alignment >= Size)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (CurDAG->isBaseWithConstantOffset(N)) {
|
||||
|
@ -577,8 +577,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
|
||||
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
|
||||
|
||||
setTargetDAGCombine(ISD::GlobalAddress);
|
||||
|
||||
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
|
||||
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
|
||||
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
|
||||
@ -3679,8 +3677,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
|
||||
SelectionDAG &DAG,
|
||||
unsigned Flag) const {
|
||||
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
|
||||
N->getOffset(), Flag);
|
||||
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
|
||||
@ -3755,9 +3752,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
|
||||
unsigned char OpFlags =
|
||||
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
|
||||
|
||||
if (OpFlags != AArch64II::MO_NO_FLAG)
|
||||
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
|
||||
"unexpected offset in global node");
|
||||
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
|
||||
"unexpected offset in global node");
|
||||
|
||||
// This also catches the large code model case for Darwin.
|
||||
if ((OpFlags & AArch64II::MO_GOT) != 0) {
|
||||
@ -4995,8 +4991,10 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
|
||||
|
||||
bool AArch64TargetLowering::isOffsetFoldingLegal(
|
||||
const GlobalAddressSDNode *GA) const {
|
||||
// Offsets are folded in the DAG combine rather than here so that we can
|
||||
// intelligently choose an offset based on the uses.
|
||||
DEBUG(dbgs() << "Skipping offset folding global address: ");
|
||||
DEBUG(GA->dump());
|
||||
DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
|
||||
"addresses\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -10619,53 +10617,6 @@ static SDValue performNVCASTCombine(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// If all users of the globaladdr are of the form (globaladdr + constant), find
|
||||
// the smallest constant, fold it into the globaladdr's offset and rewrite the
|
||||
// globaladdr as (globaladdr + constant) - constant.
|
||||
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *Subtarget,
|
||||
const TargetMachine &TM) {
|
||||
auto *GN = dyn_cast<GlobalAddressSDNode>(N);
|
||||
if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
|
||||
AArch64II::MO_NO_FLAG)
|
||||
return SDValue();
|
||||
|
||||
uint64_t MinOffset = -1ull;
|
||||
for (SDNode *N : GN->uses()) {
|
||||
if (N->getOpcode() != ISD::ADD)
|
||||
return SDValue();
|
||||
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
|
||||
if (!C)
|
||||
C = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (!C)
|
||||
return SDValue();
|
||||
MinOffset = std::min(MinOffset, C->getZExtValue());
|
||||
}
|
||||
uint64_t Offset = MinOffset + GN->getOffset();
|
||||
|
||||
// Check whether folding this offset is legal. It must not go out of bounds of
|
||||
// the referenced object to avoid violating the code model, and must be
|
||||
// smaller than 2^21 because this is the largest offset expressible in all
|
||||
// object formats.
|
||||
//
|
||||
// This check also prevents us from folding negative offsets, which will end
|
||||
// up being treated in the same way as large positive ones. They could also
|
||||
// cause code model violations, and aren't really common enough to matter.
|
||||
if (Offset >= (1 << 21))
|
||||
return SDValue();
|
||||
|
||||
const GlobalValue *GV = GN->getGlobal();
|
||||
Type *T = GV->getValueType();
|
||||
if (!T->isSized() ||
|
||||
Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(GN);
|
||||
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
|
||||
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
|
||||
DAG.getConstant(MinOffset, DL, MVT::i64));
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
@ -10753,8 +10704,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
case ISD::GlobalAddress:
|
||||
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -5,31 +5,32 @@
|
||||
|
||||
; base + offset (imm9)
|
||||
; CHECK: @t1
|
||||
; CHECK: ldr xzr, [x0, #8]
|
||||
; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
|
||||
; CHECK: ret
|
||||
define void @t1(i64* %object) {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
|
||||
define void @t1() {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
|
||||
%tmp = load volatile i64, i64* %incdec.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; base + offset (> imm9)
|
||||
; CHECK: @t2
|
||||
; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
|
||||
; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
|
||||
; CHECK: ldr xzr, [
|
||||
; CHECK: [[ADDREG]]]
|
||||
; CHECK: ret
|
||||
define void @t2(i64* %object) {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
|
||||
define void @t2() {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
|
||||
%tmp = load volatile i64, i64* %incdec.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
|
||||
; CHECK: @t3
|
||||
; CHECK: ldr xzr, [x0, #32760]
|
||||
; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
|
||||
; CHECK: ret
|
||||
define void @t3(i64* %object) {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
|
||||
define void @t3() {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
|
||||
%tmp = load volatile i64, i64* %incdec.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
@ -37,10 +38,10 @@ define void @t3(i64* %object) {
|
||||
; base + unsigned offset (> imm12 * size of type in bytes)
|
||||
; CHECK: @t4
|
||||
; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
|
||||
; CHECK: ldr xzr, [x0, x[[NUM]]]
|
||||
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
|
||||
; CHECK: ret
|
||||
define void @t4(i64* %object) {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
|
||||
define void @t4() {
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
|
||||
%tmp = load volatile i64, i64* %incdec.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
@ -57,12 +58,12 @@ define void @t5(i64 %a) {
|
||||
|
||||
; base + reg + imm
|
||||
; CHECK: @t6
|
||||
; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
|
||||
; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
|
||||
; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
|
||||
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
|
||||
; CHECK: ret
|
||||
define void @t6(i64 %a, i64* %object) {
|
||||
%tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
|
||||
define void @t6(i64 %a) {
|
||||
%tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
|
||||
%incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
|
||||
%tmp = load volatile i64, i64* %incdec.ptr, align 8
|
||||
ret void
|
||||
|
@ -264,196 +264,149 @@ entry:
|
||||
|
||||
; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
|
||||
; registers for unscaled vector accesses
|
||||
@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
|
||||
|
||||
define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
|
||||
define <1 x i64> @fct0() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct0:
|
||||
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <1 x i64>*
|
||||
%0 = load <1 x i64>, <1 x i64>* %q, align 8
|
||||
%0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
|
||||
ret <1 x i64> %0
|
||||
}
|
||||
|
||||
define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
|
||||
define <2 x i32> @fct1() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct1:
|
||||
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <2 x i32>*
|
||||
%0 = load <2 x i32>, <2 x i32>* %q, align 8
|
||||
%0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
|
||||
ret <2 x i32> %0
|
||||
}
|
||||
|
||||
define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
|
||||
define <4 x i16> @fct2() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct2:
|
||||
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <4 x i16>*
|
||||
%0 = load <4 x i16>, <4 x i16>* %q, align 8
|
||||
%0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
|
||||
ret <4 x i16> %0
|
||||
}
|
||||
|
||||
define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
|
||||
define <8 x i8> @fct3() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct3:
|
||||
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <8 x i8>*
|
||||
%0 = load <8 x i8>, <8 x i8>* %q, align 8
|
||||
%0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
|
||||
ret <8 x i8> %0
|
||||
}
|
||||
|
||||
define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
|
||||
define <2 x i64> @fct4() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct4:
|
||||
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <2 x i64>*
|
||||
%0 = load <2 x i64>, <2 x i64>* %q, align 16
|
||||
%0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
|
||||
ret <2 x i64> %0
|
||||
}
|
||||
|
||||
define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
|
||||
define <4 x i32> @fct5() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct5:
|
||||
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <4 x i32>*
|
||||
%0 = load <4 x i32>, <4 x i32>* %q, align 16
|
||||
%0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
|
||||
define <8 x i16> @fct6() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct6:
|
||||
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <8 x i16>*
|
||||
%0 = load <8 x i16>, <8 x i16>* %q, align 16
|
||||
%0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
|
||||
define <16 x i8> @fct7() nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct7:
|
||||
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <16 x i8>*
|
||||
%0 = load <16 x i8>, <16 x i8>* %q, align 16
|
||||
%0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define void @fct8(i8* %str) nounwind ssp {
|
||||
define void @fct8() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct8:
|
||||
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <1 x i64>*
|
||||
%0 = load <1 x i64>, <1 x i64>* %q, align 8
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <1 x i64>*
|
||||
store <1 x i64> %0, <1 x i64>* %q2, align 8
|
||||
%0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
|
||||
store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct9(i8* %str) nounwind ssp {
|
||||
define void @fct9() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct9:
|
||||
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <2 x i32>*
|
||||
%0 = load <2 x i32>, <2 x i32>* %q, align 8
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <2 x i32>*
|
||||
store <2 x i32> %0, <2 x i32>* %q2, align 8
|
||||
%0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
|
||||
store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct10(i8* %str) nounwind ssp {
|
||||
define void @fct10() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct10:
|
||||
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <4 x i16>*
|
||||
%0 = load <4 x i16>, <4 x i16>* %q, align 8
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <4 x i16>*
|
||||
store <4 x i16> %0, <4 x i16>* %q2, align 8
|
||||
%0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
|
||||
store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct11(i8* %str) nounwind ssp {
|
||||
define void @fct11() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct11:
|
||||
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <8 x i8>*
|
||||
%0 = load <8 x i8>, <8 x i8>* %q, align 8
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <8 x i8>*
|
||||
store <8 x i8> %0, <8 x i8>* %q2, align 8
|
||||
%0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
|
||||
store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct12(i8* %str) nounwind ssp {
|
||||
define void @fct12() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct12:
|
||||
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <2 x i64>*
|
||||
%0 = load <2 x i64>, <2 x i64>* %q, align 16
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <2 x i64>*
|
||||
store <2 x i64> %0, <2 x i64>* %q2, align 16
|
||||
%0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
|
||||
store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct13(i8* %str) nounwind ssp {
|
||||
define void @fct13() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct13:
|
||||
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <4 x i32>*
|
||||
%0 = load <4 x i32>, <4 x i32>* %q, align 16
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <4 x i32>*
|
||||
store <4 x i32> %0, <4 x i32>* %q2, align 16
|
||||
%0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
|
||||
store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct14(i8* %str) nounwind ssp {
|
||||
define void @fct14() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct14:
|
||||
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <8 x i16>*
|
||||
%0 = load <8 x i16>, <8 x i16>* %q, align 16
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <8 x i16>*
|
||||
store <8 x i16> %0, <8 x i16>* %q2, align 16
|
||||
%0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
|
||||
store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fct15(i8* %str) nounwind ssp {
|
||||
define void @fct15() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fct15:
|
||||
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
||||
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
||||
%p = getelementptr inbounds i8, i8* %str, i64 3
|
||||
%q = bitcast i8* %p to <16 x i8>*
|
||||
%0 = load <16 x i8>, <16 x i8>* %q, align 16
|
||||
%p2 = getelementptr inbounds i8, i8* %str, i64 4
|
||||
%q2 = bitcast i8* %p2 to <16 x i8>*
|
||||
store <16 x i8> %0, <16 x i8>* %q2, align 16
|
||||
%0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
|
||||
store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1,59 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
|
||||
|
||||
@x1 = external hidden global [2 x i64]
|
||||
@x2 = external hidden global [16777216 x i64]
|
||||
|
||||
define i64 @f1() {
|
||||
; CHECK: f1:
|
||||
; CHECK: adrp x8, x1+16
|
||||
; CHECK: ldr x0, [x8, :lo12:x1+16]
|
||||
%l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
|
||||
ret i64 %l
|
||||
}
|
||||
|
||||
define i64 @f2() {
|
||||
; CHECK: f2:
|
||||
; CHECK: adrp x8, x1
|
||||
; CHECK: add x8, x8, :lo12:x1
|
||||
; CHECK: ldr x0, [x8, #24]
|
||||
%l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
|
||||
ret i64 %l
|
||||
}
|
||||
|
||||
define i64 @f3() {
|
||||
; CHECK: f3:
|
||||
; CHECK: adrp x8, x1+1
|
||||
; CHECK: add x8, x8, :lo12:x1+1
|
||||
; CHECK: ldr x0, [x8]
|
||||
%l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
|
||||
ret i64 %l
|
||||
}
|
||||
|
||||
define [2 x i64] @f4() {
|
||||
; CHECK: f4:
|
||||
; CHECK: adrp x8, x2+8
|
||||
; CHECK: add x8, x8, :lo12:x2+8
|
||||
; CHECK: ldp x0, x1, [x8]
|
||||
%l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
|
||||
ret [2 x i64] %l
|
||||
}
|
||||
|
||||
define i64 @f5() {
|
||||
; CHECK: f5:
|
||||
; CHECK: adrp x8, x2+2097144
|
||||
; CHECK: ldr x0, [x8, :lo12:x2+2097144]
|
||||
; CHECK: ret
|
||||
%l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
|
||||
ret i64 %l
|
||||
}
|
||||
|
||||
define i64 @f6() {
|
||||
; CHECK: f6:
|
||||
; CHECK: adrp x8, x2
|
||||
; CHECK: add x8, x8, :lo12:x2
|
||||
; CHECK: orr w9, wzr, #0x200000
|
||||
; CHECK: ldr x0, [x8, x9]
|
||||
; CHECK: ret
|
||||
%l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
|
||||
ret i64 %l
|
||||
}
|
@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
|
||||
;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
|
||||
;CHECK-APPLE-IOS-NOT: adrp
|
||||
;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
|
||||
;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE+12
|
||||
;CHECK-APPLE-IOS: str w1, [x9, __MergedGlobals_y@PAGEOFF+12]
|
||||
;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
|
||||
;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF
|
||||
%x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
|
||||
%y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
|
||||
store i32 %a1, i32* %x3, align 4
|
||||
|
@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwind {
|
||||
|
||||
; CHECK-LABEL: f3:
|
||||
define void @f3(i32 %a1, i32 %a2) minsize nounwind {
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8
|
||||
; CHECK-NEXT: stp w0, w1, [x8]
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
|
||||
; CHECK-NEXT: stp w0, w1, [x8, #8]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m3, align 4
|
||||
store i32 %a2, i32* @n3, align 4
|
||||
@ -57,9 +57,10 @@ define void @f3(i32 %a1, i32 %a2) minsize nounwind {
|
||||
|
||||
; CHECK-LABEL: f4:
|
||||
define void @f4(i32 %a1, i32 %a2) nounwind {
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
|
||||
; CHECK-NEXT: adrp x9, _n4@PAGE
|
||||
; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8]
|
||||
; CHECK-NEXT: str w0, [x8, #8]
|
||||
; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m3, align 4
|
||||
|
@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
|
||||
|
||||
; CHECK-LABEL: f3:
|
||||
define void @f3(i32 %a1, i32 %a2) #0 {
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12
|
||||
; CHECK-NEXT: stp w0, w1, [x8]
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
|
||||
; CHECK-NEXT: stp w0, w1, [x8, #12]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m2, align 4
|
||||
store i32 %a2, i32* @n2, align 4
|
||||
|
Loading…
Reference in New Issue
Block a user