mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[GISel] Support llvm.memcpy.inline
Differential revision: https://reviews.llvm.org/D105072
This commit is contained in:
parent
bd03d58d2a
commit
b3511ee3cf
@ -715,6 +715,36 @@ G_FENCE
|
|||||||
|
|
||||||
I couldn't find any documentation on this at the time of writing.
|
I couldn't find any documentation on this at the time of writing.
|
||||||
|
|
||||||
|
G_MEMCPY
|
||||||
|
^^^^^^^^
|
||||||
|
|
||||||
|
Generic memcpy. Expects two MachineMemOperands covering the store and load
|
||||||
|
respectively, in addition to explicit operands.
|
||||||
|
|
||||||
|
G_MEMCPY_INLINE
|
||||||
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Generic inlined memcpy. Like G_MEMCPY, but it is guaranteed that this version
|
||||||
|
will not be lowered as a call to an external function. Currently the size
|
||||||
|
operand is required to evaluate as a constant (not an immediate), though that is
|
||||||
|
expected to change when llvm.memcpy.inline is taught to support dynamic sizes.
|
||||||
|
|
||||||
|
G_MEMMOVE
|
||||||
|
^^^^^^^^^
|
||||||
|
|
||||||
|
Generic memmove. Similar to G_MEMCPY, but the source and destination memory
|
||||||
|
ranges are allowed to overlap.
|
||||||
|
|
||||||
|
G_MEMSET
|
||||||
|
^^^^^^^^
|
||||||
|
|
||||||
|
Generic memset. Expects a MachineMemOperand in addition to explicit operands.
|
||||||
|
|
||||||
|
G_BZERO
|
||||||
|
^^^^^^^
|
||||||
|
|
||||||
|
Generic bzero. Expects a MachineMemOperand in addition to explicit operands.
|
||||||
|
|
||||||
Control Flow
|
Control Flow
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
@ -532,16 +532,25 @@ public:
|
|||||||
/// combine functions. Returns true if changed.
|
/// combine functions. Returns true if changed.
|
||||||
bool tryCombine(MachineInstr &MI);
|
bool tryCombine(MachineInstr &MI);
|
||||||
|
|
||||||
|
/// Emit loads and stores that perform the given memcpy.
|
||||||
|
/// Assumes \p MI is a G_MEMCPY_INLINE
|
||||||
|
/// TODO: implement dynamically sized inline memcpy,
|
||||||
|
/// and rename: s/bool tryEmit/void emit/
|
||||||
|
bool tryEmitMemcpyInline(MachineInstr &MI);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Memcpy family optimization helpers.
|
// Memcpy family optimization helpers.
|
||||||
|
bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
|
||||||
|
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
|
||||||
|
bool IsVolatile);
|
||||||
bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
|
bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
|
||||||
unsigned KnownLen, Align DstAlign, Align SrcAlign,
|
uint64_t KnownLen, uint64_t Limit, Align DstAlign,
|
||||||
bool IsVolatile);
|
Align SrcAlign, bool IsVolatile);
|
||||||
bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
|
bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
|
||||||
unsigned KnownLen, Align DstAlign, Align SrcAlign,
|
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
|
||||||
bool IsVolatile);
|
bool IsVolatile);
|
||||||
bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
|
bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
|
||||||
unsigned KnownLen, Align DstAlign, bool IsVolatile);
|
uint64_t KnownLen, Align DstAlign, bool IsVolatile);
|
||||||
|
|
||||||
/// Given a non-indexed load or store instruction \p MI, find an offset that
|
/// Given a non-indexed load or store instruction \p MI, find an offset that
|
||||||
/// can be usefully and legally folded into it as a post-indexing operation.
|
/// can be usefully and legally folded into it as a post-indexing operation.
|
||||||
|
@ -739,6 +739,9 @@ HANDLE_TARGET_OPCODE(G_WRITE_REGISTER)
|
|||||||
/// llvm.memcpy intrinsic
|
/// llvm.memcpy intrinsic
|
||||||
HANDLE_TARGET_OPCODE(G_MEMCPY)
|
HANDLE_TARGET_OPCODE(G_MEMCPY)
|
||||||
|
|
||||||
|
/// llvm.memcpy.inline intrinsic
|
||||||
|
HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE)
|
||||||
|
|
||||||
/// llvm.memmove intrinsic
|
/// llvm.memmove intrinsic
|
||||||
HANDLE_TARGET_OPCODE(G_MEMMOVE)
|
HANDLE_TARGET_OPCODE(G_MEMMOVE)
|
||||||
|
|
||||||
|
@ -1353,6 +1353,14 @@ def G_MEMCPY : GenericInstruction {
|
|||||||
let mayStore = true;
|
let mayStore = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def G_MEMCPY_INLINE : GenericInstruction {
|
||||||
|
let OutOperandList = (outs);
|
||||||
|
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size);
|
||||||
|
let hasSideEffects = false;
|
||||||
|
let mayLoad = true;
|
||||||
|
let mayStore = true;
|
||||||
|
}
|
||||||
|
|
||||||
def G_MEMMOVE : GenericInstruction {
|
def G_MEMMOVE : GenericInstruction {
|
||||||
let OutOperandList = (outs);
|
let OutOperandList = (outs);
|
||||||
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
|
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
|
||||||
|
@ -1218,7 +1218,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
|
bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
|
||||||
Register Val, unsigned KnownLen,
|
Register Val, uint64_t KnownLen,
|
||||||
Align Alignment, bool IsVolatile) {
|
Align Alignment, bool IsVolatile) {
|
||||||
auto &MF = *MI.getParent()->getParent();
|
auto &MF = *MI.getParent()->getParent();
|
||||||
const auto &TLI = *MF.getSubtarget().getTargetLowering();
|
const auto &TLI = *MF.getSubtarget().getTargetLowering();
|
||||||
@ -1330,10 +1330,51 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
|
||||||
|
|
||||||
|
Register Dst = MI.getOperand(0).getReg();
|
||||||
|
Register Src = MI.getOperand(1).getReg();
|
||||||
|
Register Len = MI.getOperand(2).getReg();
|
||||||
|
|
||||||
|
const auto *MMOIt = MI.memoperands_begin();
|
||||||
|
const MachineMemOperand *MemOp = *MMOIt;
|
||||||
|
bool IsVolatile = MemOp->isVolatile();
|
||||||
|
|
||||||
|
// See if this is a constant length copy
|
||||||
|
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
|
||||||
|
// FIXME: support dynamically sized G_MEMCPY_INLINE
|
||||||
|
assert(LenVRegAndVal.hasValue() &&
|
||||||
|
"inline memcpy with dynamic size is not yet supported");
|
||||||
|
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
|
||||||
|
if (KnownLen == 0) {
|
||||||
|
MI.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &DstMMO = **MI.memoperands_begin();
|
||||||
|
const auto &SrcMMO = **std::next(MI.memoperands_begin());
|
||||||
|
Align DstAlign = DstMMO.getBaseAlign();
|
||||||
|
Align SrcAlign = SrcMMO.getBaseAlign();
|
||||||
|
|
||||||
|
return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
|
||||||
|
IsVolatile);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
|
||||||
|
Register Src, uint64_t KnownLen,
|
||||||
|
Align DstAlign, Align SrcAlign,
|
||||||
|
bool IsVolatile) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
|
||||||
|
return optimizeMemcpy(MI, Dst, Src, KnownLen,
|
||||||
|
std::numeric_limits<uint64_t>::max(), DstAlign,
|
||||||
|
SrcAlign, IsVolatile);
|
||||||
|
}
|
||||||
|
|
||||||
bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
|
bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
|
||||||
Register Src, unsigned KnownLen,
|
Register Src, uint64_t KnownLen,
|
||||||
Align DstAlign, Align SrcAlign,
|
uint64_t Limit, Align DstAlign,
|
||||||
bool IsVolatile) {
|
Align SrcAlign, bool IsVolatile) {
|
||||||
auto &MF = *MI.getParent()->getParent();
|
auto &MF = *MI.getParent()->getParent();
|
||||||
const auto &TLI = *MF.getSubtarget().getTargetLowering();
|
const auto &TLI = *MF.getSubtarget().getTargetLowering();
|
||||||
auto &DL = MF.getDataLayout();
|
auto &DL = MF.getDataLayout();
|
||||||
@ -1343,7 +1384,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
|
|||||||
|
|
||||||
bool DstAlignCanChange = false;
|
bool DstAlignCanChange = false;
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
bool OptSize = shouldLowerMemFuncForSize(MF);
|
|
||||||
Align Alignment = commonAlignment(DstAlign, SrcAlign);
|
Align Alignment = commonAlignment(DstAlign, SrcAlign);
|
||||||
|
|
||||||
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
|
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
|
||||||
@ -1354,7 +1394,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
|
|||||||
// FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
|
// FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
|
||||||
// if the memcpy is in a tail call position.
|
// if the memcpy is in a tail call position.
|
||||||
|
|
||||||
unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
|
|
||||||
std::vector<LLT> MemOps;
|
std::vector<LLT> MemOps;
|
||||||
|
|
||||||
const auto &DstMMO = **MI.memoperands_begin();
|
const auto &DstMMO = **MI.memoperands_begin();
|
||||||
@ -1437,7 +1476,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
|
bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
|
||||||
Register Src, unsigned KnownLen,
|
Register Src, uint64_t KnownLen,
|
||||||
Align DstAlign, Align SrcAlign,
|
Align DstAlign, Align SrcAlign,
|
||||||
bool IsVolatile) {
|
bool IsVolatile) {
|
||||||
auto &MF = *MI.getParent()->getParent();
|
auto &MF = *MI.getParent()->getParent();
|
||||||
@ -1550,10 +1589,6 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
|
|||||||
|
|
||||||
auto MMOIt = MI.memoperands_begin();
|
auto MMOIt = MI.memoperands_begin();
|
||||||
const MachineMemOperand *MemOp = *MMOIt;
|
const MachineMemOperand *MemOp = *MMOIt;
|
||||||
bool IsVolatile = MemOp->isVolatile();
|
|
||||||
// Don't try to optimize volatile.
|
|
||||||
if (IsVolatile)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Align DstAlign = MemOp->getBaseAlign();
|
Align DstAlign = MemOp->getBaseAlign();
|
||||||
Align SrcAlign;
|
Align SrcAlign;
|
||||||
@ -1571,18 +1606,33 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
|
|||||||
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
|
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
|
||||||
if (!LenVRegAndVal)
|
if (!LenVRegAndVal)
|
||||||
return false; // Leave it to the legalizer to lower it to a libcall.
|
return false; // Leave it to the legalizer to lower it to a libcall.
|
||||||
unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
|
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
|
||||||
|
|
||||||
if (KnownLen == 0) {
|
if (KnownLen == 0) {
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsVolatile = MemOp->isVolatile();
|
||||||
|
if (Opc == TargetOpcode::G_MEMCPY_INLINE)
|
||||||
|
return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
|
||||||
|
IsVolatile);
|
||||||
|
|
||||||
|
// Don't try to optimize volatile.
|
||||||
|
if (IsVolatile)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (MaxLen && KnownLen > MaxLen)
|
if (MaxLen && KnownLen > MaxLen)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (Opc == TargetOpcode::G_MEMCPY)
|
if (Opc == TargetOpcode::G_MEMCPY) {
|
||||||
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
|
auto &MF = *MI.getParent()->getParent();
|
||||||
|
const auto &TLI = *MF.getSubtarget().getTargetLowering();
|
||||||
|
bool OptSize = shouldLowerMemFuncForSize(MF);
|
||||||
|
uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
|
||||||
|
return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
|
||||||
|
IsVolatile);
|
||||||
|
}
|
||||||
if (Opc == TargetOpcode::G_MEMMOVE)
|
if (Opc == TargetOpcode::G_MEMMOVE)
|
||||||
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
|
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
|
||||||
if (Opc == TargetOpcode::G_MEMSET)
|
if (Opc == TargetOpcode::G_MEMSET)
|
||||||
|
@ -1589,6 +1589,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
|
|||||||
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
|
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
|
||||||
DstAlign = MCI->getDestAlign().valueOrOne();
|
DstAlign = MCI->getDestAlign().valueOrOne();
|
||||||
SrcAlign = MCI->getSourceAlign().valueOrOne();
|
SrcAlign = MCI->getSourceAlign().valueOrOne();
|
||||||
|
} else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
|
||||||
|
DstAlign = MCI->getDestAlign().valueOrOne();
|
||||||
|
SrcAlign = MCI->getSourceAlign().valueOrOne();
|
||||||
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
|
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
|
||||||
DstAlign = MMI->getDestAlign().valueOrOne();
|
DstAlign = MMI->getDestAlign().valueOrOne();
|
||||||
SrcAlign = MMI->getSourceAlign().valueOrOne();
|
SrcAlign = MMI->getSourceAlign().valueOrOne();
|
||||||
@ -1597,10 +1600,12 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
|
|||||||
DstAlign = MSI->getDestAlign().valueOrOne();
|
DstAlign = MSI->getDestAlign().valueOrOne();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to propagate the tail call flag from the IR inst as an argument.
|
if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
|
||||||
// Otherwise, we have to pessimize and assume later that we cannot tail call
|
// We need to propagate the tail call flag from the IR inst as an argument.
|
||||||
// any memory intrinsics.
|
// Otherwise, we have to pessimize and assume later that we cannot tail call
|
||||||
ICall.addImm(CI.isTailCall() ? 1 : 0);
|
// any memory intrinsics.
|
||||||
|
ICall.addImm(CI.isTailCall() ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Create mem operands to store the alignment and volatile info.
|
// Create mem operands to store the alignment and volatile info.
|
||||||
auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
|
auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
|
||||||
@ -2033,6 +2038,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
|
|||||||
getOrCreateVReg(*CI.getArgOperand(0)),
|
getOrCreateVReg(*CI.getArgOperand(0)),
|
||||||
MachineInstr::copyFlagsFromInstruction(CI));
|
MachineInstr::copyFlagsFromInstruction(CI));
|
||||||
return true;
|
return true;
|
||||||
|
case Intrinsic::memcpy_inline:
|
||||||
|
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
|
||||||
case Intrinsic::memcpy:
|
case Intrinsic::memcpy:
|
||||||
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
|
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
|
||||||
case Intrinsic::memmove:
|
case Intrinsic::memmove:
|
||||||
|
@ -1477,6 +1477,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case TargetOpcode::G_MEMCPY_INLINE:
|
||||||
case TargetOpcode::G_MEMCPY:
|
case TargetOpcode::G_MEMCPY:
|
||||||
case TargetOpcode::G_MEMMOVE: {
|
case TargetOpcode::G_MEMMOVE: {
|
||||||
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
|
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
|
||||||
@ -1507,6 +1508,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
|||||||
if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
|
if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
|
||||||
report("inconsistent load address space", MI);
|
report("inconsistent load address space", MI);
|
||||||
|
|
||||||
|
if (Opc != TargetOpcode::G_MEMCPY_INLINE)
|
||||||
|
if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL))
|
||||||
|
report("'tail' flag (operand 3) must be an immediate 0 or 1", MI);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case TargetOpcode::G_BZERO:
|
case TargetOpcode::G_BZERO:
|
||||||
@ -1532,6 +1537,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
|||||||
if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
|
if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
|
||||||
report("inconsistent " + Twine(Name, " address space"), MI);
|
report("inconsistent " + Twine(Name, " address space"), MI);
|
||||||
|
|
||||||
|
if (!MI->getOperand(MI->getNumOperands() - 1).isImm() ||
|
||||||
|
(MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL))
|
||||||
|
report("'tail' flag (last operand) must be an immediate 0 or 1", MI);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
|
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
|
||||||
|
@ -85,6 +85,8 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
|||||||
return Helper.tryCombineConcatVectors(MI);
|
return Helper.tryCombineConcatVectors(MI);
|
||||||
case TargetOpcode::G_SHUFFLE_VECTOR:
|
case TargetOpcode::G_SHUFFLE_VECTOR:
|
||||||
return Helper.tryCombineShuffleVector(MI);
|
return Helper.tryCombineShuffleVector(MI);
|
||||||
|
case TargetOpcode::G_MEMCPY_INLINE:
|
||||||
|
return Helper.tryEmitMemcpyInline(MI);
|
||||||
case TargetOpcode::G_MEMCPY:
|
case TargetOpcode::G_MEMCPY:
|
||||||
case TargetOpcode::G_MEMMOVE:
|
case TargetOpcode::G_MEMMOVE:
|
||||||
case TargetOpcode::G_MEMSET: {
|
case TargetOpcode::G_MEMSET: {
|
||||||
|
@ -272,6 +272,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
|||||||
return Helper.tryCombineConcatVectors(MI);
|
return Helper.tryCombineConcatVectors(MI);
|
||||||
case TargetOpcode::G_SHUFFLE_VECTOR:
|
case TargetOpcode::G_SHUFFLE_VECTOR:
|
||||||
return Helper.tryCombineShuffleVector(MI);
|
return Helper.tryCombineShuffleVector(MI);
|
||||||
|
case TargetOpcode::G_MEMCPY_INLINE:
|
||||||
|
return Helper.tryEmitMemcpyInline(MI);
|
||||||
case TargetOpcode::G_MEMCPY:
|
case TargetOpcode::G_MEMCPY:
|
||||||
case TargetOpcode::G_MEMMOVE:
|
case TargetOpcode::G_MEMMOVE:
|
||||||
case TargetOpcode::G_MEMSET: {
|
case TargetOpcode::G_MEMSET: {
|
||||||
|
@ -205,6 +205,8 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
|||||||
return true;
|
return true;
|
||||||
|
|
||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
|
case TargetOpcode::G_MEMCPY_INLINE:
|
||||||
|
return Helper.tryEmitMemcpyInline(MI);
|
||||||
case TargetOpcode::G_CONCAT_VECTORS:
|
case TargetOpcode::G_CONCAT_VECTORS:
|
||||||
return Helper.tryCombineConcatVectors(MI);
|
return Helper.tryCombineConcatVectors(MI);
|
||||||
case TargetOpcode::G_SHUFFLE_VECTOR:
|
case TargetOpcode::G_SHUFFLE_VECTOR:
|
||||||
|
@ -42,6 +42,8 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
|
|||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
case TargetOpcode::G_MEMCPY_INLINE:
|
||||||
|
return Helper.tryEmitMemcpyInline(MI);
|
||||||
case TargetOpcode::G_LOAD:
|
case TargetOpcode::G_LOAD:
|
||||||
case TargetOpcode::G_SEXTLOAD:
|
case TargetOpcode::G_SEXTLOAD:
|
||||||
case TargetOpcode::G_ZEXTLOAD: {
|
case TargetOpcode::G_ZEXTLOAD: {
|
||||||
|
86
test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir
Normal file
86
test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
--- |
|
||||||
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||||
|
target triple = "arm64-apple-darwin"
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0
|
||||||
|
|
||||||
|
define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32* %dst to i8*
|
||||||
|
%1 = bitcast i32* %src to i8*
|
||||||
|
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { argmemonly nounwind }
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_memcpy_inline
|
||||||
|
alignment: 4
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: _ }
|
||||||
|
- { id: 1, class: _ }
|
||||||
|
- { id: 2, class: _ }
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $x0, $x1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_memcpy_inline
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
|
||||||
|
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||||
|
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
|
||||||
|
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
|
||||||
|
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
|
||||||
|
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||||
|
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
|
||||||
|
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
|
||||||
|
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
|
||||||
|
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
|
||||||
|
; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
|
||||||
|
; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
|
||||||
|
; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
|
||||||
|
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
|
||||||
|
; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
|
||||||
|
; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load 16 from %ir.1 + 64, align 4)
|
||||||
|
; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store 16 into %ir.0 + 64, align 4)
|
||||||
|
; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
|
||||||
|
; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
|
||||||
|
; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load 16 from %ir.1 + 80, align 4)
|
||||||
|
; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store 16 into %ir.0 + 80, align 4)
|
||||||
|
; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
|
||||||
|
; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64)
|
||||||
|
; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load 16 from %ir.1 + 96, align 4)
|
||||||
|
; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store 16 into %ir.0 + 96, align 4)
|
||||||
|
; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
|
||||||
|
; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64)
|
||||||
|
; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load 16 from %ir.1 + 112, align 4)
|
||||||
|
; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store 16 into %ir.0 + 112, align 4)
|
||||||
|
; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127
|
||||||
|
; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64)
|
||||||
|
; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4)
|
||||||
|
; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
%0:_(p0) = COPY $x0
|
||||||
|
%1:_(p0) = COPY $x1
|
||||||
|
%2:_(s64) = G_CONSTANT i64 143
|
||||||
|
G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
|
||||||
|
RET_ReallyLR
|
||||||
|
|
||||||
|
...
|
125
test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll
Normal file
125
test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=irtranslator %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
define void @copy(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: copy
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @inline_copy(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: inline_copy
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @copy_volatile(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: copy_volatile
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @inline_copy_volatile(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: inline_copy_volatile
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @tail_copy(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: tail_copy
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @tail_inline_copy(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: tail_inline_copy
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @tail_copy_volatile(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: tail_copy_volatile
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @tail_inline_copy_volatile(i8* %dst, i8* %src) {
|
||||||
|
; CHECK-LABEL: name: tail_inline_copy_volatile
|
||||||
|
; CHECK: bb.1.entry:
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
|
||||||
|
; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
|
||||||
|
; CHECK: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind
|
||||||
|
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind
|
@ -629,6 +629,9 @@
|
|||||||
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
||||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
||||||
|
# DEBUG-NEXT: G_MEMCPY_INLINE (opcode 219): 3 type indices, 0 imm indices
|
||||||
|
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
||||||
|
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
||||||
# DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index
|
# DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index
|
||||||
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
||||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||||
|
@ -0,0 +1,81 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
|
||||||
|
--- |
|
||||||
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
|
||||||
|
target triple = "amdgcn-amd-amdhsa"
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0
|
||||||
|
|
||||||
|
define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32* %dst to i8*
|
||||||
|
%1 = bitcast i32* %src to i8*
|
||||||
|
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 13, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx900" }
|
||||||
|
attributes #1 = { "target-cpu"="gfx900" }
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_memcpy_inline
|
||||||
|
alignment: 1
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: _ }
|
||||||
|
- { id: 1, class: _ }
|
||||||
|
- { id: 2, class: sgpr_64 }
|
||||||
|
- { id: 3, class: _ }
|
||||||
|
- { id: 4, class: _ }
|
||||||
|
- { id: 5, class: _ }
|
||||||
|
- { id: 6, class: _ }
|
||||||
|
- { id: 7, class: _ }
|
||||||
|
- { id: 8, class: ccr_sgpr_64 }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$sgpr30_sgpr31', virtual-reg: '%2' }
|
||||||
|
frameInfo:
|
||||||
|
maxAlignment: 1
|
||||||
|
machineFunctionInfo:
|
||||||
|
maxKernArgAlign: 1
|
||||||
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||||
|
frameOffsetReg: '$sgpr33'
|
||||||
|
stackPtrOffsetReg: '$sgpr32'
|
||||||
|
argumentInfo:
|
||||||
|
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||||
|
occupancy: 10
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_memcpy_inline
|
||||||
|
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||||
|
; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
|
||||||
|
; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||||
|
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[MV1]](p0) :: (load 8 from %ir.1, align 4)
|
||||||
|
; CHECK: G_STORE [[LOAD]](s64), [[MV]](p0) :: (store 8 into %ir.0, align 4)
|
||||||
|
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
|
||||||
|
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV1]], [[C]](s64)
|
||||||
|
; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from %ir.1 + 5, align 1, basealign 4)
|
||||||
|
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV]], [[C]](s64)
|
||||||
|
; CHECK: G_STORE [[LOAD1]](s64), [[PTR_ADD1]](p0) :: (store 8 into %ir.0 + 5, align 1, basealign 4)
|
||||||
|
; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
|
||||||
|
; CHECK: S_SETPC_B64_return [[COPY5]]
|
||||||
|
%3:_(s32) = COPY $vgpr0
|
||||||
|
%4:_(s32) = COPY $vgpr1
|
||||||
|
%0:_(p0) = G_MERGE_VALUES %3(s32), %4(s32)
|
||||||
|
%5:_(s32) = COPY $vgpr2
|
||||||
|
%6:_(s32) = COPY $vgpr3
|
||||||
|
%1:_(p0) = G_MERGE_VALUES %5(s32), %6(s32)
|
||||||
|
%2:sgpr_64 = COPY $sgpr30_sgpr31
|
||||||
|
%7:_(s64) = G_CONSTANT i64 13
|
||||||
|
G_MEMCPY_INLINE %0(p0), %1(p0), %7(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
|
||||||
|
%8:ccr_sgpr_64 = COPY %2
|
||||||
|
S_SETPC_B64_return %8
|
||||||
|
|
||||||
|
...
|
@ -0,0 +1,60 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=mips-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
|
||||||
|
--- |
|
||||||
|
; ModuleID = '../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll'
|
||||||
|
source_filename = "../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll"
|
||||||
|
target datalayout = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
|
||||||
|
target triple = "mipsel-pc-linux-gnu"
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0
|
||||||
|
|
||||||
|
define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast i32* %dst to i8*
|
||||||
|
%1 = bitcast i32* %src to i8*
|
||||||
|
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 2, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { argmemonly nofree nounwind willreturn }
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_memcpy_inline
|
||||||
|
alignment: 4
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: _ }
|
||||||
|
- { id: 1, class: _ }
|
||||||
|
- { id: 2, class: _ }
|
||||||
|
- { id: 3, class: _ }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$a0' }
|
||||||
|
- { reg: '$a1' }
|
||||||
|
frameInfo:
|
||||||
|
maxAlignment: 1
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $a0, $a1
|
||||||
|
|
||||||
|
; MIPS32-LABEL: name: test_memcpy_inline
|
||||||
|
; MIPS32: liveins: $a0, $a1
|
||||||
|
; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
|
||||||
|
; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
|
||||||
|
; MIPS32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.1, align 4)
|
||||||
|
; MIPS32: G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store 1 into %ir.0, align 4)
|
||||||
|
; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||||
|
; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32)
|
||||||
|
; MIPS32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.1 + 1, basealign 4)
|
||||||
|
; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
|
||||||
|
; MIPS32: G_STORE [[LOAD1]](s8), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 1, basealign 4)
|
||||||
|
; MIPS32: RetRA
|
||||||
|
%0:_(p0) = COPY $a0
|
||||||
|
%1:_(p0) = COPY $a1
|
||||||
|
%2:_(s64) = G_CONSTANT i64 2
|
||||||
|
%3:_(s32) = G_TRUNC %2(s64)
|
||||||
|
G_MEMCPY_INLINE %0(p0), %1(p0), %3(s32) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
|
||||||
|
RetRA
|
||||||
|
|
||||||
|
...
|
@ -27,7 +27,9 @@ body: |
|
|||||||
; CHECK: *** Bad machine code: inconsistent bzero address space ***
|
; CHECK: *** Bad machine code: inconsistent bzero address space ***
|
||||||
G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1)
|
G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1)
|
||||||
|
|
||||||
; CHECK: *** Bad machine code: bzero operand must be a pointer ***
|
; CHECK: *** Bad machine code: bzero operand must be a pointer ***
|
||||||
G_BZERO %cst1, %cst2, 0 :: (store 4)
|
G_BZERO %cst1, %cst2, 0 :: (store 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: 'tail' flag (last operand) must be an immediate 0 or 1 ***
|
||||||
|
G_BZERO %ptr, %cst2, 2 :: (store 4)
|
||||||
...
|
...
|
||||||
|
@ -47,4 +47,9 @@ body: |
|
|||||||
; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
|
; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
|
||||||
G_MEMCPY %0, %2, %2, 0 :: (store 4), (load 4)
|
G_MEMCPY %0, %2, %2, 0 :: (store 4), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
|
||||||
|
G_MEMCPY %0, %0, %2, %0 :: (store 4), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
|
||||||
|
G_MEMCPY %0, %0, %2, 2 :: (store 4), (load 4)
|
||||||
...
|
...
|
||||||
|
49
test/MachineVerifier/test_g_memcpy_inline.mir
Normal file
49
test/MachineVerifier/test_g_memcpy_inline.mir
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
|
||||||
|
# REQUIRES: aarch64-registered-target
|
||||||
|
---
|
||||||
|
name: test_memcpy_inline
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
liveins:
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
%0:_(p0) = G_CONSTANT i64 0
|
||||||
|
%1:_(p0) = G_CONSTANT i64 4
|
||||||
|
%2:_(s64) = G_CONSTANT i64 4
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (store 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: wrong memory operand types ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (load 4), (store 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (store unknown-size), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load unknown-size)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent store address space ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (store 4, addrspace 1), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent load address space ***
|
||||||
|
G_MEMCPY_INLINE %0, %1, %2 :: (store 4), (load 4, addrspace 1)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
|
||||||
|
G_MEMCPY_INLINE %2, %0, %2 :: (store 4), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
|
||||||
|
G_MEMCPY_INLINE %0, %2, %2 :: (store 4), (load 4)
|
||||||
|
...
|
55
test/MachineVerifier/test_g_memmove.mir
Normal file
55
test/MachineVerifier/test_g_memmove.mir
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
|
||||||
|
# REQUIRES: aarch64-registered-target
|
||||||
|
---
|
||||||
|
name: test_memmove
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
liveins:
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
|
||||||
|
%0:_(p0) = G_CONSTANT i64 0
|
||||||
|
%1:_(p0) = G_CONSTANT i64 4
|
||||||
|
%2:_(s64) = G_CONSTANT i64 4
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (store 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: wrong memory operand types ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (load 4), (store 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (store unknown-size), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load unknown-size)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent store address space ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (store 4, addrspace 1), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: inconsistent load address space ***
|
||||||
|
G_MEMMOVE %0, %1, %2, 0 :: (store 4), (load 4, addrspace 1)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
|
||||||
|
G_MEMMOVE %2, %0, %2, 0 :: (store 4), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
|
||||||
|
G_MEMMOVE %0, %2, %2, 0 :: (store 4), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
|
||||||
|
G_MEMMOVE %0, %0, %2, %0 :: (store 4), (load 4)
|
||||||
|
|
||||||
|
; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
|
||||||
|
G_MEMMOVE %0, %0, %2, 2 :: (store 4), (load 4)
|
||||||
|
...
|
Loading…
Reference in New Issue
Block a user