mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
Add one more argument to the prefetch intrinsic to indicate whether it's a data
or instruction cache access. Update the targets to match it and also teach autoupgrade. llvm-svn: 132976
This commit is contained in:
parent
56a82c5ef8
commit
b6afc5168f
@ -6064,7 +6064,7 @@ LLVM</a>.</p>
|
||||
|
||||
<h5>Syntax:</h5>
|
||||
<pre>
|
||||
declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>)
|
||||
declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
|
||||
</pre>
|
||||
|
||||
<h5>Overview:</h5>
|
||||
@ -6077,8 +6077,10 @@ LLVM</a>.</p>
|
||||
<p><tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the
|
||||
specifier determining if the fetch should be for a read (0) or write (1),
|
||||
and <tt>locality</tt> is a temporal locality specifier ranging from (0) - no
|
||||
locality, to (3) - extremely local keep in cache. The <tt>rw</tt>
|
||||
and <tt>locality</tt> arguments must be constant integers.</p>
|
||||
locality, to (3) - extremely local keep in cache. The <tt>cache type</tt>
|
||||
specifies whether the prefetch is performed on the data (1) or instruction (0)
|
||||
cache. The <tt>rw</tt>, <tt>locality</tt> and <tt>cache type</tt> arguments
|
||||
must be constant integers.</p>
|
||||
|
||||
<h5>Semantics:</h5>
|
||||
<p>This intrinsic does not modify the behavior of the program. In particular,
|
||||
|
@ -580,7 +580,8 @@ namespace ISD {
|
||||
|
||||
// PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are
|
||||
// their first operand. The other operands are the address to prefetch,
|
||||
// read / write specifier, and locality specifier.
|
||||
// read / write specifier, locality specifier and instruction / data cache
|
||||
// specifier.
|
||||
PREFETCH,
|
||||
|
||||
// OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
|
||||
|
@ -211,7 +211,8 @@ def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>,
|
||||
// however it does conveniently prevent the prefetch from being reordered
|
||||
// with respect to nearby accesses to the same memory.
|
||||
def int_prefetch : Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<0>]>;
|
||||
def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
|
||||
|
||||
|
@ -197,8 +197,8 @@ def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert
|
||||
SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3>
|
||||
]>;
|
||||
|
||||
def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch
|
||||
SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
|
||||
def SDTPrefetch : SDTypeProfile<0, 4, [ // prefetch
|
||||
SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisInt<1>
|
||||
]>;
|
||||
|
||||
def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier
|
||||
|
@ -4788,15 +4788,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||
return implVisitAluOverflow(I, ISD::SMULO);
|
||||
|
||||
case Intrinsic::prefetch: {
|
||||
SDValue Ops[4];
|
||||
SDValue Ops[5];
|
||||
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
||||
Ops[0] = getRoot();
|
||||
Ops[1] = getValue(I.getArgOperand(0));
|
||||
Ops[2] = getValue(I.getArgOperand(1));
|
||||
Ops[3] = getValue(I.getArgOperand(2));
|
||||
Ops[4] = getValue(I.getArgOperand(3));
|
||||
DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
|
||||
DAG.getVTList(MVT::Other),
|
||||
&Ops[0], 4,
|
||||
&Ops[0], 5,
|
||||
EVT::getIntegerVT(*Context, 8),
|
||||
MachinePointerInfo(I.getArgOperand(0)),
|
||||
0, /* align */
|
||||
|
@ -2281,12 +2281,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
|
||||
// ARMv7 with MP extension has PLDW.
|
||||
return Op.getOperand(0);
|
||||
|
||||
if (Subtarget->isThumb())
|
||||
unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
|
||||
if (Subtarget->isThumb()) {
|
||||
// Invert the bits.
|
||||
isRead = ~isRead & 1;
|
||||
unsigned isData = Subtarget->isThumb() ? 0 : 1;
|
||||
isData = ~isData & 1;
|
||||
}
|
||||
|
||||
// Currently there is no intrinsic that matches pli.
|
||||
return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
|
||||
Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
|
||||
DAG.getConstant(isData, MVT::i32));
|
||||
|
@ -62,6 +62,9 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
|
||||
SDTCisInt<1>]>;
|
||||
|
||||
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
|
||||
|
||||
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
||||
@ -130,7 +133,7 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
|
||||
[SDNPHasChain]>;
|
||||
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
|
||||
[SDNPHasChain]>;
|
||||
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
|
||||
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
|
||||
|
||||
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
|
||||
|
@ -2006,13 +2006,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
|
||||
// Prefetch intrinsic.
|
||||
def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
|
||||
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
|
||||
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
|
||||
def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
|
||||
"prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
|
||||
"prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
|
||||
def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
|
||||
"prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
|
||||
"prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
|
||||
def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
|
||||
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
|
||||
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
|
||||
|
||||
// Load, store, and memory fence
|
||||
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
|
||||
|
@ -284,6 +284,30 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
break;
|
||||
}
|
||||
|
||||
// This upgrades the llvm.prefetch intrinsic to accept one more parameter,
|
||||
// which is a instruction / data cache identifier. The old version only
|
||||
// implicitly accepted the data version.
|
||||
if (Name.compare(5,8,"prefetch",8) == 0) {
|
||||
// Don't do anything if it has the correct number of arguments already
|
||||
if (FTy->getNumParams() == 4)
|
||||
break;
|
||||
|
||||
assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
|
||||
// We first need to change the name of the old (bad) intrinsic, because
|
||||
// its type is incorrect, but we cannot overload that name. We
|
||||
// arbitrarily unique it here allowing us to construct a correctly named
|
||||
// and typed function below.
|
||||
F->setName("");
|
||||
NewFn = cast<Function>(M->getOrInsertFunction(Name,
|
||||
FTy->getReturnType(),
|
||||
FTy->getParamType(0),
|
||||
FTy->getParamType(1),
|
||||
FTy->getParamType(2),
|
||||
FTy->getParamType(2),
|
||||
(Type*)0));
|
||||
return true;
|
||||
}
|
||||
|
||||
break;
|
||||
case 'x':
|
||||
// This fixes the poorly named crc32 intrinsics
|
||||
@ -1344,6 +1368,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
CI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case Intrinsic::prefetch: {
|
||||
IRBuilder<> Builder(C);
|
||||
Builder.SetInsertPoint(CI->getParent(), CI);
|
||||
const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
|
||||
|
||||
// Add the extra "data cache" argument
|
||||
Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
CI->getArgOperand(2),
|
||||
llvm::ConstantInt::get(I32Ty, 1) };
|
||||
CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+4,
|
||||
CI->getName(), CI);
|
||||
NewCI->setTailCall(CI->isTailCall());
|
||||
NewCI->setCallingConv(CI->getCallingConv());
|
||||
// Handle any uses of the old CallInst.
|
||||
if (!CI->use_empty())
|
||||
// Replace all uses of the old call with the new cast which has the
|
||||
// correct type.
|
||||
CI->replaceAllUsesWith(NewCI);
|
||||
|
||||
// Clean up the old call now that it has been completely upgraded.
|
||||
CI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,3 +109,11 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
|
||||
call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.prefetch(i8*, i32, i32) nounwind
|
||||
|
||||
define void @p(i8* %ptr) {
|
||||
; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1)
|
||||
tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
@ -17,8 +17,8 @@ entry:
|
||||
; THUMB2: t1:
|
||||
; THUMB2-NOT: pldw [r0]
|
||||
; THUMB2: pld [r0]
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -30,7 +30,7 @@ entry:
|
||||
; THUMB2: t2:
|
||||
; THUMB2: pld [r0, #1023]
|
||||
%tmp = getelementptr i8* %ptr, i32 1023
|
||||
tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
|
||||
tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -45,7 +45,7 @@ entry:
|
||||
%tmp1 = lshr i32 %offset, 2
|
||||
%tmp2 = add i32 %base, %tmp1
|
||||
%tmp3 = inttoptr i32 %tmp2 to i8*
|
||||
tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
|
||||
tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -59,8 +59,8 @@ entry:
|
||||
%tmp1 = shl i32 %offset, 2
|
||||
%tmp2 = add i32 %base, %tmp1
|
||||
%tmp3 = inttoptr i32 %tmp2 to i8*
|
||||
tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
|
||||
tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.prefetch(i8*, i32, i32) nounwind
|
||||
declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
|
||||
|
@ -6,11 +6,11 @@ entry:
|
||||
; CHECK: prefetcht1
|
||||
; CHECK: prefetcht0
|
||||
; CHECK: prefetchnta
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
|
||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.prefetch(i8*, i32, i32) nounwind
|
||||
declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
|
||||
|
Loading…
Reference in New Issue
Block a user