mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Handle masked loads and stores in MemoryLocation/Dependence
Differential Revision: https://reviews.llvm.org/D87061
This commit is contained in:
parent
f813a8ccef
commit
10332af38a
@ -166,6 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
|
||||
// These intrinsics don't really modify the memory, but returning Mod
|
||||
// will allow them to be handled conservatively.
|
||||
return ModRefInfo::Mod;
|
||||
case Intrinsic::masked_load:
|
||||
Loc = MemoryLocation::getForArgument(II, 0, TLI);
|
||||
return ModRefInfo::Ref;
|
||||
case Intrinsic::masked_store:
|
||||
Loc = MemoryLocation::getForArgument(II, 1, TLI);
|
||||
return ModRefInfo::Mod;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -442,7 +448,9 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
|
||||
// If we reach a lifetime begin or end marker, then the query ends here
|
||||
// because the value is undefined.
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
|
||||
Intrinsic::ID ID = II->getIntrinsicID();
|
||||
switch (ID) {
|
||||
case Intrinsic::lifetime_start:
|
||||
// FIXME: This only considers queries directly on the invariant-tagged
|
||||
// pointer, not on query pointers that are indexed off of them. It'd
|
||||
// be nice to handle that at some point (the right approach is to use
|
||||
@ -450,6 +458,19 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
|
||||
if (BatchAA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
|
||||
return MemDepResult::getDef(II);
|
||||
continue;
|
||||
case Intrinsic::masked_load:
|
||||
case Intrinsic::masked_store: {
|
||||
MemoryLocation Loc;
|
||||
/*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
|
||||
AliasResult R = BatchAA.alias(Loc, MemLoc);
|
||||
if (R == NoAlias)
|
||||
continue;
|
||||
if (R == MustAlias)
|
||||
return MemDepResult::getDef(II);
|
||||
if (ID == Intrinsic::masked_load)
|
||||
continue;
|
||||
return MemDepResult::getClobber(II);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,6 +176,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
|
||||
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()),
|
||||
AATags);
|
||||
|
||||
case Intrinsic::masked_load:
|
||||
assert(ArgIdx == 0 && "Invalid argument index");
|
||||
return MemoryLocation(
|
||||
Arg,
|
||||
LocationSize::upperBound(DL.getTypeStoreSize(II->getType())),
|
||||
AATags);
|
||||
|
||||
case Intrinsic::masked_store:
|
||||
assert(ArgIdx == 1 && "Invalid argument index");
|
||||
return MemoryLocation(
|
||||
Arg,
|
||||
LocationSize::upperBound(
|
||||
DL.getTypeStoreSize(II->getArgOperand(0)->getType())),
|
||||
AATags);
|
||||
|
||||
case Intrinsic::invariant_end:
|
||||
// The first argument to an invariant.end is a "descriptor" type (e.g. a
|
||||
// pointer to a empty struct) which is never actually dereferenced.
|
||||
|
@ -1,6 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -gvn -S < %s | FileCheck %s
|
||||
|
||||
; Check that in both cases the second load is recognized as redundant
|
||||
; and is removed.
|
||||
|
||||
define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) {
|
||||
; CHECK-LABEL: @f0(
|
||||
; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]]
|
||||
@ -21,8 +24,7 @@ define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) {
|
||||
; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1
|
||||
; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef)
|
||||
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]])
|
||||
; CHECK-NEXT: [[V3:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef)
|
||||
; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V3]]
|
||||
; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]]
|
||||
; CHECK-NEXT: ret <128 x i8> [[V4]]
|
||||
;
|
||||
%v0 = icmp eq <128 x i8> %a1, %a2
|
||||
|
Loading…
x
Reference in New Issue
Block a user