1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

Pointers in Masked Load, Store, Gather, Scatter intrinsics

The masked intrinsics support all integer and floating point data types. I added the pointer type to this list.
Added tests for CodeGen and for Loop Vectorizer.
Updated the Language Reference.

Differential Revision: http://reviews.llvm.org/D14150

llvm-svn: 253544
This commit is contained in:
Elena Demikhovsky 2015-11-19 07:17:16 +00:00
parent b753649d63
commit fea4d52acf
6 changed files with 207 additions and 21 deletions

View File

@ -11428,12 +11428,16 @@ LLVM provides intrinsics for predicated vector load and store operations. The pr
Syntax:
"""""""
This is an overloaded intrinsic. The loaded data is a vector of any integer or floating point data type.
This is an overloaded intrinsic. The loaded data is a vector of any integer, floating point or pointer data type.
::
declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
;; The data is a vector of pointers to double
declare <8 x double*> @llvm.masked.load.v8p0f64 (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
;; The data is a vector of function pointers
declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
Overview:
"""""""""
@ -11469,12 +11473,16 @@ The result of this operation is equivalent to a regular vector load instruction
Syntax:
"""""""
This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type.
This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type.
::
declare void @llvm.masked.store.v8i32 (<8 x i32> <value>, <8 x i32> * <ptr>, i32 <alignment>, <8 x i1> <mask>)
declare void @llvm.masked.store.v16f32(<16 x i32> <value>, <16 x i32>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
declare void @llvm.masked.store.v8i32 (<8 x i32> <value>, <8 x i32>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
declare void @llvm.masked.store.v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
;; The data is a vector of pointers to double
declare void @llvm.masked.store.v8p0f64 (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
;; The data is a vector of function pointers
declare void @llvm.masked.store.v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
Overview:
"""""""""
@ -11515,12 +11523,13 @@ LLVM provides intrinsics for vector gather and scatter operations. They are simi
Syntax:
"""""""
This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer or floating point data type gathered together into one vector.
This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating point or pointer data type gathered together into one vector.
::
declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
declare <8 x float*> @llvm.masked.gather.v8p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1> <mask>, <8 x float*> <passthru>)
Overview:
"""""""""
@ -11568,12 +11577,13 @@ The semantics of this operation are equivalent to a sequence of conditional scal
Syntax:
"""""""
This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type. Each vector element is stored in an arbitrary memory addresses. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.
This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.
::
declare void @llvm.masked.scatter.v8i32 (<8 x i32> <value>, <8 x i32*> <ptrs>, i32 <alignment>, <8 x i1> <mask>)
declare void @llvm.masked.scatter.v16f32(<16 x i32> <value>, <16 x i32*> <ptrs>, i32 <alignment>, <16 x i1> <mask>)
declare void @llvm.masked.scatter.v8i32 (<8 x i32> <value>, <8 x i32*> <ptrs>, i32 <alignment>, <8 x i1> <mask>)
declare void @llvm.masked.scatter.v16f32 (<16 x float> <value>, <16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>)
declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1> <mask>)
Overview:
"""""""""

View File

@ -492,7 +492,10 @@ static std::string getMangledTypeStr(Type* Ty) {
Result += "vararg";
// Ensure nested function types are distinguishable.
Result += "f";
} else if (Ty)
} else if (isa<VectorType>(Ty))
Result += "v" + utostr(Ty->getVectorNumElements()) +
getMangledTypeStr(Ty->getVectorElementType());
else if (Ty)
Result += EVT::getEVT(Ty).getEVTString();
return Result;
}

View File

@ -1160,10 +1160,8 @@ int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
Type *ScalarTy = DataTy->getScalarType();
// TODO: Pointers should also be legal,
// but it requires additional support in composing intrinsics name.
// getPrimitiveSizeInBits() returns 0 for PointerType
int DataWidth = ScalarTy->getPrimitiveSizeInBits();
int DataWidth = isa<PointerType>(ScalarTy) ?
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
return (DataWidth >= 32 && ST->hasAVX2());
}
@ -1186,10 +1184,8 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
if (isa<VectorType>(DataTy) && !isPowerOf2_32(DataTy->getVectorNumElements()))
return false;
Type *ScalarTy = DataTy->getScalarType();
// TODO: Pointers should also be legal,
// but it requires additional support in composing intrinsics name.
// getPrimitiveSizeInBits() returns 0 for PointerType
int DataWidth = ScalarTy->getPrimitiveSizeInBits();
int DataWidth = isa<PointerType>(ScalarTy) ?
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
// AVX-512 allows gather and scatter
return DataWidth >= 32 && ST->hasAVX512();

View File

@ -330,3 +330,13 @@ define <3 x i32> @test16(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
ret <3 x i32>%res
}
declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
; KNL-LABEL: test17
; KNL: vpgatherqq
; KNL: vpgatherqq
define <16 x float*> @test17(<16 x float**> %ptrs) {
%res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
ret <16 x float*>%res
}

View File

@ -300,3 +300,28 @@ declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
; AVX512-LABEL: test23
; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
%mask = icmp eq <16 x i32*> %trigger, zeroinitializer
%res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
ret <16 x i32*> %res
}
%mystruct = type { i16, i16, [1 x i8*] }
declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
; AVX512-LABEL: test24
; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
; AVX512: kshiftrw $8, %k1, %k1
; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
%res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
ret <16 x %mystruct*> %res
}

View File

@ -499,4 +499,146 @@ for.end: ; preds = %for.cond
ret void
}
; void foo7 (double * __restrict__ out, double ** __restrict__ in,
; bool * __restrict__ trigger, unsigned size) {
;
; for (unsigned i=0; i<size; i++)
; if (trigger[i] && (in[i] != 0))
; out[i] = (double) 0.5;
; }
;AVX512-LABEL: @foo7
;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
;AVX512: call void @llvm.masked.store.v8f64
;AVX512: ret void
define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
entry:
%out.addr = alloca double*, align 8
%in.addr = alloca double**, align 8
%trigger.addr = alloca i8*, align 8
%size.addr = alloca i32, align 4
%i = alloca i32, align 4
store double* %out, double** %out.addr, align 8
store double** %in, double*** %in.addr, align 8
store i8* %trigger, i8** %trigger.addr, align 8
store i32 %size, i32* %size.addr, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %size.addr, align 4
%cmp = icmp ult i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load i32, i32* %i, align 4
%idxprom = zext i32 %2 to i64
%3 = load i8*, i8** %trigger.addr, align 8
%arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
%4 = load i8, i8* %arrayidx, align 1
%tobool = trunc i8 %4 to i1
br i1 %tobool, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %for.body
%5 = load i32, i32* %i, align 4
%idxprom1 = zext i32 %5 to i64
%6 = load double**, double*** %in.addr, align 8
%arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1
%7 = load double*, double** %arrayidx2, align 8
%cmp3 = icmp ne double* %7, null
br i1 %cmp3, label %if.then, label %if.end
if.then: ; preds = %land.lhs.true
%8 = load i32, i32* %i, align 4
%idxprom4 = zext i32 %8 to i64
%9 = load double*, double** %out.addr, align 8
%arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
store double 5.000000e-01, double* %arrayidx5, align 8
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%10 = load i32, i32* %i, align 4
%inc = add i32 %10, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
;typedef int (*fp)();
;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
;
; for (unsigned i=0; i<size; i++)
; if (trigger[i] && (in[i] != 0))
; out[i] = (double) 0.5;
;}
;AVX512-LABEL: @foo8
;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
;AVX512: call void @llvm.masked.store.v8f64
;AVX512: ret void
define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
entry:
%out.addr = alloca double*, align 8
%in.addr = alloca i32 ()**, align 8
%trigger.addr = alloca i8*, align 8
%size.addr = alloca i32, align 4
%i = alloca i32, align 4
store double* %out, double** %out.addr, align 8
store i32 ()** %in, i32 ()*** %in.addr, align 8
store i8* %trigger, i8** %trigger.addr, align 8
store i32 %size, i32* %size.addr, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %size.addr, align 4
%cmp = icmp ult i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load i32, i32* %i, align 4
%idxprom = zext i32 %2 to i64
%3 = load i8*, i8** %trigger.addr, align 8
%arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
%4 = load i8, i8* %arrayidx, align 1
%tobool = trunc i8 %4 to i1
br i1 %tobool, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %for.body
%5 = load i32, i32* %i, align 4
%idxprom1 = zext i32 %5 to i64
%6 = load i32 ()**, i32 ()*** %in.addr, align 8
%arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1
%7 = load i32 ()*, i32 ()** %arrayidx2, align 8
%cmp3 = icmp ne i32 ()* %7, null
br i1 %cmp3, label %if.then, label %if.end
if.then: ; preds = %land.lhs.true
%8 = load i32, i32* %i, align 4
%idxprom4 = zext i32 %8 to i64
%9 = load double*, double** %out.addr, align 8
%arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
store double 5.000000e-01, double* %arrayidx5, align 8
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%10 = load i32, i32* %i, align 4
%inc = add i32 %10, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret void
}