1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[x86, InstCombine] fix masked load pass-through operand to be a zero vector

This bug was introduced with:
http://reviews.llvm.org/rL262269

AVX masked loads are specified to set vector lanes to zero when the high bit of the mask
element for that lane is zero:
"If the mask is 0, the corresponding data element is set to zero in the load form of these
instructions, and unmodified in the store form." --Intel manual

Differential Revision: http://reviews.llvm.org/D19017

llvm-svn: 266148
This commit is contained in:
Sanjay Patel 2016-04-12 23:16:23 +00:00
parent de85c02fba
commit ba900a7bb3
2 changed files with 17 additions and 14 deletions

View File

@ -834,11 +834,12 @@ static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
Value *Ptr = II.getOperand(0); Value *Ptr = II.getOperand(0);
Value *Mask = II.getOperand(1); Value *Mask = II.getOperand(1);
Constant *ZeroVec = Constant::getNullValue(II.getType());
// Special case a zero mask since that's not a ConstantDataVector. // Special case a zero mask since that's not a ConstantDataVector.
// This masked load instruction does nothing, so return an undef. // This masked load instruction creates a zero vector.
if (isa<ConstantAggregateZero>(Mask)) if (isa<ConstantAggregateZero>(Mask))
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); return IC.replaceInstUsesWith(II, ZeroVec);
auto *ConstMask = dyn_cast<ConstantDataVector>(Mask); auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
if (!ConstMask) if (!ConstMask)
@ -857,7 +858,9 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
// on each element's most significant bit (the sign bit). // on each element's most significant bit (the sign bit).
Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
CallInst *NewMaskedLoad = IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask); // The pass-through vector for an x86 masked load is a zero vector.
CallInst *NewMaskedLoad =
IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
return IC.replaceInstUsesWith(II, NewMaskedLoad); return IC.replaceInstUsesWith(II, NewMaskedLoad);
} }

View File

@ -13,14 +13,14 @@ define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
; CHECK-NEXT: ret <4 x float> %ld ; CHECK-NEXT: ret <4 x float> %ld
} }
; Zero mask is a nop. ; Zero mask returns a zero vector.
define <4 x float> @mload_zeros(i8* %f) { define <4 x float> @mload_zeros(i8* %f) {
%ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer) %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
ret <4 x float> %ld ret <4 x float> %ld
; CHECK-LABEL: @mload_zeros( ; CHECK-LABEL: @mload_zeros(
; CHECK-NEXT: ret <4 x float> undef ; CHECK-NEXT: ret <4 x float> zeroinitializer
} }
; Only the sign bit matters. ; Only the sign bit matters.
@ -30,7 +30,7 @@ define <4 x float> @mload_fake_ones(i8* %f) {
ret <4 x float> %ld ret <4 x float> %ld
; CHECK-LABEL: @mload_fake_ones( ; CHECK-LABEL: @mload_fake_ones(
; CHECK-NEXT: ret <4 x float> undef ; CHECK-NEXT: ret <4 x float> zeroinitializer
} }
; All mask bits are set, so this is just a vector load. ; All mask bits are set, so this is just a vector load.
@ -53,7 +53,7 @@ define <4 x float> @mload_one_one(i8* %f) {
; CHECK-LABEL: @mload_one_one( ; CHECK-LABEL: @mload_one_one(
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> undef) ; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> zeroinitializer)
; CHECK-NEXT: ret <4 x float> %1 ; CHECK-NEXT: ret <4 x float> %1
} }
@ -65,7 +65,7 @@ define <2 x double> @mload_one_one_double(i8* %f) {
; CHECK-LABEL: @mload_one_one_double( ; CHECK-LABEL: @mload_one_one_double(
; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>*
; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> undef) ; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> zeroinitializer)
; CHECK-NEXT: ret <2 x double> %1 ; CHECK-NEXT: ret <2 x double> %1
} }
@ -77,7 +77,7 @@ define <8 x float> @mload_v8f32(i8* %f) {
; CHECK-LABEL: @mload_v8f32( ; CHECK-LABEL: @mload_v8f32(
; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>*
; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> undef) ; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> zeroinitializer)
; CHECK-NEXT: ret <8 x float> %1 ; CHECK-NEXT: ret <8 x float> %1
} }
@ -87,7 +87,7 @@ define <4 x double> @mload_v4f64(i8* %f) {
; CHECK-LABEL: @mload_v4f64( ; CHECK-LABEL: @mload_v4f64(
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>*
; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> undef) ; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> zeroinitializer)
; CHECK-NEXT: ret <4 x double> %1 ; CHECK-NEXT: ret <4 x double> %1
} }
@ -99,7 +99,7 @@ define <4 x i32> @mload_v4i32(i8* %f) {
; CHECK-LABEL: @mload_v4i32( ; CHECK-LABEL: @mload_v4i32(
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>*
; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> undef) ; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
; CHECK-NEXT: ret <4 x i32> %1 ; CHECK-NEXT: ret <4 x i32> %1
} }
@ -109,7 +109,7 @@ define <2 x i64> @mload_v2i64(i8* %f) {
; CHECK-LABEL: @mload_v2i64( ; CHECK-LABEL: @mload_v2i64(
; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>*
; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> undef) ; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> zeroinitializer)
; CHECK-NEXT: ret <2 x i64> %1 ; CHECK-NEXT: ret <2 x i64> %1
} }
@ -119,7 +119,7 @@ define <8 x i32> @mload_v8i32(i8* %f) {
; CHECK-LABEL: @mload_v8i32( ; CHECK-LABEL: @mload_v8i32(
; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>*
; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> undef) ; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> zeroinitializer)
; CHECK-NEXT: ret <8 x i32> %1 ; CHECK-NEXT: ret <8 x i32> %1
} }
@ -129,7 +129,7 @@ define <4 x i64> @mload_v4i64(i8* %f) {
; CHECK-LABEL: @mload_v4i64( ; CHECK-LABEL: @mload_v4i64(
; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>* ; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>*
; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> undef) ; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> zeroinitializer)
; CHECK-NEXT: ret <4 x i64> %1 ; CHECK-NEXT: ret <4 x i64> %1
} }