mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[x86, InstCombine] delete x86 SSE2 masked store with zero mask
This follows up on the related AVX instruction transforms, but this one is too strange to do anything more with. Intel's behavioral description of this instruction in its Software Developer's Manual is tragi-comic. llvm-svn: 263340
This commit is contained in:
parent
eaa9a4f81a
commit
60bcb48851
@ -876,6 +876,11 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// The SSE2 version is too weird (eg, unaligned but non-temporal) to do
|
||||
// anything else at this level.
|
||||
if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
|
||||
return false;
|
||||
|
||||
auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
|
||||
if (!ConstMask)
|
||||
return false;
|
||||
@ -1674,6 +1679,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
return I;
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_sse2_maskmov_dqu:
|
||||
case Intrinsic::x86_avx_maskstore_ps:
|
||||
case Intrinsic::x86_avx_maskstore_pd:
|
||||
case Intrinsic::x86_avx_maskstore_ps_256:
|
||||
|
@ -267,6 +267,17 @@ define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
; The original SSE2 masked store variant.
|
||||
|
||||
define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
|
||||
tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
||||
declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
|
||||
declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
|
||||
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
|
||||
@ -287,3 +298,5 @@ declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>)
|
||||
declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
|
||||
declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
|
||||
|
||||
declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user