mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[InstCombine][MMX] Extend SimplifyDemandedUseBits MOVMSK support to MMX
Add the MMX implementation to the SimplifyDemandedUseBits SSE/AVX MOVMSK support added in D19614 Requires a minor tweak as llvm.x86.mmx.pmovmskb takes a x86_mmx argument - so we have to be explicit about the implied v8i8 vector type. llvm-svn: 271789
This commit is contained in:
parent
46f3fd369e
commit
c995ee7b75
@ -768,6 +768,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
|
||||
// TODO: Could compute known zero/one bits based on the input.
|
||||
break;
|
||||
}
|
||||
case Intrinsic::x86_mmx_pmovmskb:
|
||||
case Intrinsic::x86_sse_movmsk_ps:
|
||||
case Intrinsic::x86_sse2_movmsk_pd:
|
||||
case Intrinsic::x86_sse2_pmovmskb_128:
|
||||
@ -776,9 +777,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
|
||||
case Intrinsic::x86_avx2_pmovmskb: {
|
||||
// MOVMSK copies the vector elements' sign bits to the low bits
|
||||
// and zeros the high bits.
|
||||
auto Arg = II->getArgOperand(0);
|
||||
auto ArgType = cast<VectorType>(Arg->getType());
|
||||
unsigned ArgWidth = ArgType->getNumElements();
|
||||
unsigned ArgWidth;
|
||||
if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
|
||||
ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
|
||||
} else {
|
||||
auto Arg = II->getArgOperand(0);
|
||||
auto ArgType = cast<VectorType>(Arg->getType());
|
||||
ArgWidth = ArgType->getNumElements();
|
||||
}
|
||||
|
||||
// If we don't need any of low bits then return zero,
|
||||
// we know that DemandedMask is non-zero already.
|
||||
|
@ -7,6 +7,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
; DemandedBits - MOVMSK zeros the upper bits of the result.
|
||||
;
|
||||
|
||||
define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
|
||||
; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
|
||||
%2 = and i32 %1, 255
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
|
||||
; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
|
||||
@ -63,6 +73,15 @@ define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
|
||||
; DemandedBits - If we don't use the lower bits then we just return zero.
|
||||
;
|
||||
|
||||
define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
|
||||
; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
|
||||
%2 = and i32 %1, -256
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
|
||||
; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
@ -110,6 +129,7 @@ define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
|
||||
|
||||
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
|
||||
|
||||
declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
|
||||
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
|
||||
|
Loading…
x
Reference in New Issue
Block a user