From 448790d566daf0179ca3cbf25eb3d0b916147418 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 12 Apr 2012 07:23:00 +0000 Subject: [PATCH] Fix 128-bit ptest intrinsics to take v2i64 instead of v4f32 since these are integer instructions. llvm-svn: 154580 --- include/llvm/IntrinsicsX86.td | 6 +++--- lib/Target/X86/X86InstrSSE.td | 8 ++++---- test/CodeGen/X86/avx-intrinsics-x86.ll | 18 +++++++++--------- test/CodeGen/X86/sse41.ll | 18 +++++++++--------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index f4abba98c08..a6fda4a3afc 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -904,13 +904,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Test instruction with bitwise comparison. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7741f409db0..408ab16778d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6331,11 +6331,11 @@ def : Pat<(f64 (ftrunc FR64:$src)), let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize, VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize, VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), @@ -6351,11 +6351,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 616601a9438..b33493252a5 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1078,33 +1078,33 @@ define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone -define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { +define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vptest ; CHECK: sbbl - %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res } -declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone -define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { +define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vptest ; CHECK: seta ; CHECK: movzbl - %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res } -declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone -define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { +define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vptest ; CHECK: sete ; CHECK: movzbl - %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res } -declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 2ac4cb435a7..54264b16aea 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -183,8 +183,8 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: insertps $0, %xmm1, %xmm0 } -define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind { - %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone +define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 ; X32: _ptestz_1: ; X32: ptest %xmm1, %xmm0 @@ -195,8 +195,8 @@ define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: sete %al } -define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind { - %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone +define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 ; X32: _ptestz_2: ; X32: ptest %xmm1, %xmm0 @@ -207,8 +207,8 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: sbbl %eax } -define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind { - %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone +define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 ; X32: _ptestz_3: ; X32: ptest %xmm1, %xmm0 @@ -220,9 +220,9 @@ define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind { } -declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone -declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone -declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone ; This used to compile to insertps $0 + insertps $16. insertps $0 is always ; pointless.