1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/X86/xop-ifma.ll
Craig Topper 34c96d4546 Revert r363802, r363850, and r363856 "[TargetLowering] SimplifyDemandedBits..."
This reverts the following patches.
"[TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VECTOR_INREG"
"[TargetLowering] SimplifyDemandedBits ZERO_EXTEND_VECTOR_INREG -> ANY_EXTEND_VECTOR_INREG"
"[TargetLowering] SimplifyDemandedBits - add ANY_EXTEND_VECTOR_INREG support"

We can end up with an any_extend_vector_inreg with a 256 bit result type
and a 128 bit result type. This is allowed by the ISD opcode, but the
generic operation legalizer is only able to expand cases where the
total vector width is the same.

The X86 backend creates these mismatched cases for zext_vec_inreg/sext_vec_inreg.
The SimplifyDemandedBits changes are allowing those nodes to become
aext_vec_inreg. For the zext/sext cases, the X86 backend has Custom
handling and never lets them get to the generic legalizer. We need to do the same
for aext_vec_inreg.

llvm-svn: 364264
2019-06-25 01:32:42 +00:00

130 lines
5.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=XOP --check-prefix=XOP-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=XOP --check-prefix=XOP-AVX2
define <8 x i16> @test_mul_v8i16_add_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
; XOP-LABEL: test_mul_v8i16_add_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
%1 = mul <8 x i16> %a0, %a1
%2 = add <8 x i16> %1, %a2
ret <8 x i16> %2
}
define <16 x i16> @test_mul_v16i16_add_v16i16(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) {
; XOP-AVX1-LABEL: test_mul_v16i16_add_v16i16:
; XOP-AVX1: # %bb.0:
; XOP-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; XOP-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; XOP-AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
; XOP-AVX1-NEXT: vpmacsww %xmm5, %xmm3, %xmm4, %xmm3
; XOP-AVX1-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
; XOP-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-AVX1-NEXT: retq
;
; XOP-AVX2-LABEL: test_mul_v16i16_add_v16i16:
; XOP-AVX2: # %bb.0:
; XOP-AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; XOP-AVX2-NEXT: vpaddw %ymm0, %ymm2, %ymm0
; XOP-AVX2-NEXT: retq
%1 = mul <16 x i16> %a0, %a1
%2 = add <16 x i16> %a2, %1
ret <16 x i16> %2
}
define <4 x i32> @test_mul_v4i32_add_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
; XOP-LABEL: test_mul_v4i32_add_v4i32:
; XOP: # %bb.0:
; XOP-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
%1 = mul <4 x i32> %a0, %a1
%2 = add <4 x i32> %1, %a2
ret <4 x i32> %2
}
define <8 x i32> @test_mul_v8i32_add_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
; XOP-AVX1-LABEL: test_mul_v8i32_add_v8i32:
; XOP-AVX1: # %bb.0:
; XOP-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; XOP-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; XOP-AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
; XOP-AVX1-NEXT: vpmacsdd %xmm5, %xmm3, %xmm4, %xmm3
; XOP-AVX1-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
; XOP-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-AVX1-NEXT: retq
;
; XOP-AVX2-LABEL: test_mul_v8i32_add_v8i32:
; XOP-AVX2: # %bb.0:
; XOP-AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; XOP-AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; XOP-AVX2-NEXT: retq
%1 = mul <8 x i32> %a0, %a1
%2 = add <8 x i32> %a2, %1
ret <8 x i32> %2
}
define <4 x i64> @test_mulx_v4i32_add_v4i64(<4 x i32> %a0, <4 x i32> %a1, <4 x i64> %a2) {
; XOP-AVX1-LABEL: test_mulx_v4i32_add_v4i64:
; XOP-AVX1: # %bb.0:
; XOP-AVX1-NEXT: vpmovsxdq %xmm0, %xmm3
; XOP-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; XOP-AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
; XOP-AVX1-NEXT: vpmovsxdq %xmm1, %xmm4
; XOP-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; XOP-AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
; XOP-AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
; XOP-AVX1-NEXT: vpmacsdql %xmm5, %xmm1, %xmm0, %xmm0
; XOP-AVX1-NEXT: vpmacsdql %xmm2, %xmm4, %xmm3, %xmm1
; XOP-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOP-AVX1-NEXT: retq
;
; XOP-AVX2-LABEL: test_mulx_v4i32_add_v4i64:
; XOP-AVX2: # %bb.0:
; XOP-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; XOP-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; XOP-AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
; XOP-AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; XOP-AVX2-NEXT: retq
%1 = sext <4 x i32> %a0 to <4 x i64>
%2 = sext <4 x i32> %a1 to <4 x i64>
%3 = mul <4 x i64> %1, %2
%4 = add <4 x i64> %3, %a2
ret <4 x i64> %4
}
define <2 x i64> @test_pmuldq_lo_v4i32_add_v2i64(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
; XOP-LABEL: test_pmuldq_lo_v4i32_add_v2i64:
; XOP: # %bb.0:
; XOP-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
%2 = add <2 x i64> %1, %a2
ret <2 x i64> %2
}
define <2 x i64> @test_pmuldq_hi_v4i32_add_v2i64(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
; XOP-LABEL: test_pmuldq_hi_v4i32_add_v2i64:
; XOP: # %bb.0:
; XOP-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
%2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
%3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
%4 = add <2 x i64> %3, %a2
ret <2 x i64> %4
}
define <4 x i32> @test_pmaddwd_v8i16_add_v4i32(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
; XOP-LABEL: test_pmaddwd_v8i16_add_v4i32:
; XOP: # %bb.0:
; XOP-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
%2 = add <4 x i32> %1, %a2
ret <4 x i32> %2
}
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone