1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
Simon Pilgrim e2f3b489b8 [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ truncating conversions with generic IR instead.

It turns out that the behaviour of these intrinsics is different enough from generic IR that this will cause problems, INF/NAN/out of range values are guaranteed to result in a 0x80000000 value - which plays havoc with constant folding which converts them to either zero or UNDEF. This is also an issue with the scalar implementations (which were already generic IR and what I was trying to match).

This patch changes both scalar and packed versions back to using x86-specific builtins.

It also deals with the other scalar conversion cases that are runtime rounding mode dependent and can have similar issues with constant folding.

A companion clang patch is at D22105

Differential Revision: https://reviews.llvm.org/D22106

llvm-svn: 275981
2016-07-19 15:07:43 +00:00

189 lines
6.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psll_dq_bs:
; CHECK: ## BB#0:
; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psrl_dq_bs:
; CHECK: ## BB#0:
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psll_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psrl_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
; CHECK: ## BB#0:
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtps2pd:
; CHECK: ## BB#0:
; CHECK-NEXT: cvtps2pd %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_storel_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movlps %xmm0, (%eax)
; CHECK-NEXT: retl
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
ret void
}
declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; add operation forces the execution domain.
; CHECK-LABEL: test_x86_sse2_storeu_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: paddb LCPI7_0, %xmm0
; CHECK-NEXT: movdqu %xmm0, (%eax)
; CHECK-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
ret void
}
declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
; fadd operation forces the execution domain.
; CHECK-LABEL: test_x86_sse2_storeu_pd:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; CHECK-NEXT: addpd %xmm0, %xmm1
; CHECK-NEXT: movupd %xmm1, (%eax)
; CHECK-NEXT: retl
%a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void
}
declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
; CHECK-LABEL: test_x86_sse2_pshuf_d:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retl
entry:
%res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
; CHECK-LABEL: test_x86_sse2_pshufl_w:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NEXT: retl
entry:
%res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
; CHECK-LABEL: test_x86_sse2_pshufh_w:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NEXT: retl
entry:
%res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: max_epu8:
; CHECK: ## BB#0:
; CHECK-NEXT: pmaxub %xmm1, %xmm0
; CHECK-NEXT: retl
;
%res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: min_epu8:
; CHECK: ## BB#0:
; CHECK-NEXT: pminub %xmm1, %xmm0
; CHECK-NEXT: retl
;
%res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: max_epi16:
; CHECK: ## BB#0:
; CHECK-NEXT: pmaxsw %xmm1, %xmm0
; CHECK-NEXT: retl
;
%res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: min_epi16:
; CHECK: ## BB#0:
; CHECK-NEXT: pminsw %xmm1, %xmm0
; CHECK-NEXT: retl
;
%res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone