1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-30 07:22:55 +01:00
llvm-mirror/test/CodeGen/X86/avx-cvt.ll
Victor Umansky bf35274368 Fix for the following bug in AVX codegen for double-to-int conversions:
.	"fptosi" and "fptoui" IR instructions are defined with round-to-zero rounding mode.
.	Currently for AVX mode for <4xdouble> and <8xdouble>  the "VCVTPD2DQ.128" and "VCVTPD2DQ.256" instructions are selected (for .fp_to_sint. DAG node operation ) by AVX codegen. However they use round-to-nearest-even rounding mode.
.	Consequently, the conversion produces incorrect numbers.
 
The fix is to replace selection of VCVTPD2DQ instructions with VCVTTPD2DQ instructions. The latter use truncate (i.e. round-to-zero) rounding mode. 
As .fp_to_sint. DAG node operation is used only for lowering of  "fptosi" and "fptoui" IR instructions, the fix in X86InstrSSE.td definition file doesn.t have an impact on other LLVM flows.
 
The patch includes changes in the .td file, LIT test for the changes and a fix in a legacy LIT test (which produced asm code conflicting with LLVN IR spec). 

llvm-svn: 149056
2012-01-26 08:51:39 +00:00

84 lines
2.0 KiB
LLVM

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
; CHECK: vcvtdq2ps %ymm
define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
%b = sitofp <8 x i32> %a to <8 x float>
ret <8 x float> %b
}
; CHECK: vcvttps2dq %ymm
define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
%b = fptosi <8 x float> %a to <8 x i32>
ret <8 x i32> %b
}
; CHECK: vcvtdq2pd %xmm
define <4 x double> @sitofp01(<4 x i32> %a) {
%b = sitofp <4 x i32> %a to <4 x double>
ret <4 x double> %b
}
; CHECK: vcvttpd2dqy %ymm
define <4 x i32> @fptosi01(<4 x double> %a) {
%b = fptosi <4 x double> %a to <4 x i32>
ret <4 x i32> %b
}
; CHECK: vcvtpd2psy %ymm
; CHECK-NEXT: vcvtpd2psy %ymm
; CHECK-NEXT: vinsertf128 $1
define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
%a = fptrunc <8 x double> %b to <8 x float>
ret <8 x float> %a
}
; CHECK: vcvtps2pd %xmm
define <4 x double> @fpext00(<4 x float> %b) nounwind {
%a = fpext <4 x float> %b to <4 x double>
ret <4 x double> %a
}
; CHECK: vcvtsi2sdq (%
define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
entry:
%tmp1 = load i64* %e, align 8
%conv = sitofp i64 %tmp1 to double
ret double %conv
}
; CHECK: vcvtsi2sd (%
define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
entry:
%tmp1 = load i32* %e, align 4
%conv = sitofp i32 %tmp1 to double
ret double %conv
}
; CHECK: vcvtsi2ss (%
define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
entry:
%tmp1 = load i32* %e, align 4
%conv = sitofp i32 %tmp1 to float
ret float %conv
}
; CHECK: vcvtsi2ssq (%
define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
entry:
%tmp1 = load i64* %e, align 8
%conv = sitofp i64 %tmp1 to float
ret float %conv
}
; CHECK: vcvtss2sd
define void @fpext() nounwind uwtable {
entry:
%f = alloca float, align 4
%d = alloca double, align 8
%tmp = load float* %f, align 4
%conv = fpext float %tmp to double
store double %conv, double* %d, align 8
ret void
}