1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/avx-cvt.ll
Craig Topper 3321658147 [X86] Add isel patterns for folding loads when creating ROUND instructions from ffloor/fnearbyint/fceil/frint/ftrunc.
We were missing packed isel folding patterns for all of sse41, avx, and avx512.

For some reason avx512 had scalar load folding patterns under optsize(due to partial/undef reg update), but we didn't have the equivalent sse41 and avx patterns.

Sometimes we would get load folding due to peephole pass anyway, but we're also missing avx512 instructions from the load folding table. I'll try to fix that in another patch.

Some of this was spotted in the review for D47993.

This patch adds all the folds to isel, adds a few spot tests, and disables the peephole pass on a few tests to ensure we're testing some of these patterns.

llvm-svn: 334460
2018-06-12 00:48:57 +00:00

203 lines
5.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
; CHECK-LABEL: sitofp00:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-NEXT: retq
%b = sitofp <8 x i32> %a to <8 x float>
ret <8 x float> %b
}
define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
; CHECK-LABEL: fptosi00:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
; CHECK-NEXT: retq
%b = fptosi <8 x float> %a to <8 x i32>
ret <8 x i32> %b
}
define <4 x double> @sitofp01(<4 x i32> %a) {
; CHECK-LABEL: sitofp01:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
; CHECK-NEXT: retq
%b = sitofp <4 x i32> %a to <4 x double>
ret <4 x double> %b
}
define <8 x float> @sitofp02(<8 x i16> %a) {
; AVX-LABEL: sitofp02:
; AVX: # %bb.0:
; AVX-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: sitofp02:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512-NEXT: retq
%b = sitofp <8 x i16> %a to <8 x float>
ret <8 x float> %b
}
define <4 x i32> @fptosi01(<4 x double> %a) {
; CHECK-LABEL: fptosi01:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%b = fptosi <4 x double> %a to <4 x i32>
ret <4 x i32> %b
}
define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
; AVX-LABEL: fptrunc00:
; AVX: # %bb.0:
; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0
; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: fptrunc00:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtpd2ps %zmm0, %ymm0
; AVX512-NEXT: retq
%a = fptrunc <8 x double> %b to <8 x float>
ret <8 x float> %a
}
define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
; CHECK-LABEL: fptrunc01:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%ext = extractelement <2 x double> %a0, i32 0
%cvt = fptrunc double %ext to float
%res = insertelement <4 x float> %a1, float %cvt, i32 0
ret <4 x float> %res
}
define <4 x double> @fpext00(<4 x float> %b) nounwind {
; CHECK-LABEL: fpext00:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
; CHECK-NEXT: retq
%a = fpext <4 x float> %b to <4 x double>
ret <4 x double> %a
}
define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
; CHECK-LABEL: fpext01:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%ext = extractelement <4 x float> %a1, i32 0
%cvt = fpext float %ext to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}
define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcA:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i64, i64* %e, align 8
%conv = sitofp i64 %tmp1 to double
ret double %conv
}
define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcB:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i32, i32* %e, align 4
%conv = sitofp i32 %tmp1 to double
ret double %conv
}
define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcC:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i32, i32* %e, align 4
%conv = sitofp i32 %tmp1 to float
ret float %conv
}
define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcD:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i64, i64* %e, align 8
%conv = sitofp i64 %tmp1 to float
ret float %conv
}
define void @fpext() nounwind uwtable {
; CHECK-LABEL: fpext:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
%f = alloca float, align 4
%d = alloca double, align 8
%tmp = load float, float* %f, align 4
%conv = fpext float %tmp to double
store double %conv, double* %d, align 8
ret void
}
define double @nearbyint_f64(double %a) {
; CHECK-LABEL: nearbyint_f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call double @llvm.nearbyint.f64(double %a)
ret double %res
}
declare double @llvm.nearbyint.f64(double %p)
define float @floor_f32(float %a) {
; CHECK-LABEL: floor_f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call float @llvm.floor.f32(float %a)
ret float %res
}
declare float @llvm.floor.f32(float %p)
define float @floor_f32_load(float* %aptr) optsize {
; CHECK-LABEL: floor_f32_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a = load float, float* %aptr
%res = call float @llvm.floor.f32(float %a)
ret float %res
}
define double @nearbyint_f64_load(double* %aptr) optsize {
; CHECK-LABEL: nearbyint_f64_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a = load double, double* %aptr
%res = call double @llvm.nearbyint.f64(double %a)
ret double %res
}