mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
3321658147
We were missing packed isel folding patterns for all of sse41, avx, and avx512. For some reason avx512 had scalar load folding patterns under optsize(due to partial/undef reg update), but we didn't have the equivalent sse41 and avx patterns. Sometimes we would get load folding due to peephole pass anyway, but we're also missing avx512 instructions from the load folding table. I'll try to fix that in another patch. Some of this was spotted in the review for D47993. This patch adds all the folds to isel, adds a few spot tests, and disables the peephole pass on a few tests to ensure we're testing some of these patterns. llvm-svn: 334460
203 lines
5.8 KiB
LLVM
203 lines
5.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
|
|
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
|
|
|
|
define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
|
|
; CHECK-LABEL: sitofp00:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = sitofp <8 x i32> %a to <8 x float>
|
|
ret <8 x float> %b
|
|
}
|
|
|
|
define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
|
|
; CHECK-LABEL: fptosi00:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = fptosi <8 x float> %a to <8 x i32>
|
|
ret <8 x i32> %b
|
|
}
|
|
|
|
define <4 x double> @sitofp01(<4 x i32> %a) {
|
|
; CHECK-LABEL: sitofp01:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = sitofp <4 x i32> %a to <4 x double>
|
|
ret <4 x double> %b
|
|
}
|
|
|
|
define <8 x float> @sitofp02(<8 x i16> %a) {
|
|
; AVX-LABEL: sitofp02:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vpmovsxwd %xmm0, %xmm1
|
|
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: sitofp02:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
|
|
; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
; AVX512-NEXT: retq
|
|
%b = sitofp <8 x i16> %a to <8 x float>
|
|
ret <8 x float> %b
|
|
}
|
|
|
|
define <4 x i32> @fptosi01(<4 x double> %a) {
|
|
; CHECK-LABEL: fptosi01:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%b = fptosi <4 x double> %a to <4 x i32>
|
|
ret <4 x i32> %b
|
|
}
|
|
|
|
define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
|
|
; AVX-LABEL: fptrunc00:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0
|
|
; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
|
|
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: fptrunc00:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vcvtpd2ps %zmm0, %ymm0
|
|
; AVX512-NEXT: retq
|
|
%a = fptrunc <8 x double> %b to <8 x float>
|
|
ret <8 x float> %a
|
|
}
|
|
|
|
define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
|
|
; CHECK-LABEL: fptrunc01:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%ext = extractelement <2 x double> %a0, i32 0
|
|
%cvt = fptrunc double %ext to float
|
|
%res = insertelement <4 x float> %a1, float %cvt, i32 0
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x double> @fpext00(<4 x float> %b) nounwind {
|
|
; CHECK-LABEL: fpext00:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%a = fpext <4 x float> %b to <4 x double>
|
|
ret <4 x double> %a
|
|
}
|
|
|
|
define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
|
|
; CHECK-LABEL: fpext01:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%ext = extractelement <4 x float> %a1, i32 0
|
|
%cvt = fpext float %ext to double
|
|
%res = insertelement <2 x double> %a0, double %cvt, i32 0
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
|
|
; CHECK-LABEL: funcA:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%tmp1 = load i64, i64* %e, align 8
|
|
%conv = sitofp i64 %tmp1 to double
|
|
ret double %conv
|
|
}
|
|
|
|
define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
|
|
; CHECK-LABEL: funcB:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%tmp1 = load i32, i32* %e, align 4
|
|
%conv = sitofp i32 %tmp1 to double
|
|
ret double %conv
|
|
}
|
|
|
|
define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
|
|
; CHECK-LABEL: funcC:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%tmp1 = load i32, i32* %e, align 4
|
|
%conv = sitofp i32 %tmp1 to float
|
|
ret float %conv
|
|
}
|
|
|
|
define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
|
|
; CHECK-LABEL: funcD:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%tmp1 = load i64, i64* %e, align 8
|
|
%conv = sitofp i64 %tmp1 to float
|
|
ret float %conv
|
|
}
|
|
|
|
define void @fpext() nounwind uwtable {
|
|
; CHECK-LABEL: fpext:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: retq
|
|
%f = alloca float, align 4
|
|
%d = alloca double, align 8
|
|
%tmp = load float, float* %f, align 4
|
|
%conv = fpext float %tmp to double
|
|
store double %conv, double* %d, align 8
|
|
ret void
|
|
}
|
|
|
|
define double @nearbyint_f64(double %a) {
|
|
; CHECK-LABEL: nearbyint_f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call double @llvm.nearbyint.f64(double %a)
|
|
ret double %res
|
|
}
|
|
declare double @llvm.nearbyint.f64(double %p)
|
|
|
|
define float @floor_f32(float %a) {
|
|
; CHECK-LABEL: floor_f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%res = call float @llvm.floor.f32(float %a)
|
|
ret float %res
|
|
}
|
|
declare float @llvm.floor.f32(float %p)
|
|
|
|
define float @floor_f32_load(float* %aptr) optsize {
|
|
; CHECK-LABEL: floor_f32_load:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = load float, float* %aptr
|
|
%res = call float @llvm.floor.f32(float %a)
|
|
ret float %res
|
|
}
|
|
|
|
define double @nearbyint_f64_load(double* %aptr) optsize {
|
|
; CHECK-LABEL: nearbyint_f64_load:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = load double, double* %aptr
|
|
%res = call double @llvm.nearbyint.f64(double %a)
|
|
ret double %res
|
|
}
|
|
|