mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
d4e15c2b06
This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
52 lines
2.2 KiB
LLVM
52 lines
2.2 KiB
LLVM
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+mmx,+fma,+f16c,+avx512f -stop-after finalize-isel -o - %s | FileCheck %s
|
|
; This test ensures that the MXCSR is implicitly used by MMX FP instructions.
|
|
|
|
define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
|
|
; CHECK: MMX_CVTPS2PIirr %{{[0-9]}}, implicit $mxcsr
|
|
; CHECK: MMX_CVTPI2PSirr %{{[0-9]}}, killed %{{[0-9]}}, implicit $mxcsr
|
|
; CHECK: MMX_CVTTPS2PIirr killed %{{[0-9]}}, implicit $mxcsr
|
|
; CHECK: MMX_CVTPI2PDirr killed %{{[0-9]$}}
|
|
; CHECK: MMX_CVTPD2PIirr killed %{{[0-9]}}, implicit $mxcsr
|
|
%1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0)
|
|
%2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %1)
|
|
%3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2)
|
|
%4 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %3)
|
|
%5 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %4)
|
|
ret x86_mmx %5
|
|
}
|
|
|
|
define half @mxcsr_f16c(float %a) {
|
|
; CHECK: VCVTPS2PH{{.*}}mxcsr
|
|
%res = fptrunc float %a to half
|
|
ret half %res
|
|
}
|
|
|
|
define <4 x float> @mxcsr_fma_ss(<4 x float> %a, <4 x float> %b) {
|
|
; CHECK: VFMADD{{.*}}mxcsr
|
|
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float>
|
|
%a)
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x float> @mxcsr_fma_ps(<4 x float> %a, <4 x float> %b) {
|
|
; CHECK: VFMADD{{.*}}mxcsr
|
|
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float>
|
|
%a)
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <8 x double> @mxcsr_fma_sae(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
|
|
; CHECK: VFMADD{{.*}}mxcsr
|
|
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 -1, i32 10)
|
|
ret <8 x double> %res
|
|
}
|
|
|
|
declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>)
|
|
declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx)
|
|
declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>)
|
|
declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
|
|
declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>)
|
|
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
|
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
|
|
declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|