mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR.
Officially, we don't acknowledge non-default configurations of MXCSR, as getting there would require usage of the FENV_ACCESS pragma (at least insofar as rounding mode is concerned). We don't support the pragma, so we can assume that the default rounding mode - round to nearest, ties to even - is always used. However, it's inconsistent with the rest of the instruction set, where MXCSR is always effective (unless otherwise specified). Also, it's an unnecessary obstacle to the few brave souls that use fenv.h with LLVM. Avoid the hard-coded rounding mode for fp_to_f16; use MXCSR instead. llvm-svn: 259448
This commit is contained in:
parent
b310adabbe
commit
d732a878e7
@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in {
|
||||
|
||||
// Patterns for matching conversions from float to half-float and vice versa.
|
||||
let Predicates = [HasF16C] in {
|
||||
// Use MXCSR.RC for rounding instead of explicitly specifying the default
|
||||
// rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
|
||||
// configurations we support (the default). However, falling back to MXCSR is
|
||||
// more consistent with other instructions, which are always controlled by it.
|
||||
// It's encoded as 0b100.
|
||||
def : Pat<(fp_to_f16 FR32:$src),
|
||||
(i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
|
||||
(COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
|
||||
(COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>;
|
||||
|
||||
def : Pat<(f16_to_fp GR16:$src),
|
||||
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
|
||||
@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in {
|
||||
|
||||
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
|
||||
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
|
||||
(VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
|
||||
(VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 {
|
||||
; ALL-LABEL: test1_fast:
|
||||
; F16C-NOT: callq {{_+}}truncdfhf2
|
||||
; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX: callq {{_+}}truncdfhf2
|
||||
; ALL: ret
|
||||
entry:
|
||||
@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 {
|
||||
; F16C: fldt
|
||||
; F16C-NEXT: fstps
|
||||
; F16C-NEXT: vmovss
|
||||
; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX: callq {{_+}}truncxfhf2
|
||||
; ALL: ret
|
||||
entry:
|
||||
|
@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 {
|
||||
; CHECK_LIBCALL-NEXT: retq
|
||||
|
||||
; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
|
||||
; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
|
||||
; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]]
|
||||
; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
|
||||
; CHECK-F16C-NEXT: movw %ax, (%rsi)
|
||||
; CHECK-F16C-NEXT: retq
|
||||
@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
|
||||
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
|
||||
; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
|
||||
; CHECK-LIBCALL-NEXT: popq [[ADDR]]
|
||||
; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
|
||||
; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]]
|
||||
; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
|
||||
; CHECK-F16C-NEXT: movw %ax, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
|
Loading…
x
Reference in New Issue
Block a user