1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR.

Officially, we don't acknowledge non-default configurations of MXCSR,
as getting there would require usage of the FENV_ACCESS pragma (at
least insofar as rounding mode is concerned).

We don't support the pragma, so we can assume that the default
rounding mode - round to nearest, ties to even - is always used.

However, it's inconsistent with the rest of the instruction set,
where MXCSR is always effective (unless otherwise specified).
Also, it's an unnecessary obstacle to the few brave souls that use
fenv.h with LLVM.

Avoid the hard-coded rounding mode for fp_to_f16; use MXCSR instead.

llvm-svn: 259448
This commit is contained in:
Ahmed Bougacha 2016-02-02 01:32:50 +00:00
parent b310adabbe
commit d732a878e7
3 changed files with 11 additions and 6 deletions

View File

@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in {
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasF16C] in {
// Use MXCSR.RC for rounding instead of explicitly specifying the default
// rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
// configurations we support (the default). However, falling back to MXCSR is
// more consistent with other instructions, which are always controlled by it.
// It's encoded as 0b100.
def : Pat<(fp_to_f16 FR32:$src),
(i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
(COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
(COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>;
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in {
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
(VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
(VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >;
}
//===----------------------------------------------------------------------===//

View File

@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 {
; ALL-LABEL: test1_fast:
; F16C-NOT: callq {{_+}}truncdfhf2
; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX: callq {{_+}}truncdfhf2
; ALL: ret
entry:
@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 {
; F16C: fldt
; F16C-NEXT: fstps
; F16C-NEXT: vmovss
; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX: callq {{_+}}truncxfhf2
; ALL: ret
entry:

View File

@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 {
; CHECK_LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]]
; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-F16C-NEXT: retq
@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
; CHECK-LIBCALL-NEXT: popq [[ADDR]]
; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: retq