[X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR.

Officially, we don't acknowledge non-default configurations of MXCSR, as getting there would require usage of the FENV_ACCESS pragma (at least insofar as rounding mode is concerned). We don't support the pragma, so we can assume that the default rounding mode - round to nearest, ties to even - is always used. However, it's inconsistent with the rest of the instruction set, where MXCSR is always effective (unless otherwise specified). Also, it's an unnecessary obstacle to the few brave souls that use fenv.h with LLVM. Avoid the hard-coded rounding mode for fp_to_f16; use MXCSR instead. llvm-svn: 259448
2025-01-31 20:51:52 +01:00 · 2016-02-02 01:32:50 +00:00 · 2016-02-02 01:32:50 +00:00 · d732a878e7
commit d732a878e7
parent b310adabbe
3 changed files with 11 additions and 6 deletions
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in {

 // Patterns for  matching conversions from float to half-float and vice versa.
 let Predicates = [HasF16C] in {
+  // Use MXCSR.RC for rounding instead of explicitly specifying the default
+  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
+  // configurations we support (the default). However, falling back to MXCSR is
+  // more consistent with other instructions, which are always controlled by it.
+  // It's encoded as 0b100.
  def : Pat<(fp_to_f16 FR32:$src),
            (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
-              (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+              (COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>;

  def : Pat<(f16_to_fp GR16:$src),
            (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in {

  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
            (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
-              (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+              (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >;
 }

 //===----------------------------------------------------------------------===//
--- a/test/CodeGen/X86/fastmath-float-half-conversion.ll
+++ b/test/CodeGen/X86/fastmath-float-half-conversion.ll
@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 {
 ; ALL-LABEL: test1_fast:
 ; F16C-NOT: callq {{_+}}truncdfhf2
 ; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
 ; AVX: callq {{_+}}truncdfhf2
 ; ALL: ret
 entry:
@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 {
 ; F16C: fldt
 ; F16C-NEXT: fstps
 ; F16C-NEXT: vmovss
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
 ; AVX: callq {{_+}}truncxfhf2
 ; ALL: ret
 entry:
--- a/test/CodeGen/X86/half.ll
+++ b/test/CodeGen/X86/half.ll
@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 {
 ; CHECK_LIBCALL-NEXT: retq

 ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]]
 ; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
 ; CHECK-F16C-NEXT: movw %ax, (%rsi)
 ; CHECK-F16C-NEXT: retq
@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
 ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
 ; CHECK-LIBCALL-NEXT: popq [[ADDR]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]]
 ; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
 ; CHECK-F16C-NEXT: movw %ax, (%rsi)
 ; CHECK-NEXT: retq