mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
18b312e6d9
The default legalization strategy is PromoteFloat which keeps half in single precision format through multiple floating point operations. Conversion to/from float is done at loads, stores, bitcasts, and other places that care about the exact size being 16 bits. This patches switches to the alternative method softPromoteHalf. This aims to keep the type in 16-bit format between every operation. So we promote to float and immediately round for any arithmetic operation. This should be closer to the IR semantics since we are rounding after each operation and not accumulating extra precision across multiple operations. X86 is the only other target that enables this today. See https://reviews.llvm.org/D73749 I had to update getRegisterTypeForCallingConv to force f16 to use f32 when the F extension is enabled. This way we can still pass it in the lower bits of an FPR for ilp32f and lp64f ABIs. The softPromoteHalf would otherwise always give i16 as the argument type. Reviewed By: asb, frasercrmck Differential Revision: https://reviews.llvm.org/D99148
80 lines
2.9 KiB
LLVM
80 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck -check-prefix=RV32I %s
|
|
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck -check-prefix=RV64I %s
|
|
|
|
; This file provides a simple sanity check of half operations for
|
|
; RV32I and RV64I. This is primarily intended to ensure that custom
|
|
; legalisation or DAG combines aren't incorrectly triggered when the Zfh
|
|
; extension isn't enabled.
|
|
|
|
define half @half_test(half %a, half %b) nounwind {
|
|
; RV32I-LABEL: half_test:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: mv s0, a1
|
|
; RV32I-NEXT: lui a1, 16
|
|
; RV32I-NEXT: addi s1, a1, -1
|
|
; RV32I-NEXT: and a0, a0, s1
|
|
; RV32I-NEXT: call __gnu_h2f_ieee@plt
|
|
; RV32I-NEXT: mv s2, a0
|
|
; RV32I-NEXT: and a0, s0, s1
|
|
; RV32I-NEXT: call __gnu_h2f_ieee@plt
|
|
; RV32I-NEXT: mv s0, a0
|
|
; RV32I-NEXT: mv a0, s2
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call __addsf3@plt
|
|
; RV32I-NEXT: call __gnu_f2h_ieee@plt
|
|
; RV32I-NEXT: and a0, a0, s1
|
|
; RV32I-NEXT: call __gnu_h2f_ieee@plt
|
|
; RV32I-NEXT: mv a1, s0
|
|
; RV32I-NEXT: call __divsf3@plt
|
|
; RV32I-NEXT: call __gnu_f2h_ieee@plt
|
|
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: ret
|
|
;
|
|
; RV64I-LABEL: half_test:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -32
|
|
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: mv s0, a1
|
|
; RV64I-NEXT: lui a1, 16
|
|
; RV64I-NEXT: addiw s1, a1, -1
|
|
; RV64I-NEXT: and a0, a0, s1
|
|
; RV64I-NEXT: call __gnu_h2f_ieee@plt
|
|
; RV64I-NEXT: mv s2, a0
|
|
; RV64I-NEXT: and a0, s0, s1
|
|
; RV64I-NEXT: call __gnu_h2f_ieee@plt
|
|
; RV64I-NEXT: mv s0, a0
|
|
; RV64I-NEXT: mv a0, s2
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call __addsf3@plt
|
|
; RV64I-NEXT: call __gnu_f2h_ieee@plt
|
|
; RV64I-NEXT: and a0, a0, s1
|
|
; RV64I-NEXT: call __gnu_h2f_ieee@plt
|
|
; RV64I-NEXT: mv a1, s0
|
|
; RV64I-NEXT: call __divsf3@plt
|
|
; RV64I-NEXT: call __gnu_f2h_ieee@plt
|
|
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: addi sp, sp, 32
|
|
; RV64I-NEXT: ret
|
|
%1 = fadd half %a, %b
|
|
%2 = fdiv half %1, %b
|
|
ret half %2
|
|
}
|