mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
d4e15c2b06
This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
36 lines
1.4 KiB
LLVM
36 lines
1.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
|
|
|
|
; This test makes sure that a vector that needs to be promoted that is bitcasted to fp16 is legalized correctly without causing a width mismatch.
|
|
define void @constant_fold_vector_to_half() {
|
|
; CHECK-LABEL: constant_fold_vector_to_half:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movw $16384, (%rax) # imm = 0x4000
|
|
; CHECK-NEXT: retq
|
|
store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), half* undef
|
|
ret void
|
|
}
|
|
|
|
; Similarly this makes sure that the opposite bitcast of the above is also legalized without crashing.
|
|
define void @pr38533_2(half %x) {
|
|
; CHECK-LABEL: pr38533_2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movw %di, (%rax)
|
|
; CHECK-NEXT: retq
|
|
%a = bitcast half %x to <4 x i4>
|
|
store volatile <4 x i4> %a, <4 x i4>* undef
|
|
ret void
|
|
}
|
|
|
|
; This case is a bitcast from fp16 to a 16-bit wide legal vector type. In this case the result type is legal when the bitcast gets type legalized.
|
|
define void @pr38533_3(half %x) {
|
|
; CHECK-LABEL: pr38533_3:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movw %di, (%rax)
|
|
; CHECK-NEXT: retq
|
|
%a = bitcast half %x to <16 x i1>
|
|
store volatile <16 x i1> %a, <16 x i1>* undef
|
|
ret void
|
|
}
|