1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 12:02:58 +02:00

[RegisterBankInfo] Relax the assert of having matching type sizes on default mappings

Instead of asserting that the type sizes are exactly equal, we check
that the new size is big enough to contain the original type.
We have to relax this constrain because, right now, we sometimes
specify that things that are smaller than a storage type are legal
instead of widening everything to the size of a storage type.
E.g., we say that G_AND s16 is legal and we map that on GPR32.

This is something we may revisit in the future (either by changing
the legalization process or keeping track separately of the storage
size and the size of the type), but let us reflect the reality of
the situation for now.

llvm-svn: 318587
This commit is contained in:
Quentin Colombet 2017-11-18 04:28:58 +00:00
parent d94543f0d4
commit 6546e8fabd
2 changed files with 87 additions and 1 deletions

View File

@ -441,7 +441,11 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
LLT OrigTy = MRI.getType(OrigReg);
LLT NewTy = MRI.getType(NewReg);
if (OrigTy != NewTy) {
assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
// The default mapping is not supposed to change the size of
// the storage. However, right now we don't necessarily bump all
// the types to storage size. For instance, we can consider
// s16 G_AND legal whereas the storage size is going to be 32.
assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
"Types with difference size cannot be handled by the default "
"mapping");
DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "

View File

@ -97,6 +97,19 @@
define void @fp16Ext32() { ret void }
define void @fp16Ext64() { ret void }
define void @fp32Ext64() { ret void }
define half @passFp16(half %p) {
entry:
ret half %p
}
define half @passFp16ViaAllocas(half %p) {
entry:
%p.addr = alloca half, align 2
store half %p, half* %p.addr, align 2
%0 = load half, half* %p.addr, align 2
ret half %0
}
...
---
@ -875,3 +888,72 @@ body: |
RET_ReallyLR implicit %d0
...
---
# Make sure we map FP16 ABI on FPR register bank.
# CHECK-LABEL: name: passFp16
# CHECK: registers:
# CHECK: - { id: 0, class: fpr, preferred-register: '' }
# CHECK: %0:fpr(s16) = COPY %h0
# CHECK-NEXT: %h0 = COPY %0(s16)
name: passFp16
alignment: 2
legalized: true
registers:
- { id: 0, class: _ }
body: |
bb.1.entry:
liveins: %h0
%0(s16) = COPY %h0
%h0 = COPY %0(s16)
RET_ReallyLR implicit %h0
...
---
# This test tries to mix 16-bit types on fpr with 32-bit types on gpr.
# The problem when doing that is that switching from fpr to gpr requires
# more than just a plain COPY.
# In this specific case, currently we map the ABI copy from h0 to fpr,
# then, the fast mapping takes GPR for store and the size of the storage
# gets bumped to 32-bit.
# CHECK-LABEL: name: passFp16ViaAllocas
# CHECK: registers:
# CHECK: - { id: 0, class: fpr, preferred-register: '' }
# CHECK: - { id: 1, class: gpr, preferred-register: '' }
# CHECK: - { id: 2, class: gpr, preferred-register: '' }
# CHECK: - { id: 3, class: gpr, preferred-register: '' }
#
# CHECK: %0:fpr(s16) = COPY %h0
# CHECK-NEXT: %1:gpr(p0) = G_FRAME_INDEX %stack.0.p.addr
# Currently the default mapping we provide for store does not
# consider fpr for s16, unless they are produced by floating point
# operation. Thus, we have to repair the assignment.
# CHECK-NEXT: %3:gpr(s16) = COPY %0(s16)
# CHECK-NEXT: G_STORE %3(s16), %1(p0) :: (store 2 into %ir.p.addr)
# CHECK-NEXT: %2:gpr(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr)
# CHECK-NEXT: %h0 = COPY %2(s16)
name: passFp16ViaAllocas
alignment: 2
legalized: true
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
frameInfo:
maxAlignment: 2
stack:
- { id: 0, name: p.addr, size: 2, alignment: 2, stack-id: 0 }
body: |
bb.1.entry:
liveins: %h0
%0(s16) = COPY %h0
%1(p0) = G_FRAME_INDEX %stack.0.p.addr
G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p.addr)
%2(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr)
%h0 = COPY %2(s16)
RET_ReallyLR implicit %h0
...