1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
Matt Arsenault 59de807f62 AMDGPU: Start adding MODE register uses to instructions
This is the groundwork required to implement strictfp. For now, this
should be NFC for regular instructoins (many instructions just gain an
extra use of a reserved register). Regalloc won't rematerialize
instructions with reads of physical registers, but we were suffering
from that anyway with the exec reads.

Should add it for all the related FP uses (possibly with some
extras). I did not add it to either the gpr index mode instructions
(or every single VALU instruction) since it's a ridiculous feature
already modeled as an arbitrary side effect.

Also work towards marking instructions with FP exceptions. This
doesn't actually set the bit yet since this would start to change
codegen. It seems nofpexcept is currently not implied from the regular
IR FP operations. Add it to some MIR tests where I think it might
matter.
2020-05-27 14:47:00 -04:00

710 lines
26 KiB
YAML

# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
--- |
define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile float, float addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH3C00
%f32.add = fadd float %f32.val, 1.000000e+00
store volatile half %f16.add0, half addrspace(1)* undef
store volatile float %f32.add, float addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile float, float addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH3C00
%f32.add = fadd float %f32.val, 1.000000e+00
store volatile half %f16.add0, half addrspace(1)* undef
store volatile float %f32.add, float addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile float, float addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH3C00
%f32.add = fadd float %f32.val, 1.000000e+00
store volatile half %f16.add0, half addrspace(1)* undef
store volatile float %f32.add, float addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile float, float addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH3C00
%f16.add1 = fadd half %f16.val1, 0xH3C00
%f32.add = fadd float %f32.val, 1.000000e+00
store volatile half %f16.add0, half addrspace(1)* undef
store volatile half %f16.add1, half addrspace(1)* undef
store volatile float %f32.add, float addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH0001
%f16.add1 = fadd half %f16.val1, 0xH0001
store volatile half %f16.add0, half addrspace(1)* undef
store volatile half %f16.add1,half addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile float, float addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xHFFFE
%f16.add1 = fadd half %f16.val1, 0xHFFFE
%f32.add = fadd float %f32.val, 0xffffffffc0000000
store volatile half %f16.add0, half addrspace(1)* undef
store volatile half %f16.add1, half addrspace(1)* undef
store volatile float %f32.add, float addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
%f32.val0 = load volatile float, float addrspace(1)* undef
%f32.val1 = load volatile float, float addrspace(1)* undef
%f32.val = load volatile float, float addrspace(1)* undef
%f32.add0 = fadd float %f32.val0, 1.0
%f32.add1 = fadd float %f32.val1, 1.0
store volatile float %f32.add0, float addrspace(1)* undef
store volatile float %f32.add1, float addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile half, half addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH3C00
%f32.add = fadd half %f32.val, 1.000000e+00
store volatile half %f16.add0, half addrspace(1)* undef
store volatile half %f32.add, half addrspace(1)* undef
ret void
}
define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
%f16.val0 = load volatile half, half addrspace(1)* undef
%f16.val1 = load volatile half, half addrspace(1)* undef
%f32.val = load volatile half, half addrspace(1)* undef
%f16.add0 = fadd half %f16.val0, 0xH3C00
%f32.add = fadd half %f32.val, 1.000000e+00
store volatile half %f16.add0, half addrspace(1)* undef
store volatile half %f32.add, half addrspace(1)* undef
ret void
}
attributes #0 = { nounwind }
...
---
# f32 1.0 with a single use should be folded as the low 32-bits of a
# literal constant.
# CHECK-LABEL: name: add_f32_1.0_one_f16_use
# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec
name: add_f32_1.0_one_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = V_MOV_B32_e32 1065353216, implicit $exec
%13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
---
# Materialized f32 inline immediate should not be folded into the f16
# operands
# CHECK-LABEL: name: add_f32_1.0_multi_f16_use
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $mode, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $mode, implicit $exec
name: add_f32_1.0_multi_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1065353216, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
---
# f32 1.0 should be folded into the single f32 use as an inline
# immediate, and folded into the single f16 use as a literal constant
# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec
# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
name: add_f32_1.0_one_f32_use_one_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
- { id: 16, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
%16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
---
# f32 1.0 should be folded for the single f32 use as an inline
# constant, and not folded as a multi-use literal for the f16 cases
# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
# CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit $mode, implicit $exec
# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit $mode, implicit $exec
# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
name: add_f32_1.0_one_f32_use_multi_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
- { id: 16, class: vgpr_32 }
- { id: 17, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 1065353216, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
---
# CHECK-LABEL: name: add_i32_1_multi_f16_use
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec
name: add_i32_1_multi_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 1, implicit $exec
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
---
# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
# CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec
# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec
# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec
name: add_i32_m2_one_f32_use_multi_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
- { id: 16, class: vgpr_32 }
- { id: 17, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%14 = V_MOV_B32_e32 -2, implicit $exec
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
---
# f32 1.0 should be folded for the single f32 use as an inline
# constant, and not folded as a multi-use literal for the f16 cases
# CHECK-LABEL: name: add_f16_1.0_multi_f32_use
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec
# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $mode, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $mode, implicit $exec
name: add_f16_1.0_multi_f32_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%13 = V_MOV_B32_e32 15360, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
S_ENDPGM 0
...
---
# The low 16-bits are an inline immediate, but the high bits are junk
# FIXME: Should be able to fold this
# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $mode, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $mode, implicit $exec
name: add_f16_1.0_other_high_bits_multi_f16_use
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 80886784, implicit $exec
%14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...
---
# FIXME: Should fold inline immediate into f16 and literal use into
# f32 instruction.
# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec
# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $mode, implicit $exec
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $mode, implicit $exec
name: add_f16_1.0_other_high_bits_use_f16_f32
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_32 }
- { id: 2, class: sgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_32 }
- { id: 6, class: sreg_64 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: sreg_32 }
- { id: 10, class: sgpr_128 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0 (%ir-block.0):
%4 = IMPLICIT_DEF
%5 = COPY %4.sub1
%6 = IMPLICIT_DEF
%7 = COPY %6.sub0
%8 = S_MOV_B32 61440
%9 = S_MOV_B32 -1
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
%13 = V_MOV_B32_e32 305413120, implicit $exec
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
S_ENDPGM 0
...