mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
59de807f62
This is the groundwork required to implement strictfp. For now, this should be NFC for regular instructoins (many instructions just gain an extra use of a reserved register). Regalloc won't rematerialize instructions with reads of physical registers, but we were suffering from that anyway with the exec reads. Should add it for all the related FP uses (possibly with some extras). I did not add it to either the gpr index mode instructions (or every single VALU instruction) since it's a ridiculous feature already modeled as an arbitrary side effect. Also work towards marking instructions with FP exceptions. This doesn't actually set the bit yet since this would start to change codegen. It seems nofpexcept is currently not implied from the regular IR FP operations. Add it to some MIR tests where I think it might matter.
710 lines
26 KiB
YAML
710 lines
26 KiB
YAML
# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
|
|
--- |
|
|
define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile float, float addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
%f32.add = fadd float %f32.val, 1.000000e+00
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile float %f32.add, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile float, float addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
%f32.add = fadd float %f32.val, 1.000000e+00
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile float %f32.add, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile float, float addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
%f32.add = fadd float %f32.val, 1.000000e+00
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile float %f32.add, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile float, float addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
%f16.add1 = fadd half %f16.val1, 0xH3C00
|
|
%f32.add = fadd float %f32.val, 1.000000e+00
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile half %f16.add1, half addrspace(1)* undef
|
|
store volatile float %f32.add, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH0001
|
|
%f16.add1 = fadd half %f16.val1, 0xH0001
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile half %f16.add1,half addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile float, float addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xHFFFE
|
|
%f16.add1 = fadd half %f16.val1, 0xHFFFE
|
|
%f32.add = fadd float %f32.val, 0xffffffffc0000000
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile half %f16.add1, half addrspace(1)* undef
|
|
store volatile float %f32.add, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
|
|
%f32.val0 = load volatile float, float addrspace(1)* undef
|
|
%f32.val1 = load volatile float, float addrspace(1)* undef
|
|
%f32.val = load volatile float, float addrspace(1)* undef
|
|
%f32.add0 = fadd float %f32.val0, 1.0
|
|
%f32.add1 = fadd float %f32.val1, 1.0
|
|
store volatile float %f32.add0, float addrspace(1)* undef
|
|
store volatile float %f32.add1, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile half, half addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
%f32.add = fadd half %f32.val, 1.000000e+00
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile half %f32.add, half addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
|
|
%f16.val0 = load volatile half, half addrspace(1)* undef
|
|
%f16.val1 = load volatile half, half addrspace(1)* undef
|
|
%f32.val = load volatile half, half addrspace(1)* undef
|
|
%f16.add0 = fadd half %f16.val0, 0xH3C00
|
|
%f32.add = fadd half %f32.val, 1.000000e+00
|
|
store volatile half %f16.add0, half addrspace(1)* undef
|
|
store volatile half %f32.add, half addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
|
|
...
|
|
---
|
|
|
|
# f32 1.0 with a single use should be folded as the low 32-bits of a
|
|
# literal constant.
|
|
|
|
# CHECK-LABEL: name: add_f32_1.0_one_f16_use
|
|
# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec
|
|
|
|
name: add_f32_1.0_one_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
%13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
# Materialized f32 inline immediate should not be folded into the f16
|
|
# operands
|
|
|
|
# CHECK-LABEL: name: add_f32_1.0_multi_f16_use
|
|
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $mode, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $mode, implicit $exec
|
|
|
|
|
|
name: add_f32_1.0_multi_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%13 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# f32 1.0 should be folded into the single f32 use as an inline
|
|
# immediate, and folded into the single f16 use as a literal constant
|
|
|
|
# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec
|
|
# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
|
|
|
|
name: add_f32_1.0_one_f32_use_one_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
- { id: 16, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%14 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
%16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# f32 1.0 should be folded for the single f32 use as an inline
|
|
# constant, and not folded as a multi-use literal for the f16 cases
|
|
|
|
# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
|
|
# CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit $mode, implicit $exec
|
|
# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit $mode, implicit $exec
|
|
# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
|
|
|
|
name: add_f32_1.0_one_f32_use_multi_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
- { id: 16, class: vgpr_32 }
|
|
- { id: 17, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%14 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
# CHECK-LABEL: name: add_i32_1_multi_f16_use
|
|
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
|
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec
|
|
|
|
|
|
name: add_i32_1_multi_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%13 = V_MOV_B32_e32 1, implicit $exec
|
|
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
|
|
# CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec
|
|
# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec
|
|
# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec
|
|
|
|
name: add_i32_m2_one_f32_use_multi_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
- { id: 16, class: vgpr_32 }
|
|
- { id: 17, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%14 = V_MOV_B32_e32 -2, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
|
|
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# f32 1.0 should be folded for the single f32 use as an inline
|
|
# constant, and not folded as a multi-use literal for the f16 cases
|
|
|
|
# CHECK-LABEL: name: add_f16_1.0_multi_f32_use
|
|
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec
|
|
# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $mode, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $mode, implicit $exec
|
|
|
|
name: add_f16_1.0_multi_f32_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%13 = V_MOV_B32_e32 15360, implicit $exec
|
|
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
%15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
|
BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# The low 16-bits are an inline immediate, but the high bits are junk
|
|
# FIXME: Should be able to fold this
|
|
|
|
# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
|
|
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec
|
|
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $mode, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $mode, implicit $exec
|
|
|
|
name: add_f16_1.0_other_high_bits_multi_f16_use
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%13 = V_MOV_B32_e32 80886784, implicit $exec
|
|
%14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
---
|
|
|
|
# FIXME: Should fold inline immediate into f16 and literal use into
|
|
# f32 instruction.
|
|
|
|
# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
|
|
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec
|
|
# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $mode, implicit $exec
|
|
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $mode, implicit $exec
|
|
name: add_f16_1.0_other_high_bits_use_f16_f32
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
registers:
|
|
- { id: 0, class: sreg_64 }
|
|
- { id: 1, class: sreg_32 }
|
|
- { id: 2, class: sgpr_32 }
|
|
- { id: 3, class: vgpr_32 }
|
|
- { id: 4, class: sreg_64 }
|
|
- { id: 5, class: sreg_32 }
|
|
- { id: 6, class: sreg_64 }
|
|
- { id: 7, class: sreg_32 }
|
|
- { id: 8, class: sreg_32 }
|
|
- { id: 9, class: sreg_32 }
|
|
- { id: 10, class: sgpr_128 }
|
|
- { id: 11, class: vgpr_32 }
|
|
- { id: 12, class: vgpr_32 }
|
|
- { id: 13, class: vgpr_32 }
|
|
- { id: 14, class: vgpr_32 }
|
|
- { id: 15, class: vgpr_32 }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0 (%ir-block.0):
|
|
%4 = IMPLICIT_DEF
|
|
%5 = COPY %4.sub1
|
|
%6 = IMPLICIT_DEF
|
|
%7 = COPY %6.sub0
|
|
%8 = S_MOV_B32 61440
|
|
%9 = S_MOV_B32 -1
|
|
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
|
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
|
%13 = V_MOV_B32_e32 305413120, implicit $exec
|
|
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
|
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
|
S_ENDPGM 0
|
|
|
|
...
|