mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AMDGPU: Check NSZ MI flag when folding omod
I'm not sure the exact nsz flag combination that is OK. I think as long as it's on either, this is OK. For now just check it on the omod multiply. llvm-svn: 339513
This commit is contained in:
parent
cc2045f6d7
commit
9bf7a399aa
@ -994,9 +994,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
|
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
|
||||||
// correctly handle signed zeros.
|
// correctly handle signed zeros.
|
||||||
//
|
//
|
||||||
// TODO: Check nsz on instructions when fast math flags are preserved to MI
|
bool IsIEEEMode = ST->enableIEEEBit(MF);
|
||||||
// level.
|
bool HasNSZ = MFI->hasNoSignedZerosFPMath();
|
||||||
bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
|
|
||||||
|
|
||||||
for (MachineBasicBlock *MBB : depth_first(&MF)) {
|
for (MachineBasicBlock *MBB : depth_first(&MF)) {
|
||||||
MachineBasicBlock::iterator I, Next;
|
MachineBasicBlock::iterator I, Next;
|
||||||
@ -1007,7 +1006,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
tryFoldInst(TII, &MI);
|
tryFoldInst(TII, &MI);
|
||||||
|
|
||||||
if (!TII->isFoldableCopy(MI)) {
|
if (!TII->isFoldableCopy(MI)) {
|
||||||
if (IsIEEEMode || !tryFoldOMod(MI))
|
// TODO: Omod might be OK if there is NSZ only on the source
|
||||||
|
// instruction, and not the omod multiply.
|
||||||
|
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
|
||||||
|
!tryFoldOMod(MI))
|
||||||
tryFoldClamp(MI);
|
tryFoldClamp(MI);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
71
test/CodeGen/AMDGPU/omod-nsz-flag.mir
Normal file
71
test/CodeGen/AMDGPU/omod-nsz-flag.mir
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define amdgpu_ps void @omod_inst_flag_nsz_src() {
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps void @omod_inst_flag_nsz_result() {
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps void @omod_inst_flag_nsz_both() {
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# FIXME: Is it OK to fold omod for this?
|
||||||
|
# GCN-LABEL: name: omod_inst_flag_nsz_src
|
||||||
|
# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||||
|
# GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||||
|
# GCN-NEXT: S_ENDPGM implicit %1
|
||||||
|
name: omod_inst_flag_nsz_src
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
|
||||||
|
%0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||||
|
%1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||||
|
S_ENDPGM implicit %1
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
|
||||||
|
# GCN-LABEL: name: omod_inst_flag_nsz_result
|
||||||
|
# GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
|
||||||
|
# GCN-NEXT: S_ENDPGM implicit %0
|
||||||
|
|
||||||
|
name: omod_inst_flag_nsz_result
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
|
||||||
|
%0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||||
|
%1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||||
|
S_ENDPGM implicit %1
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# GCN-LABEL: name: omod_inst_flag_nsz_both
|
||||||
|
# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
|
||||||
|
# GCN-NEXT: S_ENDPGM implicit %0
|
||||||
|
|
||||||
|
name: omod_inst_flag_nsz_both
|
||||||
|
tracksRegLiveness: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
|
||||||
|
%0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
|
||||||
|
%1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
|
||||||
|
S_ENDPGM implicit %1
|
||||||
|
...
|
Loading…
Reference in New Issue
Block a user