1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

AMDGPU: Check NSZ MI flag when folding omod

I'm not sure the exact nsz flag combination that
is OK. I think as long as it's on either, this is OK.
For now just check it on the omod multiply.

llvm-svn: 339513
This commit is contained in:
Matt Arsenault 2018-08-12 08:44:25 +00:00
parent cc2045f6d7
commit 9bf7a399aa
2 changed files with 77 additions and 4 deletions

View File

@ -994,9 +994,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// omod is ignored by hardware if IEEE bit is enabled. omod also does not // omod is ignored by hardware if IEEE bit is enabled. omod also does not
// correctly handle signed zeros. // correctly handle signed zeros.
// //
// TODO: Check nsz on instructions when fast math flags are preserved to MI bool IsIEEEMode = ST->enableIEEEBit(MF);
// level. bool HasNSZ = MFI->hasNoSignedZerosFPMath();
bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
for (MachineBasicBlock *MBB : depth_first(&MF)) { for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next; MachineBasicBlock::iterator I, Next;
@ -1007,7 +1006,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
tryFoldInst(TII, &MI); tryFoldInst(TII, &MI);
if (!TII->isFoldableCopy(MI)) { if (!TII->isFoldableCopy(MI)) {
if (IsIEEEMode || !tryFoldOMod(MI)) // TODO: Omod might be OK if there is NSZ only on the source
// instruction, and not the omod multiply.
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
!tryFoldOMod(MI))
tryFoldClamp(MI); tryFoldClamp(MI);
continue; continue;
} }

View File

@ -0,0 +1,71 @@
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
--- |
define amdgpu_ps void @omod_inst_flag_nsz_src() {
unreachable
}
define amdgpu_ps void @omod_inst_flag_nsz_result() {
unreachable
}
define amdgpu_ps void @omod_inst_flag_nsz_both() {
unreachable
}
...
---
# FIXME: Is it OK to fold omod for this?
# GCN-LABEL: name: omod_inst_flag_nsz_src
# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
# GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
# GCN-NEXT: S_ENDPGM implicit %1
name: omod_inst_flag_nsz_src
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
%1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
S_ENDPGM implicit %1
...
---
# GCN-LABEL: name: omod_inst_flag_nsz_result
# GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
# GCN-NEXT: S_ENDPGM implicit %0
name: omod_inst_flag_nsz_result
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
%1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
S_ENDPGM implicit %1
...
---
# GCN-LABEL: name: omod_inst_flag_nsz_both
# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
# GCN-NEXT: S_ENDPGM implicit %0
name: omod_inst_flag_nsz_both
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
%1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
S_ENDPGM implicit %1
...