mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AMDGPU: Be explicit about denormal mode in MIR tests
Start checking the machine function in GlobalISel instead of the target directly. This temporarily breaks fcanonicalize selection in GlobalISel.
This commit is contained in:
parent
78c8c056b5
commit
907a06be36
@ -1720,13 +1720,15 @@ bool AMDGPULegalizerInfo::legalizeFMad(
|
||||
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
|
||||
assert(Ty.isScalar());
|
||||
|
||||
MachineFunction &MF = B.getMF();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
// TODO: Always legal with future ftz flag.
|
||||
if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals())
|
||||
if (Ty == LLT::scalar(32) && !MFI->getMode().FP32Denormals)
|
||||
return true;
|
||||
if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals())
|
||||
if (Ty == LLT::scalar(16) && !MFI->getMode().FP64FP16Denormals)
|
||||
return true;
|
||||
|
||||
MachineFunction &MF = B.getMF();
|
||||
|
||||
MachineIRBuilder HelperBuilder(MI);
|
||||
GISelObserverWrapper DummyObserver;
|
||||
@ -1897,7 +1899,8 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
|
||||
if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
|
||||
return false;
|
||||
|
||||
if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals())
|
||||
if (!Unsafe && ResTy == S32 &&
|
||||
MF.getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals)
|
||||
return false;
|
||||
|
||||
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
|
||||
@ -1973,15 +1976,16 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
|
||||
// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
|
||||
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
|
||||
static void toggleSPDenormMode(bool Enable,
|
||||
MachineIRBuilder &B,
|
||||
const GCNSubtarget &ST,
|
||||
MachineIRBuilder &B) {
|
||||
AMDGPU::SIModeRegisterDefaults Mode) {
|
||||
// Set SP denorm mode to this value.
|
||||
unsigned SPDenormMode =
|
||||
Enable ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||
|
||||
if (ST.hasDenormModeInst()) {
|
||||
// Preserve default FP64FP16 denorm mode while updating FP32 mode.
|
||||
unsigned DPDenormModeDefault = ST.hasFP64Denormals()
|
||||
unsigned DPDenormModeDefault = Mode.FP64FP16Denormals
|
||||
? FP_DENORM_FLUSH_NONE
|
||||
: FP_DENORM_FLUSH_IN_FLUSH_OUT;
|
||||
|
||||
@ -2008,6 +2012,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||
Register Res = MI.getOperand(0).getReg();
|
||||
Register LHS = MI.getOperand(1).getReg();
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
|
||||
AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
|
||||
|
||||
uint16_t Flags = MI.getFlags();
|
||||
|
||||
@ -2036,8 +2042,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||
|
||||
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
|
||||
// aren't modeled as reading it.
|
||||
if (!ST.hasFP32Denormals())
|
||||
toggleSPDenormMode(true, ST, B);
|
||||
if (!Mode.FP32Denormals)
|
||||
toggleSPDenormMode(true, B, ST, Mode);
|
||||
|
||||
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
|
||||
auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
|
||||
@ -2046,8 +2052,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
|
||||
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
|
||||
|
||||
if (!ST.hasFP32Denormals())
|
||||
toggleSPDenormMode(false, ST, B);
|
||||
if (!Mode.FP32Denormals)
|
||||
toggleSPDenormMode(false, B, ST, Mode);
|
||||
|
||||
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
|
||||
.addUse(Fma4.getReg(0))
|
||||
|
@ -1,24 +1,22 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals,+fp64-fp16-denormals -run-pass=instruction-select -global-isel-abort=2 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9-DENORM %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-fp64-fp16-denormals -run-pass=instruction-select -global-isel-abort=2 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9-FLUSH %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_f16
|
||||
name: fcanonicalize_f16_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_f16
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DENORM: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_f16
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-FLUSH: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]]
|
||||
; GFX9-LABEL: name: fcanonicalize_f16_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s16) = G_FCANONICALIZE %1
|
||||
@ -27,21 +25,43 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_f32
|
||||
name: fcanonicalize_f16_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_f32
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_f32
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
; GFX9-LABEL: name: fcanonicalize_f16_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:vgpr(s16) = G_FCANONICALIZE %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_f32_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_f32_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
@ -49,21 +69,43 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_v2f16
|
||||
name: fcanonicalize_f32_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_v2f16
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9-DENORM: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_v2f16
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9-FLUSH: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_f32_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_v2f16_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_v2f16_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
@ -71,21 +113,43 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_f64
|
||||
name: fcanonicalize_v2f16_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_v2f16_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[FCANONICALIZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FCANONICALIZE [[COPY]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[FCANONICALIZE]](<2 x s16>)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_f64_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_f64
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9-DENORM: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F64_]]
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_f64
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9-FLUSH: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F64_]]
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_f64_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MAX_F64_]]
|
||||
%0:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
@ -93,21 +157,66 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fabs_f32
|
||||
name: fcanonicalize_f64_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; GFX9-LABEL: name: fcanonicalize_f64_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MAX_F64_]]
|
||||
%0:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_FCANONICALIZE %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fabs_f32_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_fabs_f32
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_fabs_f32
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FABS %0
|
||||
%2:vgpr(s32) = G_FCANONICALIZE %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fabs_f32_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FABS %0
|
||||
%2:vgpr(s32) = G_FCANONICALIZE %1
|
||||
@ -116,21 +225,20 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fneg_f32
|
||||
name: fcanonicalize_fneg_f32_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_fneg_f32
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_fneg_f32
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FNEG %0
|
||||
%2:vgpr(s32) = G_FCANONICALIZE %1
|
||||
@ -139,25 +247,69 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fneg_fabs_f32
|
||||
name: fcanonicalize_fneg_f32_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-DENORM-LABEL: name: fcanonicalize_fneg_fabs_f32
|
||||
; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DENORM: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GFX9-DENORM: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec
|
||||
; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]]
|
||||
; GFX9-FLUSH-LABEL: name: fcanonicalize_fneg_fabs_f32
|
||||
; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-FLUSH: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GFX9-FLUSH: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec
|
||||
; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FNEG %0
|
||||
%2:vgpr(s32) = G_FCANONICALIZE %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fneg_fabs_f32_denorm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FNEG %0
|
||||
%2:vgpr(s32) = G_FABS %1
|
||||
%3:vgpr(s32) = G_FCANONICALIZE %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: fcanonicalize_fneg_fabs_f32_flush
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_flush
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec
|
||||
; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FNEG %0
|
||||
%2:vgpr(s32) = G_FABS %1
|
||||
|
@ -7,6 +7,11 @@
|
||||
|
||||
---
|
||||
name: test_fdiv_s16
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
@ -23,14 +28,12 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
|
||||
@ -94,12 +97,108 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fdiv_s32
|
||||
name: test_fdiv_s32_denorms_on
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; SI-LABEL: name: test_fdiv_s32
|
||||
; SI-LABEL: name: test_fdiv_s32_denorms_on
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; SI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; VI-LABEL: name: test_fdiv_s32_denorms_on
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; VI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-LABEL: name: test_fdiv_s32_denorms_on
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_on
|
||||
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]]
|
||||
; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32)
|
||||
; GFX10-LABEL: name: test_fdiv_s32_denorms_on
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX10: $vgpr0 = COPY [[INT6]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = G_FDIV %0, %1
|
||||
$vgpr0 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fdiv_s32_denorms_off
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; SI-LABEL: name: test_fdiv_s32_denorms_off
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
@ -118,7 +217,7 @@ body: |
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; SI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; VI-LABEL: name: test_fdiv_s32
|
||||
; VI-LABEL: name: test_fdiv_s32_denorms_off
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
@ -137,7 +236,7 @@ body: |
|
||||
; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; VI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-LABEL: name: test_fdiv_s32
|
||||
; GFX9-LABEL: name: test_fdiv_s32_denorms_off
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
@ -156,13 +255,13 @@ body: |
|
||||
; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off
|
||||
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]]
|
||||
; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32)
|
||||
; GFX10-LABEL: name: test_fdiv_s32
|
||||
; GFX10-LABEL: name: test_fdiv_s32_denorms_off
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
@ -188,36 +287,41 @@ body: |
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fdiv_s32_arcp
|
||||
name: test_fdiv_s32_denorms_off_arcp
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; SI-LABEL: name: test_fdiv_s32_arcp
|
||||
; SI-LABEL: name: test_fdiv_s32_denorms_off_arcp
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; SI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
|
||||
; SI: $vgpr0 = COPY [[FMUL]](s32)
|
||||
; VI-LABEL: name: test_fdiv_s32_arcp
|
||||
; VI-LABEL: name: test_fdiv_s32_denorms_off_arcp
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; VI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
; VI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
|
||||
; VI: $vgpr0 = COPY [[FMUL]](s32)
|
||||
; GFX9-LABEL: name: test_fdiv_s32_arcp
|
||||
; GFX9-LABEL: name: test_fdiv_s32_denorms_off_arcp
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
|
||||
; GFX9: $vgpr0 = COPY [[FMUL]](s32)
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_arcp
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off_arcp
|
||||
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
|
||||
; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32)
|
||||
; GFX10-LABEL: name: test_fdiv_s32_arcp
|
||||
; GFX10-LABEL: name: test_fdiv_s32_denorms_off_arcp
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
|
||||
@ -231,6 +335,11 @@ body: |
|
||||
|
||||
---
|
||||
name: test_fdiv_s64
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
@ -269,6 +378,11 @@ body: |
|
||||
|
||||
---
|
||||
name: test_fdiv_v2s32
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
@ -450,28 +564,24 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; SI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; SI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; SI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; SI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]]
|
||||
; SI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; SI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; SI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; SI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32)
|
||||
@ -486,28 +596,24 @@ body: |
|
||||
; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; VI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; VI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]]
|
||||
; VI: S_SETREG_IMM32_B32 3, 2305
|
||||
; VI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; VI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; VI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]]
|
||||
; VI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; VI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; VI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; VI: S_SETREG_IMM32_B32 0, 2305
|
||||
; VI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; VI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; VI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; VI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]]
|
||||
; VI: S_SETREG_IMM32_B32 3, 2305
|
||||
; VI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; VI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; VI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]]
|
||||
; VI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; VI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; VI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; VI: S_SETREG_IMM32_B32 0, 2305
|
||||
; VI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; VI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32)
|
||||
@ -522,28 +628,24 @@ body: |
|
||||
; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; GFX9: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX9: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]]
|
||||
; GFX9: S_SETREG_IMM32_B32 3, 2305
|
||||
; GFX9: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX9: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX9: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX9: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX9: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX9: S_SETREG_IMM32_B32 0, 2305
|
||||
; GFX9: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX9: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; GFX9: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]]
|
||||
; GFX9: S_SETREG_IMM32_B32 3, 2305
|
||||
; GFX9: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; GFX9: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]]
|
||||
; GFX9: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; GFX9: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; GFX9: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; GFX9: S_SETREG_IMM32_B32 0, 2305
|
||||
; GFX9: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; GFX9: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32)
|
||||
@ -569,28 +671,24 @@ body: |
|
||||
; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; GFX10: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX10: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]]
|
||||
; GFX10: S_DENORM_MODE 15
|
||||
; GFX10: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX10: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX10: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX10: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX10: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX10: S_DENORM_MODE 12
|
||||
; GFX10: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX10: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32)
|
||||
; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; GFX10: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]]
|
||||
; GFX10: S_DENORM_MODE 15
|
||||
; GFX10: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; GFX10: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]]
|
||||
; GFX10: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; GFX10: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; GFX10: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; GFX10: S_DENORM_MODE 12
|
||||
; GFX10: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; GFX10: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32)
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32)
|
||||
@ -617,42 +715,36 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
|
||||
; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]]
|
||||
; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
|
||||
; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
|
||||
; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
|
||||
; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]]
|
||||
; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1)
|
||||
; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32)
|
||||
@ -667,42 +759,36 @@ body: |
|
||||
; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; VI: S_SETREG_IMM32_B32 3, 2305
|
||||
; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; VI: S_SETREG_IMM32_B32 0, 2305
|
||||
; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; VI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; VI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; VI: S_SETREG_IMM32_B32 3, 2305
|
||||
; VI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; VI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; VI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; VI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; VI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; VI: S_SETREG_IMM32_B32 0, 2305
|
||||
; VI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; VI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; VI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; VI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; VI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
|
||||
; VI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
|
||||
; VI: S_SETREG_IMM32_B32 3, 2305
|
||||
; VI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]]
|
||||
; VI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
|
||||
; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
|
||||
; VI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
|
||||
; VI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]]
|
||||
; VI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]]
|
||||
; VI: S_SETREG_IMM32_B32 0, 2305
|
||||
; VI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1)
|
||||
; VI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32)
|
||||
@ -717,42 +803,36 @@ body: |
|
||||
; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX9: S_SETREG_IMM32_B32 3, 2305
|
||||
; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX9: S_SETREG_IMM32_B32 0, 2305
|
||||
; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; GFX9: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; GFX9: S_SETREG_IMM32_B32 3, 2305
|
||||
; GFX9: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; GFX9: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; GFX9: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; GFX9: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; GFX9: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; GFX9: S_SETREG_IMM32_B32 0, 2305
|
||||
; GFX9: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; GFX9: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; GFX9: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; GFX9: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; GFX9: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
|
||||
; GFX9: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
|
||||
; GFX9: S_SETREG_IMM32_B32 3, 2305
|
||||
; GFX9: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]]
|
||||
; GFX9: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
|
||||
; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
|
||||
; GFX9: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
|
||||
; GFX9: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]]
|
||||
; GFX9: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]]
|
||||
; GFX9: S_SETREG_IMM32_B32 0, 2305
|
||||
; GFX9: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1)
|
||||
; GFX9: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32)
|
||||
@ -780,42 +860,36 @@ body: |
|
||||
; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX10: S_DENORM_MODE 15
|
||||
; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX10: S_DENORM_MODE 12
|
||||
; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32)
|
||||
; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; GFX10: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; GFX10: S_DENORM_MODE 15
|
||||
; GFX10: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]]
|
||||
; GFX10: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; GFX10: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; GFX10: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; GFX10: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; GFX10: S_DENORM_MODE 12
|
||||
; GFX10: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; GFX10: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32)
|
||||
; GFX10: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; GFX10: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; GFX10: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
|
||||
; GFX10: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
|
||||
; GFX10: S_DENORM_MODE 15
|
||||
; GFX10: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]]
|
||||
; GFX10: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
|
||||
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
|
||||
; GFX10: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
|
||||
; GFX10: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]]
|
||||
; GFX10: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]]
|
||||
; GFX10: S_DENORM_MODE 12
|
||||
; GFX10: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1)
|
||||
; GFX10: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32)
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32)
|
||||
@ -910,14 +984,12 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
|
||||
@ -927,14 +999,12 @@ body: |
|
||||
; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
|
||||
; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]]
|
||||
; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
|
||||
; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32)
|
||||
@ -1081,14 +1151,12 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
|
||||
@ -1098,14 +1166,12 @@ body: |
|
||||
; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
|
||||
; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]]
|
||||
; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
|
||||
; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32)
|
||||
@ -1115,14 +1181,12 @@ body: |
|
||||
; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32)
|
||||
; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
|
||||
; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]]
|
||||
; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
|
||||
; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
|
||||
; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
|
||||
; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]]
|
||||
; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1)
|
||||
; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32)
|
||||
; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32)
|
||||
@ -1361,14 +1425,12 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
|
||||
@ -1378,14 +1440,12 @@ body: |
|
||||
; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
|
||||
; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
|
||||
; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]]
|
||||
; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
|
||||
; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
|
||||
; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
|
||||
; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]]
|
||||
; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
|
||||
; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
|
||||
; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32)
|
||||
@ -1395,14 +1455,12 @@ body: |
|
||||
; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32)
|
||||
; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
|
||||
; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]]
|
||||
; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
|
||||
; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
|
||||
; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
|
||||
; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]]
|
||||
; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1)
|
||||
; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32)
|
||||
; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32)
|
||||
@ -1412,14 +1470,12 @@ body: |
|
||||
; SI: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32)
|
||||
; SI: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32)
|
||||
; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]]
|
||||
; SI: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]]
|
||||
; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]]
|
||||
; SI: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]]
|
||||
; SI: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]]
|
||||
; SI: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1)
|
||||
; SI: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32)
|
||||
; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32)
|
||||
@ -1634,14 +1690,12 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
|
||||
@ -1696,14 +1750,12 @@ body: |
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: S_SETREG_IMM32_B32 3, 2305
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: S_SETREG_IMM32_B32 0, 2305
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
|
||||
; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
|
||||
@ -1752,25 +1804,73 @@ body: |
|
||||
liveins: $vgpr0
|
||||
|
||||
; SI-LABEL: name: test_fdiv_s32_constant_one_rcp
|
||||
; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
|
||||
; SI: $vgpr0 = COPY [[INT]](s32)
|
||||
; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; SI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; VI-LABEL: name: test_fdiv_s32_constant_one_rcp
|
||||
; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
|
||||
; VI: $vgpr0 = COPY [[INT]](s32)
|
||||
; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; VI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-LABEL: name: test_fdiv_s32_constant_one_rcp
|
||||
; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[INT]](s32)
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_one_rcp
|
||||
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
|
||||
; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32)
|
||||
; GFX10-LABEL: name: test_fdiv_s32_constant_one_rcp
|
||||
; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
|
||||
; GFX10: $vgpr0 = COPY [[INT]](s32)
|
||||
; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
|
||||
; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX10: $vgpr0 = COPY [[INT6]](s32)
|
||||
%0:_(s32) = G_FCONSTANT float 1.0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = G_FDIV %0, %1
|
||||
@ -1784,30 +1884,78 @@ body: |
|
||||
liveins: $vgpr0
|
||||
|
||||
; SI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp
|
||||
; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
|
||||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
|
||||
; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
|
||||
; SI: $vgpr0 = COPY [[INT]](s32)
|
||||
; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; SI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; VI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp
|
||||
; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
|
||||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
|
||||
; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
|
||||
; VI: $vgpr0 = COPY [[INT]](s32)
|
||||
; VI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; VI: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-LABEL: name: test_fdiv_s32_constant_negative_one_rcp
|
||||
; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[INT]](s32)
|
||||
; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[INT6]](s32)
|
||||
; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_negative_one_rcp
|
||||
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
|
||||
; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
|
||||
; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32)
|
||||
; GFX10-LABEL: name: test_fdiv_s32_constant_negative_one_rcp
|
||||
; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
|
||||
; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
|
||||
; GFX10: $vgpr0 = COPY [[INT]](s32)
|
||||
; GFX10: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
|
||||
; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
|
||||
; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
|
||||
; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
|
||||
; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
|
||||
; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
|
||||
; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]]
|
||||
; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]]
|
||||
; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
|
||||
; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32)
|
||||
; GFX10: $vgpr0 = COPY [[INT6]](s32)
|
||||
%0:_(s32) = G_FCONSTANT float -1.0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = G_FDIV %0, %1
|
||||
@ -1816,6 +1964,11 @@ body: |
|
||||
|
||||
---
|
||||
name: test_fdiv_s64_constant_one_rcp
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
@ -1852,6 +2005,11 @@ body: |
|
||||
|
||||
---
|
||||
name: test_fdiv_s64_constant_negative_one_rcp
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
@ -1,181 +0,0 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx10 -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx10 -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s
|
||||
|
||||
---
|
||||
name: test_fmad_s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; F32DENORM-LABEL: name: test_fmad_s32
|
||||
; F32DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; F32DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; F32DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; F32DENORM: $vgpr0 = COPY %3(s32)
|
||||
; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; F32FLUSH-LABEL: name: test_fmad_s32
|
||||
; F32FLUSH: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; F32FLUSH: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; F32FLUSH: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; F32FLUSH: $vgpr0 = COPY [[FMAD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = G_FMAD %0, %1, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_s32_flags
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; F32DENORM-LABEL: name: test_fmad_s32_flags
|
||||
; F32DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; F32DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; F32DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; F32DENORM: $vgpr0 = COPY %3(s32)
|
||||
; F32DENORM: %4:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; F32DENORM: %3:_(s32) = nnan G_FADD %4, [[COPY2]]
|
||||
; F32FLUSH-LABEL: name: test_fmad_s32_flags
|
||||
; F32FLUSH: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; F32FLUSH: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; F32FLUSH: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; F32FLUSH: %3:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; F32FLUSH: $vgpr0 = COPY %3(s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = nnan G_FMAD %0, %1, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v2s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
||||
; F32DENORM-LABEL: name: test_fmad_v2s32
|
||||
; F32DENORM: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; F32DENORM: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; F32DENORM: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; F32DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; F32DENORM: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; F32DENORM: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; F32DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32)
|
||||
; F32DENORM: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]]
|
||||
; F32DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; F32DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]]
|
||||
; F32FLUSH-LABEL: name: test_fmad_v2s32
|
||||
; F32FLUSH: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; F32FLUSH: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; F32FLUSH: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; F32FLUSH: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; F32FLUSH: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; F32FLUSH: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
|
||||
; F32FLUSH: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; F32FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; F32FLUSH: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
%3:_(<2 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v3s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8
|
||||
|
||||
; F32DENORM-LABEL: name: test_fmad_v3s32
|
||||
; F32DENORM: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; F32DENORM: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; F32DENORM: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; F32DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; F32DENORM: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; F32DENORM: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; F32DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32)
|
||||
; F32DENORM: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; F32DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; F32DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; F32DENORM: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; F32DENORM: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]]
|
||||
; F32FLUSH-LABEL: name: test_fmad_v3s32
|
||||
; F32FLUSH: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; F32FLUSH: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; F32FLUSH: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; F32FLUSH: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; F32FLUSH: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; F32FLUSH: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
|
||||
; F32FLUSH: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
|
||||
; F32FLUSH: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; F32FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; F32FLUSH: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
%3:_(<3 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v4s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
|
||||
; F32DENORM-LABEL: name: test_fmad_v4s32
|
||||
; F32DENORM: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; F32DENORM: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; F32DENORM: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; F32DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; F32DENORM: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; F32DENORM: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; F32DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32)
|
||||
; F32DENORM: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]]
|
||||
; F32DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; F32DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]]
|
||||
; F32DENORM: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; F32DENORM: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]]
|
||||
; F32DENORM: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; F32DENORM: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]]
|
||||
; F32FLUSH-LABEL: name: test_fmad_v4s32
|
||||
; F32FLUSH: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; F32FLUSH: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; F32FLUSH: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; F32FLUSH: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; F32FLUSH: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; F32FLUSH: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
|
||||
; F32FLUSH: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
|
||||
; F32FLUSH: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
|
||||
; F32FLUSH: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; F32FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; F32FLUSH: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
%3:_(<4 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
File diff suppressed because it is too large
Load Diff
495
test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir
Normal file
495
test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir
Normal file
@ -0,0 +1,495 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
---
|
||||
name: test_fmad_s32_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_s32_flush
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX6: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX7-LABEL: name: test_fmad_s32_flush
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX7: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX10-LABEL: name: test_fmad_s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX10: $vgpr0 = COPY [[FMAD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = G_FMAD %0, %1, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_s32_flags_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_s32_flags_flush
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX6: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX6: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX7-LABEL: name: test_fmad_s32_flags_flush
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX7: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX10-LABEL: name: test_fmad_s32_flags_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX10: $vgpr0 = COPY [[FMAD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = nnan G_FMAD %0, %1, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v2s32_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_v2s32_flush
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
|
||||
; GFX6: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX7-LABEL: name: test_fmad_v2s32_flush
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
|
||||
; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v2s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
|
||||
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
%3:_(<2 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v3s32_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_v3s32_flush
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
|
||||
; GFX6: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
|
||||
; GFX6: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX7-LABEL: name: test_fmad_v3s32_flush
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX7: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
|
||||
; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
|
||||
; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v3s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
|
||||
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
|
||||
; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
%3:_(<3 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v4s32_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_v4s32_flush
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
|
||||
; GFX6: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
|
||||
; GFX6: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
|
||||
; GFX6: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX7-LABEL: name: test_fmad_v4s32_flush
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX7: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
|
||||
; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
|
||||
; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
|
||||
; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v4s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
|
||||
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
|
||||
; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
|
||||
; GFX10: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
%3:_(<4 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_s32_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_s32_denorm
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX6: $vgpr0 = COPY %3(s32)
|
||||
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX7-LABEL: name: test_fmad_s32_denorm
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: $vgpr0 = COPY %3(s32)
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX10-LABEL: name: test_fmad_s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: $vgpr0 = COPY %3(s32)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = G_FMAD %0, %1, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_s32_flags_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_s32_flags_denorm
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX6: $vgpr0 = COPY %3(s32)
|
||||
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX6: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX7-LABEL: name: test_fmad_s32_flags_denorm
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: $vgpr0 = COPY %3(s32)
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX10-LABEL: name: test_fmad_s32_flags_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: $vgpr0 = COPY %3(s32)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = nnan G_FMAD %0, %1, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v2s32_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_v2s32_denorm
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]]
|
||||
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]]
|
||||
; GFX7-LABEL: name: test_fmad_v2s32_denorm
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]]
|
||||
; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]]
|
||||
; GFX10-LABEL: name: test_fmad_v2s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]]
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
%3:_(<2 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v3s32_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_v3s32_denorm
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]]
|
||||
; GFX7-LABEL: name: test_fmad_v3s32_denorm
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX7: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]]
|
||||
; GFX10-LABEL: name: test_fmad_v3s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]]
|
||||
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
%3:_(<3 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v4s32_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp32-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
|
||||
; GFX6-LABEL: name: test_fmad_v4s32_denorm
|
||||
; GFX6: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]]
|
||||
; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]]
|
||||
; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]]
|
||||
; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]]
|
||||
; GFX7-LABEL: name: test_fmad_v4s32_denorm
|
||||
; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX7: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]]
|
||||
; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]]
|
||||
; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]]
|
||||
; GFX7: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]]
|
||||
; GFX10-LABEL: name: test_fmad_v4s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]]
|
||||
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]]
|
||||
; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]]
|
||||
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
%3:_(<4 x s32>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
112
test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir
Normal file
112
test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir
Normal file
@ -0,0 +1,112 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: test_fmad_s64_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
||||
; CHECK-LABEL: name: test_fmad_s64_flush
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
|
||||
; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(s64) = COPY $vgpr4_vgpr5
|
||||
%3:_(s64) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v2s64_flush
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
|
||||
; CHECK-LABEL: name: test_fmad_v2s64_flush
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>)
|
||||
; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]]
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]]
|
||||
; CHECK: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]]
|
||||
; CHECK: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64)
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
%3:_(<2 x s64>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_s64_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
||||
; CHECK-LABEL: name: test_fmad_s64_denorm
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
|
||||
; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(s64) = COPY $vgpr4_vgpr5
|
||||
%3:_(s64) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fmad_v2s64_denorm
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
fp64-fp16-denormals: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
|
||||
; CHECK-LABEL: name: test_fmad_v2s64_denorm
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>)
|
||||
; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]]
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]]
|
||||
; CHECK: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]]
|
||||
; CHECK: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64)
|
||||
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
|
||||
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
%3:_(<2 x s64>) = G_FMAD %0, %1, %2
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
@ -12,6 +12,7 @@ tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
ieee: false
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
@ -33,6 +34,7 @@ tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
ieee: false
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
@ -54,6 +56,7 @@ tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
mode:
|
||||
ieee: false
|
||||
fp32-denormals: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
Loading…
Reference in New Issue
Block a user