mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
AMDGPU: Run SIFoldOperands after PeepholeOptimizer
PeepholeOptimizer cleans up redundant copies, which makes the operand folding more effective. shader-db stats: Totals: SGPRS: 34200 -> 34336 (0.40 %) VGPRS: 22118 -> 21655 (-2.09 %) Code Size: 632144 -> 633460 (0.21 %) bytes LDS: 11 -> 11 (0.00 %) blocks Scratch: 10240 -> 11264 (10.00 %) bytes per wave Max Waves: 8822 -> 8918 (1.09 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 7704 -> 7840 (1.77 %) VGPRS: 5169 -> 4706 (-8.96 %) Code Size: 234444 -> 235760 (0.56 %) bytes LDS: 2 -> 2 (0.00 %) blocks Scratch: 0 -> 1024 (0.00 %) bytes per wave Max Waves: 1188 -> 1284 (8.08 %) Wait states: 0 -> 0 (0.00 %) Increases: SGPRS: 35 (0.01 %) VGPRS: 1 (0.00 %) Code Size: 59 (0.02 %) LDS: 0 (0.00 %) Scratch: 1 (0.00 %) Max Waves: 48 (0.02 %) Wait states: 0 (0.00 %) Decreases: SGPRS: 26 (0.01 %) VGPRS: 54 (0.02 %) Code Size: 68 (0.03 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) Max Waves: 4 (0.00 %) Wait states: 0 (0.00 %) llvm-svn: 266378
This commit is contained in:
parent
61abb9daf9
commit
2dfb6d03c5
@ -202,6 +202,7 @@ public:
|
||||
GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
|
||||
: AMDGPUPassConfig(TM, PM) { }
|
||||
bool addPreISel() override;
|
||||
void addMachineSSAOptimization() override;
|
||||
bool addInstSelector() override;
|
||||
#ifdef LLVM_BUILD_GLOBAL_ISEL
|
||||
bool addIRTranslator() override;
|
||||
@ -323,11 +324,24 @@ bool GCNPassConfig::addPreISel() {
|
||||
return false;
|
||||
}
|
||||
|
||||
void GCNPassConfig::addMachineSSAOptimization() {
|
||||
TargetPassConfig::addMachineSSAOptimization();
|
||||
|
||||
// We want to fold operands after PeepholeOptimizer has run (or as part of
|
||||
// it), because it will eliminate extra copies making it easier to fold the
|
||||
// real source operand. We want to eliminate dead instructions after, so that
|
||||
// we see fewer uses of the copies. We then need to clean up the dead
|
||||
// instructions leftover after the operands are folded as well.
|
||||
//
|
||||
// XXX - Can we get away without running DeadMachineInstructionElim again?
|
||||
addPass(&SIFoldOperandsID);
|
||||
addPass(&DeadMachineInstructionElimID);
|
||||
}
|
||||
|
||||
bool GCNPassConfig::addInstSelector() {
|
||||
AMDGPUPassConfig::addInstSelector();
|
||||
addPass(createSILowerI1CopiesPass());
|
||||
addPass(&SIFixSGPRCopiesID);
|
||||
addPass(createSIFoldOperandsPass());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1058,6 +1058,8 @@ static void removeModOperands(MachineInstr &MI) {
|
||||
MI.RemoveOperand(Src0ModIdx);
|
||||
}
|
||||
|
||||
// TODO: Maybe this should be removed this and custom fold everything in
|
||||
// SIFoldOperands?
|
||||
bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
|
||||
unsigned Reg, MachineRegisterInfo *MRI) const {
|
||||
if (!MRI->hasOneNonDBGUse(Reg))
|
||||
@ -1073,6 +1075,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
|
||||
return false;
|
||||
}
|
||||
|
||||
const MachineOperand &ImmOp = DefMI->getOperand(1);
|
||||
|
||||
// If this is a free constant, there's no reason to do this.
|
||||
// TODO: We could fold this here instead of letting SIFoldOperands do it
|
||||
// later.
|
||||
if (isInlineConstant(ImmOp, 4))
|
||||
return false;
|
||||
|
||||
MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
|
||||
MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
|
||||
MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
|
||||
|
@ -159,7 +159,7 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float
|
||||
; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
|
||||
; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, |[[R2]]|
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
@ -61,7 +61,7 @@ define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)*
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
|
||||
; SI: v_fma_f32 {{v[0-9]+}}, 2.0, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
|
||||
define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
|
@ -96,8 +96,8 @@ define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float ad
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
|
||||
; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, 2.0, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, 4.0, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
|
||||
; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
|
||||
define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
|
||||
|
@ -39,7 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
|
||||
; unless the target returns true for isNegFree()
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_free_f64:
|
||||
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}}
|
||||
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
%bc = bitcast i64 %in to double
|
||||
%fsub = fsub double 0.0, %bc
|
||||
|
@ -47,7 +47,7 @@ define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}s_fsub_imm_f64:
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}, 4.0
|
||||
define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
|
||||
%sub = fsub double 4.0, %a
|
||||
store double %sub, double addrspace(1)* %out
|
||||
@ -55,7 +55,7 @@ define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}s_fsub_imm_inv_f64:
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}}, -4.0
|
||||
define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) {
|
||||
%sub = fsub double %a, 4.0
|
||||
store double %sub, double addrspace(1)* %out
|
||||
|
@ -322,7 +322,7 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0.0
|
||||
@ -333,7 +333,7 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0.5
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0.5
|
||||
@ -344,7 +344,7 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -0.5
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, -0.5
|
||||
@ -355,7 +355,7 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1.0
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 1.0
|
||||
@ -366,7 +366,7 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1.0
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, -1.0
|
||||
@ -377,7 +377,7 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2.0
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 2.0
|
||||
@ -388,7 +388,7 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2.0
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, -2.0
|
||||
@ -399,7 +399,7 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 4.0
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 4.0
|
||||
@ -410,7 +410,7 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -4.0
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, -4.0
|
||||
@ -422,7 +422,7 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1{{$}}
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0x0000000000000001
|
||||
@ -433,7 +433,7 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2{{$}}
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0x0000000000000002
|
||||
@ -444,7 +444,7 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_16_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 16
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0x0000000000000010
|
||||
@ -455,7 +455,7 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0xffffffffffffffff
|
||||
@ -466,7 +466,7 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0xfffffffffffffffe
|
||||
@ -477,7 +477,7 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -16
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0xfffffffffffffff0
|
||||
@ -488,7 +488,7 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_63_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 63
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0x000000000000003F
|
||||
@ -499,7 +499,7 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_64_f64:
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 64
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = fadd double %x, 0x0000000000000040
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
|
||||
; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; FIXME: Enable for VI.
|
||||
@ -45,7 +45,7 @@ define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a,
|
||||
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
|
||||
; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VA]], [[VC]]
|
||||
; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], 1.0, [[VC]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
|
||||
@ -146,7 +146,7 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
|
||||
|
||||
; SI: BB9_2:
|
||||
; SI: s_or_b64 exec, exec, [[SAVE]]
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v0
|
||||
; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: buffer_store_dword
|
||||
; SI: s_endpgm
|
||||
|
@ -1,13 +1,17 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; FIXME: None of these trigger madmk emission anymore. It is still
|
||||
; possible, but requires the correct registers to be used which is
|
||||
; hard to trigger.
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
||||
; GCN-LABEL: {{^}}madmk_f32:
|
||||
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
|
||||
; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
|
||||
define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -182,7 +186,7 @@ define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float ad
|
||||
|
||||
; SI-LABEL: {{^}}kill_madmk_verifier_error:
|
||||
; SI: s_xor_b64
|
||||
; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
|
||||
; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
|
||||
; SI: s_or_b64
|
||||
define void @kill_madmk_verifier_error() nounwind {
|
||||
bb:
|
||||
|
@ -96,8 +96,8 @@ define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
|
||||
; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
|
||||
; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
|
||||
; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9
|
||||
; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9
|
||||
; SI: s_endpgm
|
||||
define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -3,7 +3,7 @@
|
||||
; register operands in the correct order when modifying the opcode of an
|
||||
; instruction to V_ADD_I32_e32.
|
||||
|
||||
; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
|
||||
; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 killed %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
|
||||
|
||||
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
entry:
|
||||
|
@ -10,14 +10,14 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
|
||||
; SI: v_cmp_eq_i32_e64 vcc,
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
||||
; uses an SGPR (implicit vcc).
|
||||
|
||||
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
|
||||
; SI-DAG: v_cmp_eq_i32_e64 vcc,
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
|
||||
|
||||
; SI: s_endpgm
|
||||
define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
|
||||
%cmp = icmp eq i32 %in, 0
|
||||
|
@ -230,9 +230,8 @@ define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
; GCN-LABEL: {{^}}s_ashr_63_i64:
|
||||
; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
|
||||
; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31
|
||||
; GCN-DAG: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]]
|
||||
; GCN-DAG: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
|
||||
; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}}
|
||||
; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
|
||||
; GCN: s_addc_u32 {{s[0-9]+}}, s[[SHIFT]], {{s[0-9]+}}
|
||||
define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%result = ashr i64 %a, 63
|
||||
%add = add i64 %result, %b
|
||||
|
@ -70,10 +70,11 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
|
||||
; SI: v_cmp_eq_i32_e64 vcc
|
||||
; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
|
||||
; uses an SGPR (implicit vcc).
|
||||
|
||||
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
|
||||
; SI-DAG: v_cmp_eq_i32_e64 vcc
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float) #1
|
||||
@ -107,7 +107,7 @@ define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, fl
|
||||
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
|
||||
@ -227,7 +227,7 @@ define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out
|
||||
; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
|
||||
; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
|
||||
|
||||
; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
|
||||
; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS1]], [[SGPR0]], [[VK0]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
|
||||
; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
|
||||
|
||||
@ -254,7 +254,7 @@ define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||
|
||||
; Same zero component is re-used for half of each immediate.
|
||||
; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
|
||||
; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
|
||||
; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
|
||||
|
||||
; GCN: buffer_store_dwordx2 [[RESULT0]]
|
||||
; GCN: buffer_store_dwordx2 [[RESULT1]]
|
||||
|
@ -24,7 +24,7 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
|
||||
; GCN-NOT: v_mac_f32
|
||||
; GCN: v_mad_f32 v{{[0-9]}}, 0.5, s{{[0-9]+}}, 0.5
|
||||
; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5
|
||||
define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%tmp0 = fmul float 0.5, %in
|
||||
|
Loading…
x
Reference in New Issue
Block a user