mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Fix immediate folding logic when shrinking instructions
If the literal is being folded into src0, it doesn't matter if it's an SGPR because it's being replaced with the literal. Also fixes initially selecting 32-bit versions of some instructions which also confused commuting. llvm-svn: 281117
This commit is contained in:
parent
5e10e55c23
commit
33e593fd71
@ -1852,13 +1852,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
||||
case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
|
||||
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
|
||||
case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
|
||||
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
|
||||
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
|
||||
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
|
||||
case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
|
||||
case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
|
||||
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
|
||||
case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
|
||||
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
|
||||
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
|
||||
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
|
||||
case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
|
||||
case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
|
||||
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
|
||||
case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
|
||||
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
|
||||
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
|
||||
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
|
||||
|
@ -1871,7 +1871,7 @@ def : Pat <
|
||||
|
||||
def : Pat <
|
||||
(fneg (fabs f32:$src)),
|
||||
(S_OR_B32 $src, 0x80000000) // Set sign bit
|
||||
(S_OR_B32 $src, (S_MOV_B32 0x80000000)) // Set sign bit
|
||||
>;
|
||||
|
||||
// FIXME: Should use S_OR_B32
|
||||
|
@ -134,7 +134,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
|
||||
|
||||
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
|
||||
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
|
||||
MachineOperand &Src0 = MI.getOperand(Src0Idx);
|
||||
|
||||
@ -144,12 +143,6 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
|
||||
TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
|
||||
return;
|
||||
|
||||
// Literal constants and SGPRs can only be used in Src0, so if Src0 is an
|
||||
// SGPR, we cannot commute the instruction, so we can't fold any literal
|
||||
// constants.
|
||||
if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
|
||||
return;
|
||||
|
||||
// Try to fold Src0
|
||||
if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
|
||||
unsigned Reg = Src0.getReg();
|
||||
@ -158,7 +151,8 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
|
||||
MachineOperand &MovSrc = Def->getOperand(1);
|
||||
bool ConstantFolded = false;
|
||||
|
||||
if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
|
||||
if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
|
||||
isUInt<32>(MovSrc.getImm()))) {
|
||||
Src0.ChangeToImmediate(MovSrc.getImm());
|
||||
ConstantFolded = true;
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind
|
||||
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
|
||||
; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
|
||||
; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
|
||||
; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
|
||||
; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
|
||||
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
|
||||
; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
|
||||
; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
|
||||
|
@ -82,8 +82,10 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600: -PV
|
||||
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; FIXME: In this case two uses of the constant should be folded
|
||||
; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
|
||||
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
|
||||
@ -92,10 +94,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
|
||||
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
|
||||
|
@ -16,7 +16,7 @@ define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
|
||||
; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
|
||||
; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
|
||||
; GCN: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]]
|
||||
; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]]
|
||||
; GCN: v_or_b32_e32 [[PACKED:v[0-9]+]], [[HI]], [[V0]]
|
||||
; GCN: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN: s_endpgm
|
||||
define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
|
||||
@ -440,7 +440,7 @@ define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspa
|
||||
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
|
||||
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
|
||||
; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[CVT1]]
|
||||
; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[CVT0]], [[SHL]]
|
||||
; GCN-DAG: v_or_b32_e32 [[PACKED:v[0-9]+]], [[SHL]], [[CVT0]]
|
||||
; GCN-DAG: buffer_store_dword [[PACKED]]
|
||||
; GCN: s_endpgm
|
||||
define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
|
||||
|
@ -10,9 +10,7 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
|
||||
; VI: s_load_dword [[SRC:s[0-9]+]]
|
||||
; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]]
|
||||
; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
|
||||
; TODO: this constant should be folded:
|
||||
; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff
|
||||
; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]]
|
||||
; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xff7fffff, [[MIN]]
|
||||
; VI: buffer_store_dword [[RESULT]]
|
||||
define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
|
||||
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
|
||||
|
@ -113,11 +113,9 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
|
||||
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
|
||||
; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
|
||||
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
|
||||
; SI: s_endpgm
|
||||
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 8
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k0:
|
||||
@ -11,6 +11,7 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 4295032831)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -24,6 +25,7 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 4295000063)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -37,6 +39,7 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 274877939711)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -50,6 +53,7 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 4295000064)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -63,6 +67,7 @@ define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 4295098368)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -77,6 +82,7 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 18374967954648334319)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -90,6 +96,7 @@ define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 270582939713)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -104,10 +111,10 @@ define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 70368744185856)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}s_movk_i32_k8:
|
||||
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
|
||||
; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
|
||||
@ -119,6 +126,7 @@ define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 1229782942255906816)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -133,6 +141,7 @@ define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 1229782942255906817)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -147,6 +156,7 @@ define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 1229782942255909000)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -161,6 +171,7 @@ define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 1229782942255910911)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -175,5 +186,6 @@ define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
|
||||
%loada = load i64, i64 addrspace(1)* %a, align 4
|
||||
%or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
call void asm sideeffect "; use $0", "s"(i64 1229782942255902721)
|
||||
ret void
|
||||
}
|
||||
|
@ -1,9 +1,8 @@
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
|
||||
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
|
||||
define amdgpu_vs void @main(float) {
|
||||
main_body:
|
||||
%1 = fmul float %0, 0x3FE86A7F00000000
|
||||
|
Loading…
Reference in New Issue
Block a user