mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
AMDGPU: Generalize shl combine
Reduce 64-bit shl with constant > 32. We already special cased this for the == 32 case, but this also works for any >= 32 constant. llvm-svn: 258092
This commit is contained in:
parent
5bbdf4402d
commit
97aeb607e4
@ -2544,14 +2544,17 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
|
||||
if (N->getValueType(0) != MVT::i64)
|
||||
return SDValue();
|
||||
|
||||
// i64 (shl x, 32) -> (build_pair 0, x)
|
||||
// i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
|
||||
|
||||
// Doing this with moves theoretically helps MI optimizations that understand
|
||||
// copies. 2 v_mov_b32_e32 will have the same code size / cycle count as
|
||||
// v_lshl_b64. In the SALU case, I think this is slightly worse since it
|
||||
// doubles the code size and I'm unsure about cycle count.
|
||||
// On some subtargets, 64-bit shift is a quarter rate instruction. In the
|
||||
// common case, splitting this into a move and a 32-bit shift is faster and
|
||||
// the same code size.
|
||||
const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (!RHS || RHS->getZExtValue() != 32)
|
||||
if (!RHS)
|
||||
return SDValue();
|
||||
|
||||
unsigned RHSVal = RHS->getZExtValue();
|
||||
if (RHSVal < 32)
|
||||
return SDValue();
|
||||
|
||||
SDValue LHS = N->getOperand(0);
|
||||
@ -2559,12 +2562,15 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
|
||||
SDLoc SL(N);
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
// Extract low 32-bits.
|
||||
SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
|
||||
|
||||
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
|
||||
SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);
|
||||
|
||||
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo);
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Zero, NewShift);
|
||||
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
|
||||
|
@ -62,3 +62,50 @@ define void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; lshl (i64 x), c: c > 32 => reg_sequence lshl 0, (i32 lo_32(x)), (c - 32)
|
||||
|
||||
; GCN-LABEL: {{^}}shl_i64_const_35:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 3, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = shl i64 %val, 35
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}shl_i64_const_32:
|
||||
; GCN: buffer_load_dword v[[HI:[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = shl i64 %val, 32
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}shl_i64_const_63:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 31, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = shl i64 %val, 63
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; ashr (i64 x), 63 => (ashr lo(x), 31), lo(x)
|
||||
|
||||
; GCN-LABEL: {{^}}ashr_i64_const_gt_32:
|
||||
define void @ashr_i64_const_gt_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
%shl = ashr i64 %val, 35
|
||||
store i64 %shl, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user