mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[PowerPC] allow D-form VSX load/store when accessing FrameIndex without offset
VSX D-form load/store instructions of POWER9 require the offset be a multiple of 16 and a helper`isOffsetMultipleOf` is used to check this. So far, the helper handles FrameIndex + offset case, but not handling FrameIndex without offset case. Due to this, we are missing opportunities to exploit D-form instructions when accessing an object or array allocated on stack. For example, x-form store (stxvx) is used for int a[4] = {0}; instead of d-form store (stxv). For larger arrays, D-form instruction is not used when accessing the first 16-byte. Using D-form instructions reduces register pressure as well as instructions. Differential Revision: https://reviews.llvm.org/D45079 llvm-svn: 329377
This commit is contained in:
parent
7c14afae06
commit
f85880f8af
@ -3937,20 +3937,28 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
|
||||
else if (STN)
|
||||
AddrOp = STN->getOperand(2);
|
||||
|
||||
// If the address points a frame object or a frame object with an offset,
|
||||
// we need to check the object alignment.
|
||||
short Imm = 0;
|
||||
if (AddrOp.getOpcode() == ISD::ADD) {
|
||||
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
|
||||
AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
|
||||
AddrOp)) {
|
||||
// If op0 is a frame index that is under aligned, we can't do it either,
|
||||
// because it is translated to r31 or r1 + slot + offset. We won't know the
|
||||
// slot number until the stack frame is finalized.
|
||||
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddrOp.getOperand(0))) {
|
||||
const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
|
||||
unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
|
||||
if ((SlotAlign % Val) != 0)
|
||||
return false;
|
||||
}
|
||||
return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
|
||||
const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
|
||||
unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
|
||||
if ((SlotAlign % Val) != 0)
|
||||
return false;
|
||||
|
||||
// If we have an offset, we need further check on the offset.
|
||||
if (AddrOp.getOpcode() != ISD::ADD)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (AddrOp.getOpcode() == ISD::ADD)
|
||||
return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
|
||||
|
||||
// If the address comes from the outside, the offset will be zero.
|
||||
return AddrOp.getOpcode() == ISD::CopyFromReg;
|
||||
}
|
||||
|
@ -411,3 +411,41 @@ entry:
|
||||
}
|
||||
|
||||
declare void @sink(...)
|
||||
|
||||
; stack object should be accessed using D-form load/store instead of X-form
|
||||
define signext i32 @func1() {
|
||||
; CHECK-LABEL: @func1
|
||||
; CHECK-NOT: stxvx
|
||||
; CHECK: stxv {{[0-9]+}}, {{[0-9]+}}(1)
|
||||
; CHECK-NOT: stxvx
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%a = alloca [4 x i32], align 4
|
||||
%0 = bitcast [4 x i32]* %a to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 16, i1 false)
|
||||
%arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 0
|
||||
%call = call signext i32 @callee(i32* nonnull %arraydecay) #3
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; stack object should be accessed using D-form load/store instead of X-form
|
||||
define signext i32 @func2() {
|
||||
; CHECK-LABEL: @func2
|
||||
; CHECK-NOT: stxvx
|
||||
; CHECK: stxv [[ZEROREG:[0-9]+]], {{[0-9]+}}(1)
|
||||
; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
|
||||
; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
|
||||
; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
|
||||
; CHECK-NOT: stxvx
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%a = alloca [16 x i32], align 4
|
||||
%0 = bitcast [16 x i32]* %a to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 64, i1 false)
|
||||
%arraydecay = getelementptr inbounds [16 x i32], [16 x i32]* %a, i64 0, i64 0
|
||||
%call = call signext i32 @callee(i32* nonnull %arraydecay) #3
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
|
||||
declare signext i32 @callee(i32*) local_unnamed_addr #2
|
||||
|
Loading…
Reference in New Issue
Block a user