mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[SelectionDAGBuilder] Stop setting alignment to one for hidden sret values
We allocated a suitably aligned frame index so we know that all the values have ABI alignment. For MIPS this avoids using pair of lwl + lwr instructions instead of a single lw. I found this when compiling CHERI pure capability code where we can't use the lwl/lwr unaligned loads/stores and and were to falling back to a byte load + shift + or sequence. This should save a few instructions for MIPS and possibly other backends that don't have fast unaligned loads/stores. It also improves code generation for CodeGen/X86/pr34653.ll and CodeGen/WebAssembly/offset.ll since they can now use aligned loads. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D78999
This commit is contained in:
parent
283e2844aa
commit
ac2e4676eb
@ -1833,6 +1833,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
|
||||
unsigned NumValues = ValueVTs.size();
|
||||
|
||||
SmallVector<SDValue, 4> Chains(NumValues);
|
||||
Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
|
||||
for (unsigned i = 0; i != NumValues; ++i) {
|
||||
// An aggregate return value cannot wrap around the address space, so
|
||||
// offsets to its parts don't wrap either.
|
||||
@ -1841,9 +1842,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
|
||||
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
|
||||
if (MemVTs[i] != ValueVTs[i])
|
||||
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
|
||||
Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
|
||||
Chains[i] = DAG.getStore(
|
||||
Chain, getCurSDLoc(), Val,
|
||||
// FIXME: better loc info would be nice.
|
||||
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
|
||||
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
|
||||
commonAlignment(BaseAlign, Offsets[i]));
|
||||
}
|
||||
|
||||
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
|
||||
@ -9271,6 +9274,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoUnsignedWrap(true);
|
||||
|
||||
MachineFunction &MF = CLI.DAG.getMachineFunction();
|
||||
Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
|
||||
for (unsigned i = 0; i < NumValues; ++i) {
|
||||
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
|
||||
CLI.DAG.getConstant(Offsets[i], CLI.DL,
|
||||
@ -9279,7 +9284,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||
RetTys[i], CLI.DL, CLI.Chain, Add,
|
||||
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
|
||||
DemoteStackIdx, Offsets[i]),
|
||||
/* Alignment = */ 1);
|
||||
HiddenSRetAlign);
|
||||
ReturnValues[i] = L;
|
||||
Chains[i] = L.getValue(1);
|
||||
}
|
||||
|
@ -16,23 +16,13 @@ define internal void @test() unnamed_addr nounwind {
|
||||
; CHECK-NEXT: daddiu $4, $sp, 8
|
||||
; CHECK-NEXT: jal implicit_sret_decl
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # implicit-def: $at_64
|
||||
; CHECK-NEXT: ldl $1, 24($sp)
|
||||
; CHECK-NEXT: ldr $1, 31($sp)
|
||||
; CHECK-NEXT: ld $6, 24($sp)
|
||||
; CHECK-NEXT: ld $5, 16($sp)
|
||||
; CHECK-NEXT: ld $7, 32($sp)
|
||||
; CHECK-NEXT: lw $1, 8($sp)
|
||||
; CHECK-NEXT: # implicit-def: $v0_64
|
||||
; CHECK-NEXT: ldl $2, 16($sp)
|
||||
; CHECK-NEXT: ldr $2, 23($sp)
|
||||
; CHECK-NEXT: # implicit-def: $v1_64
|
||||
; CHECK-NEXT: ldl $3, 32($sp)
|
||||
; CHECK-NEXT: ldr $3, 39($sp)
|
||||
; CHECK-NEXT: # implicit-def: $a1
|
||||
; CHECK-NEXT: lwl $5, 8($sp)
|
||||
; CHECK-NEXT: lwr $5, 11($sp)
|
||||
; CHECK-NEXT: # implicit-def: $a0_64
|
||||
; CHECK-NEXT: move $4, $5
|
||||
; CHECK-NEXT: move $5, $2
|
||||
; CHECK-NEXT: move $6, $1
|
||||
; CHECK-NEXT: move $7, $3
|
||||
; CHECK-NEXT: move $2, $1
|
||||
; CHECK-NEXT: move $4, $2
|
||||
; CHECK-NEXT: jal use_sret
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
|
||||
@ -75,15 +65,9 @@ define internal void @test2() unnamed_addr nounwind {
|
||||
; CHECK-NEXT: daddiu $4, $sp, 0
|
||||
; CHECK-NEXT: jal implicit_sret_decl2
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: # implicit-def: $at
|
||||
; CHECK-NEXT: lwl $1, 20($sp)
|
||||
; CHECK-NEXT: lwr $1, 23($sp)
|
||||
; CHECK-NEXT: # implicit-def: $v0
|
||||
; CHECK-NEXT: lwl $2, 12($sp)
|
||||
; CHECK-NEXT: lwr $2, 15($sp)
|
||||
; CHECK-NEXT: # implicit-def: $v1
|
||||
; CHECK-NEXT: lwl $3, 4($sp)
|
||||
; CHECK-NEXT: lwr $3, 7($sp)
|
||||
; CHECK-NEXT: lw $1, 20($sp)
|
||||
; CHECK-NEXT: lw $2, 12($sp)
|
||||
; CHECK-NEXT: lw $3, 4($sp)
|
||||
; CHECK-NEXT: # implicit-def: $a0_64
|
||||
; CHECK-NEXT: move $4, $3
|
||||
; CHECK-NEXT: # implicit-def: $a1_64
|
||||
|
@ -645,9 +645,9 @@ define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q)
|
||||
|
||||
; CHECK-LABEL: aggregate_return:
|
||||
; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}}
|
||||
; CHECK: i64.store 8($0):p2align=2, $pop[[L0]]{{$}}
|
||||
; CHECK: i64.store 8($0), $pop[[L0]]{{$}}
|
||||
; CHECK: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
|
||||
; CHECK: i64.store 0($0):p2align=2, $pop[[L1]]{{$}}
|
||||
; CHECK: i64.store 0($0), $pop[[L1]]{{$}}
|
||||
define {i32,i32,i32,i32} @aggregate_return() {
|
||||
ret {i32,i32,i32,i32} zeroinitializer
|
||||
}
|
||||
|
@ -15,28 +15,22 @@ define void @pr34653() {
|
||||
; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
|
||||
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
|
||||
; CHECK-NEXT: callq test
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm1
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm2
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm3
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm4
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm5
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm6
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm7
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm8
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm9
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm10
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm11
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm12
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm13
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm14
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm15
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero
|
||||
@ -60,17 +54,11 @@ define void @pr34653() {
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
|
||||
; CHECK-NEXT: # xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
|
||||
; CHECK-NEXT: # xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
|
||||
; CHECK-NEXT: # xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
|
||||
; CHECK-NEXT: # xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: movq %rbp, %rsp
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
|
Loading…
x
Reference in New Issue
Block a user