mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
f704804dd2
allocas in LLVM IR have a specified alignment. When that alignment is specified, the alloca has at least that alignment at runtime. If the specified type of the alloca has a higher preferred alignment, SelectionDAG currently ignores that specified alignment, and increases the alignment. It does this even if it would trigger stack realignment. I don't think this makes sense, so this patch changes that. I was looking into this for SVE in particular: for SVE, overaligning vscale'ed types is extra expensive because it requires realigning the stack multiple times, or using dynamic allocation. (This currently isn't implemented.) I updated the expected assembly for a couple tests; in particular, for arg-copy-elide.ll, the optimization in question does not increase the alignment the way SelectionDAG normally would. For the rest, I just increased the specified alignment on the allocas to match what SelectionDAG was inferring. Differential Revision: https://reviews.llvm.org/D79532
29 lines
1019 B
LLVM
29 lines
1019 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
|
|
|
define void @_start() {
|
|
; CHECK-LABEL: _start:
|
|
; CHECK: # %bb.0: # %Entry
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
|
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
|
|
; CHECK-NEXT: shrdq $2, %rcx, %rax
|
|
; CHECK-NEXT: shrq $2, %rcx
|
|
; CHECK-NEXT: leaq 1(,%rax,4), %rdx
|
|
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: shrdq $62, %rcx, %rax
|
|
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: orq $-2, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
Entry:
|
|
%y = alloca <3 x i129>, align 4
|
|
%L = load <3 x i129>, <3 x i129>* %y
|
|
%I1 = insertelement <3 x i129> %L, i129 340282366920938463463374607431768211455, i32 1
|
|
store <3 x i129> %I1, <3 x i129>* %y
|
|
ret void
|
|
}
|