1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/pr34653.ll
Alex Richardson ac2e4676eb [SelectionDAGBuilder] Stop setting alignment to one for hidden sret values
We allocated a suitably aligned frame index so we know that all the values
have ABI alignment.
For MIPS this avoids using pair of lwl + lwr instructions instead of a
single lw. I found this when compiling CHERI pure capability code where
we can't use the lwl/lwr unaligned loads/stores and and were to falling
back to a byte load + shift + or sequence.

This should save a few instructions for MIPS and possibly other backends
that don't have fast unaligned loads/stores.
It also improves code generation for CodeGen/X86/pr34653.ll and
CodeGen/WebAssembly/offset.ll since they can now use aligned loads.

Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D78999
2020-05-04 14:44:39 +01:00

72 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+avx512f -o - | FileCheck %s
declare fastcc <38 x double> @test()
define void @pr34653() {
; CHECK-LABEL: pr34653:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-512, %rsp # imm = 0xFE00
; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK-NEXT: callq test
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm19 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm20 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm21 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm22 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm23 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm24 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm25 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm26 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm27 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm28 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm29 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm30 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm31 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
entry:
%v = call fastcc <38 x double> @test()
%v.0 = extractelement <38 x double> %v, i32 0
ret void
}