mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
ac2e4676eb
We allocated a suitably aligned frame index so we know that all the values have ABI alignment. For MIPS this avoids using pair of lwl + lwr instructions instead of a single lw. I found this when compiling CHERI pure capability code where we can't use the lwl/lwr unaligned loads/stores and and were to falling back to a byte load + shift + or sequence. This should save a few instructions for MIPS and possibly other backends that don't have fast unaligned loads/stores. It also improves code generation for CodeGen/X86/pr34653.ll and CodeGen/WebAssembly/offset.ll since they can now use aligned loads. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D78999
72 lines
3.2 KiB
LLVM
72 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+avx512f -o - | FileCheck %s
|
|
|
|
declare fastcc <38 x double> @test()
|
|
|
|
define void @pr34653() {
|
|
; CHECK-LABEL: pr34653:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
; CHECK-NEXT: andq $-512, %rsp # imm = 0xFE00
|
|
; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
|
|
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
|
|
; CHECK-NEXT: callq test
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm19 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm20 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm21 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm22 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm23 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm24 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm25 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm26 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm27 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm28 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm29 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm30 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm31 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%v = call fastcc <38 x double> @test()
|
|
%v.0 = extractelement <38 x double> %v, i32 0
|
|
ret void
|
|
}
|
|
|