mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
5fe7f3e187
Trace through multiple COPYs when looking for a physreg source. Add hinting for vregs that will be copied into physregs (we only hinted for vregs getting copied to a physreg previously). Give hinted a register a bonus when deciding which value to spill. This is part of my rewrite regallocfast series. In fact this one doesn't even have an effect unless you also flip the allocation to happen from back to front of a basic block. Nonetheless it helps to split this up to ease review of D52010 Patch by Matthias Braun llvm-svn: 360887
64 lines
3.8 KiB
LLVM
64 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s
|
|
|
|
define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) {
|
|
; CHECK-LABEL: pluto:
|
|
; CHECK: # %bb.0: # %bb
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
; CHECK-NEXT: subq $128, %rsp
|
|
; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
|
|
; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
|
|
; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
|
|
; CHECK-NEXT: vmovaps 144(%rbp), %ymm11
|
|
; CHECK-NEXT: vmovaps 112(%rbp), %ymm12
|
|
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
|
|
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
|
|
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
|
|
; CHECK-NEXT: vmovaps %xmm9, %xmm6
|
|
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $ymm0
|
|
; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
|
|
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7]
|
|
; CHECK-NEXT: # kill: def $xmm2 killed $xmm2 killed $ymm2
|
|
; CHECK-NEXT: # implicit-def: $ymm11
|
|
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm11, %ymm11
|
|
; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm2
|
|
; CHECK-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
|
|
; CHECK-NEXT: # implicit-def: $ymm6
|
|
; CHECK-NEXT: vmovaps %xmm2, %xmm6
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm11[4,5,6,7]
|
|
; CHECK-NEXT: vmovaps %xmm7, %xmm6
|
|
; CHECK-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2,3,4,5,6,7]
|
|
; CHECK-NEXT: # implicit-def: $ymm11
|
|
; CHECK-NEXT: vmovaps %xmm6, %xmm11
|
|
; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm9[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm9[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm11[0,1,2,3],ymm9[4,5,6,7]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3]
|
|
; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7]
|
|
; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm5, %ymm1
|
|
; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm9, %ymm3
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
|
; CHECK-NEXT: retq
|
|
bb:
|
|
%tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
|
|
%tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer
|
|
%tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5
|
|
%tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6>
|
|
ret <16 x i64> %tmp7
|
|
}
|