mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
1ad05318c7
X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699
98 lines
3.8 KiB
LLVM
98 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 | FileCheck %s
|
|
|
|
define win64cc void @opaque() {
|
|
; CHECK-LABEL: opaque:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: retq
|
|
ret void
|
|
}
|
|
|
|
; We need xmm6 to be live from the loop header across all iterations of the loop.
|
|
; We shouldn't clobber ymm6 inside the loop.
|
|
define i32 @main() {
|
|
; CHECK-LABEL: main:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: subq $584, %rsp # imm = 0x248
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 592
|
|
; CHECK-NEXT: vmovaps {{.*#+}} xmm6 = [1010101010101010101,2020202020202020202]
|
|
; CHECK-NEXT: xorl %esi, %esi
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
; CHECK-NEXT: .LBB1_1: # %fake-loop
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3
|
|
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
|
|
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5
|
|
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm4
|
|
; CHECK-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq opaque
|
|
; CHECK-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: testb %sil, %sil
|
|
; CHECK-NEXT: jne .LBB1_1
|
|
; CHECK-NEXT: # %bb.2: # %exit
|
|
; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: sete %al
|
|
; CHECK-NEXT: decl %eax
|
|
; CHECK-NEXT: addq $584, %rsp # imm = 0x248
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-NEXT: retq
|
|
start:
|
|
%dummy0 = alloca [22 x i64], align 8
|
|
%dummy1 = alloca [22 x i64], align 8
|
|
%dummy2 = alloca [22 x i64], align 8
|
|
|
|
%data = alloca <2 x i64>, align 8
|
|
|
|
br label %fake-loop
|
|
|
|
fake-loop: ; preds = %fake-loop, %start
|
|
%dummy0.cast = bitcast [22 x i64]* %dummy0 to i8*
|
|
%dummy1.cast = bitcast [22 x i64]* %dummy1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy1.cast, i8* nonnull align 8 %dummy0.cast, i64 176, i1 false)
|
|
|
|
%dummy1.cast.copy = bitcast [22 x i64]* %dummy1 to i8*
|
|
%dummy2.cast = bitcast [22 x i64]* %dummy2 to i8*
|
|
call void @llvm.lifetime.start.p0i8(i64 176, i8* nonnull %dummy2.cast)
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy2.cast, i8* nonnull align 8 %dummy1.cast.copy, i64 176, i1 false)
|
|
|
|
call win64cc void @opaque()
|
|
|
|
store <2 x i64> <i64 1010101010101010101, i64 2020202020202020202>, <2 x i64>* %data, align 8
|
|
|
|
%opaque-false = icmp eq i8 0, 1
|
|
br i1 %opaque-false, label %fake-loop, label %exit
|
|
|
|
exit: ; preds = %fake-loop
|
|
%data.cast = bitcast <2 x i64>* %data to i64*
|
|
%0 = load i64, i64* %data.cast, align 8
|
|
%1 = icmp eq i64 %0, 1010101010101010101
|
|
%2 = select i1 %1, i32 0, i32 -1
|
|
ret i32 %2
|
|
}
|
|
|
|
; Function Attrs: argmemonly nounwind
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0
|
|
|
|
; Function Attrs: argmemonly nounwind
|
|
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0
|
|
|
|
attributes #0 = { argmemonly nounwind }
|