llvm-mirror/test/CodeGen/X86/pr44140.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 | FileCheck %s

define win64cc void @opaque() {
; CHECK-LABEL: opaque:
; CHECK:       # %bb.0:
; CHECK-NEXT:    retq
  ret void
}

; We need xmm6 to be live from the loop header across all iterations of the loop.
; We shouldn't clobber ymm6 inside the loop.
define i32 @main() {
; CHECK-LABEL: main:
; CHECK:       # %bb.0: # %start
; CHECK-NEXT:    subq $584, %rsp # imm = 0x248
; CHECK-NEXT:    .cfi_def_cfa_offset 592
; CHECK-NEXT:    vmovaps {{.*#+}} xmm6 = [1010101010101010101,2020202020202020202]
; CHECK-NEXT:    xorl %esi, %esi
; CHECK-NEXT:    .p2align 4, 0x90
; CHECK-NEXT:  .LBB1_1: # %fake-loop
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm7
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm2
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm3
; CHECK-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT:    vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm2, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm2, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm5
; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm4
; CHECK-NEXT:    vmovups %ymm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vmovups %ymm4, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    callq opaque
; CHECK-NEXT:    vmovaps %xmm6, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    testb %sil, %sil
; CHECK-NEXT:    jne .LBB1_1
; CHECK-NEXT:  # %bb.2: # %exit
; CHECK-NEXT:    movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    cmpq %rcx, {{[0-9]+}}(%rsp)
; CHECK-NEXT:    sete %al
; CHECK-NEXT:    decl %eax
; CHECK-NEXT:    addq $584, %rsp # imm = 0x248
; CHECK-NEXT:    .cfi_def_cfa_offset 8
; CHECK-NEXT:    retq
start:
  %dummy0 = alloca [22 x i64], align 8
  %dummy1 = alloca [22 x i64], align 8
  %dummy2 = alloca [22 x i64], align 8

  %data = alloca <2 x i64>, align 8

  br label %fake-loop

fake-loop:                                        ; preds = %fake-loop, %start
  %dummy0.cast = bitcast [22 x i64]* %dummy0 to i8*
  %dummy1.cast = bitcast [22 x i64]* %dummy1 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy1.cast, i8* nonnull align 8 %dummy0.cast, i64 176, i1 false)

  %dummy1.cast.copy = bitcast [22 x i64]* %dummy1 to i8*
  %dummy2.cast = bitcast [22 x i64]* %dummy2 to i8*
  call void @llvm.lifetime.start.p0i8(i64 176, i8* nonnull %dummy2.cast)
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy2.cast, i8* nonnull align 8 %dummy1.cast.copy, i64 176, i1 false)

  call win64cc void @opaque()

  store <2 x i64> <i64 1010101010101010101, i64 2020202020202020202>, <2 x i64>* %data, align 8

  %opaque-false = icmp eq i8 0, 1
  br i1 %opaque-false, label %fake-loop, label %exit

exit:                                             ; preds = %fake-loop
  %data.cast = bitcast <2 x i64>* %data to i64*
  %0 = load i64, i64* %data.cast, align 8
  %1 = icmp eq i64 %0, 1010101010101010101
  %2 = select i1 %1, i32 0, i32 -1
  ret i32 %2
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0

; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0

attributes #0 = { argmemonly nounwind }
[X86] Add test case for pr44140. NFC 2019-11-25 23:49:21 +01:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 \| FileCheck %s`

			`define win64cc void @opaque() {`
			`; CHECK-LABEL: opaque:`
			`; CHECK: # %bb.0:`
			`; CHECK-NEXT: retq`
			`ret void`
			`}`

			`; We need xmm6 to be live from the loop header across all iterations of the loop.`
			`; We shouldn't clobber ymm6 inside the loop.`
			`define i32 @main() {`
			`; CHECK-LABEL: main:`
			`; CHECK: # %bb.0: # %start`
			`; CHECK-NEXT: subq $584, %rsp # imm = 0x248`
			`; CHECK-NEXT: .cfi_def_cfa_offset 592`
			`; CHECK-NEXT: vmovaps {{.*#+}} xmm6 = [1010101010101010101,2020202020202020202]`
			`; CHECK-NEXT: xorl %esi, %esi`
			`; CHECK-NEXT: .p2align 4, 0x90`
			`; CHECK-NEXT: .LBB1_1: # %fake-loop`
			`; CHECK-NEXT: # =>This Inner Loop Header: Depth=1`
			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0`
			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1`
[CriticalAntiDepBreaker] Teach the regmask clobber check to check if any subregister is preserved before considering the super register clobbered X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699 2019-11-27 20:11:41 +01:00			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7`
[X86] Add test case for pr44140. NFC 2019-11-25 23:49:21 +01:00			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2`
			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3`
			`; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1`
			`; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)`
[CriticalAntiDepBreaker] Teach the regmask clobber check to check if any subregister is preserved before considering the super register clobbered X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699 2019-11-27 20:11:41 +01:00			`; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)`
[X86] Add test case for pr44140. NFC 2019-11-25 23:49:21 +01:00			`; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)`
[CriticalAntiDepBreaker] Teach the regmask clobber check to check if any subregister is preserved before considering the super register clobbered X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699 2019-11-27 20:11:41 +01:00			`; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)`
[X86] Add test case for pr44140. NFC 2019-11-25 23:49:21 +01:00			`; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5`
			`; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm4`
			`; CHECK-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: vzeroupper`
			`; CHECK-NEXT: callq opaque`
			`; CHECK-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: testb %sil, %sil`
			`; CHECK-NEXT: jne .LBB1_1`
			`; CHECK-NEXT: # %bb.2: # %exit`
			`; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5`
			`; CHECK-NEXT: xorl %eax, %eax`
			`; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp)`
			`; CHECK-NEXT: sete %al`
			`; CHECK-NEXT: decl %eax`
			`; CHECK-NEXT: addq $584, %rsp # imm = 0x248`
			`; CHECK-NEXT: .cfi_def_cfa_offset 8`
			`; CHECK-NEXT: retq`
			`start:`
			`%dummy0 = alloca [22 x i64], align 8`
			`%dummy1 = alloca [22 x i64], align 8`
			`%dummy2 = alloca [22 x i64], align 8`

			`%data = alloca <2 x i64>, align 8`

			`br label %fake-loop`

			`fake-loop: ; preds = %fake-loop, %start`
			`%dummy0.cast = bitcast [22 x i64]* %dummy0 to i8*`
			`%dummy1.cast = bitcast [22 x i64]* %dummy1 to i8*`
			`call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy1.cast, i8* nonnull align 8 %dummy0.cast, i64 176, i1 false)`

			`%dummy1.cast.copy = bitcast [22 x i64]* %dummy1 to i8*`
			`%dummy2.cast = bitcast [22 x i64]* %dummy2 to i8*`
			`call void @llvm.lifetime.start.p0i8(i64 176, i8* nonnull %dummy2.cast)`
			`call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy2.cast, i8* nonnull align 8 %dummy1.cast.copy, i64 176, i1 false)`

			`call win64cc void @opaque()`

			`store <2 x i64> <i64 1010101010101010101, i64 2020202020202020202>, <2 x i64>* %data, align 8`

			`%opaque-false = icmp eq i8 0, 1`
			`br i1 %opaque-false, label %fake-loop, label %exit`

			`exit: ; preds = %fake-loop`
			`%data.cast = bitcast <2 x i64>* %data to i64*`
			`%0 = load i64, i64* %data.cast, align 8`
			`%1 = icmp eq i64 %0, 1010101010101010101`
			`%2 = select i1 %1, i32 0, i32 -1`
			`ret i32 %2`
			`}`

			`; Function Attrs: argmemonly nounwind`
			`declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0`

			`; Function Attrs: argmemonly nounwind`
			`declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0`

			`attributes #0 = { argmemonly nounwind }`