llvm-mirror/test/CodeGen/PowerPC/pr42492.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s

define void @f(i8*, i8*, i64*) {
; Check we don't assert and this is not a Hardware Loop
; CHECK-LABEL: f:
; CHECK:       # %bb.0:
; CHECK-NEXT:    cmpld 3, 4
; CHECK-NEXT:    beqlr 0
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    ld 6, 8(5)
; CHECK-NEXT:    not 3, 3
; CHECK-NEXT:    add 3, 3, 4
; CHECK-NEXT:    li 4, 0
; CHECK-NEXT:    .p2align 5
; CHECK-NEXT:  .LBB0_2:
; CHECK-NEXT:    addi 7, 4, 1
; CHECK-NEXT:    sldi 6, 6, 4
; CHECK-NEXT:    cmplwi 4, 14
; CHECK-NEXT:    bc 12, 1, .LBB0_4
; CHECK-NEXT:  # %bb.3:
; CHECK-NEXT:    cmpd 3, 4
; CHECK-NEXT:    mr 4, 7
; CHECK-NEXT:    bc 4, 2, .LBB0_2
; CHECK-NEXT:  .LBB0_4:
; CHECK-NEXT:    std 6, 8(5)
; CHECK-NEXT:    blr

  %4 = icmp eq i8* %0, %1
  br i1 %4, label %9, label %5

5:                                                ; preds = %3
  %6 = getelementptr inbounds i64, i64* %2, i64 1
  %7 = load i64, i64* %6, align 8
  br label %10

8:                                                ; preds = %10
  store i64 %14, i64* %6, align 8
  br label %9

9:                                                ; preds = %8, %3
  ret void

10:                                               ; preds = %5, %10
  %11 = phi i64 [ %7, %5 ], [ %14, %10 ]
  %12 = phi i32 [ 0, %5 ], [ %15, %10 ]
  %13 = phi i8* [ %0, %5 ], [ %16, %10 ]
  %14 = shl nsw i64 %11, 4
  %15 = add nuw nsw i32 %12, 1
  %16 = getelementptr inbounds i8, i8* %13, i64 1
  %17 = icmp ugt i32 %12, 14
  %18 = icmp eq i8* %16, %1
  %19 = or i1 %18, %17
  br i1 %19, label %8, label %10
}
[PowerPC] Hardware Loop branch instruction's condition may not be icmp. This fixes pr42492. Differential Revision: https://reviews.llvm.org/D64124 llvm-svn: 365104 2019-07-04 03:51:47 +02:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 < %s \| FileCheck %s`

			`define void @f(i8, i8, i64*) {`
			`; Check we don't assert and this is not a Hardware Loop`
			`; CHECK-LABEL: f:`
[PowerPC] Regenerate test for D52431 llvm-svn: 375435 2019-10-21 19:45:51 +02:00			`; CHECK: # %bb.0:`
			`; CHECK-NEXT: cmpld 3, 4`
			`; CHECK-NEXT: beqlr 0`
			`; CHECK-NEXT: # %bb.1:`
			`; CHECK-NEXT: ld 6, 8(5)`
			`; CHECK-NEXT: not 3, 3`
			`; CHECK-NEXT: add 3, 3, 4`
			`; CHECK-NEXT: li 4, 0`
			`; CHECK-NEXT: .p2align 5`
[Power9] Add addi post-ra scheduling heuristic The instruction addi is usually used to post increase the loop indvar, which looks like this: label_X: load x, base(i) ... y = op x ... i = addi i, 1 goto label_X However, for PowerPC, if there are too many vsx instructions that between y = op x and i = addi i, 1, it will use all the hw resource that block the execution of i = addi, i, 1, which result in the stall of the load instruction in next iteration. So, a heuristic is added to move the addi as early as possible to have the load hide the latency of vsx instructions, if other heuristic didn't apply to avoid the starve. Reviewed By: jji Differential Revision: https://reviews.llvm.org/D80269 2020-06-08 03:31:07 +02:00			`; CHECK-NEXT: .LBB0_2:`
			`; CHECK-NEXT: addi 7, 4, 1`
[PowerPC] Regenerate test for D52431 llvm-svn: 375435 2019-10-21 19:45:51 +02:00			`; CHECK-NEXT: sldi 6, 6, 4`
[PowerPC] Turn on CR-Logical reducer pass This re-commits r375152 which was pulled in r375233 because it broke the EXPENSIVE_CHECKS bot on Windows. The reason for the failure was a bug in the pass that the commit turned on by default. This patch fixes that bug and turns the pass back on. This patch has been verified on the buildbot that originally failed thanks to Simon Pilgrim. Differential revision: https://reviews.llvm.org/D52431 llvm-svn: 375497 2019-10-22 14:20:38 +02:00			`; CHECK-NEXT: cmplwi 4, 14`
			`; CHECK-NEXT: bc 12, 1, .LBB0_4`
[Power9] Add addi post-ra scheduling heuristic The instruction addi is usually used to post increase the loop indvar, which looks like this: label_X: load x, base(i) ... y = op x ... i = addi i, 1 goto label_X However, for PowerPC, if there are too many vsx instructions that between y = op x and i = addi i, 1, it will use all the hw resource that block the execution of i = addi, i, 1, which result in the stall of the load instruction in next iteration. So, a heuristic is added to move the addi as early as possible to have the load hide the latency of vsx instructions, if other heuristic didn't apply to avoid the starve. Reviewed By: jji Differential Revision: https://reviews.llvm.org/D80269 2020-06-08 03:31:07 +02:00			`; CHECK-NEXT: # %bb.3:`
[PowerPC] Turn on CR-Logical reducer pass This re-commits r375152 which was pulled in r375233 because it broke the EXPENSIVE_CHECKS bot on Windows. The reason for the failure was a bug in the pass that the commit turned on by default. This patch fixes that bug and turns the pass back on. This patch has been verified on the buildbot that originally failed thanks to Simon Pilgrim. Differential revision: https://reviews.llvm.org/D52431 llvm-svn: 375497 2019-10-22 14:20:38 +02:00			`; CHECK-NEXT: cmpd 3, 4`
			`; CHECK-NEXT: mr 4, 7`
			`; CHECK-NEXT: bc 4, 2, .LBB0_2`
			`; CHECK-NEXT: .LBB0_4:`
[PowerPC] Regenerate test for D52431 llvm-svn: 375435 2019-10-21 19:45:51 +02:00			`; CHECK-NEXT: std 6, 8(5)`
			`; CHECK-NEXT: blr`
[PowerPC] Hardware Loop branch instruction's condition may not be icmp. This fixes pr42492. Differential Revision: https://reviews.llvm.org/D64124 llvm-svn: 365104 2019-07-04 03:51:47 +02:00
			`%4 = icmp eq i8* %0, %1`
			`br i1 %4, label %9, label %5`

			`5: ; preds = %3`
			`%6 = getelementptr inbounds i64, i64* %2, i64 1`
			`%7 = load i64, i64* %6, align 8`
			`br label %10`

			`8: ; preds = %10`
			`store i64 %14, i64* %6, align 8`
			`br label %9`

			`9: ; preds = %8, %3`
			`ret void`

			`10: ; preds = %5, %10`
			`%11 = phi i64 [ %7, %5 ], [ %14, %10 ]`
			`%12 = phi i32 [ 0, %5 ], [ %15, %10 ]`
			`%13 = phi i8* [ %0, %5 ], [ %16, %10 ]`
			`%14 = shl nsw i64 %11, 4`
			`%15 = add nuw nsw i32 %12, 1`
			`%16 = getelementptr inbounds i8, i8* %13, i64 1`
			`%17 = icmp ugt i32 %12, 14`
			`%18 = icmp eq i8* %16, %1`
			`%19 = or i1 %18, %17`
			`br i1 %19, label %8, label %10`
			`}`