From 95717fc94447c0130136126385edd763f8dcd3d4 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 5 Feb 2021 13:54:16 +0700 Subject: [PATCH] [Test] Add more tests demonstrating oddities in behavior of LSR These tests demonstrate that LSR does not insert IV increment into the latch block (as it supposes to) when it can use an existing Phi as IV rather than creating a new LSR IV. --- .../X86/2020_12_02_decrementing_loop.ll | 90 +++++++++++- .../post-increment-insertion.ll | 138 ++++++++++++++++++ 2 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/LoopStrengthReduce/post-increment-insertion.ll diff --git a/test/CodeGen/X86/2020_12_02_decrementing_loop.ll b/test/CodeGen/X86/2020_12_02_decrementing_loop.ll index 510301d9b3b..5d860ea6e98 100644 --- a/test/CodeGen/X86/2020_12_02_decrementing_loop.ll +++ b/test/CodeGen/X86/2020_12_02_decrementing_loop.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s ; TODO: We can get rid of movq here by using different offset and %rax. -define i32 @test(i32* %p, i64 %len, i32 %x) { -; CHECK-LABEL: test: +define i32 @test_01(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_01: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .p2align 4, 0x90 @@ -42,3 +42,89 @@ exit: ; preds = %loop failure: ; preds = %backedge unreachable } + +define i32 @test_02(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_02: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_1: ## %loop +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB1_4 +; CHECK-NEXT: ## %bb.2: ## %backedge +; CHECK-NEXT: ## in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) +; CHECK-NEXT: leaq -1(%rsi), %rsi +; CHECK-NEXT: jne LBB1_1 +; CHECK-NEXT: ## %bb.3: ## %failure +; CHECK-NEXT: ud2 +; CHECK-NEXT: LBB1_4: ## %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq +entry: + %start = add i64 %len, -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ] + %iv.next = add nsw i64 %iv, -1 + %iv.offset = add i64 %iv, 1 + %iv.next.offset = add i64 %iv.next, 1 + %cond_1 = icmp eq i64 %iv.offset, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset + %loaded = load atomic i32, i32* %addr unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +} + +define i32 @test_03(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_03: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB2_1: ## %loop +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB2_4 +; CHECK-NEXT: ## %bb.2: ## %backedge +; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) +; CHECK-NEXT: leaq -1(%rsi), %rsi +; CHECK-NEXT: jne LBB2_1 +; CHECK-NEXT: ## %bb.3: ## %failure +; CHECK-NEXT: ud2 +; CHECK-NEXT: LBB2_4: ## %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq +entry: + %start = add i64 %len, -100 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ] + %iv.next = add nsw i64 %iv, -1 + %iv.offset = add i64 %iv, 100 + %iv.next.offset = add i64 %iv.next, 100 + %cond_1 = icmp eq i64 %iv.offset, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset + %loaded = load atomic i32, i32* %addr unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +} diff --git a/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll b/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll new file mode 100644 index 00000000000..d7fa20803f8 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +; FIXME: iv.next is supposed to be inserted in the backedge. +define i32 @test_01(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 -1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[IV]] +; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4 +; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; CHECK: failure: +; CHECK-NEXT: unreachable +; +entry: + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] + %iv.next = add nsw i64 %iv, -1 + %cond_1 = icmp eq i64 %iv, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next + %loaded = load atomic i32, i32* %addr unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +} + +define i32 @test_02(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 -1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[LSR_IV]], 0 +; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[LSR_IV]] +; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4 +; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 +; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; CHECK: failure: +; CHECK-NEXT: unreachable +; +entry: + %start = add i64 %len, -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ] + %iv.next = add nsw i64 %iv, -1 + %iv.offset = add i64 %iv, 1 + %iv.next.offset = add i64 %iv.next, 1 + %cond_1 = icmp eq i64 %iv.offset, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset + %loaded = load atomic i32, i32* %addr unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +} + +define i32 @test_03(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 -1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[LSR_IV]], 0 +; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[LSR_IV]] +; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4 +; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 +; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; CHECK: failure: +; CHECK-NEXT: unreachable +; +entry: + %start = add i64 %len, -100 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ] + %iv.next = add nsw i64 %iv, -1 + %iv.offset = add i64 %iv, 100 + %iv.next.offset = add i64 %iv.next, 100 + %cond_1 = icmp eq i64 %iv.offset, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset + %loaded = load atomic i32, i32* %addr unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +}