From d0d4c1578a27592173e5e64a480c30a2cdbb390e Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Sun, 25 Jul 2021 00:47:03 -0700
Subject: [PATCH] [AArch64][GlobalISel] Enable some select combines after
 legalization.

The legalizer generates selects for some operations, which can have constant
condition values, resulting in lots of dead code if it's not folded away.

Differential Revision: https://reviews.llvm.org/D106762
---
 lib/Target/AArch64/AArch64Combine.td          |  3 +-
 .../AArch64/GlobalISel/arm64-atomic-128.ll    | 54 ++++-----------
 .../postlegalizercombiner-select.mir          | 67 +++++++++++++++++++
 test/CodeGen/AArch64/fold-global-offsets.ll   | 10 +--
 4 files changed, 85 insertions(+), 49 deletions(-)
 create mode 100644 test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir

diff --git a/lib/Target/AArch64/AArch64Combine.td b/lib/Target/AArch64/AArch64Combine.td
index 5e2b5b66a95..6af11af8b21 100644
--- a/lib/Target/AArch64/AArch64Combine.td
+++ b/lib/Target/AArch64/AArch64Combine.td
@@ -203,6 +203,7 @@ def AArch64PostLegalizerCombinerHelper
                         extractvecelt_pairwise_add, redundant_or,
                         mul_const, redundant_sext_inreg,
                         form_bitfield_extract, rotate_out_of_range,
-                        icmp_to_true_false_known_bits, merge_unmerge]> {
+                        icmp_to_true_false_known_bits, merge_unmerge,
+                        select_combines]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
index b5b49e86f15..7830ad05e1a 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
@@ -351,27 +351,14 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) {
 ; CHECK-LLSC-O1-NEXT:    sub x9, x8, #64 // =64
 ; CHECK-LLSC-O1-NEXT:  .LBB4_1: // %atomicrmw.start
 ; CHECK-LLSC-O1-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-LLSC-O1-NEXT:    ldxp x11, x10, [x2]
-; CHECK-LLSC-O1-NEXT:    sub x12, x8, #64 // =64
-; CHECK-LLSC-O1-NEXT:    tst wzr, #0x1
-; CHECK-LLSC-O1-NEXT:    lsl x13, x10, x8
-; CHECK-LLSC-O1-NEXT:    lsr x14, x10, x9
-; CHECK-LLSC-O1-NEXT:    lsl x10, x10, x12
-; CHECK-LLSC-O1-NEXT:    csel x10, x14, x10, ne
-; CHECK-LLSC-O1-NEXT:    csel x13, x13, xzr, ne
-; CHECK-LLSC-O1-NEXT:    csel x10, xzr, x10, ne
-; CHECK-LLSC-O1-NEXT:    orr x11, x11, x13
-; CHECK-LLSC-O1-NEXT:    lsl x13, x10, x9
-; CHECK-LLSC-O1-NEXT:    lsr x12, x10, x12
-; CHECK-LLSC-O1-NEXT:    orr x13, x13, x11, lsr #0
-; CHECK-LLSC-O1-NEXT:    tst wzr, #0x1
-; CHECK-LLSC-O1-NEXT:    csel x12, x13, x12, ne
-; CHECK-LLSC-O1-NEXT:    csel x12, x11, x12, ne
-; CHECK-LLSC-O1-NEXT:    stxp w13, x11, x12, [x2]
-; CHECK-LLSC-O1-NEXT:    cbnz w13, .LBB4_1
+; CHECK-LLSC-O1-NEXT:    ldxp x10, x8, [x2]
+; CHECK-LLSC-O1-NEXT:    lsl x8, x8, x9
+; CHECK-LLSC-O1-NEXT:    lsr x11, x8, x9
+; CHECK-LLSC-O1-NEXT:    stxp w12, x10, x11, [x2]
+; CHECK-LLSC-O1-NEXT:    cbnz w12, .LBB4_1
 ; CHECK-LLSC-O1-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-LLSC-O1-NEXT:    mov v0.d[0], x11
-; CHECK-LLSC-O1-NEXT:    mov v0.d[1], x10
+; CHECK-LLSC-O1-NEXT:    mov v0.d[0], x10
+; CHECK-LLSC-O1-NEXT:    mov v0.d[1], x8
 ; CHECK-LLSC-O1-NEXT:    str q0, [x3]
 ; CHECK-LLSC-O1-NEXT:    ret
 ;
@@ -381,27 +368,14 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) {
 ; CHECK-CAS-O1-NEXT:    sub x9, x8, #64 // =64
 ; CHECK-CAS-O1-NEXT:  .LBB4_1: // %atomicrmw.start
 ; CHECK-CAS-O1-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-CAS-O1-NEXT:    ldxp x11, x10, [x2]
-; CHECK-CAS-O1-NEXT:    sub x12, x8, #64 // =64
-; CHECK-CAS-O1-NEXT:    lsl x13, x10, x8
-; CHECK-CAS-O1-NEXT:    lsr x14, x10, x9
-; CHECK-CAS-O1-NEXT:    lsl x10, x10, x12
-; CHECK-CAS-O1-NEXT:    tst wzr, #0x1
-; CHECK-CAS-O1-NEXT:    csel x13, x13, xzr, ne
-; CHECK-CAS-O1-NEXT:    csel x10, x14, x10, ne
-; CHECK-CAS-O1-NEXT:    csel x10, xzr, x10, ne
-; CHECK-CAS-O1-NEXT:    orr x11, x11, x13
-; CHECK-CAS-O1-NEXT:    lsl x13, x10, x9
-; CHECK-CAS-O1-NEXT:    orr x13, x13, x11, lsr #0
-; CHECK-CAS-O1-NEXT:    lsr x12, x10, x12
-; CHECK-CAS-O1-NEXT:    tst wzr, #0x1
-; CHECK-CAS-O1-NEXT:    csel x12, x13, x12, ne
-; CHECK-CAS-O1-NEXT:    csel x12, x11, x12, ne
-; CHECK-CAS-O1-NEXT:    stxp w13, x11, x12, [x2]
-; CHECK-CAS-O1-NEXT:    cbnz w13, .LBB4_1
+; CHECK-CAS-O1-NEXT:    ldxp x10, x8, [x2]
+; CHECK-CAS-O1-NEXT:    lsl x8, x8, x9
+; CHECK-CAS-O1-NEXT:    lsr x11, x8, x9
+; CHECK-CAS-O1-NEXT:    stxp w12, x10, x11, [x2]
+; CHECK-CAS-O1-NEXT:    cbnz w12, .LBB4_1
 ; CHECK-CAS-O1-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-CAS-O1-NEXT:    mov v0.d[0], x11
-; CHECK-CAS-O1-NEXT:    mov v0.d[1], x10
+; CHECK-CAS-O1-NEXT:    mov v0.d[0], x10
+; CHECK-CAS-O1-NEXT:    mov v0.d[1], x8
 ; CHECK-CAS-O1-NEXT:    str q0, [x3]
 ; CHECK-CAS-O1-NEXT:    ret
 ;
diff --git a/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir b/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir
new file mode 100644
index 00000000000..32e2fe0fcf4
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir
@@ -0,0 +1,67 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+# select (c, x, x) -> x
+name:            test_combine_select_same_res
+legalized:       true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_combine_select_same_res
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[COPY]], [[COPY]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s1) = G_TRUNC %0
+    %2:_(s64) = G_SELECT %1, %0, %0
+    $x0 = COPY %2(s64)
+...
+---
+# select (undef, x, y) -> y
+name:            test_combine_select_undef_res0_res1
+legalized:       true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_combine_select_undef_res0_res1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: $x0 = COPY [[COPY]](s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s1) = G_IMPLICIT_DEF
+    %3:_(s64) = G_SELECT %2, %0, %1
+    $x0 = COPY %3(s64)
+...
+---
+# select (false, x, y) -> y
+name:            test_combine_select_false_res0_res1
+legalized:       true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_combine_select_false_res0_res1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: $x0 = COPY [[COPY]](s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s64) = G_SELECT %2, %0, %1
+    $x0 = COPY %3(s64)
+...
+---
+# select (true, x, y) -> x
+name:            test_combine_select_true_res0_res1
+legalized:       true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_combine_select_true_res0_res1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: $x0 = COPY [[COPY]](s64)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s64) = G_SELECT %2, %0, %1
+    $x0 = COPY %3(s64)
+...
diff --git a/test/CodeGen/AArch64/fold-global-offsets.ll b/test/CodeGen/AArch64/fold-global-offsets.ll
index 1cb891fea76..1871cc3caf3 100644
--- a/test/CodeGen/AArch64/fold-global-offsets.ll
+++ b/test/CodeGen/AArch64/fold-global-offsets.ll
@@ -132,16 +132,10 @@ define i32 @f7() {
 ; GISEL-NEXT:    add x8, x8, :lo12:x3+88
 ; GISEL-NEXT:    mov v0.d[1], x8
 ; GISEL-NEXT:    mov w9, #64
-; GISEL-NEXT:    mov d1, v0.d[1]
+; GISEL-NEXT:    mov d0, v0.d[1]
 ; GISEL-NEXT:    sub x8, x9, #64 // =64
-; GISEL-NEXT:    fmov x10, d1
 ; GISEL-NEXT:    fmov x9, d0
-; GISEL-NEXT:    lsl x11, x10, x8
-; GISEL-NEXT:    lsr x8, x10, x8
-; GISEL-NEXT:    orr x10, x11, x9, lsr #0
-; GISEL-NEXT:    tst wzr, #0x1
-; GISEL-NEXT:    csel x8, x10, x8, ne
-; GISEL-NEXT:    csel x8, x9, x8, ne
+; GISEL-NEXT:    lsr x8, x9, x8
 ; GISEL-NEXT:    ldr w0, [x8, #20]
 ; GISEL-NEXT:    ret