1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[AArch64][GlobalISel] Enable some select combines after legalization.

The legalizer generates selects for some operations, which can have constant
condition values, resulting in lots of dead code if it's not folded away.

Differential Revision: https://reviews.llvm.org/D106762
This commit is contained in:
Amara Emerson 2021-07-25 00:47:03 -07:00
parent b09f2e63d9
commit d0d4c1578a
4 changed files with 85 additions and 49 deletions

View File

@ -203,6 +203,7 @@ def AArch64PostLegalizerCombinerHelper
extractvecelt_pairwise_add, redundant_or,
mul_const, redundant_sext_inreg,
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge]> {
icmp_to_true_false_known_bits, merge_unmerge,
select_combines]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}

View File

@ -351,27 +351,14 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) {
; CHECK-LLSC-O1-NEXT: sub x9, x8, #64 // =64
; CHECK-LLSC-O1-NEXT: .LBB4_1: // %atomicrmw.start
; CHECK-LLSC-O1-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-LLSC-O1-NEXT: ldxp x11, x10, [x2]
; CHECK-LLSC-O1-NEXT: sub x12, x8, #64 // =64
; CHECK-LLSC-O1-NEXT: tst wzr, #0x1
; CHECK-LLSC-O1-NEXT: lsl x13, x10, x8
; CHECK-LLSC-O1-NEXT: lsr x14, x10, x9
; CHECK-LLSC-O1-NEXT: lsl x10, x10, x12
; CHECK-LLSC-O1-NEXT: csel x10, x14, x10, ne
; CHECK-LLSC-O1-NEXT: csel x13, x13, xzr, ne
; CHECK-LLSC-O1-NEXT: csel x10, xzr, x10, ne
; CHECK-LLSC-O1-NEXT: orr x11, x11, x13
; CHECK-LLSC-O1-NEXT: lsl x13, x10, x9
; CHECK-LLSC-O1-NEXT: lsr x12, x10, x12
; CHECK-LLSC-O1-NEXT: orr x13, x13, x11, lsr #0
; CHECK-LLSC-O1-NEXT: tst wzr, #0x1
; CHECK-LLSC-O1-NEXT: csel x12, x13, x12, ne
; CHECK-LLSC-O1-NEXT: csel x12, x11, x12, ne
; CHECK-LLSC-O1-NEXT: stxp w13, x11, x12, [x2]
; CHECK-LLSC-O1-NEXT: cbnz w13, .LBB4_1
; CHECK-LLSC-O1-NEXT: ldxp x10, x8, [x2]
; CHECK-LLSC-O1-NEXT: lsl x8, x8, x9
; CHECK-LLSC-O1-NEXT: lsr x11, x8, x9
; CHECK-LLSC-O1-NEXT: stxp w12, x10, x11, [x2]
; CHECK-LLSC-O1-NEXT: cbnz w12, .LBB4_1
; CHECK-LLSC-O1-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-LLSC-O1-NEXT: mov v0.d[0], x11
; CHECK-LLSC-O1-NEXT: mov v0.d[1], x10
; CHECK-LLSC-O1-NEXT: mov v0.d[0], x10
; CHECK-LLSC-O1-NEXT: mov v0.d[1], x8
; CHECK-LLSC-O1-NEXT: str q0, [x3]
; CHECK-LLSC-O1-NEXT: ret
;
@ -381,27 +368,14 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) {
; CHECK-CAS-O1-NEXT: sub x9, x8, #64 // =64
; CHECK-CAS-O1-NEXT: .LBB4_1: // %atomicrmw.start
; CHECK-CAS-O1-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-CAS-O1-NEXT: ldxp x11, x10, [x2]
; CHECK-CAS-O1-NEXT: sub x12, x8, #64 // =64
; CHECK-CAS-O1-NEXT: lsl x13, x10, x8
; CHECK-CAS-O1-NEXT: lsr x14, x10, x9
; CHECK-CAS-O1-NEXT: lsl x10, x10, x12
; CHECK-CAS-O1-NEXT: tst wzr, #0x1
; CHECK-CAS-O1-NEXT: csel x13, x13, xzr, ne
; CHECK-CAS-O1-NEXT: csel x10, x14, x10, ne
; CHECK-CAS-O1-NEXT: csel x10, xzr, x10, ne
; CHECK-CAS-O1-NEXT: orr x11, x11, x13
; CHECK-CAS-O1-NEXT: lsl x13, x10, x9
; CHECK-CAS-O1-NEXT: orr x13, x13, x11, lsr #0
; CHECK-CAS-O1-NEXT: lsr x12, x10, x12
; CHECK-CAS-O1-NEXT: tst wzr, #0x1
; CHECK-CAS-O1-NEXT: csel x12, x13, x12, ne
; CHECK-CAS-O1-NEXT: csel x12, x11, x12, ne
; CHECK-CAS-O1-NEXT: stxp w13, x11, x12, [x2]
; CHECK-CAS-O1-NEXT: cbnz w13, .LBB4_1
; CHECK-CAS-O1-NEXT: ldxp x10, x8, [x2]
; CHECK-CAS-O1-NEXT: lsl x8, x8, x9
; CHECK-CAS-O1-NEXT: lsr x11, x8, x9
; CHECK-CAS-O1-NEXT: stxp w12, x10, x11, [x2]
; CHECK-CAS-O1-NEXT: cbnz w12, .LBB4_1
; CHECK-CAS-O1-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-CAS-O1-NEXT: mov v0.d[0], x11
; CHECK-CAS-O1-NEXT: mov v0.d[1], x10
; CHECK-CAS-O1-NEXT: mov v0.d[0], x10
; CHECK-CAS-O1-NEXT: mov v0.d[1], x8
; CHECK-CAS-O1-NEXT: str q0, [x3]
; CHECK-CAS-O1-NEXT: ret
;

View File

@ -0,0 +1,67 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
---
# select (c, x, x) -> x
name: test_combine_select_same_res
legalized: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test_combine_select_same_res
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[COPY]], [[COPY]]
; CHECK: $x0 = COPY [[SELECT]](s64)
%0:_(s64) = COPY $x0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_SELECT %1, %0, %0
$x0 = COPY %2(s64)
...
---
# select (undef, x, y) -> y
name: test_combine_select_undef_res0_res1
legalized: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test_combine_select_undef_res0_res1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: $x0 = COPY [[COPY]](s64)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s1) = G_IMPLICIT_DEF
%3:_(s64) = G_SELECT %2, %0, %1
$x0 = COPY %3(s64)
...
---
# select (false, x, y) -> y
name: test_combine_select_false_res0_res1
legalized: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test_combine_select_false_res0_res1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: $x0 = COPY [[COPY]](s64)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s1) = G_CONSTANT i1 false
%3:_(s64) = G_SELECT %2, %0, %1
$x0 = COPY %3(s64)
...
---
# select (true, x, y) -> x
name: test_combine_select_true_res0_res1
legalized: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test_combine_select_true_res0_res1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: $x0 = COPY [[COPY]](s64)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s1) = G_CONSTANT i1 true
%3:_(s64) = G_SELECT %2, %0, %1
$x0 = COPY %3(s64)
...

View File

@ -132,16 +132,10 @@ define i32 @f7() {
; GISEL-NEXT: add x8, x8, :lo12:x3+88
; GISEL-NEXT: mov v0.d[1], x8
; GISEL-NEXT: mov w9, #64
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: mov d0, v0.d[1]
; GISEL-NEXT: sub x8, x9, #64 // =64
; GISEL-NEXT: fmov x10, d1
; GISEL-NEXT: fmov x9, d0
; GISEL-NEXT: lsl x11, x10, x8
; GISEL-NEXT: lsr x8, x10, x8
; GISEL-NEXT: orr x10, x11, x9, lsr #0
; GISEL-NEXT: tst wzr, #0x1
; GISEL-NEXT: csel x8, x10, x8, ne
; GISEL-NEXT: csel x8, x9, x8, ne
; GISEL-NEXT: lsr x8, x9, x8
; GISEL-NEXT: ldr w0, [x8, #20]
; GISEL-NEXT: ret