diff --git a/lib/Target/AArch64/AArch64Combine.td b/lib/Target/AArch64/AArch64Combine.td index 5e2b5b66a95..6af11af8b21 100644 --- a/lib/Target/AArch64/AArch64Combine.td +++ b/lib/Target/AArch64/AArch64Combine.td @@ -203,6 +203,7 @@ def AArch64PostLegalizerCombinerHelper extractvecelt_pairwise_add, redundant_or, mul_const, redundant_sext_inreg, form_bitfield_extract, rotate_out_of_range, - icmp_to_true_false_known_bits, merge_unmerge]> { + icmp_to_true_false_known_bits, merge_unmerge, + select_combines]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll index b5b49e86f15..7830ad05e1a 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -351,27 +351,14 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) { ; CHECK-LLSC-O1-NEXT: sub x9, x8, #64 // =64 ; CHECK-LLSC-O1-NEXT: .LBB4_1: // %atomicrmw.start ; CHECK-LLSC-O1-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-LLSC-O1-NEXT: ldxp x11, x10, [x2] -; CHECK-LLSC-O1-NEXT: sub x12, x8, #64 // =64 -; CHECK-LLSC-O1-NEXT: tst wzr, #0x1 -; CHECK-LLSC-O1-NEXT: lsl x13, x10, x8 -; CHECK-LLSC-O1-NEXT: lsr x14, x10, x9 -; CHECK-LLSC-O1-NEXT: lsl x10, x10, x12 -; CHECK-LLSC-O1-NEXT: csel x10, x14, x10, ne -; CHECK-LLSC-O1-NEXT: csel x13, x13, xzr, ne -; CHECK-LLSC-O1-NEXT: csel x10, xzr, x10, ne -; CHECK-LLSC-O1-NEXT: orr x11, x11, x13 -; CHECK-LLSC-O1-NEXT: lsl x13, x10, x9 -; CHECK-LLSC-O1-NEXT: lsr x12, x10, x12 -; CHECK-LLSC-O1-NEXT: orr x13, x13, x11, lsr #0 -; CHECK-LLSC-O1-NEXT: tst wzr, #0x1 -; CHECK-LLSC-O1-NEXT: csel x12, x13, x12, ne -; CHECK-LLSC-O1-NEXT: csel x12, x11, x12, ne -; CHECK-LLSC-O1-NEXT: stxp w13, x11, x12, [x2] -; CHECK-LLSC-O1-NEXT: cbnz w13, .LBB4_1 +; CHECK-LLSC-O1-NEXT: ldxp x10, x8, [x2] +; CHECK-LLSC-O1-NEXT: lsl x8, x8, x9 +; CHECK-LLSC-O1-NEXT: lsr x11, x8, x9 +; CHECK-LLSC-O1-NEXT: stxp w12, x10, x11, [x2] +; CHECK-LLSC-O1-NEXT: cbnz w12, .LBB4_1 ; CHECK-LLSC-O1-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-LLSC-O1-NEXT: mov v0.d[0], x11 -; CHECK-LLSC-O1-NEXT: mov v0.d[1], x10 +; CHECK-LLSC-O1-NEXT: mov v0.d[0], x10 +; CHECK-LLSC-O1-NEXT: mov v0.d[1], x8 ; CHECK-LLSC-O1-NEXT: str q0, [x3] ; CHECK-LLSC-O1-NEXT: ret ; @@ -381,27 +368,14 @@ define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) { ; CHECK-CAS-O1-NEXT: sub x9, x8, #64 // =64 ; CHECK-CAS-O1-NEXT: .LBB4_1: // %atomicrmw.start ; CHECK-CAS-O1-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-CAS-O1-NEXT: ldxp x11, x10, [x2] -; CHECK-CAS-O1-NEXT: sub x12, x8, #64 // =64 -; CHECK-CAS-O1-NEXT: lsl x13, x10, x8 -; CHECK-CAS-O1-NEXT: lsr x14, x10, x9 -; CHECK-CAS-O1-NEXT: lsl x10, x10, x12 -; CHECK-CAS-O1-NEXT: tst wzr, #0x1 -; CHECK-CAS-O1-NEXT: csel x13, x13, xzr, ne -; CHECK-CAS-O1-NEXT: csel x10, x14, x10, ne -; CHECK-CAS-O1-NEXT: csel x10, xzr, x10, ne -; CHECK-CAS-O1-NEXT: orr x11, x11, x13 -; CHECK-CAS-O1-NEXT: lsl x13, x10, x9 -; CHECK-CAS-O1-NEXT: orr x13, x13, x11, lsr #0 -; CHECK-CAS-O1-NEXT: lsr x12, x10, x12 -; CHECK-CAS-O1-NEXT: tst wzr, #0x1 -; CHECK-CAS-O1-NEXT: csel x12, x13, x12, ne -; CHECK-CAS-O1-NEXT: csel x12, x11, x12, ne -; CHECK-CAS-O1-NEXT: stxp w13, x11, x12, [x2] -; CHECK-CAS-O1-NEXT: cbnz w13, .LBB4_1 +; CHECK-CAS-O1-NEXT: ldxp x10, x8, [x2] +; CHECK-CAS-O1-NEXT: lsl x8, x8, x9 +; CHECK-CAS-O1-NEXT: lsr x11, x8, x9 +; CHECK-CAS-O1-NEXT: stxp w12, x10, x11, [x2] +; CHECK-CAS-O1-NEXT: cbnz w12, .LBB4_1 ; CHECK-CAS-O1-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-CAS-O1-NEXT: mov v0.d[0], x11 -; CHECK-CAS-O1-NEXT: mov v0.d[1], x10 +; CHECK-CAS-O1-NEXT: mov v0.d[0], x10 +; CHECK-CAS-O1-NEXT: mov v0.d[1], x8 ; CHECK-CAS-O1-NEXT: str q0, [x3] ; CHECK-CAS-O1-NEXT: ret ; diff --git a/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir b/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir new file mode 100644 index 00000000000..32e2fe0fcf4 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-select.mir @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +--- +# select (c, x, x) -> x +name: test_combine_select_same_res +legalized: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_combine_select_same_res + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[COPY]], [[COPY]] + ; CHECK: $x0 = COPY [[SELECT]](s64) + %0:_(s64) = COPY $x0 + %1:_(s1) = G_TRUNC %0 + %2:_(s64) = G_SELECT %1, %0, %0 + $x0 = COPY %2(s64) +... +--- +# select (undef, x, y) -> y +name: test_combine_select_undef_res0_res1 +legalized: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_combine_select_undef_res0_res1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s1) = G_IMPLICIT_DEF + %3:_(s64) = G_SELECT %2, %0, %1 + $x0 = COPY %3(s64) +... +--- +# select (false, x, y) -> y +name: test_combine_select_false_res0_res1 +legalized: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_combine_select_false_res0_res1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s1) = G_CONSTANT i1 false + %3:_(s64) = G_SELECT %2, %0, %1 + $x0 = COPY %3(s64) +... +--- +# select (true, x, y) -> x +name: test_combine_select_true_res0_res1 +legalized: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_combine_select_true_res0_res1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s1) = G_CONSTANT i1 true + %3:_(s64) = G_SELECT %2, %0, %1 + $x0 = COPY %3(s64) +... diff --git a/test/CodeGen/AArch64/fold-global-offsets.ll b/test/CodeGen/AArch64/fold-global-offsets.ll index 1cb891fea76..1871cc3caf3 100644 --- a/test/CodeGen/AArch64/fold-global-offsets.ll +++ b/test/CodeGen/AArch64/fold-global-offsets.ll @@ -132,16 +132,10 @@ define i32 @f7() { ; GISEL-NEXT: add x8, x8, :lo12:x3+88 ; GISEL-NEXT: mov v0.d[1], x8 ; GISEL-NEXT: mov w9, #64 -; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: mov d0, v0.d[1] ; GISEL-NEXT: sub x8, x9, #64 // =64 -; GISEL-NEXT: fmov x10, d1 ; GISEL-NEXT: fmov x9, d0 -; GISEL-NEXT: lsl x11, x10, x8 -; GISEL-NEXT: lsr x8, x10, x8 -; GISEL-NEXT: orr x10, x11, x9, lsr #0 -; GISEL-NEXT: tst wzr, #0x1 -; GISEL-NEXT: csel x8, x10, x8, ne -; GISEL-NEXT: csel x8, x9, x8, ne +; GISEL-NEXT: lsr x8, x9, x8 ; GISEL-NEXT: ldr w0, [x8, #20] ; GISEL-NEXT: ret