mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] Mark MVE_VMOV_to_lane_32 as isInsertSubregLike
This allows the peephole optimizer to know that a MVE_VMOV_to_lane_32 is the same as an insert subreg, allowing it to optimize some redundant lane moves. Differential Revision: https://reviews.llvm.org/D95433
This commit is contained in:
parent
e79a30d836
commit
91ae3d7f3f
@ -5391,7 +5391,9 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case ARM::VSETLNi32:
|
||||
case ARM::MVE_VMOV_to_lane_32:
|
||||
// dX = VSETLNi32 dY, rZ, imm
|
||||
// qX = MVE_VMOV_to_lane_32 qY, rZ, imm
|
||||
const MachineOperand &MOBaseReg = MI.getOperand(1);
|
||||
const MachineOperand &MOInsertedReg = MI.getOperand(2);
|
||||
if (MOInsertedReg.isUndef())
|
||||
@ -5402,7 +5404,7 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
|
||||
|
||||
InsertedReg.Reg = MOInsertedReg.getReg();
|
||||
InsertedReg.SubReg = MOInsertedReg.getSubReg();
|
||||
InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
|
||||
InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
|
||||
return true;
|
||||
}
|
||||
llvm_unreachable("Target dependent opcode missing");
|
||||
|
@ -1825,12 +1825,13 @@ class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
|
||||
}
|
||||
|
||||
def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
|
||||
def MVE_VMOV_to_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_to_lane>;
|
||||
def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
|
||||
def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
|
||||
def MVE_VMOV_to_lane_16 : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
|
||||
def MVE_VMOV_from_lane_s8 : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
|
||||
def MVE_VMOV_from_lane_u8 : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
|
||||
let isInsertSubreg = 1 in
|
||||
def MVE_VMOV_to_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_to_lane>;
|
||||
def MVE_VMOV_to_lane_16 : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
|
||||
def MVE_VMOV_to_lane_8 : MVE_VMOV_lane_8 < "8", 0b0, MVE_VMOV_to_lane>;
|
||||
|
||||
// This is the same as insertelt but allows the inserted value to be an i32 as
|
||||
|
@ -274,11 +274,8 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x) {
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: sxth r0, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -798,11 +795,8 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x) {
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: sxtb r0, r0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adc.w r1, r1, r2, asr #31
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -1131,15 +1125,12 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) {
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sxth r2, r2
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: sxth r3, r3
|
||||
; CHECK-NEXT: vmov r12, s4
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i16> %x to <2 x i64>
|
||||
@ -1694,15 +1685,12 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) {
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: sxtb r2, r2
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
; CHECK-NEXT: asr.w r12, r2, #31
|
||||
; CHECK-NEXT: sxtb r3, r3
|
||||
; CHECK-NEXT: vmov r12, s4
|
||||
; CHECK-NEXT: adds.w r12, r12, r3
|
||||
; CHECK-NEXT: adc.w r2, r2, r3, asr #31
|
||||
; CHECK-NEXT: adds.w r0, r0, r12
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: adds r2, r2, r3
|
||||
; CHECK-NEXT: adc.w r3, r12, r3, asr #31
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%xx = sext <2 x i8> %x to <2 x i64>
|
||||
|
@ -970,23 +970,20 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) {
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: vmov r2, s5
|
||||
; CHECK-NEXT: vmov r3, s5
|
||||
; CHECK-NEXT: vmov r4, s7
|
||||
; CHECK-NEXT: umull r12, r3, r1, r0
|
||||
; CHECK-NEXT: mla r1, r1, r2, r3
|
||||
; CHECK-NEXT: umull r12, r2, r1, r0
|
||||
; CHECK-NEXT: mla r1, r1, r3, r2
|
||||
; CHECK-NEXT: vmov r2, s1
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov.32 q2[0], r12
|
||||
; CHECK-NEXT: mla lr, r2, r0, r1
|
||||
; CHECK-NEXT: vmov r0, s6
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: umull r3, r1, r2, r0
|
||||
; CHECK-NEXT: mla r1, r2, r4, r1
|
||||
; CHECK-NEXT: vmov r2, s3
|
||||
; CHECK-NEXT: mla r1, r2, r0, r1
|
||||
; CHECK-NEXT: vmov r2, s6
|
||||
; CHECK-NEXT: vmov.32 q2[1], r1
|
||||
; CHECK-NEXT: vmov r12, s8
|
||||
; CHECK-NEXT: umull lr, r0, r3, r2
|
||||
; CHECK-NEXT: mla r0, r3, r4, r0
|
||||
; CHECK-NEXT: vmov r3, s3
|
||||
; CHECK-NEXT: mla r2, r3, r2, r0
|
||||
; CHECK-NEXT: adds.w r0, r12, lr
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: adds.w r0, r12, r3
|
||||
; CHECK-NEXT: adc.w r1, r1, lr
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%m = mul <2 x i64> %x, %y
|
||||
@ -1843,23 +1840,20 @@ define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %y, i64
|
||||
; CHECK-NEXT: vmov r3, s0
|
||||
; CHECK-NEXT: vmov r4, s5
|
||||
; CHECK-NEXT: vmov r6, s7
|
||||
; CHECK-NEXT: umull r12, lr, r3, r2
|
||||
; CHECK-NEXT: mla r3, r3, r4, lr
|
||||
; CHECK-NEXT: umull lr, r12, r3, r2
|
||||
; CHECK-NEXT: mla r3, r3, r4, r12
|
||||
; CHECK-NEXT: vmov r4, s1
|
||||
; CHECK-NEXT: vmov.32 q2[0], r12
|
||||
; CHECK-NEXT: mla r2, r4, r2, r3
|
||||
; CHECK-NEXT: vmov r4, s6
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: vmov.32 q2[1], r2
|
||||
; CHECK-NEXT: vmov r12, s8
|
||||
; CHECK-NEXT: umull lr, r5, r3, r4
|
||||
; CHECK-NEXT: mla r3, r3, r6, r5
|
||||
; CHECK-NEXT: mla r12, r4, r2, r3
|
||||
; CHECK-NEXT: vmov r3, s6
|
||||
; CHECK-NEXT: vmov r4, s2
|
||||
; CHECK-NEXT: umull r2, r5, r4, r3
|
||||
; CHECK-NEXT: mla r4, r4, r6, r5
|
||||
; CHECK-NEXT: vmov r5, s3
|
||||
; CHECK-NEXT: adds.w r6, r12, lr
|
||||
; CHECK-NEXT: mla r3, r5, r4, r3
|
||||
; CHECK-NEXT: adcs r2, r3
|
||||
; CHECK-NEXT: adds r0, r0, r6
|
||||
; CHECK-NEXT: adcs r1, r2
|
||||
; CHECK-NEXT: adds.w r2, r2, lr
|
||||
; CHECK-NEXT: mla r3, r5, r3, r4
|
||||
; CHECK-NEXT: adc.w r3, r3, r12
|
||||
; CHECK-NEXT: adds r0, r0, r2
|
||||
; CHECK-NEXT: adcs r1, r3
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%m = mul <2 x i64> %x, %y
|
||||
|
@ -18,7 +18,6 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
|
||||
; CHECK-NEXT: vmov.f32 s8, s7
|
||||
; CHECK-NEXT: vmov.f32 s10, s1
|
||||
; CHECK-NEXT: vmov r2, s8
|
||||
; CHECK-NEXT: vmov r0, s10
|
||||
; CHECK-NEXT: vmov.f64 d4, d2
|
||||
; CHECK-NEXT: vmov.f32 s9, s6
|
||||
; CHECK-NEXT: vmov.f32 s10, s0
|
||||
|
Loading…
x
Reference in New Issue
Block a user