1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AArch64][GlobalISel] Merge selection of vector-vector G_ASHR/G_LSHR and support more cases.

The vector-immediate cases are handled elsewhere in an earlier commit.
This commit is contained in:
Amara Emerson 2020-09-21 15:28:04 -07:00
parent eaf34219d1
commit 56e14355d6
2 changed files with 126 additions and 20 deletions

View File

@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@ -108,7 +109,7 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
@ -1511,9 +1512,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
return true;
}
bool AArch64InstructionSelector::selectVectorASHR(
bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_ASHR);
assert(I.getOpcode() == TargetOpcode::G_ASHR ||
I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
@ -1522,25 +1524,34 @@ bool AArch64InstructionSelector::selectVectorASHR(
if (!Ty.isVector())
return false;
bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
// We expect the immediate case to be lowered in the PostLegalCombiner to
// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
const TargetRegisterClass *RC = nullptr;
const TargetRegisterClass *RC =
getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
if (Ty == LLT::vector(2, 64)) {
Opc = AArch64::SSHLv2i64;
Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(4, 32)) {
Opc = AArch64::SSHLv4i32;
Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(2, 32)) {
Opc = AArch64::SSHLv2i32;
Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
RC = &AArch64::FPR64RegClass;
} else if (Ty == LLT::vector(4, 16)) {
Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
NegOpc = AArch64::NEGv4i16;
} else if (Ty == LLT::vector(8, 16)) {
Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
NegOpc = AArch64::NEGv8i16;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
@ -2452,22 +2463,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorASHR(I, MRI);
return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_OR:
case TargetOpcode::G_LSHR: {
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;

View File

@ -322,7 +322,7 @@ body: |
...
---
name: ashr_v4i64
name: ashr_v2i64
alignment: 4
legalized: true
regBankSelected: true
@ -336,7 +336,7 @@ body: |
bb.1:
liveins: $q0, $q1
; CHECK-LABEL: name: ashr_v4i64
; CHECK-LABEL: name: ashr_v2i64
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
@ -373,6 +373,102 @@ body: |
RET_ReallyLR implicit $d0
...
---
name: lshr_v4i16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1
; CHECK-LABEL: name: lshr_v4i16
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[NEGv4i16_:%[0-9]+]]:fpr64 = NEGv4i16 [[COPY1]]
; CHECK: [[USHLv4i16_:%[0-9]+]]:fpr64 = USHLv4i16 [[COPY]], [[NEGv4i16_]]
; CHECK: $d0 = COPY [[USHLv4i16_]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
%1:fpr(<4 x s16>) = COPY $d1
%2:fpr(<4 x s16>) = G_LSHR %0, %1(<4 x s16>)
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
---
name: lshr_v4i32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
- { id: 2, class: fpr }
machineFunctionInfo: {}
body: |
bb.1:
liveins: $q0, $q1
; CHECK-LABEL: name: lshr_v4i32
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: [[NEGv4i32_:%[0-9]+]]:fpr128 = NEGv4i32 [[COPY1]]
; CHECK: [[USHLv4i32_:%[0-9]+]]:fpr128 = USHLv4i32 [[COPY]], [[NEGv4i32_]]
; CHECK: $q0 = COPY [[USHLv4i32_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<4 x s32>) = COPY $q0
%1:fpr(<4 x s32>) = COPY $q1
%2:fpr(<4 x s32>) = G_LSHR %0, %1(<4 x s32>)
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: lshr_v8i16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0, $q1
; CHECK-LABEL: name: lshr_v8i16
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: [[NEGv8i16_:%[0-9]+]]:fpr128 = NEGv8i16 [[COPY1]]
; CHECK: [[USHLv8i16_:%[0-9]+]]:fpr128 = USHLv8i16 [[COPY]], [[NEGv8i16_]]
; CHECK: $q0 = COPY [[USHLv8i16_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<8 x s16>) = COPY $q0
%1:fpr(<8 x s16>) = COPY $q1
%2:fpr(<8 x s16>) = G_LSHR %0, %1(<8 x s16>)
$q0 = COPY %2(<8 x s16>)
RET_ReallyLR implicit $q0
...
---
name: ashr_v4i16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1
; CHECK-LABEL: name: ashr_v4i16
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[NEGv4i16_:%[0-9]+]]:fpr64 = NEGv4i16 [[COPY1]]
; CHECK: [[SSHLv4i16_:%[0-9]+]]:fpr64 = SSHLv4i16 [[COPY]], [[NEGv4i16_]]
; CHECK: $d0 = COPY [[SSHLv4i16_]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
%1:fpr(<4 x s16>) = COPY $d1
%2:fpr(<4 x s16>) = G_ASHR %0, %1(<4 x s16>)
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
---
name: vashr_v4i16_imm
legalized: true
regBankSelected: true