1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00

[Hexagon] Adding word combine dot-new form and replacing old combine opcode.

llvm-svn: 223815
This commit is contained in:
Colin LeMahieu 2014-12-09 19:23:45 +00:00
parent 2100e2da37
commit 56e9b3ffa1
6 changed files with 57 additions and 80 deletions

View File

@ -563,7 +563,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
// Handle globals.
if (HiOperand.isGlobal()) {
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
HiOperand.getTargetFlags())
.addImm(LoOperand.getImm());
@ -580,7 +580,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
// Handle constant extended immediates.
if (!isInt<8>(HiOperand.getImm())) {
assert(isInt<8>(LoOperand.getImm()));
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addImm(LoOperand.getImm());
return;
@ -596,7 +596,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
// Insert new combine instruction.
// DoubleRegDest = combine #HiImm, #LoImm
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg)
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
.addImm(HiOperand.getImm())
.addImm(LoOperand.getImm());
}
@ -666,7 +666,7 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
// Insert new combine instruction.
// DoubleRegDest = combine HiReg, LoReg
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg)
BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg)
.addReg(HiReg, HiRegKillFlag)
.addReg(LoReg, LoRegKillFlag);
}

View File

@ -523,7 +523,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
TargetConstVal, Chain);
SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
TargetConst0);
SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
MVT::i64, MVT::Other,
SDValue(Result_2,0),
SDValue(Result_1,0));
@ -550,7 +550,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
Base, TargetConst0, Chain);
SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
TargetConst0);
SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
MVT::i64, MVT::Other,
SDValue(Result_2,0),
SDValue(Result_1,0));
@ -1183,7 +1183,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
MVT::i32,
TargetConst0);
SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
MVT::i64, MVT::Other,
SDValue(Result_2, 0),
SDValue(Result_1, 0));

View File

@ -739,10 +739,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
switch(Opc) {
default: llvm_unreachable("Unexpected predicated instruction");
case Hexagon::COMBINE_rr_cPt:
return Hexagon::COMBINE_rr_cNotPt;
case Hexagon::COMBINE_rr_cNotPt:
return Hexagon::COMBINE_rr_cPt;
case Hexagon::C2_ccombinewt:
return Hexagon::C2_ccombinewf;
case Hexagon::C2_ccombinewf:
return Hexagon::C2_ccombinewt;
// Dealloc_return.
case Hexagon::DEALLOC_RET_cPt_V4:
@ -780,9 +780,9 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
case Hexagon::TFRI_f:
return !invertPredicate ? Hexagon::TFRI_cPt_f :
Hexagon::TFRI_cNotPt_f;
case Hexagon::COMBINE_rr:
return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
Hexagon::COMBINE_rr_cNotPt;
case Hexagon::A2_combinew:
return !invertPredicate ? Hexagon::C2_ccombinewt :
Hexagon::C2_ccombinewf;
// Word.
case Hexagon::STriw_f:
@ -1340,8 +1340,8 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
case Hexagon::A4_pzxthtnew:
case Hexagon::ADD_ri_cPt:
case Hexagon::ADD_ri_cNotPt:
case Hexagon::COMBINE_rr_cPt:
case Hexagon::COMBINE_rr_cNotPt:
case Hexagon::C2_ccombinewt:
case Hexagon::C2_ccombinewf:
return true;
}
}
@ -1627,10 +1627,10 @@ int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI,
// Conditional combine
case Hexagon::COMBINE_rr_cPt :
return Hexagon::COMBINE_rr_cdnPt;
case Hexagon::COMBINE_rr_cNotPt :
return Hexagon::COMBINE_rr_cdnNotPt;
case Hexagon::C2_ccombinewt:
return Hexagon::C2_ccombinewnewt;
case Hexagon::C2_ccombinewf:
return Hexagon::C2_ccombinewnewf;
}
}

View File

@ -217,6 +217,8 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0,
// Conditional combinew uses "newt/f" instead of "t/fnew".
def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>;
def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>;
def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>;
def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
}
let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
@ -493,37 +495,6 @@ multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
}
}
// Combines the two integer registers SRC1 and SRC2 into a double register.
let isPredicable = 1 in
class T_Combine : ALU32_rr<(outs DoubleRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = combine($src1, $src2)",
[(set (i64 DoubleRegs:$dst),
(i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
(i32 IntRegs:$src2))))]>;
multiclass Combine_base {
let BaseOpcode = "combine" in {
def NAME : T_Combine;
let hasSideEffects = 0, isPredicated = 1 in {
defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>;
defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>;
}
}
}
defm COMBINE_rr : Combine_base, PredNewRel;
// Combines the two immediates SRC1 and SRC2 into a double register.
class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
"$dst = combine(#$src1, #$src2)",
[(set (i64 DoubleRegs:$dst),
(i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs).
// Pattern definition for 'neg' was not necessary.
@ -2797,7 +2768,7 @@ def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
let AddedComplexity = 100 in
def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
(i64 (COMBINE_rr (TFRI 0),
(i64 (A2_combinew (TFRI 0),
(LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>,
Requires<[NoV4T]>;
@ -2867,7 +2838,7 @@ def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
// Hexagon does not support 64-bit MUXes; so emulate with combines.
def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
(i64 DoubleRegs:$src3)),
(i64 (COMBINE_rr (i32 (C2_mux (i1 PredRegs:$src1),
(i64 (A2_combinew (i32 (C2_mux (i1 PredRegs:$src1),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
subreg_hireg)),
(i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
@ -3052,7 +3023,7 @@ def : Pat <(i32 (sext (i1 PredRegs:$src1))),
// i1 -> i64
def : Pat <(i64 (sext (i1 PredRegs:$src1))),
(i64 (COMBINE_rr (TFRI -1), (C2_muxii (i1 PredRegs:$src1), -1, 0)))>;
(i64 (A2_combinew (TFRI -1), (C2_muxii (i1 PredRegs:$src1), -1, 0)))>;
// Convert sign-extended load back to load and sign extend.
// i8 -> i64
@ -3082,58 +3053,58 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
// i1 -> i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
(i64 (COMBINE_rr (TFRI 0), (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
(i64 (A2_combinew (TFRI 0), (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
Requires<[NoV4T]>;
// i32 -> i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
(i64 (A2_combinew (TFRI 0), (i32 IntRegs:$src1)))>,
Requires<[NoV4T]>;
// i8 -> i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
(i64 (A2_combinew (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
(i64 (A2_combinew (TFRI 0), (LDriub_indexed IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i1 -> i64
def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
(i64 (A2_combinew (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
(i64 (A2_combinew (TFRI 0), (LDriub_indexed IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i16 -> i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
(i64 (A2_combinew (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
(i64 (A2_combinew (TFRI 0), (LDriuh_indexed IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i32 -> i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
(i64 (A2_combinew (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 100 in
def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
(i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
(i64 (A2_combinew (TFRI 0), (LDriw_indexed IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[NoV4T]>;
@ -3159,20 +3130,20 @@ def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 (i32 (add IntRegs:$src2,
s11_2ExtPred:$offset2)))))),
(i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(LDriw_indexed IntRegs:$src2,
s11_2ExtPred:$offset2)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
(i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(LDriw ADDRriS11_2:$srcLow)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zext (i32 IntRegs:$srcLow))))),
(i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
IntRegs:$srcLow))>;
let AddedComplexity = 100 in
@ -3180,26 +3151,26 @@ def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 (i32 (add IntRegs:$src2,
s11_2ExtPred:$offset2)))))),
(i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(LDriw_indexed IntRegs:$src2,
s11_2ExtPred:$offset2)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
(i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(LDriw ADDRriS11_2:$srcLow)))>;
def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
(i32 32))),
(i64 (zext (i32 IntRegs:$srcLow))))),
(i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
(i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
IntRegs:$srcLow))>;
// Any extended 64-bit load.
// anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
(i64 (A2_combinew (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
// When there is an offset we should prefer the pattern below over the pattern above.
@ -3214,25 +3185,25 @@ def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
// ********************************************
let AddedComplexity = 100 in
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
(i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
(i64 (A2_combinew (TFRI 0), (LDriw_indexed IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[NoV4T]>;
// anyext i16 -> i64.
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
(i64 (A2_combinew (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
(i64 (A2_combinew (TFRI 0), (LDrih_indexed IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[NoV4T]>;
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
(i64 (A2_combinew (TFRI 0), (i32 IntRegs:$src1)))>,
Requires<[NoV4T]>;
// Multiply 64-bit unsigned and use upper result.
@ -3240,7 +3211,7 @@ def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(i64
(MPYU64_acc
(i64
(COMBINE_rr
(A2_combinew
(TFRI 0),
(i32
(EXTRACT_SUBREG
@ -3251,7 +3222,7 @@ def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(i64
(MPYU64_acc
(i64
(COMBINE_rr (TFRI 0),
(A2_combinew (TFRI 0),
(i32
(EXTRACT_SUBREG
(i64
@ -3275,7 +3246,7 @@ def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(i64
(MPY64_acc
(i64
(COMBINE_rr (TFRI 0),
(A2_combinew (TFRI 0),
(i32
(EXTRACT_SUBREG
(i64
@ -3285,7 +3256,7 @@ def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(i64
(MPY64_acc
(i64
(COMBINE_rr (TFRI 0),
(A2_combinew (TFRI 0),
(i32
(EXTRACT_SUBREG
(i64

View File

@ -13,7 +13,7 @@
//
def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
(i64
(COMBINE_rr
(A2_combinew
(HEXAGON_M2_maci
(HEXAGON_M2_maci
(i32

View File

@ -10,6 +10,12 @@
# CHECK: if (p3) r17:16 = combine(r21, r31)
0xf0 0xdf 0x15 0xfd
# CHECK: if (!p3) r17:16 = combine(r21, r31)
0x03 0x40 0x45 0x85 0x70 0xff 0x15 0xfd
# CHECK: p3 = r5
# CHECK-NEXT: if (p3.new) r17:16 = combine(r21, r31)
0x03 0x40 0x45 0x85 0xf0 0xff 0x15 0xfd
# CHECK: p3 = r5
# CHECK-NEXT: if (!p3.new) r17:16 = combine(r21, r31)
0x71 0xdf 0x15 0xf9
# CHECK: if (p3) r17 = and(r21, r31)
0x71 0xdf 0x35 0xf9