1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

Add some more Thumb2 multiplication instructions.

llvm-svn: 74889
This commit is contained in:
Evan Cheng 2009-07-07 01:17:28 +00:00
parent 3d69bea38e
commit 46b98516f6
6 changed files with 187 additions and 11 deletions

View File

@ -923,7 +923,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
break;
}
case ISD::MUL:
if (Subtarget->isThumb())
if (Subtarget->isThumb1Only())
break;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned RHSV = C->getZExtValue();
@ -953,20 +953,37 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
Op.getOperand(0), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32));
case ISD::UMUL_LOHI: {
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
if (Subtarget->isThumb1Only())
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
return CurDAG->getTargetNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
} else {
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
}
}
case ISD::SMUL_LOHI: {
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
if (Subtarget->isThumb1Only())
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getTargetNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
} else {
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
}
}
case ISD::LOAD: {
SDNode *ResNode = 0;
if (Subtarget->isThumb2())
if (Subtarget->isThumb() && Subtarget->hasThumb2())
ResNode = SelectT2IndexedLoad(Op);
else
ResNode = SelectARMIndexedLoad(Op);

View File

@ -246,7 +246,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// i64 operation support.
if (Subtarget->isThumb()) {
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
@ -255,7 +255,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
} else {
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
if (!Subtarget->hasV6Ops())
if (!Subtarget->isThumb() && !Subtarget->hasV6Ops())
setOperationAction(ISD::MULHS, MVT::i32, Expand);
}
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@ -3034,7 +3034,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
if (Subtarget->isThumb() && Subtarget->hasThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
else
@ -3071,7 +3071,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
if (Subtarget->isThumb() && Subtarget->hasThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
else

View File

@ -838,7 +838,113 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
"mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
// FIXME: SMULL, etc.
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b),
"smull", " $ldst, $hdst, $a, $b", []>;
def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b),
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b),
"smlal", " $ldst, $hdst, $a, $b", []>;
def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b),
"umlal", " $ldst, $hdst, $a, $b", []>;
def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b),
"umaal", " $ldst, $hdst, $a, $b", []>;
} // neverHasSideEffects
// Most significant word multiply
def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
"smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
"smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
"smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
multiclass T2I_smul<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
!strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>;
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
!strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>;
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
!strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>;
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
!strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>;
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
!strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>;
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
!strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>;
}
multiclass T2I_smla<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
!strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>;
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
!strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>;
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
!strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>;
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
!strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>;
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
!strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>;
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
!strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>;
}
defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// TODO: Halfword multiple accumulate long: SMLAL<x><y>
// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.

View File

@ -0,0 +1,20 @@
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep smmul | count 1
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep umull | count 1
define i32 @smulhi(i32 %x, i32 %y) {
%tmp = sext i32 %x to i64 ; <i64> [#uses=1]
%tmp1 = sext i32 %y to i64 ; <i64> [#uses=1]
%tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
%tmp3 = lshr i64 %tmp2, 32 ; <i64> [#uses=1]
%tmp3.upgrd.1 = trunc i64 %tmp3 to i32 ; <i32> [#uses=1]
ret i32 %tmp3.upgrd.1
}
define i32 @umulhi(i32 %x, i32 %y) {
%tmp = zext i32 %x to i64 ; <i64> [#uses=1]
%tmp1 = zext i32 %y to i64 ; <i64> [#uses=1]
%tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
%tmp3 = lshr i64 %tmp2, 32 ; <i64> [#uses=1]
%tmp3.upgrd.2 = trunc i64 %tmp3 to i32 ; <i32> [#uses=1]
ret i32 %tmp3.upgrd.2
}

View File

@ -0,0 +1,10 @@
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
; RUN: grep smlabt | count 1
define i32 @f3(i32 %a, i16 %x, i32 %y) {
%tmp = sext i16 %x to i32 ; <i32> [#uses=1]
%tmp2 = ashr i32 %y, 16 ; <i32> [#uses=1]
%tmp3 = mul i32 %tmp2, %tmp ; <i32> [#uses=1]
%tmp5 = add i32 %tmp3, %a ; <i32> [#uses=1]
ret i32 %tmp5
}

View File

@ -0,0 +1,23 @@
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
; RUN: grep smulbt | count 1
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | \
; RUN: grep smultt | count 1
@x = weak global i16 0 ; <i16*> [#uses=1]
@y = weak global i16 0 ; <i16*> [#uses=0]
define i32 @f1(i32 %y) {
%tmp = load i16* @x ; <i16> [#uses=1]
%tmp1 = add i16 %tmp, 2 ; <i16> [#uses=1]
%tmp2 = sext i16 %tmp1 to i32 ; <i32> [#uses=1]
%tmp3 = ashr i32 %y, 16 ; <i32> [#uses=1]
%tmp4 = mul i32 %tmp2, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp4
}
define i32 @f2(i32 %x, i32 %y) {
%tmp1 = ashr i32 %x, 16 ; <i32> [#uses=1]
%tmp3 = ashr i32 %y, 16 ; <i32> [#uses=1]
%tmp4 = mul i32 %tmp3, %tmp1 ; <i32> [#uses=1]
ret i32 %tmp4
}