mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Don't emit MULX by default with BMI2
MULX has somewhat improved register allocation constraints compared to the legacy MUL instruction. Both output registers are encoded instead of fixed to EAX/EDX, but EDX is used as input. It also doesn't touch flags. Unfortunately, the encoding is longer. Prefering it whenever BMI2 is enabled is probably not optimal. Choosing it should somehow be a function of register allocation constraints like converting adds to three address. gcc and icc definitely don't pick MULX by default. Not sure what if any rules they have for using it. Differential Revision: https://reviews.llvm.org/D55565 llvm-svn: 348975
This commit is contained in:
parent
df0f46f125
commit
43dcc4cc5f
@ -3410,14 +3410,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||||||
|
|
||||||
unsigned Opc, MOpc;
|
unsigned Opc, MOpc;
|
||||||
bool isSigned = Opcode == ISD::SMUL_LOHI;
|
bool isSigned = Opcode == ISD::SMUL_LOHI;
|
||||||
bool hasBMI2 = Subtarget->hasBMI2();
|
|
||||||
if (!isSigned) {
|
if (!isSigned) {
|
||||||
switch (NVT.SimpleTy) {
|
switch (NVT.SimpleTy) {
|
||||||
default: llvm_unreachable("Unsupported VT!");
|
default: llvm_unreachable("Unsupported VT!");
|
||||||
case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
|
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
|
||||||
MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
|
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
|
||||||
case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
|
|
||||||
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
switch (NVT.SimpleTy) {
|
switch (NVT.SimpleTy) {
|
||||||
@ -3438,12 +3435,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||||||
case X86::MUL64r:
|
case X86::MUL64r:
|
||||||
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
|
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
|
||||||
break;
|
break;
|
||||||
case X86::MULX32rr:
|
|
||||||
SrcReg = X86::EDX; LoReg = HiReg = 0;
|
|
||||||
break;
|
|
||||||
case X86::MULX64rr:
|
|
||||||
SrcReg = X86::RDX; LoReg = HiReg = 0;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
|
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
|
||||||
@ -3457,26 +3448,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||||||
|
|
||||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
|
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
|
||||||
N0, SDValue()).getValue(1);
|
N0, SDValue()).getValue(1);
|
||||||
SDValue ResHi, ResLo;
|
|
||||||
|
|
||||||
if (foldedLoad) {
|
if (foldedLoad) {
|
||||||
SDValue Chain;
|
SDValue Chain;
|
||||||
MachineSDNode *CNode = nullptr;
|
MachineSDNode *CNode = nullptr;
|
||||||
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
|
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
|
||||||
InFlag };
|
InFlag };
|
||||||
if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
|
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
|
||||||
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
|
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
||||||
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
Chain = SDValue(CNode, 0);
|
||||||
ResHi = SDValue(CNode, 0);
|
InFlag = SDValue(CNode, 1);
|
||||||
ResLo = SDValue(CNode, 1);
|
|
||||||
Chain = SDValue(CNode, 2);
|
|
||||||
InFlag = SDValue(CNode, 3);
|
|
||||||
} else {
|
|
||||||
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
|
|
||||||
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
|
||||||
Chain = SDValue(CNode, 0);
|
|
||||||
InFlag = SDValue(CNode, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the chain.
|
// Update the chain.
|
||||||
ReplaceUses(N1.getValue(1), Chain);
|
ReplaceUses(N1.getValue(1), Chain);
|
||||||
@ -3484,39 +3464,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||||||
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
|
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
|
||||||
} else {
|
} else {
|
||||||
SDValue Ops[] = { N1, InFlag };
|
SDValue Ops[] = { N1, InFlag };
|
||||||
if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
|
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
|
||||||
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
|
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
||||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
InFlag = SDValue(CNode, 0);
|
||||||
ResHi = SDValue(CNode, 0);
|
|
||||||
ResLo = SDValue(CNode, 1);
|
|
||||||
InFlag = SDValue(CNode, 2);
|
|
||||||
} else {
|
|
||||||
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
|
|
||||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
|
||||||
InFlag = SDValue(CNode, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy the low half of the result, if it is needed.
|
// Copy the low half of the result, if it is needed.
|
||||||
if (!SDValue(Node, 0).use_empty()) {
|
if (!SDValue(Node, 0).use_empty()) {
|
||||||
if (!ResLo.getNode()) {
|
assert(LoReg && "Register for low half is not defined!");
|
||||||
assert(LoReg && "Register for low half is not defined!");
|
SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||||
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
|
NVT, InFlag);
|
||||||
InFlag);
|
InFlag = ResLo.getValue(2);
|
||||||
InFlag = ResLo.getValue(2);
|
|
||||||
}
|
|
||||||
ReplaceUses(SDValue(Node, 0), ResLo);
|
ReplaceUses(SDValue(Node, 0), ResLo);
|
||||||
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
|
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
|
||||||
dbgs() << '\n');
|
dbgs() << '\n');
|
||||||
}
|
}
|
||||||
// Copy the high half of the result, if it is needed.
|
// Copy the high half of the result, if it is needed.
|
||||||
if (!SDValue(Node, 1).use_empty()) {
|
if (!SDValue(Node, 1).use_empty()) {
|
||||||
if (!ResHi.getNode()) {
|
assert(HiReg && "Register for high half is not defined!");
|
||||||
assert(HiReg && "Register for high half is not defined!");
|
SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
|
||||||
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
|
NVT, InFlag);
|
||||||
InFlag);
|
InFlag = ResHi.getValue(2);
|
||||||
InFlag = ResHi.getValue(2);
|
|
||||||
}
|
|
||||||
ReplaceUses(SDValue(Node, 1), ResHi);
|
ReplaceUses(SDValue(Node, 1), ResHi);
|
||||||
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
|
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
|
||||||
dbgs() << '\n');
|
dbgs() << '\n');
|
||||||
|
@ -68,8 +68,8 @@ define i64 @mulx64(i64 %x, i64 %y, i64* %p) {
|
|||||||
; CHECK-LABEL: mulx64:
|
; CHECK-LABEL: mulx64:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movq %rdx, %rcx
|
; CHECK-NEXT: movq %rdx, %rcx
|
||||||
; CHECK-NEXT: movq %rdi, %rdx
|
; CHECK-NEXT: movq %rdi, %rax
|
||||||
; CHECK-NEXT: mulxq %rsi, %rax, %rdx
|
; CHECK-NEXT: mulq %rsi
|
||||||
; CHECK-NEXT: movq %rdx, (%rcx)
|
; CHECK-NEXT: movq %rdx, (%rcx)
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%x1 = zext i64 %x to i128
|
%x1 = zext i64 %x to i128
|
||||||
@ -86,8 +86,8 @@ define i64 @mulx64_load(i64 %x, i64* %y, i64* %p) {
|
|||||||
; CHECK-LABEL: mulx64_load:
|
; CHECK-LABEL: mulx64_load:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movq %rdx, %rcx
|
; CHECK-NEXT: movq %rdx, %rcx
|
||||||
; CHECK-NEXT: movq %rdi, %rdx
|
; CHECK-NEXT: movq %rdi, %rax
|
||||||
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx
|
; CHECK-NEXT: mulq (%rsi)
|
||||||
; CHECK-NEXT: movq %rdx, (%rcx)
|
; CHECK-NEXT: movq %rdx, (%rcx)
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%y1 = load i64, i64* %y
|
%y1 = load i64, i64* %y
|
||||||
|
@ -120,11 +120,11 @@ define i32 @mulx32(i32 %x, i32 %y, i32* %p) {
|
|||||||
; X86-LABEL: mulx32:
|
; X86-LABEL: mulx32:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-NEXT: addl %edx, %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: addl %eax, %eax
|
; X86-NEXT: addl %eax, %eax
|
||||||
; X86-NEXT: mulxl %eax, %eax, %edx
|
; X86-NEXT: addl %edx, %edx
|
||||||
|
; X86-NEXT: mull %edx
|
||||||
; X86-NEXT: movl %edx, (%ecx)
|
; X86-NEXT: movl %edx, (%ecx)
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
@ -156,10 +156,10 @@ define i32 @mulx32_load(i32 %x, i32* %y, i32* %p) {
|
|||||||
; X86-LABEL: mulx32_load:
|
; X86-LABEL: mulx32_load:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-NEXT: addl %edx, %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: mulxl (%eax), %eax, %edx
|
; X86-NEXT: addl %eax, %eax
|
||||||
|
; X86-NEXT: mull (%edx)
|
||||||
; X86-NEXT: movl %edx, (%ecx)
|
; X86-NEXT: movl %edx, (%ecx)
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -7,108 +7,61 @@
|
|||||||
; PR1198
|
; PR1198
|
||||||
|
|
||||||
define i64 @foo(i64 %x, i64 %y) nounwind {
|
define i64 @foo(i64 %x, i64 %y) nounwind {
|
||||||
; X86-NOBMI-LABEL: foo:
|
; X86-LABEL: foo:
|
||||||
; X86-NOBMI: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NOBMI-NEXT: pushl %ebp
|
; X86-NEXT: pushl %ebp
|
||||||
; X86-NOBMI-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NOBMI-NEXT: pushl %edi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NOBMI-NEXT: pushl %esi
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
; X86-NOBMI-NEXT: movl %ecx, %eax
|
; X86-NEXT: movl %ecx, %eax
|
||||||
; X86-NOBMI-NEXT: mull %ebp
|
; X86-NEXT: mull %ebp
|
||||||
; X86-NOBMI-NEXT: movl %edx, %ebx
|
; X86-NEXT: movl %edx, %ebx
|
||||||
; X86-NOBMI-NEXT: movl %esi, %eax
|
; X86-NEXT: movl %esi, %eax
|
||||||
; X86-NOBMI-NEXT: mull %ebp
|
; X86-NEXT: mull %ebp
|
||||||
; X86-NOBMI-NEXT: movl %edx, %ebp
|
; X86-NEXT: movl %edx, %ebp
|
||||||
; X86-NOBMI-NEXT: movl %eax, %esi
|
; X86-NEXT: movl %eax, %esi
|
||||||
; X86-NOBMI-NEXT: addl %ebx, %esi
|
; X86-NEXT: addl %ebx, %esi
|
||||||
; X86-NOBMI-NEXT: adcl $0, %ebp
|
; X86-NEXT: adcl $0, %ebp
|
||||||
; X86-NOBMI-NEXT: movl %ecx, %eax
|
; X86-NEXT: movl %ecx, %eax
|
||||||
; X86-NOBMI-NEXT: mull %edi
|
; X86-NEXT: mull %edi
|
||||||
; X86-NOBMI-NEXT: movl %edx, %ebx
|
; X86-NEXT: movl %edx, %ebx
|
||||||
; X86-NOBMI-NEXT: addl %esi, %eax
|
; X86-NEXT: addl %esi, %eax
|
||||||
; X86-NOBMI-NEXT: adcl %ebp, %ebx
|
; X86-NEXT: adcl %ebp, %ebx
|
||||||
; X86-NOBMI-NEXT: setb %al
|
; X86-NEXT: setb %al
|
||||||
; X86-NOBMI-NEXT: movzbl %al, %ecx
|
; X86-NEXT: movzbl %al, %ecx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NOBMI-NEXT: mull %edi
|
; X86-NEXT: mull %edi
|
||||||
; X86-NOBMI-NEXT: movl %edx, %esi
|
; X86-NEXT: movl %edx, %esi
|
||||||
; X86-NOBMI-NEXT: movl %eax, %ebp
|
; X86-NEXT: movl %eax, %ebp
|
||||||
; X86-NOBMI-NEXT: addl %ebx, %ebp
|
; X86-NEXT: addl %ebx, %ebp
|
||||||
; X86-NOBMI-NEXT: adcl %ecx, %esi
|
; X86-NEXT: adcl %ecx, %esi
|
||||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: xorl %ecx, %ecx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NOBMI-NEXT: mull %ecx
|
; X86-NEXT: mull %ecx
|
||||||
; X86-NOBMI-NEXT: movl %edx, %edi
|
; X86-NEXT: movl %edx, %edi
|
||||||
; X86-NOBMI-NEXT: movl %eax, %ebx
|
; X86-NEXT: movl %eax, %ebx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NOBMI-NEXT: mull %ecx
|
; X86-NEXT: mull %ecx
|
||||||
; X86-NOBMI-NEXT: addl %ebx, %eax
|
; X86-NEXT: addl %ebx, %eax
|
||||||
; X86-NOBMI-NEXT: adcl %edi, %edx
|
; X86-NEXT: adcl %edi, %edx
|
||||||
; X86-NOBMI-NEXT: addl %ebp, %eax
|
; X86-NEXT: addl %ebp, %eax
|
||||||
; X86-NOBMI-NEXT: adcl %esi, %edx
|
; X86-NEXT: adcl %esi, %edx
|
||||||
; X86-NOBMI-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NOBMI-NEXT: popl %edi
|
; X86-NEXT: popl %edi
|
||||||
; X86-NOBMI-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NOBMI-NEXT: popl %ebp
|
; X86-NEXT: popl %ebp
|
||||||
; X86-NOBMI-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X86-BMI-LABEL: foo:
|
; X64-LABEL: foo:
|
||||||
; X86-BMI: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X86-BMI-NEXT: pushl %ebp
|
; X64-NEXT: movq %rdi, %rax
|
||||||
; X86-BMI-NEXT: pushl %ebx
|
; X64-NEXT: mulq %rsi
|
||||||
; X86-BMI-NEXT: pushl %edi
|
; X64-NEXT: movq %rdx, %rax
|
||||||
; X86-BMI-NEXT: pushl %esi
|
; X64-NEXT: retq
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; X86-BMI-NEXT: movl %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %esi, %edx, %ebx
|
|
||||||
; X86-BMI-NEXT: movl %eax, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %esi, %ebp, %eax
|
|
||||||
; X86-BMI-NEXT: addl %ebx, %ebp
|
|
||||||
; X86-BMI-NEXT: adcl $0, %eax
|
|
||||||
; X86-BMI-NEXT: movl %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %edi, %edx, %ebx
|
|
||||||
; X86-BMI-NEXT: addl %ebp, %edx
|
|
||||||
; X86-BMI-NEXT: adcl %eax, %ebx
|
|
||||||
; X86-BMI-NEXT: setb %al
|
|
||||||
; X86-BMI-NEXT: movzbl %al, %eax
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %edi, %edi, %ebp
|
|
||||||
; X86-BMI-NEXT: addl %ebx, %edi
|
|
||||||
; X86-BMI-NEXT: adcl %eax, %ebp
|
|
||||||
; X86-BMI-NEXT: xorl %eax, %eax
|
|
||||||
; X86-BMI-NEXT: movl %esi, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %eax, %ebx, %esi
|
|
||||||
; X86-BMI-NEXT: movl %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %eax, %eax, %edx
|
|
||||||
; X86-BMI-NEXT: addl %ebx, %eax
|
|
||||||
; X86-BMI-NEXT: adcl %esi, %edx
|
|
||||||
; X86-BMI-NEXT: addl %edi, %eax
|
|
||||||
; X86-BMI-NEXT: adcl %ebp, %edx
|
|
||||||
; X86-BMI-NEXT: popl %esi
|
|
||||||
; X86-BMI-NEXT: popl %edi
|
|
||||||
; X86-BMI-NEXT: popl %ebx
|
|
||||||
; X86-BMI-NEXT: popl %ebp
|
|
||||||
; X86-BMI-NEXT: retl
|
|
||||||
;
|
|
||||||
; X64-NOBMI-LABEL: foo:
|
|
||||||
; X64-NOBMI: # %bb.0:
|
|
||||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
|
||||||
; X64-NOBMI-NEXT: mulq %rsi
|
|
||||||
; X64-NOBMI-NEXT: movq %rdx, %rax
|
|
||||||
; X64-NOBMI-NEXT: retq
|
|
||||||
;
|
|
||||||
; X64-BMI-LABEL: foo:
|
|
||||||
; X64-BMI: # %bb.0:
|
|
||||||
; X64-BMI-NEXT: movq %rdi, %rdx
|
|
||||||
; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax
|
|
||||||
; X64-BMI-NEXT: retq
|
|
||||||
%tmp0 = zext i64 %x to i128
|
%tmp0 = zext i64 %x to i128
|
||||||
%tmp1 = zext i64 %y to i128
|
%tmp1 = zext i64 %y to i128
|
||||||
%tmp2 = mul i128 %tmp0, %tmp1
|
%tmp2 = mul i128 %tmp0, %tmp1
|
||||||
@ -122,236 +75,125 @@ define i64 @foo(i64 %x, i64 %y) nounwind {
|
|||||||
; zero-extended value.
|
; zero-extended value.
|
||||||
|
|
||||||
define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind {
|
define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind {
|
||||||
; X86-NOBMI-LABEL: mul1:
|
; X86-LABEL: mul1:
|
||||||
; X86-NOBMI: # %bb.0: # %entry
|
; X86: # %bb.0: # %entry
|
||||||
; X86-NOBMI-NEXT: pushl %ebp
|
; X86-NEXT: pushl %ebp
|
||||||
; X86-NOBMI-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NOBMI-NEXT: pushl %edi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NOBMI-NEXT: pushl %esi
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NOBMI-NEXT: subl $28, %esp
|
; X86-NEXT: subl $28, %esp
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NOBMI-NEXT: orl %ecx, %eax
|
; X86-NEXT: orl %ecx, %eax
|
||||||
; X86-NOBMI-NEXT: je .LBB1_3
|
; X86-NEXT: je .LBB1_3
|
||||||
; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader
|
; X86-NEXT: # %bb.1: # %for.body.preheader
|
||||||
; X86-NOBMI-NEXT: xorl %eax, %eax
|
; X86-NEXT: xorl %eax, %eax
|
||||||
; X86-NOBMI-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl %edx, %edx
|
||||||
; X86-NOBMI-NEXT: xorl %ebx, %ebx
|
; X86-NEXT: xorl %ebx, %ebx
|
||||||
; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
|
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||||
; X86-NOBMI-NEXT: .p2align 4, 0x90
|
; X86-NEXT: .p2align 4, 0x90
|
||||||
; X86-NOBMI-NEXT: .LBB1_2: # %for.body
|
; X86-NEXT: .LBB1_2: # %for.body
|
||||||
; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NOBMI-NEXT: movl %eax, %ecx
|
; X86-NEXT: movl %eax, %ecx
|
||||||
; X86-NOBMI-NEXT: movl (%eax,%ebx,8), %ebp
|
; X86-NEXT: movl (%eax,%ebx,8), %ebp
|
||||||
; X86-NOBMI-NEXT: movl 4(%eax,%ebx,8), %esi
|
; X86-NEXT: movl 4(%eax,%ebx,8), %esi
|
||||||
; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: movl %ebp, %eax
|
; X86-NEXT: movl %ebp, %eax
|
||||||
; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NOBMI-NEXT: mull %ecx
|
; X86-NEXT: mull %ecx
|
||||||
; X86-NOBMI-NEXT: movl %edx, %edi
|
; X86-NEXT: movl %edx, %edi
|
||||||
; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: movl %esi, %eax
|
; X86-NEXT: movl %esi, %eax
|
||||||
; X86-NOBMI-NEXT: mull %ecx
|
; X86-NEXT: mull %ecx
|
||||||
; X86-NOBMI-NEXT: movl %edx, %ecx
|
; X86-NEXT: movl %edx, %ecx
|
||||||
; X86-NOBMI-NEXT: movl %eax, %esi
|
; X86-NEXT: movl %eax, %esi
|
||||||
; X86-NOBMI-NEXT: addl %edi, %esi
|
; X86-NEXT: addl %edi, %esi
|
||||||
; X86-NOBMI-NEXT: adcl $0, %ecx
|
; X86-NEXT: adcl $0, %ecx
|
||||||
; X86-NOBMI-NEXT: movl %ebp, %eax
|
; X86-NEXT: movl %ebp, %eax
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-NOBMI-NEXT: mull %edx
|
; X86-NEXT: mull %edx
|
||||||
; X86-NOBMI-NEXT: movl %edx, %ebp
|
; X86-NEXT: movl %edx, %ebp
|
||||||
; X86-NOBMI-NEXT: movl %eax, %edi
|
; X86-NEXT: movl %eax, %edi
|
||||||
; X86-NOBMI-NEXT: addl %esi, %edi
|
; X86-NEXT: addl %esi, %edi
|
||||||
; X86-NOBMI-NEXT: adcl %ecx, %ebp
|
; X86-NEXT: adcl %ecx, %ebp
|
||||||
; X86-NOBMI-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
|
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
|
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||||
; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp)
|
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||||
; X86-NOBMI-NEXT: movl %edx, %ecx
|
; X86-NEXT: movl %edx, %ecx
|
||||||
; X86-NOBMI-NEXT: movl %eax, %esi
|
; X86-NEXT: movl %eax, %esi
|
||||||
; X86-NOBMI-NEXT: addl %ebp, %esi
|
; X86-NEXT: addl %ebp, %esi
|
||||||
; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
|
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
|
||||||
; X86-NOBMI-NEXT: adcl %eax, %ecx
|
; X86-NEXT: adcl %eax, %ecx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NOBMI-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl %edx, %edx
|
||||||
; X86-NOBMI-NEXT: mull %edx
|
; X86-NEXT: mull %edx
|
||||||
; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: movl %eax, %ebp
|
; X86-NEXT: movl %eax, %ebp
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
|
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||||
; X86-NOBMI-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl %edx, %edx
|
||||||
; X86-NOBMI-NEXT: mull %edx
|
; X86-NEXT: mull %edx
|
||||||
; X86-NOBMI-NEXT: addl %ebp, %eax
|
; X86-NEXT: addl %ebp, %eax
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
|
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
|
||||||
; X86-NOBMI-NEXT: addl %esi, %eax
|
; X86-NEXT: addl %esi, %eax
|
||||||
; X86-NOBMI-NEXT: adcl %ecx, %edx
|
; X86-NEXT: adcl %ecx, %edx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
|
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||||
; X86-NOBMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
|
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
|
||||||
; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
|
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
|
||||||
; X86-NOBMI-NEXT: adcl $0, %eax
|
; X86-NEXT: adcl $0, %eax
|
||||||
; X86-NOBMI-NEXT: adcl $0, %edx
|
; X86-NEXT: adcl $0, %edx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NOBMI-NEXT: movl %esi, (%ecx,%ebx,8)
|
; X86-NEXT: movl %esi, (%ecx,%ebx,8)
|
||||||
; X86-NOBMI-NEXT: movl %edi, 4(%ecx,%ebx,8)
|
; X86-NEXT: movl %edi, 4(%ecx,%ebx,8)
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NOBMI-NEXT: movl %ecx, %edi
|
; X86-NEXT: movl %ecx, %edi
|
||||||
; X86-NOBMI-NEXT: addl $1, %ebx
|
; X86-NEXT: addl $1, %ebx
|
||||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
|
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||||
; X86-NOBMI-NEXT: adcl $0, %esi
|
; X86-NEXT: adcl $0, %esi
|
||||||
; X86-NOBMI-NEXT: movl %ebx, %ecx
|
; X86-NEXT: movl %ebx, %ecx
|
||||||
; X86-NOBMI-NEXT: xorl %ebp, %ecx
|
; X86-NEXT: xorl %ebp, %ecx
|
||||||
; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NOBMI-NEXT: xorl %edi, %esi
|
; X86-NEXT: xorl %edi, %esi
|
||||||
; X86-NOBMI-NEXT: orl %ecx, %esi
|
; X86-NEXT: orl %ecx, %esi
|
||||||
; X86-NOBMI-NEXT: jne .LBB1_2
|
; X86-NEXT: jne .LBB1_2
|
||||||
; X86-NOBMI-NEXT: .LBB1_3: # %for.end
|
; X86-NEXT: .LBB1_3: # %for.end
|
||||||
; X86-NOBMI-NEXT: xorl %eax, %eax
|
; X86-NEXT: xorl %eax, %eax
|
||||||
; X86-NOBMI-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl %edx, %edx
|
||||||
; X86-NOBMI-NEXT: addl $28, %esp
|
; X86-NEXT: addl $28, %esp
|
||||||
; X86-NOBMI-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NOBMI-NEXT: popl %edi
|
; X86-NEXT: popl %edi
|
||||||
; X86-NOBMI-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NOBMI-NEXT: popl %ebp
|
; X86-NEXT: popl %ebp
|
||||||
; X86-NOBMI-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X86-BMI-LABEL: mul1:
|
; X64-LABEL: mul1:
|
||||||
; X86-BMI: # %bb.0: # %entry
|
; X64: # %bb.0: # %entry
|
||||||
; X86-BMI-NEXT: pushl %ebp
|
; X64-NEXT: testq %rdi, %rdi
|
||||||
; X86-BMI-NEXT: pushl %ebx
|
; X64-NEXT: je .LBB1_3
|
||||||
; X86-BMI-NEXT: pushl %edi
|
; X64-NEXT: # %bb.1: # %for.body.preheader
|
||||||
; X86-BMI-NEXT: pushl %esi
|
; X64-NEXT: movq %rcx, %r8
|
||||||
; X86-BMI-NEXT: subl $20, %esp
|
; X64-NEXT: movq %rdx, %r9
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X64-NEXT: xorl %r10d, %r10d
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X64-NEXT: xorl %ecx, %ecx
|
||||||
; X86-BMI-NEXT: orl %ecx, %eax
|
; X64-NEXT: .p2align 4, 0x90
|
||||||
; X86-BMI-NEXT: je .LBB1_3
|
; X64-NEXT: .LBB1_2: # %for.body
|
||||||
; X86-BMI-NEXT: # %bb.1: # %for.body.preheader
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; X86-BMI-NEXT: xorl %ecx, %ecx
|
; X64-NEXT: movq %r8, %rax
|
||||||
; X86-BMI-NEXT: xorl %edx, %edx
|
; X64-NEXT: mulq (%r9,%rcx,8)
|
||||||
; X86-BMI-NEXT: xorl %edi, %edi
|
; X64-NEXT: addq %r10, %rax
|
||||||
; X86-BMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
|
; X64-NEXT: adcq $0, %rdx
|
||||||
; X86-BMI-NEXT: .p2align 4, 0x90
|
; X64-NEXT: movq %rax, (%rsi,%rcx,8)
|
||||||
; X86-BMI-NEXT: .LBB1_2: # %for.body
|
; X64-NEXT: incq %rcx
|
||||||
; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1
|
; X64-NEXT: cmpq %rcx, %rdi
|
||||||
; X86-BMI-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X64-NEXT: movq %rdx, %r10
|
||||||
; X86-BMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
|
; X64-NEXT: jne .LBB1_2
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X64-NEXT: .LBB1_3: # %for.end
|
||||||
; X86-BMI-NEXT: movl (%eax,%edi,8), %ecx
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X86-BMI-NEXT: movl 4(%eax,%edi,8), %ebx
|
; X64-NEXT: retq
|
||||||
; X86-BMI-NEXT: movl %ebx, (%esp) # 4-byte Spill
|
|
||||||
; X86-BMI-NEXT: movl %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-BMI-NEXT: movl %eax, %esi
|
|
||||||
; X86-BMI-NEXT: mulxl %eax, %eax, %ebp
|
|
||||||
; X86-BMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
|
|
||||||
; X86-BMI-NEXT: movl %ebx, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl %esi, %eax, %esi
|
|
||||||
; X86-BMI-NEXT: addl %ebp, %eax
|
|
||||||
; X86-BMI-NEXT: adcl $0, %esi
|
|
||||||
; X86-BMI-NEXT: movl %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ebp, %ebx
|
|
||||||
; X86-BMI-NEXT: addl %eax, %ebp
|
|
||||||
; X86-BMI-NEXT: adcl %esi, %ebx
|
|
||||||
; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload
|
|
||||||
; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %esi
|
|
||||||
; X86-BMI-NEXT: setb %dl
|
|
||||||
; X86-BMI-NEXT: addl %ebx, %eax
|
|
||||||
; X86-BMI-NEXT: movzbl %dl, %edx
|
|
||||||
; X86-BMI-NEXT: adcl %edx, %esi
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-BMI-NEXT: xorl %ebx, %ebx
|
|
||||||
; X86-BMI-NEXT: mulxl %ebx, %ebx, %edx
|
|
||||||
; X86-BMI-NEXT: movl %edx, (%esp) # 4-byte Spill
|
|
||||||
; X86-BMI-NEXT: movl %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: xorl %ecx, %ecx
|
|
||||||
; X86-BMI-NEXT: mulxl %ecx, %ecx, %edx
|
|
||||||
; X86-BMI-NEXT: addl %ebx, %ecx
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
|
||||||
; X86-BMI-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
|
|
||||||
; X86-BMI-NEXT: addl %eax, %ecx
|
|
||||||
; X86-BMI-NEXT: adcl %esi, %edx
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
|
|
||||||
; X86-BMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
|
|
||||||
; X86-BMI-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
|
|
||||||
; X86-BMI-NEXT: adcl $0, %ecx
|
|
||||||
; X86-BMI-NEXT: adcl $0, %edx
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-BMI-NEXT: movl %esi, (%eax,%edi,8)
|
|
||||||
; X86-BMI-NEXT: movl %ebp, 4(%eax,%edi,8)
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-BMI-NEXT: movl %eax, %esi
|
|
||||||
; X86-BMI-NEXT: addl $1, %edi
|
|
||||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
|
|
||||||
; X86-BMI-NEXT: adcl $0, %ebp
|
|
||||||
; X86-BMI-NEXT: movl %edi, %eax
|
|
||||||
; X86-BMI-NEXT: xorl %esi, %eax
|
|
||||||
; X86-BMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
|
|
||||||
; X86-BMI-NEXT: movl %ebp, %esi
|
|
||||||
; X86-BMI-NEXT: xorl %ebx, %esi
|
|
||||||
; X86-BMI-NEXT: orl %eax, %esi
|
|
||||||
; X86-BMI-NEXT: jne .LBB1_2
|
|
||||||
; X86-BMI-NEXT: .LBB1_3: # %for.end
|
|
||||||
; X86-BMI-NEXT: xorl %eax, %eax
|
|
||||||
; X86-BMI-NEXT: xorl %edx, %edx
|
|
||||||
; X86-BMI-NEXT: addl $20, %esp
|
|
||||||
; X86-BMI-NEXT: popl %esi
|
|
||||||
; X86-BMI-NEXT: popl %edi
|
|
||||||
; X86-BMI-NEXT: popl %ebx
|
|
||||||
; X86-BMI-NEXT: popl %ebp
|
|
||||||
; X86-BMI-NEXT: retl
|
|
||||||
;
|
|
||||||
; X64-NOBMI-LABEL: mul1:
|
|
||||||
; X64-NOBMI: # %bb.0: # %entry
|
|
||||||
; X64-NOBMI-NEXT: testq %rdi, %rdi
|
|
||||||
; X64-NOBMI-NEXT: je .LBB1_3
|
|
||||||
; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader
|
|
||||||
; X64-NOBMI-NEXT: movq %rcx, %r8
|
|
||||||
; X64-NOBMI-NEXT: movq %rdx, %r9
|
|
||||||
; X64-NOBMI-NEXT: xorl %r10d, %r10d
|
|
||||||
; X64-NOBMI-NEXT: xorl %ecx, %ecx
|
|
||||||
; X64-NOBMI-NEXT: .p2align 4, 0x90
|
|
||||||
; X64-NOBMI-NEXT: .LBB1_2: # %for.body
|
|
||||||
; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1
|
|
||||||
; X64-NOBMI-NEXT: movq %r8, %rax
|
|
||||||
; X64-NOBMI-NEXT: mulq (%r9,%rcx,8)
|
|
||||||
; X64-NOBMI-NEXT: addq %r10, %rax
|
|
||||||
; X64-NOBMI-NEXT: adcq $0, %rdx
|
|
||||||
; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8)
|
|
||||||
; X64-NOBMI-NEXT: incq %rcx
|
|
||||||
; X64-NOBMI-NEXT: cmpq %rcx, %rdi
|
|
||||||
; X64-NOBMI-NEXT: movq %rdx, %r10
|
|
||||||
; X64-NOBMI-NEXT: jne .LBB1_2
|
|
||||||
; X64-NOBMI-NEXT: .LBB1_3: # %for.end
|
|
||||||
; X64-NOBMI-NEXT: xorl %eax, %eax
|
|
||||||
; X64-NOBMI-NEXT: retq
|
|
||||||
;
|
|
||||||
; X64-BMI-LABEL: mul1:
|
|
||||||
; X64-BMI: # %bb.0: # %entry
|
|
||||||
; X64-BMI-NEXT: testq %rdi, %rdi
|
|
||||||
; X64-BMI-NEXT: je .LBB1_3
|
|
||||||
; X64-BMI-NEXT: # %bb.1: # %for.body.preheader
|
|
||||||
; X64-BMI-NEXT: movq %rcx, %r8
|
|
||||||
; X64-BMI-NEXT: movq %rdx, %r9
|
|
||||||
; X64-BMI-NEXT: xorl %r10d, %r10d
|
|
||||||
; X64-BMI-NEXT: xorl %ecx, %ecx
|
|
||||||
; X64-BMI-NEXT: .p2align 4, 0x90
|
|
||||||
; X64-BMI-NEXT: .LBB1_2: # %for.body
|
|
||||||
; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1
|
|
||||||
; X64-BMI-NEXT: movq %r8, %rdx
|
|
||||||
; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx
|
|
||||||
; X64-BMI-NEXT: addq %r10, %rax
|
|
||||||
; X64-BMI-NEXT: adcq $0, %rdx
|
|
||||||
; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8)
|
|
||||||
; X64-BMI-NEXT: incq %rcx
|
|
||||||
; X64-BMI-NEXT: cmpq %rcx, %rdi
|
|
||||||
; X64-BMI-NEXT: movq %rdx, %r10
|
|
||||||
; X64-BMI-NEXT: jne .LBB1_2
|
|
||||||
; X64-BMI-NEXT: .LBB1_3: # %for.end
|
|
||||||
; X64-BMI-NEXT: xorl %eax, %eax
|
|
||||||
; X64-BMI-NEXT: retq
|
|
||||||
entry:
|
entry:
|
||||||
%conv = zext i64 %y to i128
|
%conv = zext i64 %y to i128
|
||||||
%cmp11 = icmp eq i64 %n, 0
|
%cmp11 = icmp eq i64 %n, 0
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
define i64 @f1(i32 %a, i32 %b) {
|
define i64 @f1(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %edx
|
; CHECK-NEXT: mull {{[0-9]+}}(%esp)
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%x = zext i32 %a to i64
|
%x = zext i32 %a to i64
|
||||||
%y = zext i32 %b to i64
|
%y = zext i32 %b to i64
|
||||||
@ -17,9 +17,9 @@ define i64 @f1(i32 %a, i32 %b) {
|
|||||||
define i64 @f2(i32 %a, i32* %p) {
|
define i64 @f2(i32 %a, i32* %p) {
|
||||||
; CHECK-LABEL: f2:
|
; CHECK-LABEL: f2:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: mulxl (%eax), %eax, %edx
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
|
; CHECK-NEXT: mull (%ecx)
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%b = load i32, i32* %p
|
%b = load i32, i32* %p
|
||||||
%x = zext i32 %a to i64
|
%x = zext i32 %a to i64
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
define i128 @f1(i64 %a, i64 %b) {
|
define i128 @f1(i64 %a, i64 %b) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movq %rdi, %rdx
|
; CHECK-NEXT: movq %rdi, %rax
|
||||||
; CHECK-NEXT: mulxq %rsi, %rax, %rdx
|
; CHECK-NEXT: mulq %rsi
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%x = zext i64 %a to i128
|
%x = zext i64 %a to i128
|
||||||
%y = zext i64 %b to i128
|
%y = zext i64 %b to i128
|
||||||
@ -17,8 +17,8 @@ define i128 @f1(i64 %a, i64 %b) {
|
|||||||
define i128 @f2(i64 %a, i64* %p) {
|
define i128 @f2(i64 %a, i64* %p) {
|
||||||
; CHECK-LABEL: f2:
|
; CHECK-LABEL: f2:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movq %rdi, %rdx
|
; CHECK-NEXT: movq %rdi, %rax
|
||||||
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx
|
; CHECK-NEXT: mulq (%rsi)
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%b = load i64, i64* %p
|
%b = load i64, i64* %p
|
||||||
%x = zext i64 %a to i128
|
%x = zext i64 %a to i128
|
||||||
|
@ -5,11 +5,11 @@
|
|||||||
define void @_Z15uint64_to_asciimPc(i64 %arg) {
|
define void @_Z15uint64_to_asciimPc(i64 %arg) {
|
||||||
; HSW-LABEL: _Z15uint64_to_asciimPc:
|
; HSW-LABEL: _Z15uint64_to_asciimPc:
|
||||||
; HSW: # %bb.0: # %bb
|
; HSW: # %bb.0: # %bb
|
||||||
; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
|
; HSW-NEXT: movq %rdi, %rax
|
||||||
; HSW-NEXT: movq %rdi, %rdx
|
; HSW-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
|
||||||
; HSW-NEXT: mulxq %rax, %rax, %rcx
|
; HSW-NEXT: mulq %rcx
|
||||||
; HSW-NEXT: shrq $42, %rcx
|
; HSW-NEXT: shrq $42, %rdx
|
||||||
; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
|
; HSW-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
|
||||||
; HSW-NEXT: shrq $20, %rax
|
; HSW-NEXT: shrq $20, %rax
|
||||||
; HSW-NEXT: leal (%rax,%rax,4), %eax
|
; HSW-NEXT: leal (%rax,%rax,4), %eax
|
||||||
; HSW-NEXT: addl $5, %eax
|
; HSW-NEXT: addl $5, %eax
|
||||||
@ -22,11 +22,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) {
|
|||||||
;
|
;
|
||||||
; ZN-LABEL: _Z15uint64_to_asciimPc:
|
; ZN-LABEL: _Z15uint64_to_asciimPc:
|
||||||
; ZN: # %bb.0: # %bb
|
; ZN: # %bb.0: # %bb
|
||||||
; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
|
; ZN-NEXT: movq %rdi, %rax
|
||||||
; ZN-NEXT: movq %rdi, %rdx
|
; ZN-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
|
||||||
; ZN-NEXT: mulxq %rax, %rax, %rcx
|
; ZN-NEXT: mulq %rcx
|
||||||
; ZN-NEXT: shrq $42, %rcx
|
; ZN-NEXT: shrq $42, %rdx
|
||||||
; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
|
; ZN-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
|
||||||
; ZN-NEXT: shrq $20, %rax
|
; ZN-NEXT: shrq $20, %rax
|
||||||
; ZN-NEXT: leal 5(%rax,%rax,4), %eax
|
; ZN-NEXT: leal 5(%rax,%rax,4), %eax
|
||||||
; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
|
; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
|
||||||
|
Loading…
x
Reference in New Issue
Block a user