mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Restore selection of MULX on BMI2 targets.
Looking back over gcc and icc behavior it looks like icc does use mulx32 on 32-bit targets and mulx64 on 64-bit targets. It's also used when dividing i32 by constant on 32-bit targets and i64 by constant on 64-bit targets. gcc uses it multiplies producing a 64 bit result on 32-bit targets and 128-bit results on a 64-bit target. gcc does not appear to use it for division by constant. After this patch clang is closer to the icc behavior. This basically reverts d1c61861ddc94457b08a5a653d3908b7b38ebb22, but there were no strong feelings at the time. Fixes PR45518. Differential Revision: https://reviews.llvm.org/D80498
This commit is contained in:
parent
1e018e0462
commit
7e64cc155b
@ -4758,17 +4758,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
unsigned Opc, MOpc;
|
||||
unsigned LoReg, HiReg;
|
||||
bool IsSigned = Opcode == ISD::SMUL_LOHI;
|
||||
bool UseMULX = !IsSigned && Subtarget->hasBMI2();
|
||||
switch (NVT.SimpleTy) {
|
||||
default: llvm_unreachable("Unsupported VT!");
|
||||
case MVT::i32:
|
||||
Opc = IsSigned ? X86::IMUL32r : X86::MUL32r;
|
||||
MOpc = IsSigned ? X86::IMUL32m : X86::MUL32m;
|
||||
LoReg = X86::EAX; HiReg = X86::EDX;
|
||||
Opc = UseMULX ? X86::MULX32rr :
|
||||
IsSigned ? X86::IMUL32r : X86::MUL32r;
|
||||
MOpc = UseMULX ? X86::MULX32rm :
|
||||
IsSigned ? X86::IMUL32m : X86::MUL32m;
|
||||
LoReg = UseMULX ? X86::EDX : X86::EAX;
|
||||
HiReg = X86::EDX;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opc = IsSigned ? X86::IMUL64r : X86::MUL64r;
|
||||
MOpc = IsSigned ? X86::IMUL64m : X86::MUL64m;
|
||||
LoReg = X86::RAX; HiReg = X86::RDX;
|
||||
Opc = UseMULX ? X86::MULX64rr :
|
||||
IsSigned ? X86::IMUL64r : X86::MUL64r;
|
||||
MOpc = UseMULX ? X86::MULX64rm :
|
||||
IsSigned ? X86::IMUL64m : X86::MUL64m;
|
||||
LoReg = UseMULX ? X86::RDX : X86::RAX;
|
||||
HiReg = X86::RDX;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -4783,15 +4790,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
|
||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||
N0, SDValue()).getValue(1);
|
||||
SDValue ResHi, ResLo;
|
||||
if (foldedLoad) {
|
||||
SDValue Chain;
|
||||
MachineSDNode *CNode = nullptr;
|
||||
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
|
||||
InFlag };
|
||||
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
|
||||
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
||||
Chain = SDValue(CNode, 0);
|
||||
InFlag = SDValue(CNode, 1);
|
||||
if (UseMULX) {
|
||||
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
|
||||
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
||||
ResHi = SDValue(CNode, 0);
|
||||
ResLo = SDValue(CNode, 1);
|
||||
Chain = SDValue(CNode, 2);
|
||||
} else {
|
||||
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
|
||||
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
||||
Chain = SDValue(CNode, 0);
|
||||
InFlag = SDValue(CNode, 1);
|
||||
}
|
||||
|
||||
// Update the chain.
|
||||
ReplaceUses(N1.getValue(1), Chain);
|
||||
@ -4799,27 +4815,38 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
|
||||
} else {
|
||||
SDValue Ops[] = { N1, InFlag };
|
||||
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
||||
InFlag = SDValue(CNode, 0);
|
||||
if (UseMULX) {
|
||||
SDVTList VTs = CurDAG->getVTList(NVT, NVT);
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
||||
ResHi = SDValue(CNode, 0);
|
||||
ResLo = SDValue(CNode, 1);
|
||||
} else {
|
||||
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
||||
InFlag = SDValue(CNode, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the low half of the result, if it is needed.
|
||||
if (!SDValue(Node, 0).use_empty()) {
|
||||
assert(LoReg && "Register for low half is not defined!");
|
||||
SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||
NVT, InFlag);
|
||||
InFlag = ResLo.getValue(2);
|
||||
if (!ResLo) {
|
||||
assert(LoReg && "Register for low half is not defined!");
|
||||
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||
NVT, InFlag);
|
||||
InFlag = ResLo.getValue(2);
|
||||
}
|
||||
ReplaceUses(SDValue(Node, 0), ResLo);
|
||||
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
|
||||
dbgs() << '\n');
|
||||
}
|
||||
// Copy the high half of the result, if it is needed.
|
||||
if (!SDValue(Node, 1).use_empty()) {
|
||||
assert(HiReg && "Register for high half is not defined!");
|
||||
SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
|
||||
NVT, InFlag);
|
||||
InFlag = ResHi.getValue(2);
|
||||
if (!ResHi) {
|
||||
assert(HiReg && "Register for high half is not defined!");
|
||||
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
|
||||
NVT, InFlag);
|
||||
InFlag = ResHi.getValue(2);
|
||||
}
|
||||
ReplaceUses(SDValue(Node, 1), ResHi);
|
||||
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
|
||||
dbgs() << '\n');
|
||||
|
@ -837,18 +837,16 @@ define i64 @load_fold_udiv1(i64* %p) {
|
||||
;
|
||||
; CHECK-O3-CUR-LABEL: load_fold_udiv1:
|
||||
; CHECK-O3-CUR: # %bb.0:
|
||||
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
|
||||
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
||||
; CHECK-O3-CUR-NEXT: mulq %rcx
|
||||
; CHECK-O3-CUR-NEXT: movq %rdx, %rax
|
||||
; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
|
||||
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
|
||||
; CHECK-O3-CUR-NEXT: mulxq %rax, %rcx, %rax
|
||||
; CHECK-O3-CUR-NEXT: shrq $3, %rax
|
||||
; CHECK-O3-CUR-NEXT: retq
|
||||
;
|
||||
; CHECK-O3-EX-LABEL: load_fold_udiv1:
|
||||
; CHECK-O3-EX: # %bb.0:
|
||||
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
|
||||
; CHECK-O3-EX-NEXT: mulq (%rdi)
|
||||
; CHECK-O3-EX-NEXT: movq %rdx, %rax
|
||||
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
||||
; CHECK-O3-EX-NEXT: mulxq (%rdi), %rcx, %rax
|
||||
; CHECK-O3-EX-NEXT: shrq $3, %rax
|
||||
; CHECK-O3-EX-NEXT: retq
|
||||
%v = load atomic i64, i64* %p unordered, align 8
|
||||
@ -1033,15 +1031,14 @@ define i64 @load_fold_urem1(i64* %p) {
|
||||
;
|
||||
; CHECK-O3-LABEL: load_fold_urem1:
|
||||
; CHECK-O3: # %bb.0:
|
||||
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
||||
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
||||
; CHECK-O3-NEXT: movq %rcx, %rax
|
||||
; CHECK-O3-NEXT: mulq %rdx
|
||||
; CHECK-O3-NEXT: movq (%rdi), %rax
|
||||
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
||||
; CHECK-O3-NEXT: movq %rax, %rdx
|
||||
; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rdx
|
||||
; CHECK-O3-NEXT: shrq $3, %rdx
|
||||
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax
|
||||
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
||||
; CHECK-O3-NEXT: subq %rax, %rcx
|
||||
; CHECK-O3-NEXT: movq %rcx, %rax
|
||||
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rcx
|
||||
; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx
|
||||
; CHECK-O3-NEXT: subq %rcx, %rax
|
||||
; CHECK-O3-NEXT: retq
|
||||
%v = load atomic i64, i64* %p unordered, align 8
|
||||
%ret = urem i64 %v, 15
|
||||
@ -1694,28 +1691,28 @@ define void @rmw_fold_sdiv2(i64* %p, i64 %v) {
|
||||
define void @rmw_fold_udiv1(i64* %p, i64 %v) {
|
||||
; CHECK-O0-LABEL: rmw_fold_udiv1:
|
||||
; CHECK-O0: # %bb.0:
|
||||
; CHECK-O0-NEXT: movq (%rdi), %rax
|
||||
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
||||
; CHECK-O0-NEXT: mulq %rcx
|
||||
; CHECK-O0-NEXT: shrq $3, %rdx
|
||||
; CHECK-O0-NEXT: movq %rdx, (%rdi)
|
||||
; CHECK-O0-NEXT: movq (%rdi), %rdx
|
||||
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
|
||||
; CHECK-O0-NEXT: mulxq %rax, %rcx, %rax
|
||||
; CHECK-O0-NEXT: shrq $3, %rax
|
||||
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
||||
; CHECK-O0-NEXT: retq
|
||||
;
|
||||
; CHECK-O3-CUR-LABEL: rmw_fold_udiv1:
|
||||
; CHECK-O3-CUR: # %bb.0:
|
||||
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
|
||||
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
||||
; CHECK-O3-CUR-NEXT: mulq %rcx
|
||||
; CHECK-O3-CUR-NEXT: shrq $3, %rdx
|
||||
; CHECK-O3-CUR-NEXT: movq %rdx, (%rdi)
|
||||
; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
|
||||
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
|
||||
; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rcx
|
||||
; CHECK-O3-CUR-NEXT: shrq $3, %rcx
|
||||
; CHECK-O3-CUR-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-O3-CUR-NEXT: retq
|
||||
;
|
||||
; CHECK-O3-EX-LABEL: rmw_fold_udiv1:
|
||||
; CHECK-O3-EX: # %bb.0:
|
||||
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
|
||||
; CHECK-O3-EX-NEXT: mulq (%rdi)
|
||||
; CHECK-O3-EX-NEXT: shrq $3, %rdx
|
||||
; CHECK-O3-EX-NEXT: movq %rdx, (%rdi)
|
||||
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
||||
; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rcx
|
||||
; CHECK-O3-EX-NEXT: shrq $3, %rcx
|
||||
; CHECK-O3-EX-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-O3-EX-NEXT: retq
|
||||
%prev = load atomic i64, i64* %p unordered, align 8
|
||||
%val = udiv i64 %prev, 15
|
||||
@ -1842,27 +1839,25 @@ define void @rmw_fold_urem1(i64* %p, i64 %v) {
|
||||
; CHECK-O0: # %bb.0:
|
||||
; CHECK-O0-NEXT: movq (%rdi), %rax
|
||||
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
|
||||
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; CHECK-O0-NEXT: mulq %rcx
|
||||
; CHECK-O0-NEXT: shrq $3, %rdx
|
||||
; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rax
|
||||
; CHECK-O0-NEXT: leaq (%rax,%rax,2), %rax
|
||||
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
|
||||
; CHECK-O0-NEXT: subq %rax, %rcx
|
||||
; CHECK-O0-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-O0-NEXT: movq %rax, %rdx
|
||||
; CHECK-O0-NEXT: mulxq %rcx, %rdx, %rcx
|
||||
; CHECK-O0-NEXT: shrq $3, %rcx
|
||||
; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx
|
||||
; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx
|
||||
; CHECK-O0-NEXT: subq %rcx, %rax
|
||||
; CHECK-O0-NEXT: movq %rax, (%rdi)
|
||||
; CHECK-O0-NEXT: retq
|
||||
;
|
||||
; CHECK-O3-LABEL: rmw_fold_urem1:
|
||||
; CHECK-O3: # %bb.0:
|
||||
; CHECK-O3-NEXT: movq (%rdi), %rcx
|
||||
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
|
||||
; CHECK-O3-NEXT: movq %rcx, %rax
|
||||
; CHECK-O3-NEXT: mulq %rdx
|
||||
; CHECK-O3-NEXT: shrq $3, %rdx
|
||||
; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax
|
||||
; CHECK-O3-NEXT: movq (%rdi), %rdx
|
||||
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
|
||||
; CHECK-O3-NEXT: mulxq %rax, %rax, %rcx
|
||||
; CHECK-O3-NEXT: shrq $3, %rcx
|
||||
; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rax
|
||||
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
|
||||
; CHECK-O3-NEXT: subq %rax, %rcx
|
||||
; CHECK-O3-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-O3-NEXT: subq %rax, %rdx
|
||||
; CHECK-O3-NEXT: movq %rdx, (%rdi)
|
||||
; CHECK-O3-NEXT: retq
|
||||
%prev = load atomic i64, i64* %p unordered, align 8
|
||||
%val = urem i64 %prev, 15
|
||||
|
@ -68,8 +68,8 @@ define i64 @mulx64(i64 %x, i64 %y, i64* %p) {
|
||||
; CHECK-LABEL: mulx64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdx, %rcx
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: mulq %rsi
|
||||
; CHECK-NEXT: movq %rdi, %rdx
|
||||
; CHECK-NEXT: mulxq %rsi, %rax, %rdx
|
||||
; CHECK-NEXT: movq %rdx, (%rcx)
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = zext i64 %x to i128
|
||||
@ -86,8 +86,8 @@ define i64 @mulx64_load(i64 %x, i64* %y, i64* %p) {
|
||||
; CHECK-LABEL: mulx64_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdx, %rcx
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: mulq (%rsi)
|
||||
; CHECK-NEXT: movq %rdi, %rdx
|
||||
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx
|
||||
; CHECK-NEXT: movq %rdx, (%rcx)
|
||||
; CHECK-NEXT: retq
|
||||
%y1 = load i64, i64* %y
|
||||
|
@ -120,11 +120,11 @@ define i32 @mulx32(i32 %x, i32 %y, i32* %p) {
|
||||
; X86-LABEL: mulx32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: addl %eax, %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: addl %edx, %edx
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: addl %eax, %eax
|
||||
; X86-NEXT: mulxl %eax, %eax, %edx
|
||||
; X86-NEXT: movl %edx, (%ecx)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
@ -156,10 +156,10 @@ define i32 @mulx32_load(i32 %x, i32* %y, i32* %p) {
|
||||
; X86-LABEL: mulx32_load:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: addl %eax, %eax
|
||||
; X86-NEXT: mull (%edx)
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: addl %edx, %edx
|
||||
; X86-NEXT: mulxl (%eax), %eax, %edx
|
||||
; X86-NEXT: movl %edx, (%ecx)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
@ -215,22 +215,21 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
|
||||
define void @test_multi_def(i64* dereferenceable(8) %x1,
|
||||
; CHECK-LABEL: test_multi_def:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: movq %rdx, %r8
|
||||
; CHECK-NEXT: xorl %r9d, %r9d
|
||||
; CHECK-NEXT: movq (%rdi), %rdi
|
||||
; CHECK-NEXT: movq (%rsi), %rsi
|
||||
; CHECK-NEXT: movq %rdx, %rax
|
||||
; CHECK-NEXT: xorl %r8d, %r8d
|
||||
; CHECK-NEXT: movq (%rdi), %rdx
|
||||
; CHECK-NEXT: movq (%rsi), %r9
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB4_2: ## %for.body
|
||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: mulq %rsi
|
||||
; CHECK-NEXT: addq %rax, (%r8)
|
||||
; CHECK-NEXT: adcq %rdx, 8(%r8)
|
||||
; CHECK-NEXT: mulxq %r9, %rsi, %rdi
|
||||
; CHECK-NEXT: addq %rsi, (%rax)
|
||||
; CHECK-NEXT: adcq %rdi, 8(%rax)
|
||||
; CHECK-NEXT: ## %bb.1: ## %for.check
|
||||
; CHECK-NEXT: ## in Loop: Header=BB4_2 Depth=1
|
||||
; CHECK-NEXT: incq %r9
|
||||
; CHECK-NEXT: addq $16, %r8
|
||||
; CHECK-NEXT: cmpq %rcx, %r9
|
||||
; CHECK-NEXT: incq %r8
|
||||
; CHECK-NEXT: addq $16, %rax
|
||||
; CHECK-NEXT: cmpq %rcx, %r8
|
||||
; CHECK-NEXT: jl LBB4_2
|
||||
; CHECK-NEXT: ## %bb.3: ## %exit
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -7,48 +7,86 @@
|
||||
; PR1198
|
||||
|
||||
define i64 @foo(i64 %x, i64 %y) nounwind {
|
||||
; X86-LABEL: foo:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %ebx
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: movl %ebp, %eax
|
||||
; X86-NEXT: mull %ebx
|
||||
; X86-NEXT: movl %edx, %ebx
|
||||
; X86-NEXT: movl %eax, %ebp
|
||||
; X86-NEXT: addl %edi, %ebp
|
||||
; X86-NEXT: adcl $0, %ebx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %esi
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: addl %ebp, %eax
|
||||
; X86-NEXT: adcl %ebx, %ecx
|
||||
; X86-NEXT: setb %al
|
||||
; X86-NEXT: movzbl %al, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: mull %esi
|
||||
; X86-NEXT: addl %ecx, %eax
|
||||
; X86-NEXT: adcl %edi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
; X86-NOBMI-LABEL: foo:
|
||||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: pushl %ebp
|
||||
; X86-NOBMI-NEXT: pushl %ebx
|
||||
; X86-NOBMI-NEXT: pushl %edi
|
||||
; X86-NOBMI-NEXT: pushl %esi
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NOBMI-NEXT: movl %ecx, %eax
|
||||
; X86-NOBMI-NEXT: mull %ebx
|
||||
; X86-NOBMI-NEXT: movl %edx, %edi
|
||||
; X86-NOBMI-NEXT: movl %ebp, %eax
|
||||
; X86-NOBMI-NEXT: mull %ebx
|
||||
; X86-NOBMI-NEXT: movl %edx, %ebx
|
||||
; X86-NOBMI-NEXT: movl %eax, %ebp
|
||||
; X86-NOBMI-NEXT: addl %edi, %ebp
|
||||
; X86-NOBMI-NEXT: adcl $0, %ebx
|
||||
; X86-NOBMI-NEXT: movl %ecx, %eax
|
||||
; X86-NOBMI-NEXT: mull %esi
|
||||
; X86-NOBMI-NEXT: movl %edx, %ecx
|
||||
; X86-NOBMI-NEXT: addl %ebp, %eax
|
||||
; X86-NOBMI-NEXT: adcl %ebx, %ecx
|
||||
; X86-NOBMI-NEXT: setb %al
|
||||
; X86-NOBMI-NEXT: movzbl %al, %edi
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: mull %esi
|
||||
; X86-NOBMI-NEXT: addl %ecx, %eax
|
||||
; X86-NOBMI-NEXT: adcl %edi, %edx
|
||||
; X86-NOBMI-NEXT: popl %esi
|
||||
; X86-NOBMI-NEXT: popl %edi
|
||||
; X86-NOBMI-NEXT: popl %ebx
|
||||
; X86-NOBMI-NEXT: popl %ebp
|
||||
; X86-NOBMI-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: foo:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: mulq %rsi
|
||||
; X64-NEXT: movq %rdx, %rax
|
||||
; X64-NEXT: retq
|
||||
; X86-BMI-LABEL: foo:
|
||||
; X86-BMI: # %bb.0:
|
||||
; X86-BMI-NEXT: pushl %ebp
|
||||
; X86-BMI-NEXT: pushl %ebx
|
||||
; X86-BMI-NEXT: pushl %edi
|
||||
; X86-BMI-NEXT: pushl %esi
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-BMI-NEXT: movl %eax, %edx
|
||||
; X86-BMI-NEXT: mulxl %esi, %edx, %ebx
|
||||
; X86-BMI-NEXT: movl %ecx, %edx
|
||||
; X86-BMI-NEXT: mulxl %esi, %esi, %ebp
|
||||
; X86-BMI-NEXT: addl %ebx, %esi
|
||||
; X86-BMI-NEXT: adcl $0, %ebp
|
||||
; X86-BMI-NEXT: movl %eax, %edx
|
||||
; X86-BMI-NEXT: mulxl %edi, %eax, %ebx
|
||||
; X86-BMI-NEXT: addl %esi, %eax
|
||||
; X86-BMI-NEXT: adcl %ebp, %ebx
|
||||
; X86-BMI-NEXT: setb %al
|
||||
; X86-BMI-NEXT: movzbl %al, %esi
|
||||
; X86-BMI-NEXT: movl %ecx, %edx
|
||||
; X86-BMI-NEXT: mulxl %edi, %eax, %edx
|
||||
; X86-BMI-NEXT: addl %ebx, %eax
|
||||
; X86-BMI-NEXT: adcl %esi, %edx
|
||||
; X86-BMI-NEXT: popl %esi
|
||||
; X86-BMI-NEXT: popl %edi
|
||||
; X86-BMI-NEXT: popl %ebx
|
||||
; X86-BMI-NEXT: popl %ebp
|
||||
; X86-BMI-NEXT: retl
|
||||
;
|
||||
; X64-NOBMI-LABEL: foo:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI-NEXT: mulq %rsi
|
||||
; X64-NOBMI-NEXT: movq %rdx, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
; X64-BMI-LABEL: foo:
|
||||
; X64-BMI: # %bb.0:
|
||||
; X64-BMI-NEXT: movq %rdi, %rdx
|
||||
; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax
|
||||
; X64-BMI-NEXT: retq
|
||||
%tmp0 = zext i64 %x to i128
|
||||
%tmp1 = zext i64 %y to i128
|
||||
%tmp2 = mul i128 %tmp0, %tmp1
|
||||
@ -62,107 +100,202 @@ define i64 @foo(i64 %x, i64 %y) nounwind {
|
||||
; zero-extended value.
|
||||
|
||||
define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind {
|
||||
; X86-LABEL: mul1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: subl $24, %esp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: je .LBB1_3
|
||||
; X86-NEXT: # %bb.1: # %for.body.preheader
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: xorl %ebp, %ebp
|
||||
; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill
|
||||
; X86-NEXT: .p2align 4, 0x90
|
||||
; X86-NEXT: .LBB1_2: # %for.body
|
||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl (%eax,%ebp,8), %esi
|
||||
; X86-NEXT: movl 4(%eax,%ebp,8), %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: mull %edi
|
||||
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edi
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
|
||||
; X86-NEXT: adcl $0, %ecx
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: movl %eax, %edi
|
||||
; X86-NEXT: addl %ebx, %edi
|
||||
; X86-NEXT: adcl %ecx, %esi
|
||||
; X86-NEXT: setb %bl
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: addl %esi, %eax
|
||||
; X86-NEXT: movzbl %bl, %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: adcl %esi, %edx
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
|
||||
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
|
||||
; X86-NEXT: adcl $0, %eax
|
||||
; X86-NEXT: adcl $0, %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl %ecx, (%esi,%ebp,8)
|
||||
; X86-NEXT: movl %edi, 4(%esi,%ebp,8)
|
||||
; X86-NEXT: addl $1, %ebp
|
||||
; X86-NEXT: movl (%esp), %edi # 4-byte Reload
|
||||
; X86-NEXT: adcl $0, %edi
|
||||
; X86-NEXT: movl %ebp, %esi
|
||||
; X86-NEXT: xorl %ebx, %esi
|
||||
; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||
; X86-NEXT: xorl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: orl %esi, %edi
|
||||
; X86-NEXT: jne .LBB1_2
|
||||
; X86-NEXT: .LBB1_3: # %for.end
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: addl $24, %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
; X86-NOBMI-LABEL: mul1:
|
||||
; X86-NOBMI: # %bb.0: # %entry
|
||||
; X86-NOBMI-NEXT: pushl %ebp
|
||||
; X86-NOBMI-NEXT: pushl %ebx
|
||||
; X86-NOBMI-NEXT: pushl %edi
|
||||
; X86-NOBMI-NEXT: pushl %esi
|
||||
; X86-NOBMI-NEXT: subl $24, %esp
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: orl %ecx, %eax
|
||||
; X86-NOBMI-NEXT: je .LBB1_3
|
||||
; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader
|
||||
; X86-NOBMI-NEXT: xorl %eax, %eax
|
||||
; X86-NOBMI-NEXT: xorl %edx, %edx
|
||||
; X86-NOBMI-NEXT: xorl %ebp, %ebp
|
||||
; X86-NOBMI-NEXT: movl $0, (%esp) # 4-byte Folded Spill
|
||||
; X86-NOBMI-NEXT: .p2align 4, 0x90
|
||||
; X86-NOBMI-NEXT: .LBB1_2: # %for.body
|
||||
; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl (%eax,%ebp,8), %esi
|
||||
; X86-NOBMI-NEXT: movl 4(%eax,%ebp,8), %ecx
|
||||
; X86-NOBMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NOBMI-NEXT: movl %esi, %eax
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NOBMI-NEXT: mull %edi
|
||||
; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NOBMI-NEXT: movl %ecx, %eax
|
||||
; X86-NOBMI-NEXT: mull %edi
|
||||
; X86-NOBMI-NEXT: movl %edx, %ecx
|
||||
; X86-NOBMI-NEXT: movl %eax, %ebx
|
||||
; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
|
||||
; X86-NOBMI-NEXT: adcl $0, %ecx
|
||||
; X86-NOBMI-NEXT: movl %esi, %eax
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NOBMI-NEXT: mull %edx
|
||||
; X86-NOBMI-NEXT: movl %edx, %esi
|
||||
; X86-NOBMI-NEXT: movl %eax, %edi
|
||||
; X86-NOBMI-NEXT: addl %ebx, %edi
|
||||
; X86-NOBMI-NEXT: adcl %ecx, %esi
|
||||
; X86-NOBMI-NEXT: setb %bl
|
||||
; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NOBMI-NEXT: addl %esi, %eax
|
||||
; X86-NOBMI-NEXT: movzbl %bl, %esi
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NOBMI-NEXT: adcl %esi, %edx
|
||||
; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
|
||||
; X86-NOBMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
|
||||
; X86-NOBMI-NEXT: adcl $0, %eax
|
||||
; X86-NOBMI-NEXT: adcl $0, %edx
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NOBMI-NEXT: movl %ecx, (%esi,%ebp,8)
|
||||
; X86-NOBMI-NEXT: movl %edi, 4(%esi,%ebp,8)
|
||||
; X86-NOBMI-NEXT: addl $1, %ebp
|
||||
; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload
|
||||
; X86-NOBMI-NEXT: adcl $0, %edi
|
||||
; X86-NOBMI-NEXT: movl %ebp, %esi
|
||||
; X86-NOBMI-NEXT: xorl %ebx, %esi
|
||||
; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||
; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NOBMI-NEXT: orl %esi, %edi
|
||||
; X86-NOBMI-NEXT: jne .LBB1_2
|
||||
; X86-NOBMI-NEXT: .LBB1_3: # %for.end
|
||||
; X86-NOBMI-NEXT: xorl %eax, %eax
|
||||
; X86-NOBMI-NEXT: xorl %edx, %edx
|
||||
; X86-NOBMI-NEXT: addl $24, %esp
|
||||
; X86-NOBMI-NEXT: popl %esi
|
||||
; X86-NOBMI-NEXT: popl %edi
|
||||
; X86-NOBMI-NEXT: popl %ebx
|
||||
; X86-NOBMI-NEXT: popl %ebp
|
||||
; X86-NOBMI-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: mul1:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: testq %rdi, %rdi
|
||||
; X64-NEXT: je .LBB1_3
|
||||
; X64-NEXT: # %bb.1: # %for.body.preheader
|
||||
; X64-NEXT: movq %rcx, %r8
|
||||
; X64-NEXT: movq %rdx, %r9
|
||||
; X64-NEXT: xorl %r10d, %r10d
|
||||
; X64-NEXT: xorl %ecx, %ecx
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB1_2: # %for.body
|
||||
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X64-NEXT: movq %r8, %rax
|
||||
; X64-NEXT: mulq (%r9,%rcx,8)
|
||||
; X64-NEXT: addq %r10, %rax
|
||||
; X64-NEXT: adcq $0, %rdx
|
||||
; X64-NEXT: movq %rax, (%rsi,%rcx,8)
|
||||
; X64-NEXT: incq %rcx
|
||||
; X64-NEXT: cmpq %rcx, %rdi
|
||||
; X64-NEXT: movq %rdx, %r10
|
||||
; X64-NEXT: jne .LBB1_2
|
||||
; X64-NEXT: .LBB1_3: # %for.end
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
; X86-BMI-LABEL: mul1:
|
||||
; X86-BMI: # %bb.0: # %entry
|
||||
; X86-BMI-NEXT: pushl %ebp
|
||||
; X86-BMI-NEXT: pushl %ebx
|
||||
; X86-BMI-NEXT: pushl %edi
|
||||
; X86-BMI-NEXT: pushl %esi
|
||||
; X86-BMI-NEXT: subl $16, %esp
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI-NEXT: orl %ecx, %eax
|
||||
; X86-BMI-NEXT: je .LBB1_3
|
||||
; X86-BMI-NEXT: # %bb.1: # %for.body.preheader
|
||||
; X86-BMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-BMI-NEXT: xorl %edx, %edx
|
||||
; X86-BMI-NEXT: xorl %ebx, %ebx
|
||||
; X86-BMI-NEXT: xorl %ebp, %ebp
|
||||
; X86-BMI-NEXT: .p2align 4, 0x90
|
||||
; X86-BMI-NEXT: .LBB1_2: # %for.body
|
||||
; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI-NEXT: movl (%ecx,%ebx,8), %eax
|
||||
; X86-BMI-NEXT: movl 4(%ecx,%ebx,8), %esi
|
||||
; X86-BMI-NEXT: movl %esi, (%esp) # 4-byte Spill
|
||||
; X86-BMI-NEXT: movl %eax, %edx
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI-NEXT: mulxl %ecx, %edx, %edi
|
||||
; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-BMI-NEXT: movl %esi, %edx
|
||||
; X86-BMI-NEXT: mulxl %ecx, %esi, %ecx
|
||||
; X86-BMI-NEXT: addl %edi, %esi
|
||||
; X86-BMI-NEXT: adcl $0, %ecx
|
||||
; X86-BMI-NEXT: movl %eax, %edx
|
||||
; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %edi, %eax
|
||||
; X86-BMI-NEXT: addl %esi, %edi
|
||||
; X86-BMI-NEXT: adcl %ecx, %eax
|
||||
; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload
|
||||
; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ecx, %edx
|
||||
; X86-BMI-NEXT: setb (%esp) # 1-byte Folded Spill
|
||||
; X86-BMI-NEXT: addl %eax, %ecx
|
||||
; X86-BMI-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
|
||||
; X86-BMI-NEXT: adcl %eax, %edx
|
||||
; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||
; X86-BMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
|
||||
; X86-BMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
|
||||
; X86-BMI-NEXT: adcl $0, %ecx
|
||||
; X86-BMI-NEXT: adcl $0, %edx
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI-NEXT: movl %esi, (%eax,%ebx,8)
|
||||
; X86-BMI-NEXT: movl %edi, 4(%eax,%ebx,8)
|
||||
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-BMI-NEXT: addl $1, %ebx
|
||||
; X86-BMI-NEXT: adcl $0, %ebp
|
||||
; X86-BMI-NEXT: movl %ebx, %eax
|
||||
; X86-BMI-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI-NEXT: movl %ebp, %esi
|
||||
; X86-BMI-NEXT: xorl %edi, %esi
|
||||
; X86-BMI-NEXT: orl %eax, %esi
|
||||
; X86-BMI-NEXT: jne .LBB1_2
|
||||
; X86-BMI-NEXT: .LBB1_3: # %for.end
|
||||
; X86-BMI-NEXT: xorl %eax, %eax
|
||||
; X86-BMI-NEXT: xorl %edx, %edx
|
||||
; X86-BMI-NEXT: addl $16, %esp
|
||||
; X86-BMI-NEXT: popl %esi
|
||||
; X86-BMI-NEXT: popl %edi
|
||||
; X86-BMI-NEXT: popl %ebx
|
||||
; X86-BMI-NEXT: popl %ebp
|
||||
; X86-BMI-NEXT: retl
|
||||
;
|
||||
; X64-NOBMI-LABEL: mul1:
|
||||
; X64-NOBMI: # %bb.0: # %entry
|
||||
; X64-NOBMI-NEXT: testq %rdi, %rdi
|
||||
; X64-NOBMI-NEXT: je .LBB1_3
|
||||
; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader
|
||||
; X64-NOBMI-NEXT: movq %rcx, %r8
|
||||
; X64-NOBMI-NEXT: movq %rdx, %r9
|
||||
; X64-NOBMI-NEXT: xorl %r10d, %r10d
|
||||
; X64-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X64-NOBMI-NEXT: .p2align 4, 0x90
|
||||
; X64-NOBMI-NEXT: .LBB1_2: # %for.body
|
||||
; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X64-NOBMI-NEXT: movq %r8, %rax
|
||||
; X64-NOBMI-NEXT: mulq (%r9,%rcx,8)
|
||||
; X64-NOBMI-NEXT: addq %r10, %rax
|
||||
; X64-NOBMI-NEXT: adcq $0, %rdx
|
||||
; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8)
|
||||
; X64-NOBMI-NEXT: incq %rcx
|
||||
; X64-NOBMI-NEXT: cmpq %rcx, %rdi
|
||||
; X64-NOBMI-NEXT: movq %rdx, %r10
|
||||
; X64-NOBMI-NEXT: jne .LBB1_2
|
||||
; X64-NOBMI-NEXT: .LBB1_3: # %for.end
|
||||
; X64-NOBMI-NEXT: xorl %eax, %eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
; X64-BMI-LABEL: mul1:
|
||||
; X64-BMI: # %bb.0: # %entry
|
||||
; X64-BMI-NEXT: testq %rdi, %rdi
|
||||
; X64-BMI-NEXT: je .LBB1_3
|
||||
; X64-BMI-NEXT: # %bb.1: # %for.body.preheader
|
||||
; X64-BMI-NEXT: movq %rcx, %r8
|
||||
; X64-BMI-NEXT: movq %rdx, %r9
|
||||
; X64-BMI-NEXT: xorl %r10d, %r10d
|
||||
; X64-BMI-NEXT: xorl %ecx, %ecx
|
||||
; X64-BMI-NEXT: .p2align 4, 0x90
|
||||
; X64-BMI-NEXT: .LBB1_2: # %for.body
|
||||
; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X64-BMI-NEXT: movq %r8, %rdx
|
||||
; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx
|
||||
; X64-BMI-NEXT: addq %r10, %rax
|
||||
; X64-BMI-NEXT: adcq $0, %rdx
|
||||
; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8)
|
||||
; X64-BMI-NEXT: incq %rcx
|
||||
; X64-BMI-NEXT: cmpq %rcx, %rdi
|
||||
; X64-BMI-NEXT: movq %rdx, %r10
|
||||
; X64-BMI-NEXT: jne .LBB1_2
|
||||
; X64-BMI-NEXT: .LBB1_3: # %for.end
|
||||
; X64-BMI-NEXT: xorl %eax, %eax
|
||||
; X64-BMI-NEXT: retq
|
||||
entry:
|
||||
%conv = zext i64 %y to i128
|
||||
%cmp11 = icmp eq i64 %n, 0
|
||||
|
@ -5,8 +5,8 @@
|
||||
define i64 @f1(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %edx
|
||||
; CHECK-NEXT: retl
|
||||
%x = zext i32 %a to i64
|
||||
%y = zext i32 %b to i64
|
||||
@ -17,9 +17,9 @@ define i64 @f1(i32 %a, i32 %b) {
|
||||
define i64 @f2(i32 %a, i32* %p) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: mull (%ecx)
|
||||
; CHECK-NEXT: mulxl (%eax), %eax, %edx
|
||||
; CHECK-NEXT: retl
|
||||
%b = load i32, i32* %p
|
||||
%x = zext i32 %a to i64
|
||||
|
@ -5,8 +5,8 @@
|
||||
define i128 @f1(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: mulq %rsi
|
||||
; CHECK-NEXT: movq %rdi, %rdx
|
||||
; CHECK-NEXT: mulxq %rsi, %rax, %rdx
|
||||
; CHECK-NEXT: retq
|
||||
%x = zext i64 %a to i128
|
||||
%y = zext i64 %b to i128
|
||||
@ -17,8 +17,8 @@ define i128 @f1(i64 %a, i64 %b) {
|
||||
define i128 @f2(i64 %a, i64* %p) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: mulq (%rsi)
|
||||
; CHECK-NEXT: movq %rdi, %rdx
|
||||
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx
|
||||
; CHECK-NEXT: retq
|
||||
%b = load i64, i64* %p
|
||||
%x = zext i64 %a to i128
|
||||
|
@ -5,11 +5,11 @@
|
||||
define void @_Z15uint64_to_asciimPc(i64 %arg) {
|
||||
; HSW-LABEL: _Z15uint64_to_asciimPc:
|
||||
; HSW: # %bb.0: # %bb
|
||||
; HSW-NEXT: movq %rdi, %rax
|
||||
; HSW-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
|
||||
; HSW-NEXT: mulq %rcx
|
||||
; HSW-NEXT: shrq $42, %rdx
|
||||
; HSW-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
|
||||
; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
|
||||
; HSW-NEXT: movq %rdi, %rdx
|
||||
; HSW-NEXT: mulxq %rax, %rax, %rcx
|
||||
; HSW-NEXT: shrq $42, %rcx
|
||||
; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
|
||||
; HSW-NEXT: shrq $20, %rax
|
||||
; HSW-NEXT: leal (%rax,%rax,4), %eax
|
||||
; HSW-NEXT: addl $5, %eax
|
||||
@ -22,11 +22,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) {
|
||||
;
|
||||
; ZN-LABEL: _Z15uint64_to_asciimPc:
|
||||
; ZN: # %bb.0: # %bb
|
||||
; ZN-NEXT: movq %rdi, %rax
|
||||
; ZN-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
|
||||
; ZN-NEXT: mulq %rcx
|
||||
; ZN-NEXT: shrq $42, %rdx
|
||||
; ZN-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
|
||||
; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
|
||||
; ZN-NEXT: movq %rdi, %rdx
|
||||
; ZN-NEXT: mulxq %rax, %rax, %rcx
|
||||
; ZN-NEXT: shrq $42, %rcx
|
||||
; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
|
||||
; ZN-NEXT: shrq $20, %rax
|
||||
; ZN-NEXT: leal 5(%rax,%rax,4), %eax
|
||||
; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
|
||||
|
Loading…
x
Reference in New Issue
Block a user