1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] Don't emit MULX by default with BMI2

MULX has somewhat improved register allocation constraints compared to the legacy MUL instruction. Both output registers are encoded instead of fixed to EAX/EDX, but EDX is used as input. It also doesn't touch flags. Unfortunately, the encoding is longer.

Prefering it whenever BMI2 is enabled is probably not optimal. Choosing it should somehow be a function of register allocation constraints like converting adds to three address. gcc and icc definitely don't pick MULX by default. Not sure what if any rules they have for using it.

Differential Revision: https://reviews.llvm.org/D55565

llvm-svn: 348975
This commit is contained in:
Craig Topper 2018-12-12 21:21:31 +00:00
parent df0f46f125
commit 43dcc4cc5f
7 changed files with 217 additions and 407 deletions

View File

@ -3410,14 +3410,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opc, MOpc; unsigned Opc, MOpc;
bool isSigned = Opcode == ISD::SMUL_LOHI; bool isSigned = Opcode == ISD::SMUL_LOHI;
bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) { if (!isSigned) {
switch (NVT.SimpleTy) { switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!"); default: llvm_unreachable("Unsupported VT!");
case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
} }
} else { } else {
switch (NVT.SimpleTy) { switch (NVT.SimpleTy) {
@ -3438,12 +3435,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case X86::MUL64r: case X86::MUL64r:
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
break; break;
case X86::MULX32rr:
SrcReg = X86::EDX; LoReg = HiReg = 0;
break;
case X86::MULX64rr:
SrcReg = X86::RDX; LoReg = HiReg = 0;
break;
} }
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@ -3457,26 +3448,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1); N0, SDValue()).getValue(1);
SDValue ResHi, ResLo;
if (foldedLoad) { if (foldedLoad) {
SDValue Chain; SDValue Chain;
MachineSDNode *CNode = nullptr; MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag }; InFlag };
if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); Chain = SDValue(CNode, 0);
ResHi = SDValue(CNode, 0); InFlag = SDValue(CNode, 1);
ResLo = SDValue(CNode, 1);
Chain = SDValue(CNode, 2);
InFlag = SDValue(CNode, 3);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
Chain = SDValue(CNode, 0);
InFlag = SDValue(CNode, 1);
}
// Update the chain. // Update the chain.
ReplaceUses(N1.getValue(1), Chain); ReplaceUses(N1.getValue(1), Chain);
@ -3484,39 +3464,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else { } else {
SDValue Ops[] = { N1, InFlag }; SDValue Ops[] = { N1, InFlag };
if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { SDVTList VTs = CurDAG->getVTList(MVT::Glue);
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); InFlag = SDValue(CNode, 0);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
InFlag = SDValue(CNode, 2);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
InFlag = SDValue(CNode, 0);
}
} }
// Copy the low half of the result, if it is needed. // Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) { if (!SDValue(Node, 0).use_empty()) {
if (!ResLo.getNode()) { assert(LoReg && "Register for low half is not defined!");
assert(LoReg && "Register for low half is not defined!"); SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, NVT, InFlag);
InFlag); InFlag = ResLo.getValue(2);
InFlag = ResLo.getValue(2);
}
ReplaceUses(SDValue(Node, 0), ResLo); ReplaceUses(SDValue(Node, 0), ResLo);
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
dbgs() << '\n'); dbgs() << '\n');
} }
// Copy the high half of the result, if it is needed. // Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) { if (!SDValue(Node, 1).use_empty()) {
if (!ResHi.getNode()) { assert(HiReg && "Register for high half is not defined!");
assert(HiReg && "Register for high half is not defined!"); SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, NVT, InFlag);
InFlag); InFlag = ResHi.getValue(2);
InFlag = ResHi.getValue(2);
}
ReplaceUses(SDValue(Node, 1), ResHi); ReplaceUses(SDValue(Node, 1), ResHi);
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
dbgs() << '\n'); dbgs() << '\n');

View File

@ -68,8 +68,8 @@ define i64 @mulx64(i64 %x, i64 %y, i64* %p) {
; CHECK-LABEL: mulx64: ; CHECK-LABEL: mulx64:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: mulq %rsi
; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: movq %rdx, (%rcx)
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%x1 = zext i64 %x to i128 %x1 = zext i64 %x to i128
@ -86,8 +86,8 @@ define i64 @mulx64_load(i64 %x, i64* %y, i64* %p) {
; CHECK-LABEL: mulx64_load: ; CHECK-LABEL: mulx64_load:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: mulq (%rsi)
; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: movq %rdx, (%rcx)
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%y1 = load i64, i64* %y %y1 = load i64, i64* %y

View File

@ -120,11 +120,11 @@ define i32 @mulx32(i32 %x, i32 %y, i32* %p) {
; X86-LABEL: mulx32: ; X86-LABEL: mulx32:
; X86: # %bb.0: ; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl %edx, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl %eax, %eax ; X86-NEXT: addl %eax, %eax
; X86-NEXT: mulxl %eax, %eax, %edx ; X86-NEXT: addl %edx, %edx
; X86-NEXT: mull %edx
; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: retl ; X86-NEXT: retl
; ;
@ -156,10 +156,10 @@ define i32 @mulx32_load(i32 %x, i32* %y, i32* %p) {
; X86-LABEL: mulx32_load: ; X86-LABEL: mulx32_load:
; X86: # %bb.0: ; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl %edx, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulxl (%eax), %eax, %edx ; X86-NEXT: addl %eax, %eax
; X86-NEXT: mull (%edx)
; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: retl ; X86-NEXT: retl
; ;

View File

@ -7,108 +7,61 @@
; PR1198 ; PR1198
define i64 @foo(i64 %x, i64 %y) nounwind { define i64 @foo(i64 %x, i64 %y) nounwind {
; X86-NOBMI-LABEL: foo: ; X86-LABEL: foo:
; X86-NOBMI: # %bb.0: ; X86: # %bb.0:
; X86-NOBMI-NEXT: pushl %ebp ; X86-NEXT: pushl %ebp
; X86-NOBMI-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi ; X86-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi ; X86-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %ecx, %eax ; X86-NEXT: movl %ecx, %eax
; X86-NOBMI-NEXT: mull %ebp ; X86-NEXT: mull %ebp
; X86-NOBMI-NEXT: movl %edx, %ebx ; X86-NEXT: movl %edx, %ebx
; X86-NOBMI-NEXT: movl %esi, %eax ; X86-NEXT: movl %esi, %eax
; X86-NOBMI-NEXT: mull %ebp ; X86-NEXT: mull %ebp
; X86-NOBMI-NEXT: movl %edx, %ebp ; X86-NEXT: movl %edx, %ebp
; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NEXT: movl %eax, %esi
; X86-NOBMI-NEXT: addl %ebx, %esi ; X86-NEXT: addl %ebx, %esi
; X86-NOBMI-NEXT: adcl $0, %ebp ; X86-NEXT: adcl $0, %ebp
; X86-NOBMI-NEXT: movl %ecx, %eax ; X86-NEXT: movl %ecx, %eax
; X86-NOBMI-NEXT: mull %edi ; X86-NEXT: mull %edi
; X86-NOBMI-NEXT: movl %edx, %ebx ; X86-NEXT: movl %edx, %ebx
; X86-NOBMI-NEXT: addl %esi, %eax ; X86-NEXT: addl %esi, %eax
; X86-NOBMI-NEXT: adcl %ebp, %ebx ; X86-NEXT: adcl %ebp, %ebx
; X86-NOBMI-NEXT: setb %al ; X86-NEXT: setb %al
; X86-NOBMI-NEXT: movzbl %al, %ecx ; X86-NEXT: movzbl %al, %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: mull %edi ; X86-NEXT: mull %edi
; X86-NOBMI-NEXT: movl %edx, %esi ; X86-NEXT: movl %edx, %esi
; X86-NOBMI-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, %ebp
; X86-NOBMI-NEXT: addl %ebx, %ebp ; X86-NEXT: addl %ebx, %ebp
; X86-NOBMI-NEXT: adcl %ecx, %esi ; X86-NEXT: adcl %ecx, %esi
; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: mull %ecx ; X86-NEXT: mull %ecx
; X86-NOBMI-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, %edi
; X86-NOBMI-NEXT: movl %eax, %ebx ; X86-NEXT: movl %eax, %ebx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: mull %ecx ; X86-NEXT: mull %ecx
; X86-NOBMI-NEXT: addl %ebx, %eax ; X86-NEXT: addl %ebx, %eax
; X86-NOBMI-NEXT: adcl %edi, %edx ; X86-NEXT: adcl %edi, %edx
; X86-NOBMI-NEXT: addl %ebp, %eax ; X86-NEXT: addl %ebp, %eax
; X86-NOBMI-NEXT: adcl %esi, %edx ; X86-NEXT: adcl %esi, %edx
; X86-NOBMI-NEXT: popl %esi ; X86-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi ; X86-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx ; X86-NEXT: popl %ebx
; X86-NOBMI-NEXT: popl %ebp ; X86-NEXT: popl %ebp
; X86-NOBMI-NEXT: retl ; X86-NEXT: retl
; ;
; X86-BMI-LABEL: foo: ; X64-LABEL: foo:
; X86-BMI: # %bb.0: ; X64: # %bb.0:
; X86-BMI-NEXT: pushl %ebp ; X64-NEXT: movq %rdi, %rax
; X86-BMI-NEXT: pushl %ebx ; X64-NEXT: mulq %rsi
; X86-BMI-NEXT: pushl %edi ; X64-NEXT: movq %rdx, %rax
; X86-BMI-NEXT: pushl %esi ; X64-NEXT: retq
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: mulxl %esi, %edx, %ebx
; X86-BMI-NEXT: movl %eax, %edx
; X86-BMI-NEXT: mulxl %esi, %ebp, %eax
; X86-BMI-NEXT: addl %ebx, %ebp
; X86-BMI-NEXT: adcl $0, %eax
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: mulxl %edi, %edx, %ebx
; X86-BMI-NEXT: addl %ebp, %edx
; X86-BMI-NEXT: adcl %eax, %ebx
; X86-BMI-NEXT: setb %al
; X86-BMI-NEXT: movzbl %al, %eax
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI-NEXT: mulxl %edi, %edi, %ebp
; X86-BMI-NEXT: addl %ebx, %edi
; X86-BMI-NEXT: adcl %eax, %ebp
; X86-BMI-NEXT: xorl %eax, %eax
; X86-BMI-NEXT: movl %esi, %edx
; X86-BMI-NEXT: mulxl %eax, %ebx, %esi
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: mulxl %eax, %eax, %edx
; X86-BMI-NEXT: addl %ebx, %eax
; X86-BMI-NEXT: adcl %esi, %edx
; X86-BMI-NEXT: addl %edi, %eax
; X86-BMI-NEXT: adcl %ebp, %edx
; X86-BMI-NEXT: popl %esi
; X86-BMI-NEXT: popl %edi
; X86-BMI-NEXT: popl %ebx
; X86-BMI-NEXT: popl %ebp
; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: foo:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: mulq %rsi
; X64-NOBMI-NEXT: movq %rdx, %rax
; X64-NOBMI-NEXT: retq
;
; X64-BMI-LABEL: foo:
; X64-BMI: # %bb.0:
; X64-BMI-NEXT: movq %rdi, %rdx
; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax
; X64-BMI-NEXT: retq
%tmp0 = zext i64 %x to i128 %tmp0 = zext i64 %x to i128
%tmp1 = zext i64 %y to i128 %tmp1 = zext i64 %y to i128
%tmp2 = mul i128 %tmp0, %tmp1 %tmp2 = mul i128 %tmp0, %tmp1
@ -122,236 +75,125 @@ define i64 @foo(i64 %x, i64 %y) nounwind {
; zero-extended value. ; zero-extended value.
define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind { define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind {
; X86-NOBMI-LABEL: mul1: ; X86-LABEL: mul1:
; X86-NOBMI: # %bb.0: # %entry ; X86: # %bb.0: # %entry
; X86-NOBMI-NEXT: pushl %ebp ; X86-NEXT: pushl %ebp
; X86-NOBMI-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi ; X86-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi ; X86-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $28, %esp ; X86-NEXT: subl $28, %esp
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: orl %ecx, %eax ; X86-NEXT: orl %ecx, %eax
; X86-NOBMI-NEXT: je .LBB1_3 ; X86-NEXT: je .LBB1_3
; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader ; X86-NEXT: # %bb.1: # %for.body.preheader
; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NOBMI-NEXT: .p2align 4, 0x90 ; X86-NEXT: .p2align 4, 0x90
; X86-NOBMI-NEXT: .LBB1_2: # %for.body ; X86-NEXT: .LBB1_2: # %for.body
; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, %ecx
; X86-NOBMI-NEXT: movl (%eax,%ebx,8), %ebp ; X86-NEXT: movl (%eax,%ebx,8), %ebp
; X86-NOBMI-NEXT: movl 4(%eax,%ebx,8), %esi ; X86-NEXT: movl 4(%eax,%ebx,8), %esi
; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: movl %ebp, %eax ; X86-NEXT: movl %ebp, %eax
; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: mull %ecx ; X86-NEXT: mull %ecx
; X86-NOBMI-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, %edi
; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: movl %esi, %eax ; X86-NEXT: movl %esi, %eax
; X86-NOBMI-NEXT: mull %ecx ; X86-NEXT: mull %ecx
; X86-NOBMI-NEXT: movl %edx, %ecx ; X86-NEXT: movl %edx, %ecx
; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NEXT: movl %eax, %esi
; X86-NOBMI-NEXT: addl %edi, %esi ; X86-NEXT: addl %edi, %esi
; X86-NOBMI-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, %ecx
; X86-NOBMI-NEXT: movl %ebp, %eax ; X86-NEXT: movl %ebp, %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: mull %edx ; X86-NEXT: mull %edx
; X86-NOBMI-NEXT: movl %edx, %ebp ; X86-NEXT: movl %edx, %ebp
; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: addl %esi, %edi ; X86-NEXT: addl %esi, %edi
; X86-NOBMI-NEXT: adcl %ecx, %ebp ; X86-NEXT: adcl %ecx, %ebp
; X86-NOBMI-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NOBMI-NEXT: movl %edx, %ecx ; X86-NEXT: movl %edx, %ecx
; X86-NOBMI-NEXT: movl %eax, %esi ; X86-NEXT: movl %eax, %esi
; X86-NOBMI-NEXT: addl %ebp, %esi ; X86-NEXT: addl %ebp, %esi
; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NOBMI-NEXT: adcl %eax, %ecx ; X86-NEXT: adcl %eax, %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: mull %edx ; X86-NEXT: mull %edx
; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, %ebp
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: mull %edx ; X86-NEXT: mull %edx
; X86-NOBMI-NEXT: addl %ebp, %eax ; X86-NEXT: addl %ebp, %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NOBMI-NEXT: addl %esi, %eax ; X86-NEXT: addl %esi, %eax
; X86-NOBMI-NEXT: adcl %ecx, %edx ; X86-NEXT: adcl %ecx, %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NOBMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NOBMI-NEXT: adcl $0, %eax ; X86-NEXT: adcl $0, %eax
; X86-NOBMI-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl %esi, (%ecx,%ebx,8) ; X86-NEXT: movl %esi, (%ecx,%ebx,8)
; X86-NOBMI-NEXT: movl %edi, 4(%ecx,%ebx,8) ; X86-NEXT: movl %edi, 4(%ecx,%ebx,8)
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl %ecx, %edi ; X86-NEXT: movl %ecx, %edi
; X86-NOBMI-NEXT: addl $1, %ebx ; X86-NEXT: addl $1, %ebx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NOBMI-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %esi
; X86-NOBMI-NEXT: movl %ebx, %ecx ; X86-NEXT: movl %ebx, %ecx
; X86-NOBMI-NEXT: xorl %ebp, %ecx ; X86-NEXT: xorl %ebp, %ecx
; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOBMI-NEXT: xorl %edi, %esi ; X86-NEXT: xorl %edi, %esi
; X86-NOBMI-NEXT: orl %ecx, %esi ; X86-NEXT: orl %ecx, %esi
; X86-NOBMI-NEXT: jne .LBB1_2 ; X86-NEXT: jne .LBB1_2
; X86-NOBMI-NEXT: .LBB1_3: # %for.end ; X86-NEXT: .LBB1_3: # %for.end
; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: addl $28, %esp ; X86-NEXT: addl $28, %esp
; X86-NOBMI-NEXT: popl %esi ; X86-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi ; X86-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx ; X86-NEXT: popl %ebx
; X86-NOBMI-NEXT: popl %ebp ; X86-NEXT: popl %ebp
; X86-NOBMI-NEXT: retl ; X86-NEXT: retl
; ;
; X86-BMI-LABEL: mul1: ; X64-LABEL: mul1:
; X86-BMI: # %bb.0: # %entry ; X64: # %bb.0: # %entry
; X86-BMI-NEXT: pushl %ebp ; X64-NEXT: testq %rdi, %rdi
; X86-BMI-NEXT: pushl %ebx ; X64-NEXT: je .LBB1_3
; X86-BMI-NEXT: pushl %edi ; X64-NEXT: # %bb.1: # %for.body.preheader
; X86-BMI-NEXT: pushl %esi ; X64-NEXT: movq %rcx, %r8
; X86-BMI-NEXT: subl $20, %esp ; X64-NEXT: movq %rdx, %r9
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X64-NEXT: xorl %r10d, %r10d
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: xorl %ecx, %ecx
; X86-BMI-NEXT: orl %ecx, %eax ; X64-NEXT: .p2align 4, 0x90
; X86-BMI-NEXT: je .LBB1_3 ; X64-NEXT: .LBB1_2: # %for.body
; X86-BMI-NEXT: # %bb.1: # %for.body.preheader ; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X86-BMI-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %r8, %rax
; X86-BMI-NEXT: xorl %edx, %edx ; X64-NEXT: mulq (%r9,%rcx,8)
; X86-BMI-NEXT: xorl %edi, %edi ; X64-NEXT: addq %r10, %rax
; X86-BMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X64-NEXT: adcq $0, %rdx
; X86-BMI-NEXT: .p2align 4, 0x90 ; X64-NEXT: movq %rax, (%rsi,%rcx,8)
; X86-BMI-NEXT: .LBB1_2: # %for.body ; X64-NEXT: incq %rcx
; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: cmpq %rcx, %rdi
; X86-BMI-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X64-NEXT: movq %rdx, %r10
; X86-BMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X64-NEXT: jne .LBB1_2
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: .LBB1_3: # %for.end
; X86-BMI-NEXT: movl (%eax,%edi,8), %ecx ; X64-NEXT: xorl %eax, %eax
; X86-BMI-NEXT: movl 4(%eax,%edi,8), %ebx ; X64-NEXT: retq
; X86-BMI-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: movl %eax, %esi
; X86-BMI-NEXT: mulxl %eax, %eax, %ebp
; X86-BMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
; X86-BMI-NEXT: movl %ebx, %edx
; X86-BMI-NEXT: mulxl %esi, %eax, %esi
; X86-BMI-NEXT: addl %ebp, %eax
; X86-BMI-NEXT: adcl $0, %esi
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ebp, %ebx
; X86-BMI-NEXT: addl %eax, %ebp
; X86-BMI-NEXT: adcl %esi, %ebx
; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %esi
; X86-BMI-NEXT: setb %dl
; X86-BMI-NEXT: addl %ebx, %eax
; X86-BMI-NEXT: movzbl %dl, %edx
; X86-BMI-NEXT: adcl %edx, %esi
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI-NEXT: xorl %ebx, %ebx
; X86-BMI-NEXT: mulxl %ebx, %ebx, %edx
; X86-BMI-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-BMI-NEXT: movl %ecx, %edx
; X86-BMI-NEXT: xorl %ecx, %ecx
; X86-BMI-NEXT: mulxl %ecx, %ecx, %edx
; X86-BMI-NEXT: addl %ebx, %ecx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-BMI-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X86-BMI-NEXT: addl %eax, %ecx
; X86-BMI-NEXT: adcl %esi, %edx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
; X86-BMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
; X86-BMI-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
; X86-BMI-NEXT: adcl $0, %ecx
; X86-BMI-NEXT: adcl $0, %edx
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: movl %esi, (%eax,%edi,8)
; X86-BMI-NEXT: movl %ebp, 4(%eax,%edi,8)
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI-NEXT: movl %eax, %esi
; X86-BMI-NEXT: addl $1, %edi
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
; X86-BMI-NEXT: adcl $0, %ebp
; X86-BMI-NEXT: movl %edi, %eax
; X86-BMI-NEXT: xorl %esi, %eax
; X86-BMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
; X86-BMI-NEXT: movl %ebp, %esi
; X86-BMI-NEXT: xorl %ebx, %esi
; X86-BMI-NEXT: orl %eax, %esi
; X86-BMI-NEXT: jne .LBB1_2
; X86-BMI-NEXT: .LBB1_3: # %for.end
; X86-BMI-NEXT: xorl %eax, %eax
; X86-BMI-NEXT: xorl %edx, %edx
; X86-BMI-NEXT: addl $20, %esp
; X86-BMI-NEXT: popl %esi
; X86-BMI-NEXT: popl %edi
; X86-BMI-NEXT: popl %ebx
; X86-BMI-NEXT: popl %ebp
; X86-BMI-NEXT: retl
;
; X64-NOBMI-LABEL: mul1:
; X64-NOBMI: # %bb.0: # %entry
; X64-NOBMI-NEXT: testq %rdi, %rdi
; X64-NOBMI-NEXT: je .LBB1_3
; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader
; X64-NOBMI-NEXT: movq %rcx, %r8
; X64-NOBMI-NEXT: movq %rdx, %r9
; X64-NOBMI-NEXT: xorl %r10d, %r10d
; X64-NOBMI-NEXT: xorl %ecx, %ecx
; X64-NOBMI-NEXT: .p2align 4, 0x90
; X64-NOBMI-NEXT: .LBB1_2: # %for.body
; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NOBMI-NEXT: movq %r8, %rax
; X64-NOBMI-NEXT: mulq (%r9,%rcx,8)
; X64-NOBMI-NEXT: addq %r10, %rax
; X64-NOBMI-NEXT: adcq $0, %rdx
; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8)
; X64-NOBMI-NEXT: incq %rcx
; X64-NOBMI-NEXT: cmpq %rcx, %rdi
; X64-NOBMI-NEXT: movq %rdx, %r10
; X64-NOBMI-NEXT: jne .LBB1_2
; X64-NOBMI-NEXT: .LBB1_3: # %for.end
; X64-NOBMI-NEXT: xorl %eax, %eax
; X64-NOBMI-NEXT: retq
;
; X64-BMI-LABEL: mul1:
; X64-BMI: # %bb.0: # %entry
; X64-BMI-NEXT: testq %rdi, %rdi
; X64-BMI-NEXT: je .LBB1_3
; X64-BMI-NEXT: # %bb.1: # %for.body.preheader
; X64-BMI-NEXT: movq %rcx, %r8
; X64-BMI-NEXT: movq %rdx, %r9
; X64-BMI-NEXT: xorl %r10d, %r10d
; X64-BMI-NEXT: xorl %ecx, %ecx
; X64-BMI-NEXT: .p2align 4, 0x90
; X64-BMI-NEXT: .LBB1_2: # %for.body
; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1
; X64-BMI-NEXT: movq %r8, %rdx
; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx
; X64-BMI-NEXT: addq %r10, %rax
; X64-BMI-NEXT: adcq $0, %rdx
; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8)
; X64-BMI-NEXT: incq %rcx
; X64-BMI-NEXT: cmpq %rcx, %rdi
; X64-BMI-NEXT: movq %rdx, %r10
; X64-BMI-NEXT: jne .LBB1_2
; X64-BMI-NEXT: .LBB1_3: # %for.end
; X64-BMI-NEXT: xorl %eax, %eax
; X64-BMI-NEXT: retq
entry: entry:
%conv = zext i64 %y to i128 %conv = zext i64 %y to i128
%cmp11 = icmp eq i64 %n, 0 %cmp11 = icmp eq i64 %n, 0

View File

@ -5,8 +5,8 @@
define i64 @f1(i32 %a, i32 %b) { define i64 @f1(i32 %a, i32 %b) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %edx ; CHECK-NEXT: mull {{[0-9]+}}(%esp)
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%x = zext i32 %a to i64 %x = zext i32 %a to i64
%y = zext i32 %b to i64 %y = zext i32 %b to i64
@ -17,9 +17,9 @@ define i64 @f1(i32 %a, i32 %b) {
define i64 @f2(i32 %a, i32* %p) { define i64 @f2(i32 %a, i32* %p) {
; CHECK-LABEL: f2: ; CHECK-LABEL: f2:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: mulxl (%eax), %eax, %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: mull (%ecx)
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%b = load i32, i32* %p %b = load i32, i32* %p
%x = zext i32 %a to i64 %x = zext i32 %a to i64

View File

@ -5,8 +5,8 @@
define i128 @f1(i64 %a, i64 %b) { define i128 @f1(i64 %a, i64 %b) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: mulq %rsi
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%x = zext i64 %a to i128 %x = zext i64 %a to i128
%y = zext i64 %b to i128 %y = zext i64 %b to i128
@ -17,8 +17,8 @@ define i128 @f1(i64 %a, i64 %b) {
define i128 @f2(i64 %a, i64* %p) { define i128 @f2(i64 %a, i64* %p) {
; CHECK-LABEL: f2: ; CHECK-LABEL: f2:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rdx ; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: mulq (%rsi)
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%b = load i64, i64* %p %b = load i64, i64* %p
%x = zext i64 %a to i128 %x = zext i64 %a to i128

View File

@ -5,11 +5,11 @@
define void @_Z15uint64_to_asciimPc(i64 %arg) { define void @_Z15uint64_to_asciimPc(i64 %arg) {
; HSW-LABEL: _Z15uint64_to_asciimPc: ; HSW-LABEL: _Z15uint64_to_asciimPc:
; HSW: # %bb.0: # %bb ; HSW: # %bb.0: # %bb
; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 ; HSW-NEXT: movq %rdi, %rax
; HSW-NEXT: movq %rdi, %rdx ; HSW-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
; HSW-NEXT: mulxq %rax, %rax, %rcx ; HSW-NEXT: mulq %rcx
; HSW-NEXT: shrq $42, %rcx ; HSW-NEXT: shrq $42, %rdx
; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 ; HSW-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
; HSW-NEXT: shrq $20, %rax ; HSW-NEXT: shrq $20, %rax
; HSW-NEXT: leal (%rax,%rax,4), %eax ; HSW-NEXT: leal (%rax,%rax,4), %eax
; HSW-NEXT: addl $5, %eax ; HSW-NEXT: addl $5, %eax
@ -22,11 +22,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) {
; ;
; ZN-LABEL: _Z15uint64_to_asciimPc: ; ZN-LABEL: _Z15uint64_to_asciimPc:
; ZN: # %bb.0: # %bb ; ZN: # %bb.0: # %bb
; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 ; ZN-NEXT: movq %rdi, %rax
; ZN-NEXT: movq %rdi, %rdx ; ZN-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
; ZN-NEXT: mulxq %rax, %rax, %rcx ; ZN-NEXT: mulq %rcx
; ZN-NEXT: shrq $42, %rcx ; ZN-NEXT: shrq $42, %rdx
; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 ; ZN-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
; ZN-NEXT: shrq $20, %rax ; ZN-NEXT: shrq $20, %rax
; ZN-NEXT: leal 5(%rax,%rax,4), %eax ; ZN-NEXT: leal 5(%rax,%rax,4), %eax
; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF ; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF