mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
ba1a2a9135
instruction introduced in AVX, which can operate on 128 and 256-bit vectors. It considers a 256-bit vector as two independent 128-bit lanes. It can permute any 32 or 64 elements inside a lane, and restricts the second lane to have the same permutation of the first one. With the improved splat support introduced early today, adding codegen for this instruction enable more efficient 256-bit code: Instead of: vextractf128 $0, %ymm0, %xmm0 punpcklbw %xmm0, %xmm0 punpckhbw %xmm0, %xmm0 vinsertf128 $0, %xmm0, %ymm0, %ymm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 shufps $1, %xmm1, %xmm1 movss %xmm1, 28(%rsp) movss %xmm1, 24(%rsp) movss %xmm1, 20(%rsp) movss %xmm1, 16(%rsp) vextractf128 $0, %ymm0, %xmm0 shufps $1, %xmm0, %xmm0 movss %xmm0, 12(%rsp) movss %xmm0, 8(%rsp) movss %xmm0, 4(%rsp) movss %xmm0, (%rsp) vmovaps (%rsp), %ymm0 We get: vextractf128 $0, %ymm0, %xmm0 punpcklbw %xmm0, %xmm0 punpckhbw %xmm0, %xmm0 vinsertf128 $0, %xmm0, %ymm0, %ymm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpermilps $85, %ymm0, %ymm0 llvm-svn: 135662
271 lines
9.0 KiB
C++
271 lines
9.0 KiB
C++
//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This defines functionality used to emit comments about X86 instructions to
|
|
// an output stream for -fverbose-asm.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "X86InstComments.h"
|
|
#include "MCTargetDesc/X86MCTargetDesc.h"
|
|
#include "llvm/MC/MCInst.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "../Utils/X86ShuffleDecode.h"
|
|
using namespace llvm;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top Level Entrypoint
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
|
|
/// newline terminated strings to the specified string if desired. This
|
|
/// information is shown in disassembly dumps when verbose assembly is enabled.
|
|
void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|
const char *(*getRegName)(unsigned)) {
|
|
// If this is a shuffle operation, the switch should fill in this state.
|
|
SmallVector<unsigned, 8> ShuffleMask;
|
|
const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
|
|
|
|
switch (MI->getOpcode()) {
|
|
case X86::INSERTPSrr:
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
|
|
break;
|
|
|
|
case X86::MOVLHPSrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodeMOVLHPSMask(2, ShuffleMask);
|
|
break;
|
|
|
|
case X86::MOVHLPSrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodeMOVHLPSMask(2, ShuffleMask);
|
|
break;
|
|
|
|
case X86::PSHUFDri:
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PSHUFDmi:
|
|
DestName = getRegName(MI->getOperand(0).getReg());
|
|
DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
|
ShuffleMask);
|
|
break;
|
|
|
|
case X86::PSHUFHWri:
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PSHUFHWmi:
|
|
DestName = getRegName(MI->getOperand(0).getReg());
|
|
DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
|
|
ShuffleMask);
|
|
break;
|
|
case X86::PSHUFLWri:
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PSHUFLWmi:
|
|
DestName = getRegName(MI->getOperand(0).getReg());
|
|
DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
|
|
ShuffleMask);
|
|
break;
|
|
|
|
case X86::PUNPCKHBWrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKHBWrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKHMask(16, ShuffleMask);
|
|
break;
|
|
case X86::PUNPCKHWDrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKHWDrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKHMask(8, ShuffleMask);
|
|
break;
|
|
case X86::PUNPCKHDQrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKHDQrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKHMask(4, ShuffleMask);
|
|
break;
|
|
case X86::PUNPCKHQDQrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKHQDQrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKHMask(2, ShuffleMask);
|
|
break;
|
|
|
|
case X86::PUNPCKLBWrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKLBWrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKLBWMask(16, ShuffleMask);
|
|
break;
|
|
case X86::PUNPCKLWDrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKLWDrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKLWDMask(8, ShuffleMask);
|
|
break;
|
|
case X86::PUNPCKLDQrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKLDQrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKLDQMask(4, ShuffleMask);
|
|
break;
|
|
case X86::PUNPCKLQDQrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::PUNPCKLQDQrm:
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
DecodePUNPCKLQDQMask(2, ShuffleMask);
|
|
break;
|
|
|
|
case X86::SHUFPDrri:
|
|
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
break;
|
|
|
|
case X86::SHUFPSrri:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::SHUFPSrmi:
|
|
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
|
|
case X86::UNPCKLPDrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::UNPCKLPDrm:
|
|
DecodeUNPCKLPDMask(2, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
case X86::VUNPCKLPDrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::VUNPCKLPDrm:
|
|
DecodeUNPCKLPDMask(2, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
break;
|
|
case X86::VUNPCKLPDYrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::VUNPCKLPDYrm:
|
|
DecodeUNPCKLPDMask(4, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
break;
|
|
case X86::UNPCKLPSrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::UNPCKLPSrm:
|
|
DecodeUNPCKLPSMask(4, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
case X86::VUNPCKLPSrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::VUNPCKLPSrm:
|
|
DecodeUNPCKLPSMask(4, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
break;
|
|
case X86::VUNPCKLPSYrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::VUNPCKLPSYrm:
|
|
DecodeUNPCKLPSMask(8, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
break;
|
|
case X86::UNPCKHPDrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::UNPCKHPDrm:
|
|
DecodeUNPCKHPMask(2, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
case X86::UNPCKHPSrr:
|
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
|
// FALL THROUGH.
|
|
case X86::UNPCKHPSrm:
|
|
DecodeUNPCKHPMask(4, ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
case X86::VPERMILPSYri:
|
|
DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
|
|
ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
case X86::VPERMILPDYri:
|
|
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
|
|
ShuffleMask);
|
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
|
break;
|
|
}
|
|
|
|
|
|
// If this was a shuffle operation, print the shuffle mask.
|
|
if (!ShuffleMask.empty()) {
|
|
if (DestName == 0) DestName = Src1Name;
|
|
OS << (DestName ? DestName : "mem") << " = ";
|
|
|
|
// If the two sources are the same, canonicalize the input elements to be
|
|
// from the first src so that we get larger element spans.
|
|
if (Src1Name == Src2Name) {
|
|
for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
|
|
if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
|
|
ShuffleMask[i] >= e) // From second mask.
|
|
ShuffleMask[i] -= e;
|
|
}
|
|
}
|
|
|
|
// The shuffle mask specifies which elements of the src1/src2 fill in the
|
|
// destination, with a few sentinel values. Loop through and print them
|
|
// out.
|
|
for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
|
|
if (i != 0)
|
|
OS << ',';
|
|
if (ShuffleMask[i] == SM_SentinelZero) {
|
|
OS << "zero";
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, it must come from src1 or src2. Print the span of elements
|
|
// that comes from this src.
|
|
bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
|
|
const char *SrcName = isSrc1 ? Src1Name : Src2Name;
|
|
OS << (SrcName ? SrcName : "mem") << '[';
|
|
bool IsFirst = true;
|
|
while (i != e &&
|
|
(int)ShuffleMask[i] >= 0 &&
|
|
(ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
|
|
if (!IsFirst)
|
|
OS << ',';
|
|
else
|
|
IsFirst = false;
|
|
OS << ShuffleMask[i] % ShuffleMask.size();
|
|
++i;
|
|
}
|
|
OS << ']';
|
|
--i; // For loop increments element #.
|
|
}
|
|
//MI->print(OS, 0);
|
|
OS << "\n";
|
|
}
|
|
|
|
}
|