mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[DAG, X86] Improve Dependency analysis when doing multi-node
Instruction Selection Cleanup cycle/validity checks in ISel (IsLegalToFold, HandleMergeInputChains) and X86 (isFusableLoadOpStore). Now do a full search for cycles / dependencies pruning the search when topological property of NodeId allows. As part of this propogate the NodeId-based cutoffs to narrow hasPreprocessorHelper searches. Reviewers: craig.topper, bogner Subscribers: llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D41293 llvm-svn: 324359
This commit is contained in:
parent
4eaf495316
commit
8f7ea6d7d0
@ -796,16 +796,38 @@ public:
|
||||
/// searches to be performed in parallel, caching of results across
|
||||
/// queries and incremental addition to Worklist. Stops early if N is
|
||||
/// found but will resume. Remember to clear Visited and Worklists
|
||||
/// if DAG changes.
|
||||
/// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
|
||||
/// giving up. The TopologicalPrune flag signals that positive NodeIds are
|
||||
/// topologically ordered (Operands have strictly smaller node id) and search
|
||||
/// can be pruned leveraging this.
|
||||
static bool hasPredecessorHelper(const SDNode *N,
|
||||
SmallPtrSetImpl<const SDNode *> &Visited,
|
||||
SmallVectorImpl<const SDNode *> &Worklist,
|
||||
unsigned int MaxSteps = 0) {
|
||||
unsigned int MaxSteps = 0,
|
||||
bool TopologicalPrune = false) {
|
||||
SmallVector<const SDNode *, 8> DeferredNodes;
|
||||
if (Visited.count(N))
|
||||
return true;
|
||||
|
||||
// Node Id's are assigned in three places: As a topological
|
||||
// ordering (> 0), during legalization (results in values set to
|
||||
// 0), and new nodes (set to -1). If N has a topolgical id then we
|
||||
// know that all nodes with ids smaller than it cannot be
|
||||
// successors and we need not check them. Filter out all node
|
||||
// that can't be matches. We add them to the worklist before exit
|
||||
// in case of multiple calls.
|
||||
|
||||
int NId = N->getNodeId();
|
||||
|
||||
bool Found = false;
|
||||
while (!Worklist.empty()) {
|
||||
const SDNode *M = Worklist.pop_back_val();
|
||||
bool Found = false;
|
||||
int MId = M->getNodeId();
|
||||
if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
|
||||
(MId > 0) && (MId < NId)) {
|
||||
DeferredNodes.push_back(M);
|
||||
continue;
|
||||
}
|
||||
for (const SDValue &OpV : M->op_values()) {
|
||||
SDNode *Op = OpV.getNode();
|
||||
if (Visited.insert(Op).second)
|
||||
@ -814,11 +836,13 @@ public:
|
||||
Found = true;
|
||||
}
|
||||
if (Found)
|
||||
return true;
|
||||
break;
|
||||
if (MaxSteps != 0 && Visited.size() >= MaxSteps)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
// Push deferred nodes back on worklist.
|
||||
Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
|
||||
return Found;
|
||||
}
|
||||
|
||||
/// Return true if all the users of N are contained in Nodes.
|
||||
|
@ -2137,54 +2137,44 @@ static SDNode *findGlueUse(SDNode *N) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
|
||||
/// This function iteratively traverses up the operand chain, ignoring
|
||||
/// certain nodes.
|
||||
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
|
||||
SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
|
||||
/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
|
||||
/// beyond "ImmedUse". We may ignore chains as they are checked separately.
|
||||
static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
|
||||
bool IgnoreChains) {
|
||||
// The NodeID's are given uniques ID's where a node ID is guaranteed to be
|
||||
// greater than all of its (recursive) operands. If we scan to a point where
|
||||
// 'use' is smaller than the node we're scanning for, then we know we will
|
||||
// never find it.
|
||||
//
|
||||
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
|
||||
// happen because we scan down to newly selected nodes in the case of glue
|
||||
// uses.
|
||||
std::vector<SDNode *> WorkList;
|
||||
WorkList.push_back(Use);
|
||||
SmallPtrSet<const SDNode *, 16> Visited;
|
||||
SmallVector<const SDNode *, 16> WorkList;
|
||||
// Only check if we have non-immediate uses of Def.
|
||||
if (ImmedUse->isOnlyUserOf(Def))
|
||||
return false;
|
||||
|
||||
while (!WorkList.empty()) {
|
||||
Use = WorkList.back();
|
||||
WorkList.pop_back();
|
||||
// NodeId topological order of TokenFactors is not guaranteed. Do not skip.
|
||||
if (Use->getOpcode() != ISD::TokenFactor &&
|
||||
Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
|
||||
// We don't care about paths to Def that go through ImmedUse so mark it
|
||||
// visited and mark non-def operands as used.
|
||||
Visited.insert(ImmedUse);
|
||||
for (const SDValue &Op : ImmedUse->op_values()) {
|
||||
SDNode *N = Op.getNode();
|
||||
// Ignore chain deps (they are validated by
|
||||
// HandleMergeInputChains) and immediate uses
|
||||
if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
|
||||
continue;
|
||||
|
||||
// Don't revisit nodes if we already scanned it and didn't fail, we know we
|
||||
// won't fail if we scan it again.
|
||||
if (!Visited.insert(Use).second)
|
||||
if (!Visited.insert(N).second)
|
||||
continue;
|
||||
WorkList.push_back(N);
|
||||
}
|
||||
|
||||
for (const SDValue &Op : Use->op_values()) {
|
||||
// Ignore chain uses, they are validated by HandleMergeInputChains.
|
||||
if (Op.getValueType() == MVT::Other && IgnoreChains)
|
||||
continue;
|
||||
|
||||
// Initialize worklist to operands of Root.
|
||||
if (Root != ImmedUse) {
|
||||
for (const SDValue &Op : Root->op_values()) {
|
||||
SDNode *N = Op.getNode();
|
||||
if (N == Def) {
|
||||
if (Use == ImmedUse || Use == Root)
|
||||
continue; // We are not looking for immediate use.
|
||||
assert(N != Root);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Traverse up the operand chain.
|
||||
// Ignore chains (they are validated by HandleMergeInputChains)
|
||||
if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
|
||||
continue;
|
||||
if (!Visited.insert(N).second)
|
||||
continue;
|
||||
WorkList.push_back(N);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
|
||||
}
|
||||
|
||||
/// IsProfitableToFold - Returns true if it's profitable to fold the specific
|
||||
@ -2256,13 +2246,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
|
||||
|
||||
// If our query node has a glue result with a use, we've walked up it. If
|
||||
// the user (which has already been selected) has a chain or indirectly uses
|
||||
// the chain, our WalkChainUsers predicate will not consider it. Because of
|
||||
// the chain, HandleMergeInputChains will not consider it. Because of
|
||||
// this, we cannot ignore chains in this predicate.
|
||||
IgnoreChains = false;
|
||||
}
|
||||
|
||||
SmallPtrSet<SDNode*, 16> Visited;
|
||||
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
|
||||
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
|
||||
}
|
||||
|
||||
void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
|
||||
@ -2381,143 +2370,6 @@ void SelectionDAGISel::UpdateChains(
|
||||
DEBUG(dbgs() << "ISEL: Match complete!\n");
|
||||
}
|
||||
|
||||
enum ChainResult {
|
||||
CR_Simple,
|
||||
CR_InducesCycle,
|
||||
CR_LeadsToInteriorNode
|
||||
};
|
||||
|
||||
/// WalkChainUsers - Walk down the users of the specified chained node that is
|
||||
/// part of the pattern we're matching, looking at all of the users we find.
|
||||
/// This determines whether something is an interior node, whether we have a
|
||||
/// non-pattern node in between two pattern nodes (which prevent folding because
|
||||
/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
|
||||
/// between pattern nodes (in which case the TF becomes part of the pattern).
|
||||
///
|
||||
/// The walk we do here is guaranteed to be small because we quickly get down to
|
||||
/// already selected nodes "below" us.
|
||||
static ChainResult
|
||||
WalkChainUsers(const SDNode *ChainedNode,
|
||||
SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
|
||||
DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
|
||||
SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
|
||||
ChainResult Result = CR_Simple;
|
||||
|
||||
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
|
||||
E = ChainedNode->use_end(); UI != E; ++UI) {
|
||||
// Make sure the use is of the chain, not some other value we produce.
|
||||
if (UI.getUse().getValueType() != MVT::Other) continue;
|
||||
|
||||
SDNode *User = *UI;
|
||||
|
||||
if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
|
||||
continue;
|
||||
|
||||
// If we see an already-selected machine node, then we've gone beyond the
|
||||
// pattern that we're selecting down into the already selected chunk of the
|
||||
// DAG.
|
||||
unsigned UserOpcode = User->getOpcode();
|
||||
if (User->isMachineOpcode() ||
|
||||
UserOpcode == ISD::CopyToReg ||
|
||||
UserOpcode == ISD::CopyFromReg ||
|
||||
UserOpcode == ISD::INLINEASM ||
|
||||
UserOpcode == ISD::EH_LABEL ||
|
||||
UserOpcode == ISD::LIFETIME_START ||
|
||||
UserOpcode == ISD::LIFETIME_END) {
|
||||
// If their node ID got reset to -1 then they've already been selected.
|
||||
// Treat them like a MachineOpcode.
|
||||
if (User->getNodeId() == -1)
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we have a TokenFactor, we handle it specially.
|
||||
if (User->getOpcode() != ISD::TokenFactor) {
|
||||
// If the node isn't a token factor and isn't part of our pattern, then it
|
||||
// must be a random chained node in between two nodes we're selecting.
|
||||
// This happens when we have something like:
|
||||
// x = load ptr
|
||||
// call
|
||||
// y = x+4
|
||||
// store y -> ptr
|
||||
// Because we structurally match the load/store as a read/modify/write,
|
||||
// but the call is chained between them. We cannot fold in this case
|
||||
// because it would induce a cycle in the graph.
|
||||
if (!std::count(ChainedNodesInPattern.begin(),
|
||||
ChainedNodesInPattern.end(), User))
|
||||
return CR_InducesCycle;
|
||||
|
||||
// Otherwise we found a node that is part of our pattern. For example in:
|
||||
// x = load ptr
|
||||
// y = x+4
|
||||
// store y -> ptr
|
||||
// This would happen when we're scanning down from the load and see the
|
||||
// store as a user. Record that there is a use of ChainedNode that is
|
||||
// part of the pattern and keep scanning uses.
|
||||
Result = CR_LeadsToInteriorNode;
|
||||
InteriorChainedNodes.push_back(User);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we found a TokenFactor, there are two cases to consider: first if the
|
||||
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
|
||||
// uses of the TF are in our pattern) we just want to ignore it. Second,
|
||||
// the TokenFactor can be sandwiched in between two chained nodes, like so:
|
||||
// [Load chain]
|
||||
// ^
|
||||
// |
|
||||
// [Load]
|
||||
// ^ ^
|
||||
// | \ DAG's like cheese
|
||||
// / \ do you?
|
||||
// / |
|
||||
// [TokenFactor] [Op]
|
||||
// ^ ^
|
||||
// | |
|
||||
// \ /
|
||||
// \ /
|
||||
// [Store]
|
||||
//
|
||||
// In this case, the TokenFactor becomes part of our match and we rewrite it
|
||||
// as a new TokenFactor.
|
||||
//
|
||||
// To distinguish these two cases, do a recursive walk down the uses.
|
||||
auto MemoizeResult = TokenFactorResult.find(User);
|
||||
bool Visited = MemoizeResult != TokenFactorResult.end();
|
||||
// Recursively walk chain users only if the result is not memoized.
|
||||
if (!Visited) {
|
||||
auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
|
||||
InteriorChainedNodes);
|
||||
MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
|
||||
}
|
||||
switch (MemoizeResult->second) {
|
||||
case CR_Simple:
|
||||
// If the uses of the TokenFactor are just already-selected nodes, ignore
|
||||
// it, it is "below" our pattern.
|
||||
continue;
|
||||
case CR_InducesCycle:
|
||||
// If the uses of the TokenFactor lead to nodes that are not part of our
|
||||
// pattern that are not selected, folding would turn this into a cycle,
|
||||
// bail out now.
|
||||
return CR_InducesCycle;
|
||||
case CR_LeadsToInteriorNode:
|
||||
break; // Otherwise, keep processing.
|
||||
}
|
||||
|
||||
// Okay, we know we're in the interesting interior case. The TokenFactor
|
||||
// is now going to be considered part of the pattern so that we rewrite its
|
||||
// uses (it may have uses that are not part of the pattern) with the
|
||||
// ultimate chain result of the generated code. We will also add its chain
|
||||
// inputs as inputs to the ultimate TokenFactor we create.
|
||||
Result = CR_LeadsToInteriorNode;
|
||||
if (!Visited) {
|
||||
ChainedNodesInPattern.push_back(User);
|
||||
InteriorChainedNodes.push_back(User);
|
||||
}
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
|
||||
/// operation for when the pattern matched at least one node with a chains. The
|
||||
/// input vector contains a list of all of the chained nodes that we match. We
|
||||
@ -2527,47 +2379,60 @@ WalkChainUsers(const SDNode *ChainedNode,
|
||||
static SDValue
|
||||
HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
|
||||
SelectionDAG *CurDAG) {
|
||||
// Used for memoization. Without it WalkChainUsers could take exponential
|
||||
// time to run.
|
||||
DenseMap<const SDNode *, ChainResult> TokenFactorResult;
|
||||
// Walk all of the chained nodes we've matched, recursively scanning down the
|
||||
// users of the chain result. This adds any TokenFactor nodes that are caught
|
||||
// in between chained nodes to the chained and interior nodes list.
|
||||
SmallVector<SDNode*, 3> InteriorChainedNodes;
|
||||
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
|
||||
if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
|
||||
TokenFactorResult,
|
||||
InteriorChainedNodes) == CR_InducesCycle)
|
||||
return SDValue(); // Would induce a cycle.
|
||||
}
|
||||
|
||||
// Okay, we have walked all the matched nodes and collected TokenFactor nodes
|
||||
// that we are interested in. Form our input TokenFactor node.
|
||||
SmallPtrSet<const SDNode *, 16> Visited;
|
||||
SmallVector<const SDNode *, 8> Worklist;
|
||||
SmallVector<SDValue, 3> InputChains;
|
||||
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
|
||||
// Add the input chain of this node to the InputChains list (which will be
|
||||
// the operands of the generated TokenFactor) if it's not an interior node.
|
||||
SDNode *N = ChainNodesMatched[i];
|
||||
if (N->getOpcode() != ISD::TokenFactor) {
|
||||
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
|
||||
continue;
|
||||
unsigned int Max = 8192;
|
||||
|
||||
// Otherwise, add the input chain.
|
||||
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
|
||||
assert(InChain.getValueType() == MVT::Other && "Not a chain");
|
||||
InputChains.push_back(InChain);
|
||||
continue;
|
||||
}
|
||||
// Quick exit on trivial merge.
|
||||
if (ChainNodesMatched.size() == 1)
|
||||
return ChainNodesMatched[0]->getOperand(0);
|
||||
|
||||
// If we have a token factor, we want to add all inputs of the token factor
|
||||
// that are not part of the pattern we're matching.
|
||||
for (const SDValue &Op : N->op_values()) {
|
||||
if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
|
||||
Op.getNode()))
|
||||
InputChains.push_back(Op);
|
||||
}
|
||||
// Add chains that aren't already added (internal). Peek through
|
||||
// token factors.
|
||||
std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
|
||||
if (V.getValueType() != MVT::Other)
|
||||
return;
|
||||
if (V->getOpcode() == ISD::EntryToken)
|
||||
return;
|
||||
// Newly selected nodes (-1) are always added directly.
|
||||
if (V->getNodeId() == -1)
|
||||
InputChains.push_back(V);
|
||||
else if (V->getOpcode() == ISD::TokenFactor) {
|
||||
for (int i = 0, e = V->getNumOperands(); i != e; ++i)
|
||||
AddChains(V->getOperand(i));
|
||||
} else if (!Visited.count(V.getNode()))
|
||||
InputChains.push_back(V);
|
||||
};
|
||||
|
||||
for (auto *N : ChainNodesMatched) {
|
||||
Worklist.push_back(N);
|
||||
Visited.insert(N);
|
||||
}
|
||||
|
||||
while (!Worklist.empty())
|
||||
AddChains(Worklist.pop_back_val()->getOperand(0));
|
||||
|
||||
// Skip the search if there are no chain dependencies.
|
||||
if (InputChains.size() == 0)
|
||||
return CurDAG->getEntryNode();
|
||||
|
||||
// If one of these chains is a successor of input, we must have a
|
||||
// node that is both the predecessor and successor of the
|
||||
// to-be-merged nodes. Fail.
|
||||
Visited.clear();
|
||||
for (SDValue V : InputChains)
|
||||
Worklist.push_back(V.getNode());
|
||||
|
||||
for (auto *N : ChainNodesMatched)
|
||||
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
|
||||
return SDValue();
|
||||
// Fail conservatively if we stopped searching early.
|
||||
if (Visited.size() >= Max)
|
||||
return SDValue();
|
||||
|
||||
// Return merged chain.
|
||||
if (InputChains.size() == 1)
|
||||
return InputChains[0];
|
||||
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
|
||||
|
@ -2104,47 +2104,58 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
|
||||
// the load output chain as an operand. Return InputChain by reference.
|
||||
SDValue Chain = StoreNode->getChain();
|
||||
|
||||
bool ChainCheck = false;
|
||||
if (Chain == Load.getValue(1)) {
|
||||
ChainCheck = true;
|
||||
InputChain = LoadNode->getChain();
|
||||
} else if (Chain.getOpcode() == ISD::TokenFactor) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Chain.getOpcode() == ISD::TokenFactor) {
|
||||
// Fusing Load-Op-Store requires predecessors of store must also
|
||||
// be predecessors of the load. This addition may cause a loop. We
|
||||
// can check this by doing a search for Load in the new
|
||||
// dependencies. As this can be expensive, heuristically prune
|
||||
// this search by visiting the uses and make sure they all have
|
||||
// smaller node id than the load.
|
||||
|
||||
bool FoundLoad = false;
|
||||
SmallVector<SDValue, 4> ChainOps;
|
||||
SmallVector<const SDNode *, 4> LoopWorklist;
|
||||
SmallPtrSet<const SDNode *, 16> Visited;
|
||||
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
|
||||
SDValue Op = Chain.getOperand(i);
|
||||
if (Op == Load.getValue(1)) {
|
||||
ChainCheck = true;
|
||||
FoundLoad = true;
|
||||
// Drop Load, but keep its chain. No cycle check necessary.
|
||||
ChainOps.push_back(Load.getOperand(0));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Make sure using Op as part of the chain would not cause a cycle here.
|
||||
// In theory, we could check whether the chain node is a predecessor of
|
||||
// the load. But that can be very expensive. Instead visit the uses and
|
||||
// make sure they all have smaller node id than the load.
|
||||
int LoadId = LoadNode->getNodeId();
|
||||
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
|
||||
UE = UI->use_end(); UI != UE; ++UI) {
|
||||
if (UI.getUse().getResNo() != 0)
|
||||
continue;
|
||||
if (UI->getNodeId() > LoadId)
|
||||
return false;
|
||||
}
|
||||
|
||||
LoopWorklist.push_back(Op.getNode());
|
||||
ChainOps.push_back(Op);
|
||||
}
|
||||
|
||||
if (ChainCheck)
|
||||
// Make a new TokenFactor with all the other input chains except
|
||||
// for the load.
|
||||
InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
|
||||
MVT::Other, ChainOps);
|
||||
}
|
||||
if (!ChainCheck)
|
||||
return false;
|
||||
if (!FoundLoad)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
// If Loop Worklist is not empty. Check if we would make a loop.
|
||||
if (!LoopWorklist.empty()) {
|
||||
const unsigned int Max = 8192;
|
||||
// if Load is predecessor to potentially loop inducing chain
|
||||
// dependencies.
|
||||
if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist,
|
||||
Max, true))
|
||||
return false;
|
||||
// Fail conservatively if we ended loop search early.
|
||||
if (Visited.size() >= Max)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make a new TokenFactor with all the other input chains except
|
||||
// for the load.
|
||||
InputChain =
|
||||
CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Change a chain of {load; op; store} of the same value into a simple op
|
||||
@ -2374,6 +2385,8 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
|
||||
MemOp[1] = LoadNode->getMemOperand();
|
||||
Result->setMemRefs(MemOp, MemOp + 2);
|
||||
|
||||
// Update Load Chain uses as well.
|
||||
ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
|
||||
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
|
||||
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
|
||||
CurDAG->RemoveDeadNode(Node);
|
||||
|
@ -90,12 +90,12 @@ define void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) nounwind {
|
||||
define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind {
|
||||
; SSE2-LABEL: avg_v32i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSE2-NEXT: pavgb (%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgb 16(%rsi), %xmm0
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: pavgb (%rdi), %xmm0
|
||||
; SSE2-NEXT: pavgb 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: avg_v32i8:
|
||||
@ -545,18 +545,18 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
||||
define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind {
|
||||
; SSE2-LABEL: avg_v64i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm2
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqa 32(%rsi), %xmm2
|
||||
; SSE2-NEXT: movdqa 48(%rsi), %xmm3
|
||||
; SSE2-NEXT: pavgb (%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgb 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: pavgb 32(%rsi), %xmm0
|
||||
; SSE2-NEXT: pavgb (%rdi), %xmm0
|
||||
; SSE2-NEXT: pavgb 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgb 32(%rdi), %xmm2
|
||||
; SSE2-NEXT: pavgb 48(%rdi), %xmm3
|
||||
; SSE2-NEXT: movdqu %xmm3, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: avg_v64i8:
|
||||
@ -582,23 +582,23 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: avg_v64i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX2-NEXT: vpavgb (%rdi), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpavgb 32(%rsi), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX2-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
|
||||
; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: avg_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX512F-NEXT: vpavgb (%rdi), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpavgb 32(%rsi), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
|
||||
; AVX512F-NEXT: vpavgb (%rdi), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
@ -678,12 +678,12 @@ define void @avg_v8i16(<8 x i16>* %a, <8 x i16>* %b) nounwind {
|
||||
define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind {
|
||||
; SSE2-LABEL: avg_v16i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSE2-NEXT: pavgw (%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgw 16(%rsi), %xmm0
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: pavgw (%rdi), %xmm0
|
||||
; SSE2-NEXT: pavgw 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: avg_v16i16:
|
||||
@ -729,18 +729,18 @@ define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind {
|
||||
define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind {
|
||||
; SSE2-LABEL: avg_v32i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm2
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqa 32(%rsi), %xmm2
|
||||
; SSE2-NEXT: movdqa 48(%rsi), %xmm3
|
||||
; SSE2-NEXT: pavgw (%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgw 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: pavgw 32(%rsi), %xmm0
|
||||
; SSE2-NEXT: pavgw (%rdi), %xmm0
|
||||
; SSE2-NEXT: pavgw 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgw 32(%rdi), %xmm2
|
||||
; SSE2-NEXT: pavgw 48(%rdi), %xmm3
|
||||
; SSE2-NEXT: movdqu %xmm3, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: avg_v32i16:
|
||||
@ -766,23 +766,23 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: avg_v32i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX2-NEXT: vpavgw (%rdi), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpavgw 32(%rsi), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX2-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
|
||||
; AVX2-NEXT: vpavgw (%rdi), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: avg_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
|
||||
; AVX512F-NEXT: vpavgw (%rdi), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpavgw 32(%rsi), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
|
||||
; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
|
||||
; AVX512F-NEXT: vpavgw (%rdi), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
@ -891,9 +891,9 @@ define void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) nounwind {
|
||||
; SSE2-LABEL: avg_v32i8_2:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgb (%rsi), %xmm0
|
||||
; SSE2-NEXT: pavgb 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgb 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
@ -1072,9 +1072,9 @@ define void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) nounwind {
|
||||
; SSE2-LABEL: avg_v16i16_2:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgw (%rsi), %xmm0
|
||||
; SSE2-NEXT: pavgw 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pavgw 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
@ -1124,14 +1124,14 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqa 32(%rsi), %xmm3
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm3
|
||||
; SSE2-NEXT: pavgw (%rsi), %xmm0
|
||||
; SSE2-NEXT: pavgw 16(%rsi), %xmm1
|
||||
; SSE2-NEXT: pavgw 32(%rdi), %xmm3
|
||||
; SSE2-NEXT: pavgw 48(%rsi), %xmm2
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE2-NEXT: pavgw 32(%rsi), %xmm2
|
||||
; SSE2-NEXT: pavgw 48(%rsi), %xmm3
|
||||
; SSE2-NEXT: movdqu %xmm3, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm1, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
@ -1160,9 +1160,9 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind {
|
||||
; AVX2-LABEL: avg_v32i16_2:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
|
||||
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||
; AVX2-NEXT: vpavgw (%rsi), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -1171,9 +1171,9 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind {
|
||||
; AVX512F-LABEL: avg_v32i16_2:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
|
||||
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||
; AVX512F-NEXT: vpavgw (%rsi), %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -235,18 +235,16 @@ define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-NEXT: vmovaps %ymm1, (%eax)
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR29088:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-NEXT: vmovaps %ymm1, (%rsi)
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%ld = load <4 x i32>, <4 x i32>* %p0
|
||||
store <8 x float> zeroinitializer, <8 x float>* %p1
|
||||
|
@ -1065,9 +1065,7 @@ define void @isel_crash_16b(i8* %cV_R.addr) {
|
||||
; X64: ## %bb.0: ## %eintry
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movb (%rdi), %al
|
||||
; X64-NEXT: vmovd %eax, %xmm1
|
||||
; X64-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; X64-NEXT: vpbroadcastb (%rdi), %xmm1
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
@ -1118,9 +1116,7 @@ define void @isel_crash_32b(i8* %cV_R.addr) {
|
||||
; X64-NEXT: subq $128, %rsp
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-NEXT: movb (%rdi), %al
|
||||
; X64-NEXT: vmovd %eax, %xmm1
|
||||
; X64-NEXT: vpbroadcastb %xmm1, %ymm1
|
||||
; X64-NEXT: vpbroadcastb (%rdi), %ymm1
|
||||
; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movq %rbp, %rsp
|
||||
@ -1160,9 +1156,7 @@ define void @isel_crash_8w(i16* %cV_R.addr) {
|
||||
; X64: ## %bb.0: ## %entry
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: vmovd %eax, %xmm1
|
||||
; X64-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; X64-NEXT: vpbroadcastw (%rdi), %xmm1
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
@ -1213,9 +1207,7 @@ define void @isel_crash_16w(i16* %cV_R.addr) {
|
||||
; X64-NEXT: subq $128, %rsp
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: vmovd %eax, %xmm1
|
||||
; X64-NEXT: vpbroadcastw %xmm1, %ymm1
|
||||
; X64-NEXT: vpbroadcastw (%rdi), %ymm1
|
||||
; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movq %rbp, %rsp
|
||||
@ -1251,26 +1243,14 @@ define void @isel_crash_4d(i32* %cV_R.addr) {
|
||||
; X32-NEXT: addl $60, %esp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: isel_crash_4d:
|
||||
; X64-AVX2: ## %bb.0: ## %entry
|
||||
; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
; X64-AVX512VL-LABEL: isel_crash_4d:
|
||||
; X64-AVX512VL: ## %bb.0: ## %entry
|
||||
; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX512VL-NEXT: vpbroadcastd %eax, %xmm1
|
||||
; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: retq
|
||||
; X64-LABEL: isel_crash_4d:
|
||||
; X64: ## %bb.0: ## %entry
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vbroadcastss (%rdi), %xmm1
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%__a.addr.i = alloca <2 x i64>, align 16
|
||||
%__b.addr.i = alloca <2 x i64>, align 16
|
||||
@ -1307,46 +1287,24 @@ define void @isel_crash_8d(i32* %cV_R.addr) {
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: isel_crash_8d:
|
||||
; X64-AVX2: ## %bb.0: ## %eintry
|
||||
; X64-AVX2-NEXT: pushq %rbp
|
||||
; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-AVX2-NEXT: .cfi_offset %rbp, -16
|
||||
; X64-AVX2-NEXT: movq %rsp, %rbp
|
||||
; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
|
||||
; X64-AVX2-NEXT: andq $-32, %rsp
|
||||
; X64-AVX2-NEXT: subq $128, %rsp
|
||||
; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX2-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; X64-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX2-NEXT: popq %rbp
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
; X64-AVX512VL-LABEL: isel_crash_8d:
|
||||
; X64-AVX512VL: ## %bb.0: ## %eintry
|
||||
; X64-AVX512VL-NEXT: pushq %rbp
|
||||
; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
|
||||
; X64-AVX512VL-NEXT: movq %rsp, %rbp
|
||||
; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
|
||||
; X64-AVX512VL-NEXT: andq $-32, %rsp
|
||||
; X64-AVX512VL-NEXT: subq $128, %rsp
|
||||
; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX512VL-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX512VL-NEXT: vpbroadcastd %eax, %ymm1
|
||||
; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX512VL-NEXT: popq %rbp
|
||||
; X64-AVX512VL-NEXT: vzeroupper
|
||||
; X64-AVX512VL-NEXT: retq
|
||||
; X64-LABEL: isel_crash_8d:
|
||||
; X64: ## %bb.0: ## %eintry
|
||||
; X64-NEXT: pushq %rbp
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: .cfi_offset %rbp, -16
|
||||
; X64-NEXT: movq %rsp, %rbp
|
||||
; X64-NEXT: .cfi_def_cfa_register %rbp
|
||||
; X64-NEXT: andq $-32, %rsp
|
||||
; X64-NEXT: subq $128, %rsp
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-NEXT: vbroadcastss (%rdi), %ymm1
|
||||
; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movq %rbp, %rsp
|
||||
; X64-NEXT: popq %rbp
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
eintry:
|
||||
%__a.addr.i = alloca <4 x i64>, align 16
|
||||
%__b.addr.i = alloca <4 x i64>, align 16
|
||||
@ -1370,33 +1328,20 @@ define void @isel_crash_2q(i64* %cV_R.addr) {
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X32-NEXT: vmovaps %xmm0, (%esp)
|
||||
; X32-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; X32-NEXT: vpbroadcastq (%eax), %xmm1
|
||||
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $60, %esp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: isel_crash_2q:
|
||||
; X64-AVX2: ## %bb.0: ## %entry
|
||||
; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movq (%rdi), %rax
|
||||
; X64-AVX2-NEXT: vmovq %rax, %xmm1
|
||||
; X64-AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
; X64-AVX512VL-LABEL: isel_crash_2q:
|
||||
; X64-AVX512VL: ## %bb.0: ## %entry
|
||||
; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: movq (%rdi), %rax
|
||||
; X64-AVX512VL-NEXT: vpbroadcastq %rax, %xmm1
|
||||
; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: retq
|
||||
; X64-LABEL: isel_crash_2q:
|
||||
; X64: ## %bb.0: ## %entry
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vpbroadcastq (%rdi), %xmm1
|
||||
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%__a.addr.i = alloca <2 x i64>, align 16
|
||||
%__b.addr.i = alloca <2 x i64>, align 16
|
||||
@ -1433,46 +1378,24 @@ define void @isel_crash_4q(i64* %cV_R.addr) {
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: isel_crash_4q:
|
||||
; X64-AVX2: ## %bb.0: ## %eintry
|
||||
; X64-AVX2-NEXT: pushq %rbp
|
||||
; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-AVX2-NEXT: .cfi_offset %rbp, -16
|
||||
; X64-AVX2-NEXT: movq %rsp, %rbp
|
||||
; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
|
||||
; X64-AVX2-NEXT: andq $-32, %rsp
|
||||
; X64-AVX2-NEXT: subq $128, %rsp
|
||||
; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX2-NEXT: movq (%rdi), %rax
|
||||
; X64-AVX2-NEXT: vmovq %rax, %xmm1
|
||||
; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX2-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX2-NEXT: popq %rbp
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
; X64-AVX512VL-LABEL: isel_crash_4q:
|
||||
; X64-AVX512VL: ## %bb.0: ## %eintry
|
||||
; X64-AVX512VL-NEXT: pushq %rbp
|
||||
; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
|
||||
; X64-AVX512VL-NEXT: movq %rsp, %rbp
|
||||
; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
|
||||
; X64-AVX512VL-NEXT: andq $-32, %rsp
|
||||
; X64-AVX512VL-NEXT: subq $128, %rsp
|
||||
; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX512VL-NEXT: movq (%rdi), %rax
|
||||
; X64-AVX512VL-NEXT: vpbroadcastq %rax, %ymm1
|
||||
; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-AVX512VL-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX512VL-NEXT: popq %rbp
|
||||
; X64-AVX512VL-NEXT: vzeroupper
|
||||
; X64-AVX512VL-NEXT: retq
|
||||
; X64-LABEL: isel_crash_4q:
|
||||
; X64: ## %bb.0: ## %eintry
|
||||
; X64-NEXT: pushq %rbp
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: .cfi_offset %rbp, -16
|
||||
; X64-NEXT: movq %rsp, %rbp
|
||||
; X64-NEXT: .cfi_def_cfa_register %rbp
|
||||
; X64-NEXT: andq $-32, %rsp
|
||||
; X64-NEXT: subq $128, %rsp
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-NEXT: vbroadcastsd (%rdi), %ymm1
|
||||
; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movq %rbp, %rsp
|
||||
; X64-NEXT: popq %rbp
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
eintry:
|
||||
%__a.addr.i = alloca <4 x i64>, align 16
|
||||
%__b.addr.i = alloca <4 x i64>, align 16
|
||||
|
@ -271,18 +271,16 @@ define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-NEXT: vmovaps %ymm1, (%eax)
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR29088:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-NEXT: vmovaps %ymm1, (%rsi)
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%ld = load <4 x i32>, <4 x i32>* %p0
|
||||
store <8 x float> zeroinitializer, <8 x float>* %p1
|
||||
|
@ -186,26 +186,23 @@ define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind {
|
||||
define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
|
||||
; X64-AVX512VL-LABEL: PR29088:
|
||||
; X64-AVX512VL: ## %bb.0:
|
||||
; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi)
|
||||
; X64-AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64-AVX512BWVL-LABEL: PR29088:
|
||||
; X64-AVX512BWVL: ## %bb.0:
|
||||
; X64-AVX512BWVL-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512BWVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi)
|
||||
; X64-AVX512BWVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X64-AVX512DQVL-LABEL: PR29088:
|
||||
; X64-AVX512DQVL: ## %bb.0:
|
||||
; X64-AVX512DQVL-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX512DQVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi)
|
||||
; X64-AVX512DQVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX512DQVL-NEXT: retq
|
||||
%ld = load <4 x i32>, <4 x i32>* %p0
|
||||
store <8 x float> zeroinitializer, <8 x float>* %p1
|
||||
|
34
test/CodeGen/X86/foldmem_cycle.ll
Normal file
34
test/CodeGen/X86/foldmem_cycle.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=X64
|
||||
|
||||
; The load should not be merged with the and asit causes a cycle in the DAG.
|
||||
|
||||
define void @foo() {
|
||||
; X64-LABEL: foo:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: pushq %rbx
|
||||
; X64-NEXT: .cfi_def_cfa_offset 16
|
||||
; X64-NEXT: .cfi_offset %rbx, -16
|
||||
; X64-NEXT: movl (%rax), %ebx
|
||||
; X64-NEXT: callq bar
|
||||
; X64-NEXT: testl %ebx, %eax
|
||||
; X64-NEXT: jne .LBB0_2
|
||||
; X64-NEXT: # %bb.1: # %if.then
|
||||
; X64-NEXT: popq %rbx
|
||||
; X64-NEXT: retq
|
||||
; X64-NEXT: .LBB0_2: # %if.end
|
||||
entry:
|
||||
%0 = load i32, i32* undef
|
||||
%call = tail call i32 @bar()
|
||||
%and = and i32 %call, %0
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
br i1 %tobool, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
ret void
|
||||
|
||||
if.end:
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare i32 @bar()
|
@ -9,40 +9,30 @@ define void @add(i256* %p, i256* %q) nounwind {
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: pushl %edi
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: subl $12, %esp
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl 8(%ecx), %edi
|
||||
; X32-NEXT: movl (%ecx), %edx
|
||||
; X32-NEXT: movl 4(%ecx), %ebx
|
||||
; X32-NEXT: movl 28(%eax), %esi
|
||||
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 24(%eax), %ebp
|
||||
; X32-NEXT: addl (%eax), %edx
|
||||
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
|
||||
; X32-NEXT: adcl 4(%eax), %ebx
|
||||
; X32-NEXT: adcl 8(%eax), %edi
|
||||
; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 20(%eax), %edi
|
||||
; X32-NEXT: movl 12(%eax), %edx
|
||||
; X32-NEXT: movl 16(%eax), %esi
|
||||
; X32-NEXT: adcl 12(%ecx), %edx
|
||||
; X32-NEXT: adcl 16(%ecx), %esi
|
||||
; X32-NEXT: adcl 20(%ecx), %edi
|
||||
; X32-NEXT: movl %ebp, %eax
|
||||
; X32-NEXT: adcl 24(%ecx), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
|
||||
; X32-NEXT: adcl %ebp, 28(%ecx)
|
||||
; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
|
||||
; X32-NEXT: movl %ebp, 8(%ecx)
|
||||
; X32-NEXT: movl %ebx, 4(%ecx)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
|
||||
; X32-NEXT: movl %ebx, (%ecx)
|
||||
; X32-NEXT: movl %edx, 12(%ecx)
|
||||
; X32-NEXT: movl %esi, 16(%ecx)
|
||||
; X32-NEXT: movl %edi, 20(%ecx)
|
||||
; X32-NEXT: movl %eax, 24(%ecx)
|
||||
; X32-NEXT: addl $12, %esp
|
||||
; X32-NEXT: movl 28(%eax), %ecx
|
||||
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 24(%eax), %ecx
|
||||
; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 20(%eax), %esi
|
||||
; X32-NEXT: movl 16(%eax), %edi
|
||||
; X32-NEXT: movl 12(%eax), %ebx
|
||||
; X32-NEXT: movl 8(%eax), %ebp
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %edx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: addl %ecx, (%eax)
|
||||
; X32-NEXT: adcl %edx, 4(%eax)
|
||||
; X32-NEXT: adcl %ebp, 8(%eax)
|
||||
; X32-NEXT: adcl %ebx, 12(%eax)
|
||||
; X32-NEXT: adcl %edi, 16(%eax)
|
||||
; X32-NEXT: adcl %esi, 20(%eax)
|
||||
; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
|
||||
; X32-NEXT: adcl %ecx, 24(%eax)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
|
||||
; X32-NEXT: adcl %ecx, 28(%eax)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: popl %ebx
|
||||
@ -51,17 +41,14 @@ define void @add(i256* %p, i256* %q) nounwind {
|
||||
;
|
||||
; X64-LABEL: add:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq 16(%rdi), %rax
|
||||
; X64-NEXT: movq (%rdi), %rcx
|
||||
; X64-NEXT: movq 8(%rdi), %rdx
|
||||
; X64-NEXT: movq 24(%rsi), %r8
|
||||
; X64-NEXT: addq (%rsi), %rcx
|
||||
; X64-NEXT: adcq 8(%rsi), %rdx
|
||||
; X64-NEXT: adcq 16(%rsi), %rax
|
||||
; X64-NEXT: adcq %r8, 24(%rdi)
|
||||
; X64-NEXT: movq %rax, 16(%rdi)
|
||||
; X64-NEXT: movq %rdx, 8(%rdi)
|
||||
; X64-NEXT: movq %rcx, (%rdi)
|
||||
; X64-NEXT: movq 24(%rsi), %rax
|
||||
; X64-NEXT: movq 16(%rsi), %rcx
|
||||
; X64-NEXT: movq (%rsi), %rdx
|
||||
; X64-NEXT: movq 8(%rsi), %rsi
|
||||
; X64-NEXT: addq %rdx, (%rdi)
|
||||
; X64-NEXT: adcq %rsi, 8(%rdi)
|
||||
; X64-NEXT: adcq %rcx, 16(%rdi)
|
||||
; X64-NEXT: adcq %rax, 24(%rdi)
|
||||
; X64-NEXT: retq
|
||||
%a = load i256, i256* %p
|
||||
%b = load i256, i256* %q
|
||||
@ -77,35 +64,28 @@ define void @sub(i256* %p, i256* %q) nounwind {
|
||||
; X32-NEXT: pushl %edi
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: subl $8, %esp
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl 16(%ecx), %eax
|
||||
; X32-NEXT: movl 12(%ecx), %edx
|
||||
; X32-NEXT: movl 8(%ecx), %edi
|
||||
; X32-NEXT: movl (%ecx), %ebx
|
||||
; X32-NEXT: movl 4(%ecx), %ebp
|
||||
; X32-NEXT: subl (%esi), %ebx
|
||||
; X32-NEXT: sbbl 4(%esi), %ebp
|
||||
; X32-NEXT: sbbl 8(%esi), %edi
|
||||
; X32-NEXT: sbbl 12(%esi), %edx
|
||||
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
|
||||
; X32-NEXT: sbbl 16(%esi), %eax
|
||||
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 20(%ecx), %edx
|
||||
; X32-NEXT: sbbl 20(%esi), %edx
|
||||
; X32-NEXT: movl 24(%ecx), %eax
|
||||
; X32-NEXT: sbbl 24(%esi), %eax
|
||||
; X32-NEXT: movl 28(%esi), %esi
|
||||
; X32-NEXT: sbbl %esi, 28(%ecx)
|
||||
; X32-NEXT: movl %edi, 8(%ecx)
|
||||
; X32-NEXT: movl %ebp, 4(%ecx)
|
||||
; X32-NEXT: movl %ebx, (%ecx)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
|
||||
; X32-NEXT: movl %esi, 12(%ecx)
|
||||
; X32-NEXT: movl (%esp), %esi # 4-byte Reload
|
||||
; X32-NEXT: movl %esi, 16(%ecx)
|
||||
; X32-NEXT: movl %edx, 20(%ecx)
|
||||
; X32-NEXT: movl %eax, 24(%ecx)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl 28(%eax), %ecx
|
||||
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 24(%eax), %ecx
|
||||
; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl 20(%eax), %esi
|
||||
; X32-NEXT: movl 16(%eax), %edi
|
||||
; X32-NEXT: movl 12(%eax), %ebx
|
||||
; X32-NEXT: movl 8(%eax), %ebp
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %edx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: subl %ecx, (%eax)
|
||||
; X32-NEXT: sbbl %edx, 4(%eax)
|
||||
; X32-NEXT: sbbl %ebp, 8(%eax)
|
||||
; X32-NEXT: sbbl %ebx, 12(%eax)
|
||||
; X32-NEXT: sbbl %edi, 16(%eax)
|
||||
; X32-NEXT: sbbl %esi, 20(%eax)
|
||||
; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
|
||||
; X32-NEXT: sbbl %ecx, 24(%eax)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
|
||||
; X32-NEXT: sbbl %ecx, 28(%eax)
|
||||
; X32-NEXT: addl $8, %esp
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
@ -115,17 +95,14 @@ define void @sub(i256* %p, i256* %q) nounwind {
|
||||
;
|
||||
; X64-LABEL: sub:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq 16(%rdi), %rax
|
||||
; X64-NEXT: movq (%rdi), %rcx
|
||||
; X64-NEXT: movq 8(%rdi), %rdx
|
||||
; X64-NEXT: movq 24(%rsi), %r8
|
||||
; X64-NEXT: subq (%rsi), %rcx
|
||||
; X64-NEXT: sbbq 8(%rsi), %rdx
|
||||
; X64-NEXT: sbbq 16(%rsi), %rax
|
||||
; X64-NEXT: sbbq %r8, 24(%rdi)
|
||||
; X64-NEXT: movq %rax, 16(%rdi)
|
||||
; X64-NEXT: movq %rdx, 8(%rdi)
|
||||
; X64-NEXT: movq %rcx, (%rdi)
|
||||
; X64-NEXT: movq 24(%rsi), %rax
|
||||
; X64-NEXT: movq 16(%rsi), %rcx
|
||||
; X64-NEXT: movq (%rsi), %rdx
|
||||
; X64-NEXT: movq 8(%rsi), %rsi
|
||||
; X64-NEXT: subq %rdx, (%rdi)
|
||||
; X64-NEXT: sbbq %rsi, 8(%rdi)
|
||||
; X64-NEXT: sbbq %rcx, 16(%rdi)
|
||||
; X64-NEXT: sbbq %rax, 24(%rdi)
|
||||
; X64-NEXT: retq
|
||||
%a = load i256, i256* %p
|
||||
%b = load i256, i256* %q
|
||||
|
32
test/CodeGen/X86/load-op-store-fusion.ll
Normal file
32
test/CodeGen/X86/load-op-store-fusion.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i386-unknown | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
|
||||
|
||||
; This test makes sure we do not merge both load-op-store pairs here as it causes a cycle.
|
||||
|
||||
define i8* @fn(i32 %i.015.i, [64 x i64]* %data.i) {
|
||||
; X32-LABEL: fn:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl (%ecx,%eax,8), %edx
|
||||
; X32-NEXT: addl $1, %edx
|
||||
; X32-NEXT: adcl $0, 4(%ecx,%eax,8)
|
||||
; X32-NEXT: movl %edx, (%ecx,%eax,8)
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fn:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: incq (%rsi,%rax,8)
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%arrayidx.i6 = getelementptr inbounds [64 x i64], [64 x i64]* %data.i, i32 0, i32 %i.015.i
|
||||
%x8 = load volatile i64, i64* %arrayidx.i6, align 8
|
||||
%inc.i7 = add i64 %x8, 1
|
||||
store volatile i64 %inc.i7, i64* %arrayidx.i6, align 8
|
||||
ret i8* null
|
||||
}
|
||||
|
@ -1264,8 +1264,7 @@ define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %v
|
||||
; AVX-LABEL: load_one_mask_bit_set5:
|
||||
; AVX: ## %bb.0:
|
||||
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
|
||||
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
|
@ -10,12 +10,11 @@ define i32 @foo (i64* %so) nounwind uwtable ssp {
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl $0, 28(%eax)
|
||||
; CHECK-NEXT: movl $0, 24(%eax)
|
||||
; CHECK-NEXT: movl 20(%eax), %ecx
|
||||
; CHECK-NEXT: movl $0, 20(%eax)
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: cmpl 16(%eax), %edx
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: cmpl 16(%eax), %ecx
|
||||
; CHECK-NEXT: movl $0, 16(%eax)
|
||||
; CHECK-NEXT: sbbl %ecx, %edx
|
||||
; CHECK-NEXT: sbbl 20(%eax), %ecx
|
||||
; CHECK-NEXT: movl $0, 20(%eax)
|
||||
; CHECK-NEXT: setl %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: negl %eax
|
||||
|
@ -13,36 +13,35 @@ define i32 @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
|
||||
; X32-SSE-NEXT: andl $-16, %esp
|
||||
; X32-SSE-NEXT: subl $16, %esp
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; X32-SSE-NEXT: movl 12(%ebp), %eax
|
||||
; X32-SSE-NEXT: movl 12(%ebp), %ecx
|
||||
; X32-SSE-NEXT: movdqa 56(%ebp), %xmm4
|
||||
; X32-SSE-NEXT: movdqa 40(%ebp), %xmm5
|
||||
; X32-SSE-NEXT: movdqa 24(%ebp), %xmm6
|
||||
; X32-SSE-NEXT: movl 8(%ebp), %edx
|
||||
; X32-SSE-NEXT: movl 80(%ebp), %ecx
|
||||
; X32-SSE-NEXT: movl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movl 8(%ebp), %esi
|
||||
; X32-SSE-NEXT: movl 80(%ebp), %edx
|
||||
; X32-SSE-NEXT: movl (%edx), %eax
|
||||
; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0
|
||||
; X32-SSE-NEXT: movntps %xmm0, (%edx)
|
||||
; X32-SSE-NEXT: movntps %xmm0, (%esi)
|
||||
; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2
|
||||
; X32-SSE-NEXT: addl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movntdq %xmm2, (%edx)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movntdq %xmm2, (%esi)
|
||||
; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1
|
||||
; X32-SSE-NEXT: addl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movntpd %xmm1, (%edx)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movntpd %xmm1, (%esi)
|
||||
; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm6
|
||||
; X32-SSE-NEXT: addl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movntdq %xmm6, (%edx)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movntdq %xmm6, (%esi)
|
||||
; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm5
|
||||
; X32-SSE-NEXT: addl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movntdq %xmm5, (%edx)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movntdq %xmm5, (%esi)
|
||||
; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm4
|
||||
; X32-SSE-NEXT: addl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movntdq %xmm4, (%edx)
|
||||
; X32-SSE-NEXT: addl (%ecx), %esi
|
||||
; X32-SSE-NEXT: movntil %eax, (%edx)
|
||||
; X32-SSE-NEXT: movl (%ecx), %eax
|
||||
; X32-SSE-NEXT: addl %esi, %eax
|
||||
; X32-SSE-NEXT: movsd %xmm3, (%edx)
|
||||
; X32-SSE-NEXT: addl (%ecx), %eax
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movntdq %xmm4, (%esi)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movntil %ecx, (%esi)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: movsd %xmm3, (%esi)
|
||||
; X32-SSE-NEXT: addl (%edx), %eax
|
||||
; X32-SSE-NEXT: leal -4(%ebp), %esp
|
||||
; X32-SSE-NEXT: popl %esi
|
||||
; X32-SSE-NEXT: popl %ebp
|
||||
@ -56,36 +55,35 @@ define i32 @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
|
||||
; X32-AVX-NEXT: andl $-16, %esp
|
||||
; X32-AVX-NEXT: subl $16, %esp
|
||||
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; X32-AVX-NEXT: movl 12(%ebp), %eax
|
||||
; X32-AVX-NEXT: movl 12(%ebp), %ecx
|
||||
; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm4
|
||||
; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm5
|
||||
; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm6
|
||||
; X32-AVX-NEXT: movl 8(%ebp), %ecx
|
||||
; X32-AVX-NEXT: movl 80(%ebp), %edx
|
||||
; X32-AVX-NEXT: movl (%edx), %esi
|
||||
; X32-AVX-NEXT: movl 8(%ebp), %edx
|
||||
; X32-AVX-NEXT: movl 80(%ebp), %esi
|
||||
; X32-AVX-NEXT: movl (%esi), %eax
|
||||
; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X32-AVX-NEXT: vmovntps %xmm0, (%ecx)
|
||||
; X32-AVX-NEXT: vmovntps %xmm0, (%edx)
|
||||
; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
|
||||
; X32-AVX-NEXT: addl (%edx), %esi
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
|
||||
; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
|
||||
; X32-AVX-NEXT: addl (%edx), %esi
|
||||
; X32-AVX-NEXT: vmovntpd %xmm0, (%ecx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: vmovntpd %xmm0, (%edx)
|
||||
; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm6, %xmm0
|
||||
; X32-AVX-NEXT: addl (%edx), %esi
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
|
||||
; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm5, %xmm0
|
||||
; X32-AVX-NEXT: addl (%edx), %esi
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
|
||||
; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm4, %xmm0
|
||||
; X32-AVX-NEXT: addl (%edx), %esi
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
|
||||
; X32-AVX-NEXT: addl (%edx), %esi
|
||||
; X32-AVX-NEXT: movntil %eax, (%ecx)
|
||||
; X32-AVX-NEXT: movl (%edx), %eax
|
||||
; X32-AVX-NEXT: addl %esi, %eax
|
||||
; X32-AVX-NEXT: vmovsd %xmm3, (%ecx)
|
||||
; X32-AVX-NEXT: addl (%edx), %eax
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: movntil %ecx, (%edx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: vmovsd %xmm3, (%edx)
|
||||
; X32-AVX-NEXT: addl (%esi), %eax
|
||||
; X32-AVX-NEXT: leal -4(%ebp), %esp
|
||||
; X32-AVX-NEXT: popl %esi
|
||||
; X32-AVX-NEXT: popl %ebp
|
||||
|
@ -17,14 +17,14 @@ cond_true2732.preheader: ; preds = %entry
|
||||
store i64 %tmp2676.us.us, i64* %tmp2666
|
||||
ret i32 0
|
||||
|
||||
; INTEL: and {{e..}}, dword ptr [360]
|
||||
; INTEL: and dword ptr [356], {{e..}}
|
||||
; FIXME: mov dword ptr [360], {{e..}}
|
||||
; INTEL: and {{e..}}, dword ptr [356]
|
||||
; INTEL: and dword ptr [360], {{e..}}
|
||||
; FIXME: mov dword ptr [356], {{e..}}
|
||||
; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
|
||||
|
||||
; ATT: andl 360, %{{e..}}
|
||||
; ATT: andl %{{e..}}, 356
|
||||
; ATT: movl %{{e..}}, 360
|
||||
; ATT: andl 356, %{{e..}}
|
||||
; ATT: andl %{{e..}}, 360
|
||||
; ATT: movl %{{e..}}, 356
|
||||
|
||||
}
|
||||
|
||||
|
@ -751,72 +751,64 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p
|
||||
; X32-AVX: # %bb.0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-AVX-NEXT: vmovaps %xmm1, (%eax)
|
||||
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-AVX-NEXT: retl
|
||||
;
|
||||
; X32-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
|
||||
; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-AVX512F-NEXT: retl
|
||||
;
|
||||
; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X32-AVX512BW: # %bb.0:
|
||||
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
|
||||
; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-AVX512BW-NEXT: retl
|
||||
;
|
||||
; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X32-AVX512DQ: # %bb.0:
|
||||
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
|
||||
; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-AVX512DQ-NEXT: retl
|
||||
;
|
||||
; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX-NEXT: vmovaps %xmm1, (%rsi)
|
||||
; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX-NEXT: retq
|
||||
;
|
||||
; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X64-AVX512F: # %bb.0:
|
||||
; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
|
||||
; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX512F-NEXT: retq
|
||||
;
|
||||
; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X64-AVX512BW: # %bb.0:
|
||||
; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
|
||||
; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX512BW-NEXT: retq
|
||||
;
|
||||
; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
|
||||
; X64-AVX512DQ: # %bb.0:
|
||||
; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX512DQ-NEXT: retq
|
||||
%1 = load <4 x i32>, <4 x i32>* %p0
|
||||
store <4 x float> zeroinitializer, <4 x float>* %p1
|
||||
@ -829,10 +821,9 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>*
|
||||
; X32-AVX: # %bb.0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX-NEXT: vmovaps (%ecx), %xmm0
|
||||
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-AVX-NEXT: vmovaps %xmm1, (%eax)
|
||||
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
|
||||
; X32-AVX-NEXT: retl
|
||||
;
|
||||
@ -840,63 +831,56 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>*
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
|
||||
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
|
||||
; X32-AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X32-AVX512F-NEXT: retl
|
||||
;
|
||||
; X32-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
|
||||
; X32-AVX512BW: # %bb.0:
|
||||
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
|
||||
; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
|
||||
; X32-AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X32-AVX512BW-NEXT: retl
|
||||
;
|
||||
; X32-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
|
||||
; X32-AVX512DQ: # %bb.0:
|
||||
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
|
||||
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
|
||||
; X32-AVX512DQ-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X32-AVX512DQ-NEXT: retl
|
||||
;
|
||||
; X64-AVX-LABEL: test_broadcast_4i32_16i32_chain:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-AVX-NEXT: vmovaps %xmm1, (%rsi)
|
||||
; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
|
||||
; X64-AVX-NEXT: retq
|
||||
;
|
||||
; X64-AVX512F-LABEL: test_broadcast_4i32_16i32_chain:
|
||||
; X64-AVX512F: # %bb.0:
|
||||
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
|
||||
; X64-AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X64-AVX512F-NEXT: retq
|
||||
;
|
||||
; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
|
||||
; X64-AVX512BW: # %bb.0:
|
||||
; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
|
||||
; X64-AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X64-AVX512BW-NEXT: retq
|
||||
;
|
||||
; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
|
||||
; X64-AVX512DQ: # %bb.0:
|
||||
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
; X64-AVX512DQ-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; X64-AVX512DQ-NEXT: retq
|
||||
%1 = load <4 x i32>, <4 x i32>* %p0
|
||||
store <4 x float> zeroinitializer, <4 x float>* %p1
|
||||
|
@ -685,64 +685,49 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; XOP-NEXT: vmovd %eax, %xmm0
|
||||
; XOP-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
@ -812,64 +797,49 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $31, %eax
|
||||
; AVX1-NEXT: movzbl (%rsp,%rax), %eax
|
||||
@ -939,64 +909,49 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $31, %eax
|
||||
; AVX2-NEXT: movzbl (%rsp,%rax), %eax
|
||||
@ -1066,64 +1021,49 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $31, %eax
|
||||
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
|
||||
@ -1193,64 +1133,49 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512DQ-NEXT: andl $31, %eax
|
||||
; AVX512DQ-NEXT: movzbl (%rsp,%rax), %eax
|
||||
@ -1320,64 +1245,49 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $31, %eax
|
||||
; AVX512VL-NEXT: movzbl (%rsp,%rax), %eax
|
||||
@ -2383,64 +2293,49 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; XOP-NEXT: vmovd %eax, %xmm0
|
||||
; XOP-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
@ -2504,64 +2399,49 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX1-NEXT: andl $15, %eax
|
||||
; AVX1-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
@ -2625,64 +2505,49 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX2-NEXT: andl $15, %eax
|
||||
; AVX2-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
@ -2746,64 +2611,49 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; AVX512F-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512F-NEXT: andl $15, %eax
|
||||
; AVX512F-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
@ -2867,64 +2717,49 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512DQ-NEXT: andl $15, %eax
|
||||
; AVX512DQ-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
@ -2988,64 +2823,49 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpinsrb $15, -24(%rsp,%rax), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; AVX512VL-NEXT: andl $15, %eax
|
||||
; AVX512VL-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
|
@ -47,8 +47,7 @@ define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0,
|
||||
; ALL-NEXT: andl $3, %edx
|
||||
; ALL-NEXT: andl $3, %esi
|
||||
; ALL-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; ALL-NEXT: movq %rbp, %rsp
|
||||
|
Loading…
Reference in New Issue
Block a user