1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[Hexagon] Add heuristic to exclude critical path cost for scheduling

Patch by Brendon Cahoon.

llvm-svn: 328022
This commit is contained in:
Krzysztof Parzyszek 2018-03-20 19:26:27 +00:00
parent 42078ae08c
commit 9bbcab4539
6 changed files with 88 additions and 184 deletions

View File

@ -48,21 +48,12 @@ using namespace llvm;
static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
cl::Hidden, cl::ZeroOrMore, cl::init(1));
static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
static cl::opt<bool> DisableTCTie("disable-tc-tie",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
static cl::opt<bool> UseNewerCandidate("use-newer-candidate",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
cl::Hidden, cl::ZeroOrMore, cl::init(1));
// Check if the scheduler should penalize instructions that are available to
// early due to a zero-latency dependence.
static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
@ -139,7 +130,6 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
if (hasDependence(SU, Packet[i], QII))
return false;
}
return true;
}
@ -206,6 +196,9 @@ void VLIWMachineScheduler::schedule() {
Topo.InitDAGTopologicalSorting();
// Postprocess the DAG to add platform-specific artificial dependencies.
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
@ -554,62 +547,6 @@ static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
return true;
}
/// Return true if there is a maximum of 1 dependence that remains to be
/// scheduled. This function is used to determine if an instruction is
/// almost ready to be scheduled.
static bool isReady(SmallVector<SDep, 4> &Deps) {
if (Deps.size() == 0)
return true;
unsigned NotScheduled = 0;
for (const auto &D : Deps)
if (D.isAssignedRegDep())
if (!D.getSUnit()->isScheduled)
++NotScheduled;
return (NotScheduled <= 1);
}
/// Return true if the successors of the instruction are ready to be
/// scheduled once this instruction is scheduled.
static bool isSuccessorReady(const SUnit *SU) {
if (SU->Succs.size() == 0)
return true;
bool ValidSuccessor = false;
for (const auto &S : SU->Succs) {
if (S.isAssignedRegDep()) {
// If the successor has been scheduled, that means it was added to the
// bottom up schedule. In this case, the successor will not be close.
if (S.getSUnit()->isScheduled)
return false;
ValidSuccessor = true;
if (SU->getDepth() + S.getLatency() >= S.getSUnit()->getDepth() &&
isReady(S.getSUnit()->Preds))
return true;
}
}
return !ValidSuccessor;
}
/// Return true if the predecessors of the instruction are ready to be
/// scheduled once this instruction is scheduled.
static bool isPredecessorReady(const SUnit *SU) {
if (SU->Preds.size() == 0)
return true;
bool ValidPredecessor = false;
for (const auto &S : SU->Preds) {
if (S.isAssignedRegDep()) {
// If the predecessor has been scheduled, that means it was added to the
// bottom up schedule. In this case, the predecessor will not be close.
if (S.getSUnit()->isScheduled)
return false;
ValidPredecessor = true;
if (SU->getHeight() + S.getLatency() >= S.getSUnit()->getHeight() ||
isReady(S.getSUnit()->Succs))
return true;
}
}
return !ValidPredecessor;
}
/// Check if the instruction changes the register pressure of a register in the
/// high pressure set. The function returns a negative value if the pressure
/// decreases and a positive value is the pressure increases. If the instruction
@ -659,7 +596,10 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
unsigned IsAvailableAmt = 0;
// Critical path first.
if (Q.getID() == TopQID) {
ResCount += (SU->getHeight() * ScaleTwo);
if (Top.isLatencyBound(SU)) {
DEBUG(if (verbose) dbgs() << "LB|");
ResCount += (SU->getHeight() * ScaleTwo);
}
DEBUG(if (verbose) {
std::stringstream dbgstr;
@ -670,27 +610,16 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// If resources are available for it, multiply the
// chance of scheduling.
if (Top.ResourceModel->isResourceAvailable(SU, true)) {
if (!IgnoreBBRegPressure && pressureChange(SU, false) > 0) {
if (isSuccessorReady(SU)) {
IsAvailableAmt = (PriorityTwo + PriorityThree);
ResCount += IsAvailableAmt;
DEBUG(if (verbose) dbgs() << "HA|");
} else {
ResCount -= PriorityTwo;
DEBUG(if (verbose) dbgs() << "F|");
}
} else if (!IgnoreBBRegPressure && pressureChange(SU, false) < 0) {
ResCount += (PriorityTwo + PriorityThree);
DEBUG(if (verbose) dbgs() << "LA|");
} else {
IsAvailableAmt = (PriorityTwo + PriorityThree);
ResCount += IsAvailableAmt;
DEBUG(if (verbose) dbgs() << "A|");
}
IsAvailableAmt = (PriorityTwo + PriorityThree);
ResCount += IsAvailableAmt;
DEBUG(if (verbose) dbgs() << "A|");
} else
DEBUG(if (verbose) dbgs() << " |");
} else {
ResCount += (SU->getDepth() * ScaleTwo);
if (Bot.isLatencyBound(SU)) {
DEBUG(if (verbose) dbgs() << "LB|");
ResCount += (SU->getDepth() * ScaleTwo);
}
DEBUG(if (verbose) {
std::stringstream dbgstr;
@ -701,23 +630,9 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// If resources are available for it, multiply the
// chance of scheduling.
if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
if (!IgnoreBBRegPressure && pressureChange(SU, true) > 0) {
if (isPredecessorReady(SU)) {
IsAvailableAmt = (PriorityTwo + PriorityThree);
ResCount += IsAvailableAmt;
DEBUG(if (verbose) dbgs() << "HA|");
} else {
ResCount -= PriorityTwo;
DEBUG(if (verbose) dbgs() << "F|");
}
} else if (!IgnoreBBRegPressure && pressureChange(SU, true) < 0) {
ResCount += (PriorityTwo + PriorityThree);
DEBUG(if (verbose) dbgs() << "LA|");
} else {
IsAvailableAmt = (PriorityTwo + PriorityThree);
ResCount += IsAvailableAmt;
DEBUG(if (verbose) dbgs() << "A|");
}
IsAvailableAmt = (PriorityTwo + PriorityThree);
ResCount += IsAvailableAmt;
DEBUG(if (verbose) dbgs() << "A|");
} else
DEBUG(if (verbose) dbgs() << " |");
}
@ -728,14 +643,16 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// Look at all of the successors of this node.
// Count the number of nodes that
// this node is the sole unscheduled node for.
for (const SDep &SI : SU->Succs)
if (isSingleUnscheduledPred(SI.getSUnit(), SU))
++NumNodesBlocking;
if (Top.isLatencyBound(SU))
for (const SDep &SI : SU->Succs)
if (isSingleUnscheduledPred(SI.getSUnit(), SU))
++NumNodesBlocking;
} else {
// How many unscheduled predecessors block this node?
for (const SDep &PI : SU->Preds)
if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
++NumNodesBlocking;
if (Bot.isLatencyBound(SU))
for (const SDep &PI : SU->Preds)
if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
++NumNodesBlocking;
}
ResCount += (NumNodesBlocking * ScaleTwo);
@ -846,8 +763,9 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker,
SchedCandidate &Candidate) {
ReadyQueue &Q = Zone.Available;
DEBUG(if (SchedDebugVerboseLevel > 1)
readyQueueVerboseDump(RPTracker, Candidate, Q);
else Q.dump(););
@ -875,9 +793,19 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
continue;
}
// Don't choose an instruction with a negative scheduling cost.
if (CurrentCost < 0)
// Choose node order for negative cost candidates. There is no good
// candidate in this case.
if (CurrentCost < 0 && Candidate.SCost < 0) {
if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
|| (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = NodeOrder;
}
continue;
}
// Best cost.
if (CurrentCost > Candidate.SCost) {
@ -889,67 +817,40 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
continue;
}
// Tie breaker using Timing Class.
if (!DisableTCTie) {
auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
auto &QII = *QST.getInstrInfo();
const MachineInstr *MI = (*I)->getInstr();
const MachineInstr *CandI = Candidate.SU->getInstr();
const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
DEBUG(dbgs() << "TC Tie Breaker Cand: "
<< CandLatency << " Instr:" << InstrLatency << "\n"
<< *MI << *CandI << "\n");
if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
if (InstrLatency < CandLatency && TopUseShorterTie) {
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
DEBUG(dbgs() << "Used top shorter tie breaker\n");
continue;
} else if (InstrLatency > CandLatency && !TopUseShorterTie) {
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
DEBUG(dbgs() << "Used top longer tie breaker\n");
continue;
}
} else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
if (InstrLatency < CandLatency && BotUseShorterTie) {
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
continue;
} else if (InstrLatency > CandLatency && !BotUseShorterTie) {
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
DEBUG(dbgs() << "Used Bot longer tie breaker\n");
continue;
}
// Choose an instruction that does not depend on an artificial edge.
unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
if (CurrWeak != CandWeak) {
if (CurrWeak < CandWeak) {
DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = Weak;
}
continue;
}
if (CurrentCost == Candidate.SCost) {
if ((Q.getID() == TopQID &&
(*I)->Succs.size() > Candidate.SU->Succs.size()) ||
(Q.getID() == BotQID &&
(*I)->Preds.size() < Candidate.SU->Preds.size())) {
if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
unsigned CurrSize, CandSize;
if (Q.getID() == TopQID) {
CurrSize = (*I)->Succs.size();
CandSize = Candidate.SU->Succs.size();
} else {
CurrSize = (*I)->Preds.size();
CandSize = Candidate.SU->Preds.size();
}
if (CurrSize > CandSize) {
DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
continue;
}
// Keep the old candidate if it's a better candidate. That is, don't use
// the subsequent tie breaker.
if (CurrSize != CandSize)
continue;
}
// Tie breaker.
@ -962,7 +863,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
FoundCandidate = NodeOrder;
continue;
}
}
@ -991,7 +892,7 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
}
SchedCandidate BotCand;
// Prefer bottom scheduling when heuristics are silent.
CandResult BotResult = pickNodeFromQueue(Bot.Available,
CandResult BotResult = pickNodeFromQueue(Bot,
DAG->getBotRPTracker(), BotCand);
assert(BotResult != NoCand && "failed to find the first candidate");
@ -1009,7 +910,7 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
}
// Check if the top Q has a better candidate.
SchedCandidate TopCand;
CandResult TopResult = pickNodeFromQueue(Top.Available,
CandResult TopResult = pickNodeFromQueue(Top,
DAG->getTopRPTracker(), TopCand);
assert(TopResult != NoCand && "failed to find the first candidate");
@ -1054,7 +955,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
if (!SU) {
SchedCandidate TopCand;
CandResult TopResult =
pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
assert(TopResult != NoCand && "failed to find the first candidate");
(void)TopResult;
SU = TopCand.SU;
@ -1065,7 +966,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
if (!SU) {
SchedCandidate BotCand;
CandResult BotResult =
pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
assert(BotResult != NoCand && "failed to find the first candidate");
(void)BotResult;
SU = BotCand.SU;
@ -1080,8 +981,9 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
Bot.removeReady(SU);
DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
<< " Scheduling Instruction in cycle "
<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
<< " Scheduling instruction in cycle "
<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" <<
reportPackets() << ")\n";
SU->dump(DAG));
return SU;
}

View File

@ -126,7 +126,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
/// Represent the type of SchedCandidate found within a single queue.
enum CandResult {
NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
BestCost};
BestCost, Weak};
/// Each Scheduling boundary is associated with ready queues. It tracks the
/// current cycle in whichever direction at has moved, and maintains the state
@ -206,7 +206,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
void removeReady(SUnit *SU);
SUnit *pickOnlyChoice();
bool isLatencyBound(SUnit *SU) {
if (CurrCycle >= CriticalPathLength)
return true;
@ -245,7 +245,7 @@ public:
void releaseBottomNode(SUnit *SU) override;
unsigned ReportPackets() {
unsigned reportPackets() {
return Top.ResourceModel->getTotalPackets() +
Bot.ResourceModel->getTotalPackets();
}
@ -259,7 +259,7 @@ protected:
SUnit *SU, SchedCandidate &Candidate,
RegPressureDelta &Delta, bool verbose);
CandResult pickNodeFromQueue(ReadyQueue &Q,
CandResult pickNodeFromQueue(VLIWSchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
#ifndef NDEBUG

View File

@ -13,10 +13,9 @@ define void @test_00(<64 x i8>* %p, <64 x i8>* %q) #0 {
; CHECK-LABEL: test_01:
; CHECK-DAG: v[[V10:[0-9]+]] = vmem(r[[B01:[0-9]+]]+#0)
; CHECK-DAG: v[[V11:[0-9]+]] = vmem(r[[B01]]+#1)
; CHECK: }
; CHECK-DAG: valign(v[[V11]],v[[V10]],r[[B01]])
; CHECK-DAG: v[[V12:[0-9]+]] = vmem(r[[B01]]+#2)
; CHECK: }
; CHECK-DAG: valign(v[[V11]],v[[V10]],r[[B01]])
; CHECK-DAG: valign(v[[V12]],v[[V11]],r[[B01]])
define void @test_01(<128 x i8>* %p, <128 x i8>* %q) #0 {
%v0 = load <128 x i8>, <128 x i8>* %p, align 1

View File

@ -1,5 +1,7 @@
; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
; Broken after r326208.
; XFAIL: *
; CHECK: allocframe{{.*}}
; CHECK-NEXT: }
; CHECK-NEXT:{{.*}}tmp{{[0-9]+}}:

View File

@ -1,4 +1,6 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; XFAIL: *
; LSR changes required.
; This version of the conv3x3 test has both loops. This test checks that the
; inner loop has 13 packets.

View File

@ -1,7 +1,6 @@
; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -disable-post-ra < %s | FileCheck %s
; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]*}})
; CHECK: q{{[0-3]}} |= vand(v{{[0-9]*}},r{{[0-9]*}})
; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
@ -108,7 +107,7 @@
; CHECK: q{{[0-3]}} = xor{{[0-9]*}}(q{{[0-3]}},q{{[0-3]}})
; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
; CHECK: v{{[0-9]*}} = v
; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},#0)
; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},#1)
; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
@ -116,7 +115,7 @@
; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
; CHECK: v{{[0-9]*}} |= vand(q{{[0-3]}},r{{[0-9]*}})
; CHECK: v{{[0-9]*}} = vdelta(v{{[0-9]*}},v{{[0-9]*}})
; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},#0)
; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},#1)
; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
; CHECK: v{{[0-9]*}} = vmux(q{{[0-3]}},v{{[0-9]*}},v{{[0-9]*}})
@ -670,7 +669,7 @@ entry:
store volatile <16 x i32> %247, <16 x i32>* @VectorResult, align 64
%248 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%249 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%250 = call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %248, <16 x i32> %249, i32 0)
%250 = call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %248, <16 x i32> %249, i32 1)
store volatile <16 x i32> %250, <16 x i32>* @VectorResult, align 64
%251 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%252 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@ -695,7 +694,7 @@ entry:
store volatile <16 x i32> %266, <16 x i32>* @VectorResult, align 64
%267 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%268 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%269 = call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %267, <16 x i32> %268, i32 0)
%269 = call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %267, <16 x i32> %268, i32 1)
store volatile <16 x i32> %269, <16 x i32>* @VectorResult, align 64
%270 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%271 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64