1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[CodeGen] Don't require AA in SDAGISel at -O0.

Before r247167, the pass manager builder controlled which AA
implementations were used, exporting them all in the AliasAnalysis
analysis group.

Now, AAResultsWrapperPass always uses BasicAA, but still uses other AA
implementations if made available in the pass pipeline.

But regardless, SDAGISel is required at O0, and really doesn't need to
be doing fancy optimizations based on useful AA results.

Don't require AA at CodeGenOpt::None, and only use it otherwise.

This does have a functional impact (and one testcase is pessimized
because we can't reuse a load).  But I think that's desirable no matter
what.

Note that this alone doesn't result in less DT computations: TwoAddress
was previously able to reuse the DT we computed for SDAG.  That will be
fixed separately.

Differential Revision: https://reviews.llvm.org/D32766

llvm-svn: 302611
This commit is contained in:
Ahmed Bougacha 2017-05-10 00:39:30 +00:00
parent 8c31c46df1
commit d74b69039b
7 changed files with 75 additions and 68 deletions

View File

@ -406,7 +406,7 @@ public:
/// certain types of nodes together, or eliminating superfluous nodes. The /// certain types of nodes together, or eliminating superfluous nodes. The
/// Level argument controls whether Combine is allowed to produce nodes and /// Level argument controls whether Combine is allowed to produce nodes and
/// types that are illegal on the target. /// types that are illegal on the target.
void Combine(CombineLevel Level, AliasAnalysis &AA, void Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel); CodeGenOpt::Level OptLevel);
/// This transforms the SelectionDAG into a SelectionDAG that /// This transforms the SelectionDAG into a SelectionDAG that

View File

@ -114,7 +114,7 @@ namespace {
SmallPtrSet<SDNode *, 32> CombinedNodes; SmallPtrSet<SDNode *, 32> CombinedNodes;
// AA - Used for DAG load/store alias analysis. // AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA; AliasAnalysis *AA;
/// When an instruction is simplified, add all users of the instruction to /// When an instruction is simplified, add all users of the instruction to
/// the work lists because they might get more simplified now. /// the work lists because they might get more simplified now.
@ -496,9 +496,9 @@ namespace {
SDValue distributeTruncateThroughAnd(SDNode *N); SDValue distributeTruncateThroughAnd(SDNode *N);
public: public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
MaximumLegalStoreInBits = 0; MaximumLegalStoreInBits = 0;
@ -16435,17 +16435,17 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
UseAA = false; UseAA = false;
#endif #endif
if (UseAA && if (UseAA && AA &&
Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
// Use alias analysis information. // Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
AliasResult AAResult = AliasResult AAResult =
AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
UseTBAA ? Op0->getAAInfo() : AAMDNodes()), UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
UseTBAA ? Op1->getAAInfo() : AAMDNodes())); UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
if (AAResult == NoAlias) if (AAResult == NoAlias)
return false; return false;
} }
@ -16659,7 +16659,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
} }
/// This is the entry point for the file. /// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel) { CodeGenOpt::Level OptLevel) {
/// This is the main entry point to this class. /// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level); DAGCombiner(*this, AA, OptLevel).Run(Level);

View File

@ -811,9 +811,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
} }
} }
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) { const TargetLibraryInfo *li) {
AA = &aa; AA = aa;
GFI = gfi; GFI = gfi;
LibInfo = li; LibInfo = li;
DL = &DAG.getDataLayout(); DL = &DAG.getDataLayout();
@ -3423,7 +3423,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains) if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects. // Serialize volatile loads with other side effects.
Root = getRoot(); Root = getRoot();
else if (AA->pointsToConstantMemory(MemoryLocation( else if (AA && AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything. // Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode(); Root = DAG.getEntryNode();
@ -3535,8 +3535,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
Type *Ty = I.getType(); Type *Ty = I.getType();
AAMDNodes AAInfo; AAMDNodes AAInfo;
I.getAAMetadata(AAInfo); I.getAAMetadata(AAInfo);
assert(!AA->pointsToConstantMemory(MemoryLocation( assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
"load_from_swift_error should not be constant memory"); "load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs; SmallVector<EVT, 4> ValueVTs;
@ -3817,7 +3817,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything. // Do not serialize masked loads of constant memory with anything.
bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
@ -3861,7 +3861,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false; bool ConstantMemory = false;
if (UniformBase && if (UniformBase &&
AA->pointsToConstantMemory(MemoryLocation( AA && AA->pointsToConstantMemory(MemoryLocation(
BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
AAInfo))) { AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything. // Do not serialize (non-volatile) loads of constant memory with anything.
@ -5994,7 +5994,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
bool ConstantMemory = false; bool ConstantMemory = false;
// Do not serialize (non-volatile) loads of constant memory with anything. // Do not serialize (non-volatile) loads of constant memory with anything.
if (Builder.AA->pointsToConstantMemory(PtrVal)) { if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
Root = Builder.DAG.getEntryNode(); Root = Builder.DAG.getEntryNode();
ConstantMemory = true; ConstantMemory = true;
} else { } else {

View File

@ -604,11 +604,11 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol) CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
DAG(dag), FuncInfo(funcinfo), DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
HasTailCall(false) { HasTailCall(false) {
} }
void init(GCFunctionInfo *gfi, AliasAnalysis &aa, void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li); const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare /// Clear out the current SelectionDAG and the associated state and prepare

View File

@ -300,7 +300,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
FuncInfo(new FunctionLoweringInfo()), FuncInfo(new FunctionLoweringInfo()),
CurDAG(new SelectionDAG(tm, OL)), CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(), AA(), GFI(),
OptLevel(OL), OptLevel(OL),
DAGSize(0) { DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
@ -318,7 +318,8 @@ SelectionDAGISel::~SelectionDAGISel() {
} }
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>(); if (OptLevel != CodeGenOpt::None)
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>(); AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>(); AU.addRequired<StackProtector>();
AU.addPreserved<StackProtector>(); AU.addPreserved<StackProtector>();
@ -395,7 +396,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo(); TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering(); TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo(); RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = make_unique<OptimizationRemarkEmitter>(&Fn); ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
@ -407,12 +407,22 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CurDAG->init(*MF, *ORE); CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG); FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
// into account). That's unfortunate but OK because it just means we won't
// ask for passes that have been required anyway.
if (UseMBPI && OptLevel != CodeGenOpt::None) if (UseMBPI && OptLevel != CodeGenOpt::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else else
FuncInfo->BPI = nullptr; FuncInfo->BPI = nullptr;
SDB->init(GFI, *AA, LibInfo); if (OptLevel != CodeGenOpt::None)
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
else
AA = nullptr;
SDB->init(GFI, AA, LibInfo);
MF->setHasInlineAsm(false); MF->setHasInlineAsm(false);
@ -716,7 +726,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{ {
NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
GroupDescription, TimePassesIsEnabled); GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
} }
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@ -748,7 +758,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{ {
NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
GroupName, GroupDescription, TimePassesIsEnabled); GroupName, GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
} }
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@ -782,7 +792,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{ {
NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
GroupName, GroupDescription, TimePassesIsEnabled); GroupName, GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
} }
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@ -808,7 +818,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{ {
NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
GroupDescription, TimePassesIsEnabled); GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
} }
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber

View File

@ -30,15 +30,15 @@
; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Safe Stack instrumentation pass
; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Insert stack protectors
; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: X86 DAG->DAG Instruction Selection ; CHECK-NEXT: X86 DAG->DAG Instruction Selection
; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Expand ISel Pseudo-instructions ; CHECK-NEXT: Expand ISel Pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation ; CHECK-NEXT: Local Stack Slot Allocation
; CHECK-NEXT: X86 WinAlloca Expander ; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Bundle Machine CFG Edges ; CHECK-NEXT: Bundle Machine CFG Edges

View File

@ -30,25 +30,24 @@ define void @foo() {
; X86-O0-NEXT: subl $12, %esp ; X86-O0-NEXT: subl $12, %esp
; X86-O0-NEXT: .Lcfi0: ; X86-O0-NEXT: .Lcfi0:
; X86-O0-NEXT: .cfi_def_cfa_offset 16 ; X86-O0-NEXT: .cfi_def_cfa_offset 16
; X86-O0-NEXT: movzbl c, %eax ; X86-O0-NEXT: movb c, %al
; X86-O0-NEXT: testl %eax, %eax ; X86-O0-NEXT: testb %al, %al
; X86-O0-NEXT: setne %cl
; X86-O0-NEXT: movl %eax, %edx
; X86-O0-NEXT: movb %dl, %ch
; X86-O0-NEXT: testb %ch, %ch
; X86-O0-NEXT: setne {{[0-9]+}}(%esp) ; X86-O0-NEXT: setne {{[0-9]+}}(%esp)
; X86-O0-NEXT: movzbl %cl, %edx ; X86-O0-NEXT: movzbl c, %ecx
; X86-O0-NEXT: subl %eax, %edx ; X86-O0-NEXT: testl %ecx, %ecx
; X86-O0-NEXT: setle %cl ; X86-O0-NEXT: setne %al
; X86-O0-NEXT: # implicit-def: %EAX ; X86-O0-NEXT: movzbl %al, %edx
; X86-O0-NEXT: movb %cl, %al ; X86-O0-NEXT: subl %ecx, %edx
; X86-O0-NEXT: andl $1, %eax ; X86-O0-NEXT: setle %al
; X86-O0-NEXT: kmovd %eax, %k0 ; X86-O0-NEXT: # implicit-def: %ECX
; X86-O0-NEXT: kmovd %k0, %eax
; X86-O0-NEXT: movb %al, %cl ; X86-O0-NEXT: movb %al, %cl
; X86-O0-NEXT: andb $1, %cl ; X86-O0-NEXT: andl $1, %ecx
; X86-O0-NEXT: movzbl %cl, %eax ; X86-O0-NEXT: kmovd %ecx, %k0
; X86-O0-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-O0-NEXT: kmovd %k0, %ecx
; X86-O0-NEXT: movb %cl, %al
; X86-O0-NEXT: andb $1, %al
; X86-O0-NEXT: movzbl %al, %ecx
; X86-O0-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-O0-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-O0-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-O0-NEXT: addl $12, %esp ; X86-O0-NEXT: addl $12, %esp
; X86-O0-NEXT: retl ; X86-O0-NEXT: retl
@ -69,27 +68,25 @@ define void @foo() {
; ;
; X64-O0-LABEL: foo: ; X64-O0-LABEL: foo:
; X64-O0: # BB#0: # %entry ; X64-O0: # BB#0: # %entry
; X64-O0-NEXT: movzbl {{.*}}(%rip), %eax ; X64-O0-NEXT: movb {{.*}}(%rip), %al
; X64-O0-NEXT: movl %eax, %ecx ; X64-O0-NEXT: testb %al, %al
; X64-O0-NEXT: movb %cl, %dl
; X64-O0-NEXT: movl %ecx, %eax
; X64-O0-NEXT: testq %rcx, %rcx
; X64-O0-NEXT: setne %sil
; X64-O0-NEXT: testb %dl, %dl
; X64-O0-NEXT: setne -{{[0-9]+}}(%rsp) ; X64-O0-NEXT: setne -{{[0-9]+}}(%rsp)
; X64-O0-NEXT: movzbl %sil, %edi ; X64-O0-NEXT: movzbl {{.*}}(%rip), %ecx
; X64-O0-NEXT: subl %eax, %edi ; X64-O0-NEXT: testl %ecx, %ecx
; X64-O0-NEXT: setle %dl ; X64-O0-NEXT: setne %al
; X64-O0-NEXT: # implicit-def: %EAX ; X64-O0-NEXT: movzbl %al, %edx
; X64-O0-NEXT: movb %dl, %al ; X64-O0-NEXT: subl %ecx, %edx
; X64-O0-NEXT: andl $1, %eax ; X64-O0-NEXT: setle %al
; X64-O0-NEXT: kmovd %eax, %k0 ; X64-O0-NEXT: # implicit-def: %ECX
; X64-O0-NEXT: kmovd %k0, %eax ; X64-O0-NEXT: movb %al, %cl
; X64-O0-NEXT: movb %al, %dl ; X64-O0-NEXT: andl $1, %ecx
; X64-O0-NEXT: andb $1, %dl ; X64-O0-NEXT: kmovd %ecx, %k0
; X64-O0-NEXT: movzbl %dl, %eax ; X64-O0-NEXT: kmovd %k0, %ecx
; X64-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-O0-NEXT: movb %cl, %al
; X64-O0-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill ; X64-O0-NEXT: andb $1, %al
; X64-O0-NEXT: movzbl %al, %ecx
; X64-O0-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; X64-O0-NEXT: movl %edx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; X64-O0-NEXT: retq ; X64-O0-NEXT: retq
entry: entry:
%a = alloca i8, align 1 %a = alloca i8, align 1