diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e585ca1434c..17c6375c4a1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -868,6 +868,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSSrm: case X86::MOVSDrm: case X86::MOVAPSrm: + case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: @@ -1966,8 +1967,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2060,8 +2060,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2638,17 +2637,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { EVT VT = *RC->vt_begin(); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair MMOs = + MF.extractLoadMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); // Preserve memory reference information. - std::pair MMOs = - MF.extractLoadMemRefs(cast(N)->memoperands_begin(), - cast(N)->memoperands_end()); cast(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2676,8 +2674,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.pop_back(); AddrOps.push_back(SDValue(NewNode, 0)); AddrOps.push_back(Chain); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair MMOs = + MF.extractStoreMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -2685,10 +2686,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, NewNodes.push_back(Store); // Preserve memory reference information. - std::pair MMOs = - MF.extractStoreMemRefs(cast(N)->memoperands_begin(), - cast(N)->memoperands_end()); cast(Load)->setMemRefs(MMOs.first, MMOs.second); } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7d60507476a..83e7088f37a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll new file mode 100644 index 00000000000..3ce9edb5db4 --- /dev/null +++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7396984 + +@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1 + +define void @t(i32 %count) ssp nounwind { +entry: +; CHECK: t: +; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip) +; CHECK: movups L_str(%rip), %xmm0 + %tmp0 = alloca [60 x i8], align 1 + %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0 + br label %bb1 + +bb1: +; CHECK: LBB1_1: +; CHECK: movaps %xmm0, (%rsp) + %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ] + call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1) + %tmp3 = add i32 %tmp2, 1 + %tmp4 = icmp eq i32 %tmp3, %count + br i1 %tmp4, label %bb2, label %bb1 + +bb2: + ret void +} + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind