diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6cd07c2c729..4a17dc0d2c3 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2427,6 +2427,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; } // fold (and x, undef) -> 0 @@ -3025,6 +3037,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N1; } // fold (or x, undef) -> -1 @@ -3334,6 +3358,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll index 0a949eb29b8..f66248bc5a7 100644 --- a/test/CodeGen/X86/2011-11-30-or.ll +++ b/test/CodeGen/X86/2011-11-30-or.ll @@ -11,12 +11,12 @@ target triple = "x86_64-apple-macosx10.6.6" define void @select_func() { entry: %c.lobit.i.i.i = ashr <8 x i16> , - %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64> %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64> - %neg.i.i.i.i = xor <2 x i64> %a35, - %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i - %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i + %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, + %and.i.i.i = and <8 x i16> %neg.i.i.i.i, + %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64> + %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16> store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4 ret void diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll index 9cf4607cf5b..6cd27618de1 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -43,21 +43,21 @@ forbody: ; preds = %forcond %mul171.i = fmul <4 x float> %add167.i, %sub140.i ; <<4 x float>> [#uses=1] %add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 > ; <<4 x float>> [#uses=1] %bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32> ; <<4 x i32>> [#uses=1] - %andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1] %bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float> ; <<4 x float>> [#uses=1] %mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer ; <<4 x float>> [#uses=1] %bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32> ; <<4 x i32>> [#uses=1] - %andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1] %xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %orps203.i = or <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1] + %orps203.i = add <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1] %bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float> ; <<4 x float>> [#uses=1] %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2] %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] - %andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1] + %andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %andnps.i17 = and <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1] + %andnps.i17 = add <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll index 2f4317bf294..67ce1be1352 100644 --- a/test/CodeGen/X86/sse2-blend.ll +++ b/test/CodeGen/X86/sse2-blend.ll @@ -28,33 +28,31 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { ; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_i64 -; CHECK: xorps -; CHECK: andps ; CHECK: andnps +; CHECK: andps ; CHECK: orps ; CHECK: ret -define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) { - %A = load <4 x i64>* %v1 - %B = load <4 x i64>* %v2 - %vsel = select <4 x i1> , <4 x i64> %A, <4 x i64> %B - store <4 x i64 > %vsel, <4 x i64>* %v1 +define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) { + %A = load <2 x i64>* %v1 + %B = load <2 x i64>* %v2 + %vsel = select <2 x i1> , <2 x i64> %A, <2 x i64> %B + store <2 x i64 > %vsel, <2 x i64>* %v1 ret void } ; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_double -; CHECK: xorps -; CHECK: andps ; CHECK: andnps +; CHECK: andps ; CHECK: orps ; CHECK: ret -define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) { - %A = load <4 x double>* %v1 - %B = load <4 x double>* %v2 - %vsel = select <4 x i1> , <4 x double> %A, <4 x double> %B - store <4 x double > %vsel, <4 x double>* %v1 +define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) { + %A = load <2 x double>* %v1 + %B = load <2 x double>* %v2 + %vsel = select <2 x i1> , <2 x double> %A, <2 x double> %B + store <2 x double > %vsel, <2 x double>* %v1 ret void }