mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 03:23:01 +02:00
Teach DAG combine to handle vector logical operations with vectors of all 1s or all 0s. These cases can show up when vectors are split for legalizing. Fix some tests that were dependent on these cases not being combined.
llvm-svn: 169684
This commit is contained in:
parent
329a5c1e03
commit
a6f44fb06b
@ -2427,6 +2427,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
if (VT.isVector()) {
|
||||
SDValue FoldedVOp = SimplifyVBinOp(N);
|
||||
if (FoldedVOp.getNode()) return FoldedVOp;
|
||||
|
||||
// fold (and x, 0) -> 0, vector edition
|
||||
if (ISD::isBuildVectorAllZeros(N0.getNode()))
|
||||
return N0;
|
||||
if (ISD::isBuildVectorAllZeros(N1.getNode()))
|
||||
return N1;
|
||||
|
||||
// fold (and x, -1) -> x, vector edition
|
||||
if (ISD::isBuildVectorAllOnes(N0.getNode()))
|
||||
return N1;
|
||||
if (ISD::isBuildVectorAllOnes(N1.getNode()))
|
||||
return N0;
|
||||
}
|
||||
|
||||
// fold (and x, undef) -> 0
|
||||
@ -3025,6 +3037,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
|
||||
if (VT.isVector()) {
|
||||
SDValue FoldedVOp = SimplifyVBinOp(N);
|
||||
if (FoldedVOp.getNode()) return FoldedVOp;
|
||||
|
||||
// fold (or x, 0) -> x, vector edition
|
||||
if (ISD::isBuildVectorAllZeros(N0.getNode()))
|
||||
return N1;
|
||||
if (ISD::isBuildVectorAllZeros(N1.getNode()))
|
||||
return N0;
|
||||
|
||||
// fold (or x, -1) -> -1, vector edition
|
||||
if (ISD::isBuildVectorAllOnes(N0.getNode()))
|
||||
return N0;
|
||||
if (ISD::isBuildVectorAllOnes(N1.getNode()))
|
||||
return N1;
|
||||
}
|
||||
|
||||
// fold (or x, undef) -> -1
|
||||
@ -3334,6 +3358,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
|
||||
if (VT.isVector()) {
|
||||
SDValue FoldedVOp = SimplifyVBinOp(N);
|
||||
if (FoldedVOp.getNode()) return FoldedVOp;
|
||||
|
||||
// fold (xor x, 0) -> x, vector edition
|
||||
if (ISD::isBuildVectorAllZeros(N0.getNode()))
|
||||
return N1;
|
||||
if (ISD::isBuildVectorAllZeros(N1.getNode()))
|
||||
return N0;
|
||||
}
|
||||
|
||||
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
|
||||
|
@ -11,12 +11,12 @@ target triple = "x86_64-apple-macosx10.6.6"
|
||||
define void @select_func() {
|
||||
entry:
|
||||
%c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
|
||||
%and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
|
||||
%and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
|
||||
%neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
|
||||
%and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
|
||||
%or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
|
||||
%neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
|
||||
%and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
|
||||
%or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
|
||||
%a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
|
||||
store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
|
||||
ret void
|
||||
|
@ -43,21 +43,21 @@ forbody: ; preds = %forcond
|
||||
%mul171.i = fmul <4 x float> %add167.i, %sub140.i ; <<4 x float>> [#uses=1]
|
||||
%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 > ; <<4 x float>> [#uses=1]
|
||||
%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1]
|
||||
%andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1]
|
||||
%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float> ; <<4 x float>> [#uses=1]
|
||||
%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1]
|
||||
%andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1]
|
||||
%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
|
||||
%orps203.i = or <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1]
|
||||
%orps203.i = add <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1]
|
||||
%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float> ; <<4 x float>> [#uses=1]
|
||||
%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2]
|
||||
%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1]
|
||||
%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2]
|
||||
%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1]
|
||||
%andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1]
|
||||
%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
|
||||
%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1]
|
||||
%andnps.i17 = add <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1]
|
||||
%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1]
|
||||
%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1]
|
||||
%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
|
||||
|
@ -28,33 +28,31 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
|
||||
|
||||
; Without forcing instructions, fall back to the preferred PS domain.
|
||||
; CHECK: vsel_i64
|
||||
; CHECK: xorps
|
||||
; CHECK: andps
|
||||
; CHECK: andnps
|
||||
; CHECK: andps
|
||||
; CHECK: orps
|
||||
; CHECK: ret
|
||||
|
||||
define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
|
||||
%A = load <4 x i64>* %v1
|
||||
%B = load <4 x i64>* %v2
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
|
||||
store <4 x i64 > %vsel, <4 x i64>* %v1
|
||||
define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
|
||||
%A = load <2 x i64>* %v1
|
||||
%B = load <2 x i64>* %v2
|
||||
%vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
|
||||
store <2 x i64 > %vsel, <2 x i64>* %v1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Without forcing instructions, fall back to the preferred PS domain.
|
||||
; CHECK: vsel_double
|
||||
; CHECK: xorps
|
||||
; CHECK: andps
|
||||
; CHECK: andnps
|
||||
; CHECK: andps
|
||||
; CHECK: orps
|
||||
; CHECK: ret
|
||||
|
||||
define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
|
||||
%A = load <4 x double>* %v1
|
||||
%B = load <4 x double>* %v2
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
|
||||
store <4 x double > %vsel, <4 x double>* %v1
|
||||
define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
|
||||
%A = load <2 x double>* %v1
|
||||
%B = load <2 x double>* %v2
|
||||
%vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
|
||||
store <2 x double > %vsel, <2 x double>* %v1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user