mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[SelectionDAG] rot(x, y) --> x iff ComputeNumSignBits(x) == BitWidth(x)
Rotating an 0/-1 value by any amount will always result in the same 0/-1 value
This commit is contained in:
parent
c46fcc1c0b
commit
c336c5c2b0
@ -7414,6 +7414,10 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Simplify the operands using demanded-bits information.
|
||||||
|
if (SimplifyDemandedBits(SDValue(N, 0)))
|
||||||
|
return SDValue(N, 0);
|
||||||
|
|
||||||
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
|
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
|
||||||
if (N1.getOpcode() == ISD::TRUNCATE &&
|
if (N1.getOpcode() == ISD::TRUNCATE &&
|
||||||
N1.getOperand(0).getOpcode() == ISD::AND) {
|
N1.getOperand(0).getOpcode() == ISD::AND) {
|
||||||
|
@ -3743,6 +3743,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
|
|||||||
}
|
}
|
||||||
case ISD::ROTL:
|
case ISD::ROTL:
|
||||||
case ISD::ROTR:
|
case ISD::ROTR:
|
||||||
|
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
|
||||||
|
|
||||||
|
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
|
||||||
|
if (Tmp == VTBits)
|
||||||
|
return VTBits;
|
||||||
|
|
||||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||||
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
|
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
|
||||||
|
|
||||||
@ -3752,7 +3758,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
|
|||||||
|
|
||||||
// If we aren't rotating out all of the known-in sign bits, return the
|
// If we aren't rotating out all of the known-in sign bits, return the
|
||||||
// number that are left. This handles rotl(sext(x), 1) for example.
|
// number that are left. This handles rotl(sext(x), 1) for example.
|
||||||
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
|
|
||||||
if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
|
if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -1586,6 +1586,15 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case ISD::ROTL:
|
||||||
|
case ISD::ROTR: {
|
||||||
|
SDValue Op0 = Op.getOperand(0);
|
||||||
|
|
||||||
|
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
|
||||||
|
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
|
||||||
|
return TLO.CombineTo(Op, Op0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case ISD::BITREVERSE: {
|
case ISD::BITREVERSE: {
|
||||||
SDValue Src = Op.getOperand(0);
|
SDValue Src = Op.getOperand(0);
|
||||||
APInt DemandedSrcBits = DemandedBits.reverseBits();
|
APInt DemandedSrcBits = DemandedBits.reverseBits();
|
||||||
|
@ -87,17 +87,10 @@ define <4 x i32> @rot_v4i32_zero_non_splat(<4 x i32> %x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @rot_v4i32_allsignbits(<4 x i32> %x, <4 x i32> %y) {
|
define <4 x i32> @rot_v4i32_allsignbits(<4 x i32> %x, <4 x i32> %y) {
|
||||||
; XOP-LABEL: rot_v4i32_allsignbits:
|
; CHECK-LABEL: rot_v4i32_allsignbits:
|
||||||
; XOP: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; XOP-NEXT: vpsrad $31, %xmm0, %xmm0
|
; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||||
; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
|
; CHECK-NEXT: retq
|
||||||
; XOP-NEXT: retq
|
|
||||||
;
|
|
||||||
; AVX512-LABEL: rot_v4i32_allsignbits:
|
|
||||||
; AVX512: # %bb.0:
|
|
||||||
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
|
|
||||||
; AVX512-NEXT: vprolvd %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX512-NEXT: retq
|
|
||||||
%1 = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
|
%1 = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
|
||||||
%2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> %y)
|
%2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> %y)
|
||||||
ret <4 x i32> %2
|
ret <4 x i32> %2
|
||||||
|
@ -314,52 +314,52 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
|
|||||||
; X86-NEXT: andl $-8, %esp
|
; X86-NEXT: andl $-8, %esp
|
||||||
; X86-NEXT: subl $72, %esp
|
; X86-NEXT: subl $72, %esp
|
||||||
; X86-NEXT: movl 8(%ebp), %ecx
|
; X86-NEXT: movl 8(%ebp), %ecx
|
||||||
; X86-NEXT: movl 12(%ebp), %edx
|
; X86-NEXT: movl 12(%ebp), %eax
|
||||||
; X86-NEXT: movl 20(%ebp), %ebx
|
; X86-NEXT: movl 20(%ebp), %edx
|
||||||
; X86-NEXT: sarl $31, %ebx
|
; X86-NEXT: movl %edx, %esi
|
||||||
; X86-NEXT: movl %edx, %eax
|
; X86-NEXT: sarl $31, %esi
|
||||||
|
; X86-NEXT: movl %eax, %edi
|
||||||
|
; X86-NEXT: sarl $31, %edi
|
||||||
|
; X86-NEXT: movl %edi, %ebx
|
||||||
|
; X86-NEXT: shldl $31, %eax, %ebx
|
||||||
|
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NEXT: shldl $31, %ecx, %eax
|
; X86-NEXT: shldl $31, %ecx, %eax
|
||||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NEXT: shll $31, %ecx
|
; X86-NEXT: shll $31, %ecx
|
||||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NEXT: movl %edx, %esi
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NEXT: sarl $31, %esi
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NEXT: movl %esi, %edi
|
; X86-NEXT: pushl %edx
|
||||||
; X86-NEXT: shldl $31, %edx, %esi
|
|
||||||
; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-NEXT: rorl %edi
|
|
||||||
; X86-NEXT: pushl %ebx
|
|
||||||
; X86-NEXT: pushl %ebx
|
|
||||||
; X86-NEXT: pushl 20(%ebp)
|
|
||||||
; X86-NEXT: pushl 16(%ebp)
|
; X86-NEXT: pushl 16(%ebp)
|
||||||
; X86-NEXT: pushl %edi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NEXT: pushl %eax
|
; X86-NEXT: pushl %eax
|
||||||
; X86-NEXT: pushl %ecx
|
; X86-NEXT: pushl %ecx
|
||||||
; X86-NEXT: pushl %edx
|
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-NEXT: pushl %eax
|
||||||
; X86-NEXT: calll __divti3
|
; X86-NEXT: calll __divti3
|
||||||
; X86-NEXT: addl $32, %esp
|
; X86-NEXT: addl $32, %esp
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
|
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
|
; X86-NEXT: movl %ecx, %eax
|
||||||
|
; X86-NEXT: subl $1, %eax
|
||||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||||
; X86-NEXT: subl $1, %ecx
|
; X86-NEXT: sbbl $0, %ebx
|
||||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
; X86-NEXT: testl %esi, %esi
|
||||||
; X86-NEXT: sbbl $0, %eax
|
|
||||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
||||||
; X86-NEXT: testl %ebx, %ebx
|
|
||||||
; X86-NEXT: sets %al
|
; X86-NEXT: sets %al
|
||||||
; X86-NEXT: testl %edi, %edi
|
; X86-NEXT: testl %edi, %edi
|
||||||
; X86-NEXT: sets %cl
|
; X86-NEXT: sets %cl
|
||||||
; X86-NEXT: xorb %al, %cl
|
; X86-NEXT: xorb %al, %cl
|
||||||
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
|
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
|
||||||
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NEXT: pushl 20(%ebp)
|
; X86-NEXT: pushl 20(%ebp)
|
||||||
; X86-NEXT: pushl 16(%ebp)
|
; X86-NEXT: pushl 16(%ebp)
|
||||||
; X86-NEXT: pushl %edi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
|
||||||
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
|
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
|
||||||
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
|
; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
|
||||||
; X86-NEXT: pushl %eax
|
; X86-NEXT: pushl %eax
|
||||||
@ -374,8 +374,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
|
|||||||
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
|
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
|
||||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||||
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
|
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
|
||||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
|
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
|
||||||
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
|
; X86-NEXT: movl %ebx, %edx
|
||||||
; X86-NEXT: leal -12(%ebp), %esp
|
; X86-NEXT: leal -12(%ebp), %esp
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: popl %edi
|
; X86-NEXT: popl %edi
|
||||||
|
Loading…
Reference in New Issue
Block a user