1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[DAGCombiner] widen zext of popcount based on target support

zext (ctpop X) --> ctpop (zext X)

This is a prerequisite step for canonicalizing in the other direction (narrow the popcount) in IR - PR43688:
https://bugs.llvm.org/show_bug.cgi?id=43688

I'm not sure if any other targets are affected, but I found a missing fold for PPC, so added tests based on that.
The reason we widen all the way to 64-bit in these tests is because the initial DAG looks something like this:

  t5: i8 = ctpop t4
  t6: i32 = zero_extend t5  <-- created based on IR, but unused node?
    t7: i64 = zero_extend t5

Differential Revision: https://reviews.llvm.org/D69127
This commit is contained in:
Sanjay Patel 2019-10-25 13:34:40 -04:00
parent 2b4fed1026
commit e414386896
2 changed files with 18 additions and 9 deletions

View File

@ -9921,6 +9921,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
// If the target does not support a pop-count in the narrow source type but
// does support it in the destination type, widen the pop-count to this type:
// zext (ctpop X) --> ctpop (zext X)
// TODO: Generalize this to handle starting from anyext.
if (N0.getOpcode() == ISD::CTPOP && N0.hasOneUse() &&
!TLI.isOperationLegalOrCustom(ISD::CTPOP, N0.getValueType()) &&
TLI.isOperationLegalOrCustom(ISD::CTPOP, VT)) {
SDLoc DL(N);
SDValue NewZext = DAG.getZExtOrTrunc(N0.getOperand(0), DL, VT);
return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
}
return SDValue();
}

View File

@ -41,9 +41,8 @@ define i16 @zpop_i8_i16(i8 %x) {
define i16 @popz_i8_i16(i8 %x) {
; FAST-LABEL: popz_i8_i16:
; FAST: # %bb.0:
; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
; FAST-NEXT: popcntw 3, 3
; FAST-NEXT: clrldi 3, 3, 32
; FAST-NEXT: clrldi 3, 3, 56
; FAST-NEXT: popcntd 3, 3
; FAST-NEXT: blr
;
; SLOW-LABEL: popz_i8_i16:
@ -114,9 +113,8 @@ define i32 @zpop_i8_i32(i8 %x) {
define i32 @popz_i8_32(i8 %x) {
; FAST-LABEL: popz_i8_32:
; FAST: # %bb.0:
; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
; FAST-NEXT: popcntw 3, 3
; FAST-NEXT: clrldi 3, 3, 32
; FAST-NEXT: clrldi 3, 3, 56
; FAST-NEXT: popcntd 3, 3
; FAST-NEXT: blr
;
; SLOW-LABEL: popz_i8_32:
@ -187,9 +185,8 @@ define i32 @zpop_i16_i32(i16 %x) {
define i32 @popz_i16_32(i16 %x) {
; FAST-LABEL: popz_i16_32:
; FAST: # %bb.0:
; FAST-NEXT: rlwinm 3, 3, 0, 16, 31
; FAST-NEXT: popcntw 3, 3
; FAST-NEXT: clrldi 3, 3, 32
; FAST-NEXT: clrldi 3, 3, 48
; FAST-NEXT: popcntd 3, 3
; FAST-NEXT: blr
;
; SLOW-LABEL: popz_i16_32: