mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[DAGCombine] Combine pattern for REV16
This adds another pattern to the combiner for a case that we were not handling to generate the REV16 instruction for ARM/Thumb2 and a bswap+ror on X86. Differential Revision: https://reviews.llvm.org/D74032
This commit is contained in:
parent
5038318cdb
commit
e6b39abdfb
@ -5648,6 +5648,48 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Match this pattern:
|
||||
// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
|
||||
// And rewrite this to:
|
||||
// (rotr (bswap A), 16)
|
||||
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
|
||||
SelectionDAG &DAG, SDNode *N, SDValue N0,
|
||||
SDValue N1, EVT VT, EVT ShiftAmountTy) {
|
||||
assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
|
||||
"MatchBSwapHWordOrAndAnd: expecting i32");
|
||||
if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
|
||||
return SDValue();
|
||||
if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
// TODO: this is too restrictive; lifting this restriction requires more tests
|
||||
if (!N0->hasOneUse() || !N1->hasOneUse())
|
||||
return SDValue();
|
||||
ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
|
||||
ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
|
||||
if (!Mask0 || !Mask1)
|
||||
return SDValue();
|
||||
if (Mask0->getAPIntValue() != 0xff00ff00 ||
|
||||
Mask1->getAPIntValue() != 0x00ff00ff)
|
||||
return SDValue();
|
||||
SDValue Shift0 = N0.getOperand(0);
|
||||
SDValue Shift1 = N1.getOperand(0);
|
||||
if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
|
||||
return SDValue();
|
||||
ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
|
||||
ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
|
||||
if (!ShiftAmt0 || !ShiftAmt1)
|
||||
return SDValue();
|
||||
if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
|
||||
return SDValue();
|
||||
if (Shift0.getOperand(0) != Shift1.getOperand(0))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
|
||||
SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
|
||||
return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
|
||||
}
|
||||
|
||||
/// Match a 32-bit packed halfword bswap. That is
|
||||
/// ((x & 0x000000ff) << 8) |
|
||||
/// ((x & 0x0000ff00) >> 8) |
|
||||
@ -5664,6 +5706,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
|
||||
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
|
||||
return SDValue();
|
||||
|
||||
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
|
||||
getShiftAmountTy(VT)))
|
||||
return BSwap;
|
||||
|
||||
// Try again with commuted operands.
|
||||
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
|
||||
getShiftAmountTy(VT)))
|
||||
return BSwap;
|
||||
|
||||
|
||||
// Look for either
|
||||
// (or (bswaphpair), (bswaphpair))
|
||||
// (or (or (bswaphpair), (and)), (and))
|
||||
|
@ -1,11 +1,13 @@
|
||||
; XFAIL: *
|
||||
; fixme rev16 pattern is not matching
|
||||
|
||||
; RUN: llc < %s -mtriple=thumb-- -mcpu=arm1156t2-s -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=thumbv7m-none-eabi -o - | FileCheck %s
|
||||
|
||||
; 0xff00ff00 = 4278255360
|
||||
; 0x00ff00ff = 16711935
|
||||
define i32 @f1(i32 %a) {
|
||||
define i32 @rev16(i32 %a) {
|
||||
; CHECK-LABEL: rev16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: rev16 r0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_l8 = and i32 %l8, 4278255360
|
||||
@ -14,11 +16,142 @@ define i32 @f1(i32 %a) {
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @not_rev16(i32 %a) {
|
||||
; CHECK-LABEL: not_rev16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r1, #65280
|
||||
; CHECK-NEXT: and.w r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: and r0, r0, #65280
|
||||
; CHECK-NEXT: orr.w r0, r1, r0, lsl #8
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_r8 = and i32 %r8, 4278255360
|
||||
%mask_l8 = and i32 %l8, 16711935
|
||||
%tmp = or i32 %mask_r8, %mask_l8
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @extra_maskop_uses2(i32 %a) {
|
||||
; CHECK-LABEL: extra_maskop_uses2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r1, #-16711936
|
||||
; CHECK-NEXT: mov.w r2, #16711935
|
||||
; CHECK-NEXT: and.w r1, r1, r0, lsl #8
|
||||
; CHECK-NEXT: and.w r0, r2, r0, lsr #8
|
||||
; CHECK-NEXT: adds r2, r0, r1
|
||||
; CHECK-NEXT: muls r0, r1, r0
|
||||
; CHECK-NEXT: muls r0, r2, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_l8 = and i32 %l8, 4278255360
|
||||
%mask_r8 = and i32 %r8, 16711935
|
||||
%or = or i32 %mask_r8, %mask_l8
|
||||
%mul = mul i32 %mask_r8, %mask_l8 ; another use of the mask ops
|
||||
%r = mul i32 %mul, %or ; and use that result
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
|
||||
define i32 @bswap_ror_commuted(i32 %a) {
|
||||
; CHECK-LABEL: bswap_ror_commuted:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: rev16 r0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_l8 = and i32 %l8, 4278255360
|
||||
%mask_r8 = and i32 %r8, 16711935
|
||||
%tmp = or i32 %mask_r8, %mask_l8
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @different_shift_amount(i32 %a) {
|
||||
; CHECK-LABEL: different_shift_amount:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r1, #16711935
|
||||
; CHECK-NEXT: movw r2, #65024
|
||||
; CHECK-NEXT: and.w r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: movt r2, #65280
|
||||
; CHECK-NEXT: and.w r0, r2, r0, lsl #9
|
||||
; CHECK-NEXT: add r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 9
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_l8 = and i32 %l8, 4278255360
|
||||
%mask_r8 = and i32 %r8, 16711935
|
||||
%tmp = or i32 %mask_l8, %mask_r8
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @different_constant(i32 %a) {
|
||||
; CHECK-LABEL: different_constant:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r1, #16711935
|
||||
; CHECK-NEXT: and.w r0, r1, r0, lsr #8
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_l8 = and i32 %l8, 42
|
||||
%mask_r8 = and i32 %r8, 16711935
|
||||
%tmp = or i32 %mask_l8, %mask_r8
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @different_op(i32 %a) {
|
||||
; CHECK-LABEL: different_op:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r1, #16711935
|
||||
; CHECK-NEXT: movw r2, #256
|
||||
; CHECK-NEXT: and.w r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: movt r2, #255
|
||||
; CHECK-NEXT: add.w r0, r2, r0, lsl #8
|
||||
; CHECK-NEXT: orrs r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%mask_l8 = sub i32 %l8, 4278255360
|
||||
%mask_r8 = and i32 %r8, 16711935
|
||||
%tmp = or i32 %mask_l8, %mask_r8
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
define i32 @different_vars(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: different_vars:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r2, #16711935
|
||||
; CHECK-NEXT: and.w r1, r2, r1, lsr #8
|
||||
; CHECK-NEXT: mov.w r2, #-16711936
|
||||
; CHECK-NEXT: and.w r0, r2, r0, lsl #8
|
||||
; CHECK-NEXT: add r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %b, 8
|
||||
%mask_l8 = and i32 %l8, 4278255360
|
||||
%mask_r8 = and i32 %r8, 16711935
|
||||
%tmp = or i32 %mask_l8, %mask_r8
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
|
||||
; FIXME: this rev16 pattern is not matching
|
||||
|
||||
; 0xff000000 = 4278190080
|
||||
; 0x00ff0000 = 16711680
|
||||
; 0x0000ff00 = 65280
|
||||
; 0x000000ff = 255
|
||||
define i32 @f2(i32 %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov.w r1, #16711680
|
||||
; CHECK-NEXT: and r2, r0, #16711680
|
||||
; CHECK-NEXT: and.w r1, r1, r0, lsr #8
|
||||
; CHECK-NEXT: orr.w r1, r1, r2, lsl #8
|
||||
; CHECK-NEXT: ubfx r2, r0, #8, #8
|
||||
; CHECK-NEXT: bfi r2, r0, #8, #8
|
||||
; CHECK-NEXT: adds r0, r2, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
%masklo_l8 = and i32 %l8, 65280
|
||||
|
@ -8,23 +8,15 @@ define i32 @rev16(i32 %a) {
|
||||
; X86-LABEL: rev16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll $8, %ecx
|
||||
; X86-NEXT: shrl $8, %eax
|
||||
; X86-NEXT: andl $-16711936, %ecx # imm = 0xFF00FF00
|
||||
; X86-NEXT: andl $16711935, %eax # imm = 0xFF00FF
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: rorl $16, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: rev16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $8, %eax
|
||||
; X64-NEXT: shrl $8, %edi
|
||||
; X64-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00
|
||||
; X64-NEXT: andl $16711935, %edi # imm = 0xFF00FF
|
||||
; X64-NEXT: addl %edi, %eax
|
||||
; X64-NEXT: bswapl %eax
|
||||
; X64-NEXT: rorl $16, %eax
|
||||
; X64-NEXT: retq
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
@ -104,23 +96,15 @@ define i32 @bswap_ror_commuted(i32 %a) {
|
||||
; X86-LABEL: bswap_ror_commuted:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll $8, %ecx
|
||||
; X86-NEXT: shrl $8, %eax
|
||||
; X86-NEXT: andl $-16711936, %ecx # imm = 0xFF00FF00
|
||||
; X86-NEXT: andl $16711935, %eax # imm = 0xFF00FF
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: rorl $16, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: bswap_ror_commuted:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $8, %eax
|
||||
; X64-NEXT: shrl $8, %edi
|
||||
; X64-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00
|
||||
; X64-NEXT: andl $16711935, %edi # imm = 0xFF00FF
|
||||
; X64-NEXT: addl %edi, %eax
|
||||
; X64-NEXT: bswapl %eax
|
||||
; X64-NEXT: rorl $16, %eax
|
||||
; X64-NEXT: retq
|
||||
%l8 = shl i32 %a, 8
|
||||
%r8 = lshr i32 %a, 8
|
||||
@ -241,8 +225,6 @@ define i32 @different_vars(i32 %a, i32 %b) {
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
||||
; TODO: another pattern that we are currently not matching
|
||||
;
|
||||
; 0xff000000 = 4278190080
|
||||
; 0x00ff0000 = 16711680
|
||||
; 0x0000ff00 = 65280
|
||||
|
Loading…
Reference in New Issue
Block a user