mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[llvm][AArch64] Simplify (and (sign_extend..) #bitmask).
Fold VT = (and (sign_extend NarrowVT to VT) #bitmask) into VT = (zero_extend NarrowVT) With this combine, the test replaces a sign extended load + an unsigned extention with a zero extended load to render one of the operands of the last multiplication. BEFORE | AFTER f_i16_i32: | f_i16_i32: .fnstart | .fnstart ldrsh r0, [r0] | ldrh r1, [r1] ldrsh r1, [r1] | ldrsh r0, [r0] smulbb r0, r1, r0 | smulbb r0, r0, r1 uxth r1, r1 | mul r0, r0, r1 mul r0, r0, r1 | bx lr bx lr | Reviewed By: resistor Differential Revision: https://reviews.llvm.org/D90605
This commit is contained in:
parent
a9b449ce40
commit
f5046224cf
@ -5728,6 +5728,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
|
||||
return V;
|
||||
|
||||
// Recognize the following pattern:
|
||||
//
|
||||
// AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
|
||||
//
|
||||
// where bitmask is a mask that clears the upper bits of AndVT. The
|
||||
// number of bits in bitmask must be a power of two.
|
||||
auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
|
||||
if (LHS->getOpcode() != ISD::SIGN_EXTEND)
|
||||
return false;
|
||||
|
||||
auto *C = dyn_cast<ConstantSDNode>(RHS);
|
||||
if (!C)
|
||||
return false;
|
||||
|
||||
if (!C->getAPIntValue().isMask(
|
||||
LHS.getOperand(0).getValueType().getFixedSizeInBits()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
|
||||
if (IsAndZeroExtMask(N0, N1))
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -343,8 +343,8 @@ define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea
|
||||
;
|
||||
; CHECK-BE-LABEL: and_user:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r4, r5, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-BE-NEXT: .save {r4, r5, r6, lr}
|
||||
; CHECK-BE-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-BE-NEXT: cmp r0, #1
|
||||
; CHECK-BE-NEXT: blt .LBB3_4
|
||||
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
@ -355,24 +355,23 @@ define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea
|
||||
; CHECK-BE-NEXT: .p2align 2
|
||||
; CHECK-BE-NEXT: .LBB3_2: @ %for.body
|
||||
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
|
||||
; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
|
||||
; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
|
||||
; CHECK-BE-NEXT: ldrsh.w r7, [r2, #2]
|
||||
; CHECK-BE-NEXT: uxth.w r6, lr
|
||||
; CHECK-BE-NEXT: smlabb r5, r5, lr, r12
|
||||
; CHECK-BE-NEXT: smlabb r12, r7, r4, r5
|
||||
; CHECK-BE-NEXT: ldrh lr, [r3, #2]!
|
||||
; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
|
||||
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
|
||||
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
|
||||
; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
|
||||
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
|
||||
; CHECK-BE-NEXT: subs r0, #1
|
||||
; CHECK-BE-NEXT: mul r1, r6, r1
|
||||
; CHECK-BE-NEXT: mul r1, lr, r1
|
||||
; CHECK-BE-NEXT: bne .LBB3_2
|
||||
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
|
||||
; CHECK-BE-NEXT: add.w r0, r12, r1
|
||||
; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-BE-NEXT: .LBB3_4:
|
||||
; CHECK-BE-NEXT: mov.w r12, #0
|
||||
; CHECK-BE-NEXT: movs r1, #0
|
||||
; CHECK-BE-NEXT: add.w r0, r12, r1
|
||||
; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
|
||||
entry:
|
||||
%cmp24 = icmp sgt i32 %arg, 0
|
||||
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
29
test/CodeGen/ARM/and-sext-combine.ll
Normal file
29
test/CodeGen/ARM/and-sext-combine.ll
Normal file
@ -0,0 +1,29 @@
|
||||
; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - -O3 \
|
||||
; RUN: -asm-verbose=0 | FileCheck %s
|
||||
|
||||
; This tests exerts the folding of `VT = (and (sign_extend NarrowVT to
|
||||
; VT) #bitmask)` into `VT = (zero_extend NarrowVT to VT)` when
|
||||
; #bitmask value is the mask made by all ones that selects the value
|
||||
; of type NarrowVT inside the value of type VT. The folding is
|
||||
; implemented in `DAGCombiner::visitAND`.
|
||||
|
||||
; With this the folding, the `and` of the "signed extended load" of
|
||||
; `%b` in `f_i16_i32` is rendered as a zero extended load.
|
||||
|
||||
; CHECK-LABEL: f_i16_i32:
|
||||
; CHECK-NEXT: .fnstart
|
||||
; CHECK-NEXT: ldrh r1, [r1]
|
||||
; CHECK-NEXT: ldrsh r0, [r0]
|
||||
; CHECK-NEXT: smulbb r0, r0, r1
|
||||
; CHECK-NEXT: mul r0, r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
define i32 @f_i16_i32(i16* %a, i16* %b) {
|
||||
%1 = load i16, i16* %a, align 2
|
||||
%sext.1 = sext i16 %1 to i32
|
||||
%2 = load i16, i16* %b, align 2
|
||||
%sext.2 = sext i16 %2 to i32
|
||||
%masked = and i32 %sext.2, 65535
|
||||
%mul = mul nsw i32 %sext.2, %sext.1
|
||||
%count.next = mul i32 %mul, %masked
|
||||
ret i32 %count.next
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user