mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[Hexagon] Restrict compound instructions with constant value.
Having a constant value operand in the compound instruction is not always profitable. This patch improves coremark by ~4% on Hexagon. Differential Revision: https://reviews.llvm.org/D53152 llvm-svn: 344284
This commit is contained in:
parent
5658ce69fb
commit
dbfd952aff
@ -257,6 +257,23 @@ class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
|
||||
class Not2<PatFrag P>
|
||||
: PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
|
||||
|
||||
// If there is a constant operand that feeds the and/or instruction,
|
||||
// do not generate the compound instructions.
|
||||
// It is not always profitable, as some times we end up with a transfer.
|
||||
// Check the below example.
|
||||
// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra)
|
||||
// Instead this is preferable.
|
||||
// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra)
|
||||
class Su_ni1<PatFrag Op>
|
||||
: PatFrag<Op.Operands, !head(Op.Fragments), [{
|
||||
if (hasOneUse(N)){
|
||||
// Check if Op1 is an immediate operand.
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
return !dyn_cast<ConstantSDNode>(Op1);
|
||||
}
|
||||
return false;}],
|
||||
Op.OperandTransform>;
|
||||
|
||||
class Su<PatFrag Op>
|
||||
: PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
|
||||
Op.OperandTransform>;
|
||||
@ -1336,16 +1353,16 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
|
||||
def: Pat<(add Sext64:$Rs, I64:$Rt),
|
||||
(A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
|
||||
|
||||
def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
|
||||
def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
|
||||
def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
|
||||
|
||||
// For dags like (or (and (not _), _), (shl _, _)) where the "or" with
|
||||
// one argument matches the patterns below, and with the other argument
|
||||
|
52
test/CodeGen/Hexagon/constant_compound.ll
Normal file
52
test/CodeGen/Hexagon/constant_compound.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; RUN: llc -march=hexagon < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Generating a compound instruction with a constant is not profitable.
|
||||
; The constant needs to be kept in a register before it is fed to compound
|
||||
; instruction.
|
||||
; Before, we are generating
|
||||
; ra = #65820;
|
||||
; rb = lsr(rb, #8);
|
||||
; rc ^= and (rb, ra)
|
||||
; Now, we are generating
|
||||
; ra = and (#65820, lsr(ra, #8));
|
||||
; rb = xor(rb, ra)
|
||||
|
||||
; CHECK: and(##65280,lsr(r
|
||||
; CHECK-NOT : ^= and
|
||||
|
||||
define dso_local zeroext i16 @test_compound(i16 zeroext %varA, i16 zeroext %varB) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%tmp = zext i16 %varB to i32
|
||||
%tmp1 = and i16 %varA, 255
|
||||
%tmp2 = zext i16 %tmp1 to i32
|
||||
%.masked.i = and i32 %tmp, 255
|
||||
%tmp3 = xor i32 %.masked.i, %tmp2
|
||||
%tmp4 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp3, i32 255) #2
|
||||
%tmp5 = trunc i64 %tmp4 to i32
|
||||
%tmp6 = and i32 %tmp5, 255
|
||||
%tmp7 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp6, i32 81922) #2
|
||||
%tmp8 = trunc i64 %tmp7 to i32
|
||||
%tmp9 = xor i32 %tmp8, %tmp
|
||||
%tmp10 = lshr i32 %tmp9, 8
|
||||
%tmp11 = lshr i16 %varA, 8
|
||||
%conv2 = zext i16 %tmp11 to i32
|
||||
%tmp12 = and i32 %tmp10, 65280
|
||||
%.masked.i7 = and i32 %tmp10, 255
|
||||
%tmp13 = xor i32 %.masked.i7, %conv2
|
||||
%tmp14 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp13, i32 255) #2
|
||||
%tmp15 = trunc i64 %tmp14 to i32
|
||||
%tmp16 = and i32 %tmp15, 255
|
||||
%tmp17 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %tmp16, i32 81922) #2
|
||||
%tmp18 = trunc i64 %tmp17 to i32
|
||||
%tmp19 = xor i32 %tmp12, %tmp18
|
||||
%tmp20 = lshr i32 %tmp19, 8
|
||||
%tmp21 = trunc i32 %tmp20 to i16
|
||||
ret i16 %tmp21
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i64 @llvm.hexagon.M4.pmpyw(i32, i32) #1
|
||||
|
||||
attributes #0 = { nounwind readnone "target-cpu"="hexagonv65" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind }
|
Loading…
x
Reference in New Issue
Block a user