mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[x86] improve CMOV codegen by pushing add into operands, part 2
This is a minimum extension of D106607 to allow folding for 2 non-zero constantsi that can be materialized as immediates.. In the reduced test examples, we save 1 instruction by rolling the constants into LEA/ADD. In the motivating test from the bullet benchmark, we absorb both of the constant moves into add ops via LEA magic, so we reduce by 2 instructions. Differential Revision: https://reviews.llvm.org/D106684
This commit is contained in:
parent
697f5408f6
commit
f3223b51e0
@ -49873,14 +49873,20 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
|
||||
/// count. We do this with CMOV rather the generic 'select' because there are
|
||||
/// earlier folds that may be used to turn select-of-constants into logic hacks.
|
||||
static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
|
||||
// This checks for a zero operand because add-of-0 gets simplified away.
|
||||
// TODO: Allow generating an extra add?
|
||||
// If an operand is zero, add-of-0 gets simplified away, so that's clearly
|
||||
// better because we eliminate 1-2 instructions. This transform is still
|
||||
// an improvement without zero operands because we trade 2 move constants and
|
||||
// 1 add for 2 adds (LEA) as long as the constants can be represented as
|
||||
// immediate asm operands (fit in 32-bits).
|
||||
auto isSuitableCmov = [](SDValue V) {
|
||||
if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse())
|
||||
return false;
|
||||
return isa<ConstantSDNode>(V.getOperand(0)) &&
|
||||
isa<ConstantSDNode>(V.getOperand(1)) &&
|
||||
(isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)));
|
||||
if (!isa<ConstantSDNode>(V.getOperand(0)) ||
|
||||
!isa<ConstantSDNode>(V.getOperand(1)))
|
||||
return false;
|
||||
return isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)) ||
|
||||
(V.getConstantOperandAPInt(0).isSignedIntN(32) &&
|
||||
V.getConstantOperandAPInt(1).isSignedIntN(32));
|
||||
};
|
||||
|
||||
// Match an appropriate CMOV as the first operand of the add.
|
||||
|
@ -88,6 +88,8 @@ define i32 @select_consts_use_i32(i32 %offset, i64 %x, i32* %p) {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; Special-case LEA hacks are done before we try to push the add into a CMOV.
|
||||
|
||||
define i32 @select_40_43_i32(i32 %offset, i64 %x) {
|
||||
; CHECK-LABEL: select_40_43_i32:
|
||||
; CHECK: # %bb.0:
|
||||
@ -133,11 +135,10 @@ define i32 @select_1_0_i32(i32 %offset, i64 %x) {
|
||||
define i64 @select_max32_2_i64(i64 %offset, i64 %x) {
|
||||
; CHECK-LABEL: select_max32_2_i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq 2(%rdi), %rax
|
||||
; CHECK-NEXT: addq $2147483647, %rdi # imm = 0x7FFFFFFF
|
||||
; CHECK-NEXT: cmpq $41, %rsi
|
||||
; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
|
||||
; CHECK-NEXT: movl $2, %eax
|
||||
; CHECK-NEXT: cmovneq %rcx, %rax
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: cmovneq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%b = icmp ne i64 %x, 41
|
||||
%s = select i1 %b, i64 2147483647, i64 2
|
||||
@ -207,11 +208,11 @@ define i64 @select_big_bigger_i64(i64 %offset, i64 %x) {
|
||||
define i32 @select_20_43_i32(i32 %offset, i64 %x) {
|
||||
; CHECK-LABEL: select_20_43_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal 43(%rdi), %eax
|
||||
; CHECK-NEXT: addl $20, %edi
|
||||
; CHECK-NEXT: cmpq $42, %rsi
|
||||
; CHECK-NEXT: movl $20, %ecx
|
||||
; CHECK-NEXT: movl $43, %eax
|
||||
; CHECK-NEXT: cmovgel %ecx, %eax
|
||||
; CHECK-NEXT: addl %edi, %eax
|
||||
; CHECK-NEXT: cmovgel %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%b = icmp sgt i64 %x, 41
|
||||
%s = select i1 %b, i32 20, i32 43
|
||||
@ -222,11 +223,11 @@ define i32 @select_20_43_i32(i32 %offset, i64 %x) {
|
||||
define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
|
||||
; CHECK-LABEL: select_n2_17_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal 17(%rdi), %eax
|
||||
; CHECK-NEXT: addl $65534, %edi # imm = 0xFFFE
|
||||
; CHECK-NEXT: testb $1, %sil
|
||||
; CHECK-NEXT: movl $65534, %ecx # imm = 0xFFFE
|
||||
; CHECK-NEXT: movl $17, %eax
|
||||
; CHECK-NEXT: cmovnel %ecx, %eax
|
||||
; CHECK-NEXT: addl %edi, %eax
|
||||
; CHECK-NEXT: cmovnel %edi, %eax
|
||||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%s = select i1 %b, i16 -2, i16 17
|
||||
@ -241,14 +242,12 @@ define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
|
||||
define i16* @bullet(i1 %b, %class.btAxis* readnone %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: bullet:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq (%rdx,%rdx,4), %rcx
|
||||
; CHECK-NEXT: shlq $4, %rcx
|
||||
; CHECK-NEXT: addq %rsi, %rcx
|
||||
; CHECK-NEXT: leaq (%rdx,%rdx,4), %rax
|
||||
; CHECK-NEXT: shlq $4, %rax
|
||||
; CHECK-NEXT: leaq 60(%rsi,%rax), %rcx
|
||||
; CHECK-NEXT: leaq 66(%rsi,%rax), %rax
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: movl $60, %edx
|
||||
; CHECK-NEXT: movl $66, %eax
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: cmovneq %rcx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%gep2 = getelementptr inbounds %class.btAxis, %class.btAxis* %ptr, i64 %idx, i32 2, i64 0
|
||||
%gep1 = getelementptr inbounds %class.btAxis, %class.btAxis* %ptr, i64 %idx, i32 1, i64 0
|
||||
|
Loading…
Reference in New Issue
Block a user