1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[SelectionDAG] don't split branch on logic-of-vector-compares

SelectionDAGBuilder converts logic-of-compares into multiple branches based
on a boolean TLI setting in isJumpExpensive(). But that probably never
considered the pattern of extracted bools from a vector compare - it seems
unlikely that we would want to turn vector logic into control-flow.

The motivating x86 reduction case is shown in PR44565:
https://bugs.llvm.org/show_bug.cgi?id=44565
...and that test shows the expected improvement from using pmovmsk codegen.

For AArch64, I modified the test to include an extra op because the simpler
test gets transformed by a codegen invocation of SimplifyCFG.

Differential Revision: https://reviews.llvm.org/D82602
This commit is contained in:
Sanjay Patel 2020-07-02 16:48:09 -04:00
parent 48d2183b14
commit 6e12757f99
3 changed files with 16 additions and 15 deletions

View File

@ -2303,7 +2303,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
// As long as jumps are not expensive, this should improve performance.
// As long as jumps are not expensive (exceptions for multi-use logic ops,
// unpredictable branches, and vector extracts because those jumps are likely
// expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@ -2318,9 +2320,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
!I.hasMetadata(LLVMContext::MD_unpredictable) &&
(Opcode == Instruction::And || Opcode == Instruction::Or)) {
(Opcode == Instruction::And || Opcode == Instruction::Or) &&
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),

View File

@ -6,16 +6,15 @@ define i32 @vec_extract_branch(<2 x double> %x, i32 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: fcmgt v0.2d, v0.2d, #0.0
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tbz w8, #0, .LBB0_3
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: tbz w8, #0, .LBB0_3
; CHECK-NEXT: // %bb.2: // %true
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: tbz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1: // %true
; CHECK-NEXT: mov w8, #42
; CHECK-NEXT: sdiv w0, w8, w0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %false
; CHECK-NEXT: .LBB0_2: // %false
; CHECK-NEXT: mov w0, #88
; CHECK-NEXT: ret
%t1 = fcmp ogt <2 x double> %x, zeroinitializer

View File

@ -323,15 +323,12 @@ define i32 @vec_extract_branch(<2 x double> %x) {
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: cmpltpd %xmm0, %xmm1
; CHECK-NEXT: movmskpd %xmm1, %eax
; CHECK-NEXT: testb $1, %al
; CHECK-NEXT: je .LBB16_3
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: shrb %al
; CHECK-NEXT: je .LBB16_3
; CHECK-NEXT: # %bb.2: # %true
; CHECK-NEXT: cmpb $3, %al
; CHECK-NEXT: jne .LBB16_2
; CHECK-NEXT: # %bb.1: # %true
; CHECK-NEXT: movl $42, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB16_3: # %false
; CHECK-NEXT: .LBB16_2: # %false
; CHECK-NEXT: movl $88, %eax
; CHECK-NEXT: retq
%t1 = fcmp ogt <2 x double> %x, zeroinitializer