mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AArch64] Change order of candidate FMLS patterns
r319980 added new patterns to the machine combiner for transforming (fsub (fmul x y) z) into (fmla (fneg z) x y). That is, fsub's where the first source operand is an fmul are transformed. We previously only matched the case where the second source operand of an fsub was an fmul, transforming (fsub z (fmul x y)) into (fmls z x y). Now, if we have an fsub where both source operands are fmuls, both of the above patterns are applicable. However, the order in which we add the patterns to the list of candidates determines the transformation that takes place, since only the first pattern that matches will be used. This patch changes the order these two patterns are added to the list of candidates such that we prefer the case where the second source operand is an fmul (the fmls case), rather than the other one (the fmla/fneg case). When both source operands are fmuls, this ordering results in fewer instructions. Differential Revision: https://reviews.llvm.org/D41587 llvm-svn: 321491
This commit is contained in:
parent
3f0563fa89
commit
440296ff6f
@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
|
||||
@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f64:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2f64)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
||||
AArch64::FMULv2f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f64:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
|
||||
@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv4f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
||||
AArch64::FMULv2f64)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv4f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
|
||||
@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
|
||||
Found = true;
|
||||
}
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return Found;
|
||||
@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
||||
It = MBB.insert(It, LDRXpost);
|
||||
|
||||
return It;
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefix=UNPROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
|
||||
#
|
||||
name: f1_2s
|
||||
registers:
|
||||
@ -80,3 +80,82 @@ body: |
|
||||
# PROFITABLE-LABEL: name: f1_2d
|
||||
# PROFITABLE: %5:fpr128 = FNEGv2f64 %2
|
||||
# PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1
|
||||
---
|
||||
name: f1_both_fmul_2s
|
||||
registers:
|
||||
- { id: 0, class: fpr64 }
|
||||
- { id: 1, class: fpr64 }
|
||||
- { id: 2, class: fpr64 }
|
||||
- { id: 3, class: fpr64 }
|
||||
- { id: 4, class: fpr64 }
|
||||
- { id: 5, class: fpr64 }
|
||||
- { id: 6, class: fpr64 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%3:fpr64 = COPY %q3
|
||||
%2:fpr64 = COPY %q2
|
||||
%1:fpr64 = COPY %q1
|
||||
%0:fpr64 = COPY %q0
|
||||
%4:fpr64 = FMULv2f32 %0, %1
|
||||
%5:fpr64 = FMULv2f32 %2, %3
|
||||
%6:fpr64 = FSUBv2f32 killed %4, %5
|
||||
%q0 = COPY %6
|
||||
RET_ReallyLR implicit %q0
|
||||
|
||||
...
|
||||
# ALL-LABEL: name: f1_both_fmul_2s
|
||||
# ALL: %4:fpr64 = FMULv2f32 %0, %1
|
||||
# ALL-NEXT: FMLSv2f32 killed %4, %2, %3
|
||||
---
|
||||
name: f1_both_fmul_4s
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: fpr128 }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%3:fpr128 = COPY %q3
|
||||
%2:fpr128 = COPY %q2
|
||||
%1:fpr128 = COPY %q1
|
||||
%0:fpr128 = COPY %q0
|
||||
%4:fpr128 = FMULv4f32 %0, %1
|
||||
%5:fpr128 = FMULv4f32 %2, %3
|
||||
%6:fpr128 = FSUBv4f32 killed %4, %5
|
||||
%q0 = COPY %6
|
||||
RET_ReallyLR implicit %q0
|
||||
|
||||
...
|
||||
# ALL-LABEL: name: f1_both_fmul_4s
|
||||
# ALL: %4:fpr128 = FMULv4f32 %0, %1
|
||||
# ALL-NEXT: FMLSv4f32 killed %4, %2, %3
|
||||
---
|
||||
name: f1_both_fmul_2d
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: fpr128 }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%3:fpr128 = COPY %q3
|
||||
%2:fpr128 = COPY %q2
|
||||
%1:fpr128 = COPY %q1
|
||||
%0:fpr128 = COPY %q0
|
||||
%4:fpr128 = FMULv2f64 %0, %1
|
||||
%5:fpr128 = FMULv2f64 %2, %3
|
||||
%6:fpr128 = FSUBv2f64 killed %4, %5
|
||||
%q0 = COPY %6
|
||||
RET_ReallyLR implicit %q0
|
||||
|
||||
...
|
||||
# ALL-LABEL: name: f1_both_fmul_2d
|
||||
# ALL: %4:fpr128 = FMULv2f64 %0, %1
|
||||
# ALL-NEXT: FMLSv2f64 killed %4, %2, %3
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user