mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[VectorCombine] fix cost calc for extract-cmp
getOperationCost() is not the cost we wanted; that's not the throughput value that the rest of the calculation uses. We may want to switch everything in this code to use the getInstructionThroughput() wrapper to avoid these kinds of problems, but I'll look at that as a follow-up because that can create other logical diffs via using optional parameters (we'd need to speculatively create the vector instruction to make a fair(er) comparison).
This commit is contained in:
parent
e1da7dc606
commit
ace163be2a
@ -58,8 +58,9 @@ static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) {
|
||||
// ((2 * extract) + scalar cmp) < (vector cmp + extract) ?
|
||||
int ExtractCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
|
||||
VecTy, C->getZExtValue());
|
||||
int ScalarCmpCost = TTI.getOperationCost(CmpOpcode, ScalarTy);
|
||||
int VecCmpCost = TTI.getOperationCost(CmpOpcode, VecTy);
|
||||
int ScalarCmpCost = TTI.getCmpSelInstrCost(CmpOpcode, ScalarTy, I.getType());
|
||||
int VecCmpCost = TTI.getCmpSelInstrCost(CmpOpcode, VecTy,
|
||||
CmpInst::makeCmpResultType(VecTy));
|
||||
|
||||
int ScalarCost = 2 * ExtractCost + ScalarCmpCost;
|
||||
int VecCost = VecCmpCost + ExtractCost +
|
||||
|
@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- | FileCheck %s
|
||||
; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
|
||||
; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
|
||||
define i1 @cmp_v4i32(<4 x float> %arg, <4 x float> %arg1) {
|
||||
; CHECK-LABEL: @cmp_v4i32(
|
||||
@ -57,18 +58,32 @@ bb18:
|
||||
}
|
||||
|
||||
define i32 @cmp_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
|
||||
; CHECK-LABEL: @cmp_v2f64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]]
|
||||
; CHECK: t:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99
|
||||
; CHECK-NEXT: ret i32 [[E]]
|
||||
; CHECK: f:
|
||||
; CHECK-NEXT: ret i32 0
|
||||
; SSE-LABEL: @cmp_v2f64(
|
||||
; SSE-NEXT: entry:
|
||||
; SSE-NEXT: [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
|
||||
; SSE-NEXT: [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1
|
||||
; SSE-NEXT: [[CMP1:%.*]] = fcmp oeq double [[X1]], [[Y1]]
|
||||
; SSE-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]]
|
||||
; SSE: t:
|
||||
; SSE-NEXT: [[Z1:%.*]] = extractelement <2 x double> [[Z:%.*]], i32 1
|
||||
; SSE-NEXT: [[CMP2:%.*]] = fcmp ogt double [[Y1]], [[Z1]]
|
||||
; SSE-NEXT: [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99
|
||||
; SSE-NEXT: ret i32 [[E]]
|
||||
; SSE: f:
|
||||
; SSE-NEXT: ret i32 0
|
||||
;
|
||||
; AVX-LABEL: @cmp_v2f64(
|
||||
; AVX-NEXT: entry:
|
||||
; AVX-NEXT: [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]]
|
||||
; AVX-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
|
||||
; AVX-NEXT: br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]]
|
||||
; AVX: t:
|
||||
; AVX-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]]
|
||||
; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
||||
; AVX-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99
|
||||
; AVX-NEXT: ret i32 [[E]]
|
||||
; AVX: f:
|
||||
; AVX-NEXT: ret i32 0
|
||||
;
|
||||
entry:
|
||||
%x1 = extractelement <2 x double> %x, i32 1
|
||||
|
Loading…
Reference in New Issue
Block a user