mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[CostModel][X86] Add explicit fcmp costs for pre-SSE42 targets
Typical throughputs: cmpss/cmpps = 1cy and cmpsd/cmppd = 2cy before the Core2 era llvm-svn: 351684
This commit is contained in:
parent
41ffe33de4
commit
b7bbc260af
@ -1686,12 +1686,19 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE2CostTbl[] = {
|
||||
{ ISD::SETCC, MVT::v2f64, 2 },
|
||||
{ ISD::SETCC, MVT::f64, 1 },
|
||||
{ ISD::SETCC, MVT::v2i64, 8 },
|
||||
{ ISD::SETCC, MVT::v4i32, 1 },
|
||||
{ ISD::SETCC, MVT::v8i16, 1 },
|
||||
{ ISD::SETCC, MVT::v16i8, 1 },
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE1CostTbl[] = {
|
||||
{ ISD::SETCC, MVT::v4f32, 2 },
|
||||
{ ISD::SETCC, MVT::f32, 1 },
|
||||
};
|
||||
|
||||
if (ST->hasBWI())
|
||||
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
@ -1716,6 +1723,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE1())
|
||||
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -341,114 +341,33 @@ define i32 @maxi32(i32) {
|
||||
}
|
||||
|
||||
define float @maxf8(float) {
|
||||
; SSE-LABEL: @maxf8(
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
|
||||
; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
|
||||
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
|
||||
; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
|
||||
; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
|
||||
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
|
||||
; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
|
||||
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
|
||||
; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
|
||||
; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
|
||||
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
|
||||
; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
|
||||
; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
|
||||
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
|
||||
; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
|
||||
; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
|
||||
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
|
||||
; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
|
||||
; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
|
||||
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
|
||||
; SSE-NEXT: ret float [[TMP23]]
|
||||
;
|
||||
; AVX-LABEL: @maxf8(
|
||||
; AVX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
|
||||
; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]]
|
||||
; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; AVX-NEXT: ret float [[TMP16]]
|
||||
;
|
||||
; AVX2-LABEL: @maxf8(
|
||||
; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
|
||||
; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
|
||||
; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]]
|
||||
; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]]
|
||||
; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; AVX2-NEXT: ret float [[TMP16]]
|
||||
;
|
||||
; SKX-LABEL: @maxf8(
|
||||
; SKX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
|
||||
; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
|
||||
; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]]
|
||||
; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]]
|
||||
; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; SKX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; SKX-NEXT: ret float [[TMP16]]
|
||||
; CHECK-LABEL: @maxf8(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; CHECK-NEXT: ret float [[TMP16]]
|
||||
;
|
||||
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
|
||||
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
|
||||
@ -476,195 +395,52 @@ define float @maxf8(float) {
|
||||
}
|
||||
|
||||
define float @maxf16(float) {
|
||||
; SSE-LABEL: @maxf16(
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
|
||||
; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
|
||||
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
|
||||
; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
|
||||
; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
|
||||
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
|
||||
; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
|
||||
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
|
||||
; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
|
||||
; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
|
||||
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
|
||||
; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
|
||||
; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
|
||||
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
|
||||
; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
|
||||
; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
|
||||
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
|
||||
; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
|
||||
; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
|
||||
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
|
||||
; SSE-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
|
||||
; SSE-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
|
||||
; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
|
||||
; SSE-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
|
||||
; SSE-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
|
||||
; SSE-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
|
||||
; SSE-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
|
||||
; SSE-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
|
||||
; SSE-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
|
||||
; SSE-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
|
||||
; SSE-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
|
||||
; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
|
||||
; SSE-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
|
||||
; SSE-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
|
||||
; SSE-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
|
||||
; SSE-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
|
||||
; SSE-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
|
||||
; SSE-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
|
||||
; SSE-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
|
||||
; SSE-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
|
||||
; SSE-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
|
||||
; SSE-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
|
||||
; SSE-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
|
||||
; SSE-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
|
||||
; SSE-NEXT: ret float [[TMP47]]
|
||||
;
|
||||
; AVX-LABEL: @maxf16(
|
||||
; AVX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
|
||||
; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
|
||||
; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
|
||||
; AVX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
|
||||
; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
|
||||
; AVX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
|
||||
; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
|
||||
; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
|
||||
; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
|
||||
; AVX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
|
||||
; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
|
||||
; AVX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
|
||||
; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
|
||||
; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
|
||||
; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
|
||||
; AVX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
|
||||
; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
|
||||
; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]]
|
||||
; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]]
|
||||
; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
|
||||
; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]]
|
||||
; AVX-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
|
||||
; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
|
||||
; AVX-NEXT: ret float [[TMP32]]
|
||||
;
|
||||
; AVX2-LABEL: @maxf16(
|
||||
; AVX2-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
|
||||
; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
|
||||
; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
|
||||
; AVX2-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
|
||||
; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
|
||||
; AVX2-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
|
||||
; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
|
||||
; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
|
||||
; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
|
||||
; AVX2-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
|
||||
; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
|
||||
; AVX2-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
|
||||
; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
|
||||
; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
|
||||
; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
|
||||
; AVX2-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
|
||||
; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
|
||||
; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]]
|
||||
; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]]
|
||||
; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
|
||||
; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]]
|
||||
; AVX2-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
|
||||
; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
|
||||
; AVX2-NEXT: ret float [[TMP32]]
|
||||
;
|
||||
; SKX-LABEL: @maxf16(
|
||||
; SKX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
|
||||
; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
|
||||
; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
|
||||
; SKX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
|
||||
; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
|
||||
; SKX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
|
||||
; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
|
||||
; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
|
||||
; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
|
||||
; SKX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
|
||||
; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
|
||||
; SKX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
|
||||
; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
|
||||
; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
|
||||
; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
|
||||
; SKX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
|
||||
; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
|
||||
; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]]
|
||||
; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]]
|
||||
; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
|
||||
; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]]
|
||||
; SKX-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
|
||||
; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
|
||||
; SKX-NEXT: ret float [[TMP32]]
|
||||
; CHECK-LABEL: @maxf16(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x float> [[RDX_MINMAX_SELECT]], <16 x float> [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x float> [[RDX_MINMAX_SELECT3]], <16 x float> [[RDX_SHUF4]]
|
||||
; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
|
||||
; CHECK-NEXT: ret float [[TMP32]]
|
||||
;
|
||||
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
|
||||
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
|
||||
|
Loading…
Reference in New Issue
Block a user