mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[VectorCombine] scalarize compares with insertelement operand(s)
Generalize scalarization (recently enhanced with D80885) to allow compares as well as binops. Similar to binops, we are avoiding scalarization of a loaded value because that could avoid a register transfer in codegen. This requires 1 extra predicate that I am aware of: we do not want to scalarize the condition value of a vector select. That might also invert a transform that we do in instcombine that prefers a vector condition operand for a vector select. I think this is the final step in solving PR37463: https://bugs.llvm.org/show_bug.cgi?id=37463 Differential Revision: https://reviews.llvm.org/D81661
This commit is contained in:
parent
b8d56677b3
commit
fe400c6e8c
@ -37,6 +37,7 @@ STATISTIC(NumVecCmp, "Number of vector compares formed");
|
||||
STATISTIC(NumVecBO, "Number of vector binops formed");
|
||||
STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
|
||||
STATISTIC(NumScalarBO, "Number of scalar binops formed");
|
||||
STATISTIC(NumScalarCmp, "Number of scalar compares formed");
|
||||
|
||||
static cl::opt<bool> DisableVectorCombine(
|
||||
"disable-vector-combine", cl::init(false), cl::Hidden,
|
||||
@ -312,18 +313,31 @@ static bool foldBitcastShuf(Instruction &I, const TargetTransformInfo &TTI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Match a vector binop instruction with inserted scalar operands and convert
|
||||
/// to scalar binop followed by insertelement.
|
||||
static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
|
||||
/// Match a vector binop or compare instruction with at least one inserted
|
||||
/// scalar operand and convert to scalar binop/cmp followed by insertelement.
|
||||
static bool scalarizeBinopOrCmp(Instruction &I,
|
||||
const TargetTransformInfo &TTI) {
|
||||
CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
|
||||
Value *Ins0, *Ins1;
|
||||
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))))
|
||||
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
|
||||
!match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
|
||||
return false;
|
||||
|
||||
// Do not convert the vector condition of a vector select into a scalar
|
||||
// condition. That may cause problems for codegen because of differences in
|
||||
// boolean formats and register-file transfers.
|
||||
// TODO: Can we account for that in the cost model?
|
||||
bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
|
||||
if (IsCmp)
|
||||
for (User *U : I.users())
|
||||
if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
|
||||
return false;
|
||||
|
||||
// Match against one or both scalar values being inserted into constant
|
||||
// vectors:
|
||||
// vec_bo VecC0, (inselt VecC1, V1, Index)
|
||||
// vec_bo (inselt VecC0, V0, Index), VecC1
|
||||
// vec_bo (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
|
||||
// vec_op VecC0, (inselt VecC1, V1, Index)
|
||||
// vec_op (inselt VecC0, V0, Index), VecC1
|
||||
// vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
|
||||
// TODO: Deal with mismatched index constants and variable indexes?
|
||||
Constant *VecC0 = nullptr, *VecC1 = nullptr;
|
||||
Value *V0 = nullptr, *V1 = nullptr;
|
||||
@ -360,9 +374,15 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
|
||||
(ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy()) &&
|
||||
"Unexpected types for insert into binop");
|
||||
|
||||
Instruction::BinaryOps Opcode = cast<BinaryOperator>(&I)->getOpcode();
|
||||
int ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
|
||||
int VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
|
||||
unsigned Opcode = I.getOpcode();
|
||||
int ScalarOpCost, VectorOpCost;
|
||||
if (IsCmp) {
|
||||
ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
|
||||
VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
|
||||
} else {
|
||||
ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
|
||||
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
|
||||
}
|
||||
|
||||
// Get cost estimate for the insert element. This cost will factor into
|
||||
// both sequences.
|
||||
@ -378,18 +398,26 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
|
||||
if (OldCost < NewCost)
|
||||
return false;
|
||||
|
||||
// vec_bo (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
|
||||
// inselt NewVecC, (scalar_bo V0, V1), Index
|
||||
++NumScalarBO;
|
||||
IRBuilder<> Builder(&I);
|
||||
// vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
|
||||
// inselt NewVecC, (scalar_op V0, V1), Index
|
||||
if (IsCmp)
|
||||
++NumScalarCmp;
|
||||
else
|
||||
++NumScalarBO;
|
||||
|
||||
// For constant cases, extract the scalar element, this should constant fold.
|
||||
IRBuilder<> Builder(&I);
|
||||
if (IsConst0)
|
||||
V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
|
||||
if (IsConst1)
|
||||
V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
|
||||
|
||||
Value *Scalar = Builder.CreateBinOp(Opcode, V0, V1, I.getName() + ".scalar");
|
||||
Value *Scalar =
|
||||
IsCmp ? Opcode == Instruction::FCmp ? Builder.CreateFCmp(Pred, V0, V1)
|
||||
: Builder.CreateICmp(Pred, V0, V1)
|
||||
: Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
|
||||
|
||||
Scalar->setName(I.getName() + ".scalar");
|
||||
|
||||
// All IR flags are safe to back-propagate. There is no potential for extra
|
||||
// poison to be created by the scalar instruction.
|
||||
@ -397,7 +425,8 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
|
||||
ScalarInst->copyIRFlags(&I);
|
||||
|
||||
// Fold the vector constants in the original vectors into a new base vector.
|
||||
Constant *NewVecC = ConstantExpr::get(Opcode, VecC0, VecC1);
|
||||
Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
|
||||
: ConstantExpr::get(Opcode, VecC0, VecC1);
|
||||
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
|
||||
I.replaceAllUsesWith(Insert);
|
||||
Insert->takeName(&I);
|
||||
@ -425,7 +454,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
|
||||
continue;
|
||||
MadeChange |= foldExtractExtract(I, TTI);
|
||||
MadeChange |= foldBitcastShuf(I, TTI);
|
||||
MadeChange |= scalarizeBinop(I, TTI);
|
||||
MadeChange |= scalarizeBinopOrCmp(I, TTI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,9 +9,8 @@ declare void @usef(<4 x float>)
|
||||
|
||||
define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
|
||||
; CHECK-LABEL: @ins0_ins0_i8(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq <16 x i8> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <16 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
|
||||
@ -24,9 +23,8 @@ define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
|
||||
|
||||
define <8 x i1> @ins5_ins5_i16(i16 %x, i16 %y) {
|
||||
; CHECK-LABEL: @ins5_ins5_i16(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i8 5
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <8 x i16> undef, i16 [[Y:%.*]], i32 5
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp sgt <8 x i16> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i1> undef, i1 [[R_SCALAR]], i64 5
|
||||
; CHECK-NEXT: ret <8 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <8 x i16> undef, i16 %x, i8 5
|
||||
@ -39,9 +37,8 @@ define <8 x i1> @ins5_ins5_i16(i16 %x, i16 %y) {
|
||||
|
||||
define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) {
|
||||
; CHECK-LABEL: @ins1_ins1_i64(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[X:%.*]], i64 1
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> <i64 1, i64 -1>, i64 [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp sle <2 x i64> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sle i64 [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 false>, i1 [[R_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <2 x i64> zeroinitializer, i64 %x, i64 1
|
||||
@ -54,9 +51,8 @@ define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) {
|
||||
|
||||
define <2 x i1> @ins0_ins0_f64(double %x, double %y) {
|
||||
; CHECK-LABEL: @ins0_ins0_f64(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x double> undef, double [[Y:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = fcmp nnan ninf uge <2 x double> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ninf uge double [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <2 x double> undef, double %x, i32 0
|
||||
@ -84,9 +80,8 @@ define <16 x i1> @ins1_ins0_i8(i8 %x, i8 %y) {
|
||||
|
||||
define <4 x i1> @ins0_ins0_i32(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: @ins0_ins0_i32(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne <4 x i32> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <4 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
@ -101,8 +96,8 @@ define <4 x i1> @ins0_ins0_i32_use(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: @ins0_ins0_i32_use(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ugt <4 x i32> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <4 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
@ -116,10 +111,10 @@ define <4 x i1> @ins0_ins0_i32_use(i32 %x, i32 %y) {
|
||||
|
||||
define <4 x i1> @ins1_ins1_f32_use(float %x, float %y) {
|
||||
; CHECK-LABEL: @ins1_ins1_f32_use(
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 1
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
|
||||
; CHECK-NEXT: [[R:%.*]] = fcmp ogt <4 x float> [[I0]], [[I1]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp ogt float [[X:%.*]], [[Y]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <4 x i1> [[R]]
|
||||
;
|
||||
%i0 = insertelement <4 x float> undef, float %x, i32 1
|
||||
@ -150,8 +145,8 @@ define <4 x i1> @ins2_ins2_f32_uses(float %x, float %y) {
|
||||
|
||||
define <2 x i1> @constant_op1_i64(i64 %x) {
|
||||
; CHECK-LABEL: @constant_op1_i64(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[INS]], <i64 42, i64 undef>
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i64 [[X:%.*]], 42
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> undef, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
@ -161,8 +156,8 @@ define <2 x i1> @constant_op1_i64(i64 %x) {
|
||||
|
||||
define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
|
||||
; CHECK-LABEL: @constant_op1_i64_not_undef_lane(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp sge <2 x i64> [[INS]], <i64 42, i64 -42>
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sge i64 [[X:%.*]], 42
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
@ -170,6 +165,8 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
|
||||
ret <2 x i1> %r
|
||||
}
|
||||
|
||||
; negative test - load prevents the transform
|
||||
|
||||
define <2 x i1> @constant_op1_i64_load(i64* %p) {
|
||||
; CHECK-LABEL: @constant_op1_i64_load(
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4
|
||||
@ -185,8 +182,8 @@ define <2 x i1> @constant_op1_i64_load(i64* %p) {
|
||||
|
||||
define <4 x i1> @constant_op0_i32(i32 %x) {
|
||||
; CHECK-LABEL: @constant_op0_i32(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> <i32 undef, i32 -42, i32 undef, i32 undef>, [[INS]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ult i32 -42, [[X:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <4 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <4 x i32> undef, i32 %x, i32 1
|
||||
@ -196,8 +193,8 @@ define <4 x i1> @constant_op0_i32(i32 %x) {
|
||||
|
||||
define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) {
|
||||
; CHECK-LABEL: @constant_op0_i32_not_undef_lane(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ule <4 x i32> <i32 1, i32 42, i32 42, i32 -42>, [[INS]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ule i32 42, [[X:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i1 [[R_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <4 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <4 x i32> undef, i32 %x, i32 1
|
||||
@ -207,8 +204,8 @@ define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) {
|
||||
|
||||
define <2 x i1> @constant_op0_f64(double %x) {
|
||||
; CHECK-LABEL: @constant_op0_f64(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = fcmp fast olt <2 x double> <double 4.200000e+01, double undef>, [[INS]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp fast olt double 4.200000e+01, [[X:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <2 x double> undef, double %x, i32 0
|
||||
@ -218,8 +215,8 @@ define <2 x i1> @constant_op0_f64(double %x) {
|
||||
|
||||
define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) {
|
||||
; CHECK-LABEL: @constant_op0_f64_not_undef_lane(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
|
||||
; CHECK-NEXT: [[R:%.*]] = fcmp nnan ueq <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ueq double -4.200000e+01, [[X:%.*]]
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <2 x double> undef, double %x, i32 1
|
||||
@ -229,8 +226,8 @@ define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) {
|
||||
|
||||
define <2 x i1> @constant_op1_f64(double %x) {
|
||||
; CHECK-LABEL: @constant_op1_f64(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
|
||||
; CHECK-NEXT: [[R:%.*]] = fcmp one <2 x double> [[INS]], <double undef, double 4.200000e+01>
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp one double [[X:%.*]], 4.200000e+01
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <2 x double> undef, double %x, i32 1
|
||||
@ -240,8 +237,8 @@ define <2 x i1> @constant_op1_f64(double %x) {
|
||||
|
||||
define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
|
||||
; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
|
||||
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[R:%.*]] = fcmp uge <4 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01, float 0.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
|
||||
; CHECK-NEXT: ret <4 x i1> [[R]]
|
||||
;
|
||||
%ins = insertelement <4 x float> undef, float %x, i32 0
|
||||
@ -249,6 +246,8 @@ define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
|
||||
ret <4 x i1> %r
|
||||
}
|
||||
|
||||
; negative test - select prevents the transform
|
||||
|
||||
define <4 x float> @vec_select_use1(<4 x float> %x, <4 x float> %y, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @vec_select_use1(
|
||||
; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i8 0
|
||||
@ -264,6 +263,8 @@ define <4 x float> @vec_select_use1(<4 x float> %x, <4 x float> %y, i32 %a, i32
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; negative test - select prevents the transform
|
||||
|
||||
define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
|
||||
; CHECK-LABEL: @vec_select_use2(
|
||||
; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i8 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user