mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[InstCombine] try to reduce x86 addcarry to generic uaddo intrinsic
If we can reduce the x86-specific intrinsic to the generic op, it allows existing simplifications and value tracking folds. AFAICT, this always results in identical x86 codegen in the non-reduced case...which should be true because we semi-generically (too aggressively IMO) convert to llvm.uadd.with.overflow in CGP, so the DAG/isel must already combine/lower this intrinsic as expected. This isn't quite what was requested in: https://bugs.llvm.org/show_bug.cgi?id=40486 ...but we want to have these kinds of folds early for efficiency and to enable greater simplifications. For the case in the bug report where we have: _addcarry_u64(0, ahi, 0, &ahi) ...this gets completely simplified away in IR. Differential Revision: https://reviews.llvm.org/D57453 llvm-svn: 352870
This commit is contained in:
parent
817d11d6b8
commit
167721d054
@ -751,6 +751,33 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static Value *simplifyX86addcarry(const IntrinsicInst &II,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
Value *CarryIn = II.getArgOperand(0);
|
||||
Value *Op1 = II.getArgOperand(1);
|
||||
Value *Op2 = II.getArgOperand(2);
|
||||
Type *RetTy = II.getType();
|
||||
Type *OpTy = Op1->getType();
|
||||
assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
|
||||
RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
|
||||
"Unexpected types for x86 addcarry");
|
||||
|
||||
// If carry-in is zero, this is just an unsigned add with overflow.
|
||||
if (match(CarryIn, m_ZeroInt())) {
|
||||
Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
|
||||
{ Op1, Op2 });
|
||||
// The types have to be adjusted to match the x86 call types.
|
||||
Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
|
||||
Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
|
||||
Builder.getInt8Ty());
|
||||
Value *Res = UndefValue::get(II.getType());
|
||||
Res = Builder.CreateInsertValue(Res, UAddOV, 0);
|
||||
return Builder.CreateInsertValue(Res, UAddResult, 1);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static Value *simplifyX86insertps(const IntrinsicInst &II,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
|
||||
@ -3109,6 +3136,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
return nullptr;
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_addcarry_32:
|
||||
case Intrinsic::x86_addcarry_64:
|
||||
if (Value *V = simplifyX86addcarry(*II, Builder))
|
||||
return replaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
||||
case Intrinsic::ppc_altivec_vperm:
|
||||
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
||||
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
|
||||
|
@ -6,11 +6,12 @@ declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64)
|
||||
|
||||
define i32 @no_carryin_i32(i32 %x, i32 %y, i8* %p) {
|
||||
; CHECK-LABEL: @no_carryin_i32(
|
||||
; CHECK-NEXT: [[S:%.*]] = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 [[X:%.*]], i32 [[Y:%.*]])
|
||||
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i32 } [[S]], 0
|
||||
; CHECK-NEXT: store i8 [[OV]], i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i32 } [[S]], 1
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
|
||||
; CHECK-NEXT: store i8 [[TMP4]], i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: ret i32 [[TMP2]]
|
||||
;
|
||||
%s = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %x, i32 %y)
|
||||
%ov = extractvalue { i8, i32 } %s, 0
|
||||
@ -21,11 +22,12 @@ define i32 @no_carryin_i32(i32 %x, i32 %y, i8* %p) {
|
||||
|
||||
define i64 @no_carryin_i64(i64 %x, i64 %y, i8* %p) {
|
||||
; CHECK-LABEL: @no_carryin_i64(
|
||||
; CHECK-NEXT: [[S:%.*]] = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 [[X:%.*]], i64 [[Y:%.*]])
|
||||
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i64 } [[S]], 0
|
||||
; CHECK-NEXT: store i8 [[OV]], i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i64 } [[S]], 1
|
||||
; CHECK-NEXT: ret i64 [[R]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
|
||||
; CHECK-NEXT: store i8 [[TMP4]], i8* [[P:%.*]], align 1
|
||||
; CHECK-NEXT: ret i64 [[TMP2]]
|
||||
;
|
||||
%s = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 %x, i64 %y)
|
||||
%ov = extractvalue { i8, i64 } %s, 0
|
||||
|
Loading…
Reference in New Issue
Block a user