mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[AArch64] Enable the codegenprepare optimization that promotes operation to form
extended loads. Implement the related target lowering hook so that the optimization has a better estimation of the cost of an extension. rdar://problem/19267165 llvm-svn: 233753
This commit is contained in:
parent
9d251da5b0
commit
574df40140
@ -481,6 +481,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
|
||||
// Enable TBZ/TBNZ
|
||||
MaskAndBranchFoldingIsLegal = true;
|
||||
EnableExtLdPromotion = true;
|
||||
|
||||
setMinFunctionAlignment(2);
|
||||
|
||||
@ -6554,6 +6555,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
|
||||
VT1.getSizeInBits() <= 32);
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
|
||||
if (isa<FPExtInst>(Ext))
|
||||
return false;
|
||||
|
||||
// Vector types are next free.
|
||||
if (Ext->getType()->isVectorTy())
|
||||
return false;
|
||||
|
||||
for (const Use &U : Ext->uses()) {
|
||||
// The extension is free if we can fold it with a left shift in an
|
||||
// addressing mode or an arithmetic operation: add, sub, and cmp.
|
||||
|
||||
// Is there a shift?
|
||||
const Instruction *Instr = cast<Instruction>(U.getUser());
|
||||
|
||||
// Is this a constant shift?
|
||||
switch (Instr->getOpcode()) {
|
||||
case Instruction::Shl:
|
||||
if (!isa<ConstantInt>(Instr->getOperand(1)))
|
||||
return false;
|
||||
break;
|
||||
case Instruction::GetElementPtr: {
|
||||
gep_type_iterator GTI = gep_type_begin(Instr);
|
||||
std::advance(GTI, U.getOperandNo());
|
||||
Type *IdxTy = *GTI;
|
||||
// This extension will end up with a shift because of the scaling factor.
|
||||
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
|
||||
// Get the shift amount based on the scaling factor:
|
||||
// log2(sizeof(IdxTy)) - log2(8).
|
||||
uint64_t ShiftAmt =
|
||||
countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
|
||||
// Is the constant foldable in the shift of the addressing mode?
|
||||
// I.e., shift amount is between 1 and 4 inclusive.
|
||||
if (ShiftAmt == 0 || ShiftAmt > 4)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
case Instruction::Trunc:
|
||||
// Check if this is a noop.
|
||||
// trunc(sext ty1 to ty2) to ty1.
|
||||
if (Instr->getType() == Ext->getOperand(0)->getType())
|
||||
continue;
|
||||
// FALL THROUGH.
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// At this point we can use the bfm family, so this extension is free
|
||||
// for that use.
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
|
||||
unsigned &RequiredAligment) const {
|
||||
if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
|
||||
|
@ -355,6 +355,8 @@ public:
|
||||
getPreferredVectorAction(EVT VT) const override;
|
||||
|
||||
private:
|
||||
bool isExtFreeImpl(const Instruction *Ext) const override;
|
||||
|
||||
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
const AArch64Subtarget *Subtarget;
|
||||
|
638
test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
Normal file
638
test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
Normal file
@ -0,0 +1,638 @@
|
||||
; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
|
||||
; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
|
||||
; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
|
||||
|
||||
; CodeGenPrepare should move the zext into the block with the load
|
||||
; so that SelectionDAG can select it with the load.
|
||||
;
|
||||
; OPTALL-LABEL: @foo
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; OPTALL: store i32 [[ZEXT]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @foo(i8* %p, i32* %q) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = zext i8 %t to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to form a zextload is an operation with only one
|
||||
; argument to explicitly extend is in the the way.
|
||||
; OPTALL-LABEL: @promoteOneArg
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
|
||||
; Make sure the operation is not promoted when the promotion pass is disabled.
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteOneArg(i8* %p, i32* %q) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%add = add nuw i8 %t, 2
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = zext i8 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to form a sextload is an operation with only one
|
||||
; argument to explicitly extend is in the the way.
|
||||
; Version with sext.
|
||||
; OPTALL-LABEL: @promoteOneArgSExt
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
|
||||
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteOneArgSExt(i8* %p, i32* %q) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%add = add nsw i8 %t, 2
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = sext i8 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to form a zextload is an operation with two
|
||||
; arguments to explicitly extend is in the the way.
|
||||
; Extending %add will create two extensions:
|
||||
; 1. One for %b.
|
||||
; 2. One for %t.
|
||||
; #1 will not be removed as we do not know anything about %b.
|
||||
; #2 may not be merged with the load because %t is used in a comparison.
|
||||
; Since two extensions may be emitted in the end instead of one before the
|
||||
; transformation, the regular heuristic does not apply the optimization.
|
||||
;
|
||||
; OPTALL-LABEL: @promoteTwoArgZext
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
|
||||
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
|
||||
;
|
||||
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
|
||||
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
|
||||
;
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
|
||||
;
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%add = add nuw i8 %t, %b
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = zext i8 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to form a sextload is an operation with two
|
||||
; arguments to explicitly extend is in the the way.
|
||||
; Version with sext.
|
||||
; OPTALL-LABEL: @promoteTwoArgSExt
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
|
||||
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
|
||||
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
|
||||
;
|
||||
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
|
||||
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
|
||||
;
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%add = add nsw i8 %t, %b
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = sext i8 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we do not a zextload if we need to introduce more than
|
||||
; one additional extension.
|
||||
; OPTALL-LABEL: @promoteThreeArgZext
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
|
||||
; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
|
||||
; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
|
||||
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
|
||||
;
|
||||
; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
|
||||
; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
|
||||
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
|
||||
;
|
||||
; DISABLE: add nuw i8
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
|
||||
;
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%tmp = add nuw i8 %t, %b
|
||||
%add = add nuw i8 %tmp, %c
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = zext i8 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to form a zextload after promoting and merging
|
||||
; two extensions.
|
||||
; OPTALL-LABEL: @promoteMergeExtArgZExt
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
|
||||
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
|
||||
;
|
||||
; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
|
||||
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
|
||||
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
|
||||
;
|
||||
; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
|
||||
;
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%ext = zext i8 %t to i16
|
||||
%add = add nuw i16 %ext, %b
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = zext i16 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to form a sextload after promoting and merging
|
||||
; two extensions.
|
||||
; Version with sext.
|
||||
; OPTALL-LABEL: @promoteMergeExtArgSExt
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
|
||||
; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
|
||||
;
|
||||
; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
|
||||
; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
|
||||
; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
|
||||
;
|
||||
; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
|
||||
; OPTALL: store i32 [[RES]], i32* %q
|
||||
; OPTALL: ret
|
||||
define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%ext = zext i8 %t to i16
|
||||
%add = add nsw i16 %ext, %b
|
||||
%a = icmp slt i8 %t, 20
|
||||
br i1 %a, label %true, label %false
|
||||
true:
|
||||
%s = sext i16 %add to i32
|
||||
store i32 %s, i32* %q
|
||||
ret void
|
||||
false:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we manage to catch all the extload opportunities that are exposed
|
||||
; by the different iterations of codegen prepare.
|
||||
; Moreover, check that we do not promote more than we need to.
|
||||
; Here is what is happening in this test (not necessarly in this order):
|
||||
; 1. We try to promote the operand of %sextadd.
|
||||
; a. This creates one sext of %ld2 and one of %zextld
|
||||
; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
|
||||
; introduced one. This is fine with the current heuristic: neutral.
|
||||
; => We have one zext of %zextld left and we created one sext of %ld2.
|
||||
; 2. We try to promote the operand of %sextaddza.
|
||||
; a. This creates one sext of %zexta and one of %zextld
|
||||
; b. The sext of %zexta does not lead to any load, it stays here, even if it
|
||||
; could have been combine with the zext of %a.
|
||||
; c. The sext of %zextld leads to %ld and can be combined with it. This is
|
||||
; done by promoting %zextld. This is fine with the current heuristic:
|
||||
; neutral.
|
||||
; => We have created a new zext of %ld and we created one sext of %zexta.
|
||||
; 3. We try to promote the operand of %sextaddb.
|
||||
; a. This creates one sext of %b and one of %zextld
|
||||
; b. The sext of %b is a dead-end, nothing to be done.
|
||||
; c. Same thing as 2.c. happens.
|
||||
; => We have created a new zext of %ld and we created one sext of %b.
|
||||
; 4. We try to promote the operand of the zext of %zextld introduced in #1.
|
||||
; a. Same thing as 2.c. happens.
|
||||
; b. %zextld does not have any other uses. It is dead coded.
|
||||
; => We have created a new zext of %ld and we removed a zext of %zextld and
|
||||
; a zext of %ld.
|
||||
; Currently we do not try to reuse existing extensions, so in the end we have
|
||||
; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
|
||||
;
|
||||
; OPTALL-LABEL: @severalPromotions
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
|
||||
; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
|
||||
; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
|
||||
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
|
||||
; We do not combine this one: see 2.b.
|
||||
; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
|
||||
; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
|
||||
; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
|
||||
; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
|
||||
; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
|
||||
;
|
||||
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
|
||||
; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
|
||||
; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
|
||||
; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64
|
||||
; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
|
||||
; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64
|
||||
;
|
||||
; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
|
||||
; OPTALL: ret
|
||||
define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
|
||||
%ld = load i8, i8* %addr1
|
||||
%zextld = zext i8 %ld to i32
|
||||
%ld2 = load i32, i32* %addr2
|
||||
%add = add nsw i32 %ld2, %zextld
|
||||
%sextadd = sext i32 %add to i64
|
||||
%zexta = zext i8 %a to i32
|
||||
%addza = add nsw i32 %zexta, %zextld
|
||||
%sextaddza = sext i32 %addza to i64
|
||||
%addb = add nsw i32 %b, %zextld
|
||||
%sextaddb = sext i32 %addb to i64
|
||||
call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @dummy(i64, i64, i64)
|
||||
|
||||
; Make sure we do not try to promote vector types since the type promotion
|
||||
; helper does not support them for now.
|
||||
; OPTALL-LABEL: @vectorPromotion
|
||||
; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
|
||||
; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
|
||||
; OPTALL: ret
|
||||
define void @vectorPromotion() {
|
||||
entry:
|
||||
%a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
|
||||
%b = zext <2 x i32> %a to <2 x i64>
|
||||
ret void
|
||||
}
|
||||
|
||||
@a = common global i32 0, align 4
|
||||
@c = common global [2 x i32] zeroinitializer, align 4
|
||||
|
||||
; Make sure we support promotion of operands that produces a Value as opposed
|
||||
; to an instruction.
|
||||
; This used to cause a crash.
|
||||
; OPTALL-LABEL: @promotionOfArgEndsUpInValue
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
|
||||
;
|
||||
; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
|
||||
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
|
||||
;
|
||||
; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
|
||||
; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
|
||||
;
|
||||
; OPTALL-NEXT: ret i32 [[RES]]
|
||||
define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
|
||||
entry:
|
||||
%val = load i16, i16* %addr
|
||||
%add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
|
||||
%conv3 = sext i16 %add to i32
|
||||
ret i32 %conv3
|
||||
}
|
||||
|
||||
; Check that we see that one zext can be derived from the other for free.
|
||||
; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
|
||||
; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
|
||||
; OPT-NEXT: store i32 [[RES32]], i32* %addr
|
||||
; OPT-NEXT: store i64 [[RES64]], i64* %q
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
|
||||
; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
|
||||
; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
|
||||
; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
|
||||
;
|
||||
; OPTALL-NEXT: ret void
|
||||
define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nuw i32 %zextt, %b
|
||||
%add2 = add nuw i32 %zextt, 12
|
||||
store i32 %add, i32 *%addr
|
||||
%s = zext i32 %add2 to i64
|
||||
store i64 %s, i64* %q
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we do not increase the cost of the code.
|
||||
; The input has one free zext and one free sext. If we would have promoted
|
||||
; all the way through the load we would end up with a free zext and a
|
||||
; non-free sext (of %b).
|
||||
; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
|
||||
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
|
||||
; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
|
||||
;
|
||||
; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
|
||||
; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
|
||||
; OPTALL-NEXT: ret void
|
||||
define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nsw i32 %zextt, %b
|
||||
%idx64 = sext i32 %add to i64
|
||||
%staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
|
||||
store i32 %add, i32 *%staddr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we do not increase the cost of the code.
|
||||
; The input has one free zext and one free sext. If we would have promoted
|
||||
; all the way through the load we would end up with a free zext and a
|
||||
; non-free sext (of %b).
|
||||
; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
|
||||
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
|
||||
;
|
||||
; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
|
||||
; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
|
||||
; OPTALL-NEXT: ret void
|
||||
define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nsw i32 %zextt, %b
|
||||
%idx64 = sext i32 %add to i64
|
||||
%staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
|
||||
store i64 %stuff, i64 *%staddr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we do not increase the cost of the code.
|
||||
; The input has one free zext and one free sext. If we would have promoted
|
||||
; all the way through the load we would end up with a free zext and a
|
||||
; non-free sext (of %b).
|
||||
; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
|
||||
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
|
||||
;
|
||||
; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
|
||||
; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
|
||||
; OPTALL-NEXT: ret void
|
||||
define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nsw i32 %zextt, %b
|
||||
%idx64 = sext i32 %add to i64
|
||||
%staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
|
||||
store i128 %stuff, i128 *%staddr
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Check that we do not increase the cost of the code.
|
||||
; The input has one free zext and one free sext. If we would have promoted
|
||||
; all the way through the load we would end up with a free zext and a
|
||||
; non-free sext (of %b).
|
||||
; OPTALL-LABEL: @promoteSExtFromAddrMode256
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
|
||||
; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
|
||||
; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
|
||||
; OPTALL-NEXT: ret void
|
||||
define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nsw i32 %zextt, %b
|
||||
%idx64 = sext i32 %add to i64
|
||||
%staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
|
||||
store i256 %stuff, i256 *%staddr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we do not increase the cost of the code.
|
||||
; The input has one free zext and one free zext.
|
||||
; When we promote all the way through the load, we end up with
|
||||
; a free zext and a non-free zext (of %b).
|
||||
; However, the current target lowering says zext i32 to i64 is free
|
||||
; so the promotion happens because the cost did not change and may
|
||||
; expose more opportunities.
|
||||
; This would need to be fixed at some point.
|
||||
; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; This transformation should really happen only for stress mode.
|
||||
; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
|
||||
; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
|
||||
; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
|
||||
; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
|
||||
; OPTALL-NEXT: ret void
|
||||
define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nuw i32 %zextt, %b
|
||||
%idx64 = zext i32 %add to i64
|
||||
%staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
|
||||
store i32 %add, i32 *%staddr
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
|
||||
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
|
||||
;
|
||||
; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
|
||||
; OPTALL-NEXT: ret i64 %staddr
|
||||
define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nsw i32 %zextt, %b
|
||||
%idx64 = sext i32 %add to i64
|
||||
%staddr = shl i64 %idx64, 12
|
||||
ret i64 %staddr
|
||||
}
|
||||
|
||||
; Same comment as doNotPromoteFreeZExtFromAddrMode.
|
||||
; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
|
||||
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
|
||||
;
|
||||
; This transformation should really happen only for stress mode.
|
||||
; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
|
||||
; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
|
||||
; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
|
||||
;
|
||||
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
|
||||
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
|
||||
; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
|
||||
; OPTALL-NEXT: ret i64 %staddr
|
||||
define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
|
||||
entry:
|
||||
%t = load i8, i8* %p
|
||||
%zextt = zext i8 %t to i32
|
||||
%add = add nuw i32 %zextt, %b
|
||||
%idx64 = zext i32 %add to i64
|
||||
%staddr = shl i64 %idx64, 12
|
||||
ret i64 %staddr
|
||||
}
|
||||
|
||||
; The input has one free zext and one non-free sext.
|
||||
; When we promote all the way through to the load, we end up with
|
||||
; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
|
||||
; However, we when generate load pair and the free sext(%ld1) becomes
|
||||
; non-free. So technically, we trade a non-free sext to two non-free
|
||||
; sext.
|
||||
; This would need to be fixed at some point.
|
||||
; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
|
||||
; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
|
||||
; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
|
||||
; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
|
||||
;
|
||||
; This transformation should really happen only for stress mode.
|
||||
; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
|
||||
; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
|
||||
; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
|
||||
;
|
||||
; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
|
||||
; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
|
||||
;
|
||||
; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
|
||||
; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
|
||||
; OPTALL-NEXT: ret i64 [[FINAL]]
|
||||
define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
|
||||
%ld0 = load i32, i32* %p
|
||||
%idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
|
||||
%ld1 = load i32, i32* %idxLd1
|
||||
%res = add nsw i32 %ld1, %cst
|
||||
%sextres = sext i32 %res to i64
|
||||
%zextLd0 = zext i32 %ld0 to i64
|
||||
%final = add i64 %sextres, %zextLd0
|
||||
ret i64 %final
|
||||
}
|
Loading…
Reference in New Issue
Block a user