From ea9fed10ed4b945118e4fc13fc1f3362ffa52e3c Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 28 Nov 2018 08:58:27 +0000 Subject: [PATCH] [SystemZ::TTI] Improve cost for compare of i64 with extended i32 load CGF/CLGF compares an i64 register with a sign/zero extended loaded i32 value in memory. This patch makes such a load considered foldable and so gets a 0 cost. Review: Ulrich Weigand https://reviews.llvm.org/D54944 llvm-svn: 347735 --- .../SystemZ/SystemZTargetTransformInfo.cpp | 16 +++++++++------- .../SystemZ/memop-folding-int-arith.ll | 18 +++++++++++++++++- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 362d2d3c79d..32883efbdab 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -915,16 +915,19 @@ isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) { switch (UserI->getOpcode()) { case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64 case Instruction::Sub: + case Instruction::ICmp: if (LoadedBits == 32 && ZExtBits == 64) return true; LLVM_FALLTHROUGH; case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64 - if (LoadedBits == 16 && - (SExtBits == 32 || - (SExtBits == 64 && ST->hasMiscellaneousExtensions2()))) - return true; - if (LoadOrTruncBits == 16) - return true; + if (UserI->getOpcode() != Instruction::ICmp) { + if (LoadedBits == 16 && + (SExtBits == 32 || + (SExtBits == 64 && ST->hasMiscellaneousExtensions2()))) + return true; + if (LoadOrTruncBits == 16) + return true; + } LLVM_FALLTHROUGH; case Instruction::SDiv:// SE: 32->64 if (LoadedBits == 32 && SExtBits == 64) @@ -934,7 +937,6 @@ isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) { case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::ICmp: // This also makes sense for float operations, but disabled for now due // to regressions. // case Instruction::FCmp: diff --git a/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll b/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll index afd923a9f3a..74d95ae2ea7 100644 --- a/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll +++ b/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll @@ -726,6 +726,16 @@ define void @icmp() { %tr_0 = trunc i64 %li64_2 to i32 icmp eq i32 %tr_0, undef + ; Sign-extended load + %li32_2 = load i32, i32* undef + %sext = sext i32 %li32_2 to i64 + icmp eq i64 %sext, undef + + ; Zero-extended load + %li32_3 = load i32, i32* undef + %zext = zext i32 %li32_3 to i64 + icmp eq i64 %zext, undef + ; Loads with multiple uses are *not* folded %li64_3 = load i64, i64* undef %tr_1 = trunc i64 %li64_3 to i32 @@ -745,7 +755,13 @@ define void @icmp() { ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = icmp eq i32 %tr_0, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i32 %li32_2 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = icmp eq i64 %sext, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext = zext i32 %li32_3 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = icmp eq i64 %zext, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32 -; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = icmp eq i64 %li64_3, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = icmp eq i64 %li64_3, undef }