1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Merge clang's isRepeatedBytePattern with LLVM's isBytewiseValue

Summary:
his code was in CGDecl.cpp and really belongs in LLVM's isBytewiseValue. Teach isBytewiseValue the tricks clang's isRepeatedBytePattern had, including merging undef properly, and recursing on more types.

clang part of this patch: D51752

Subscribers: dexonsmith, llvm-commits

Differential Revision: https://reviews.llvm.org/D51751

llvm-svn: 342709
This commit is contained in:
JF Bastien 2018-09-21 05:17:42 +00:00
parent 9d6986a7f5
commit 4bd103b312
6 changed files with 185 additions and 55 deletions

View File

@ -221,7 +221,8 @@ class Value;
/// return the i8 value that it is represented with. This is true for all i8 /// return the i8 value that it is represented with. This is true for all i8
/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
/// i16 0x1234), return null. /// i16 0x1234), return null. If the value is entirely undef and padding,
/// return undef.
Value *isBytewiseValue(Value *V); Value *isBytewiseValue(Value *V);
/// Given an aggregrate and an sequence of indices, see if the scalar value /// Given an aggregrate and an sequence of indices, see if the scalar value

View File

@ -3042,62 +3042,92 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
return true; return true;
} }
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
/// byte store (e.g. i16 0x1234), return null.
Value *llvm::isBytewiseValue(Value *V) { Value *llvm::isBytewiseValue(Value *V) {
// All byte-wide stores are splatable, even of arbitrary variables. // All byte-wide stores are splatable, even of arbitrary variables.
if (V->getType()->isIntegerTy(8)) return V; if (V->getType()->isIntegerTy(8))
return V;
LLVMContext &Ctx = V->getContext();
// Undef don't care.
auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
if (isa<UndefValue>(V))
return UndefInt8;
Constant *C = dyn_cast<Constant>(V);
if (!C) {
// Conceptually, we could handle things like:
// %a = zext i8 %X to i16
// %b = shl i16 %a, 8
// %c = or i16 %a, %b
// but until there is an example that actually needs this, it doesn't seem
// worth worrying about.
return nullptr;
}
// Handle 'null' ConstantArrayZero etc. // Handle 'null' ConstantArrayZero etc.
if (Constant *C = dyn_cast<Constant>(V)) if (C->isNullValue())
if (C->isNullValue()) return Constant::getNullValue(Type::getInt8Ty(Ctx));
return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
// Constant float and double values can be handled as integer values if the // Constant floating-point values can be handled as integer values if the
// corresponding integer value is "byteable". An important case is 0.0. // corresponding integer value is "byteable". An important case is 0.0.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
if (CFP->getType()->isFloatTy()) Type *Ty = nullptr;
V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); if (CFP->getType()->isHalfTy())
if (CFP->getType()->isDoubleTy()) Ty = Type::getInt16Ty(Ctx);
V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); else if (CFP->getType()->isFloatTy())
Ty = Type::getInt32Ty(Ctx);
else if (CFP->getType()->isDoubleTy())
Ty = Type::getInt64Ty(Ctx);
// Don't handle long double formats, which have strange constraints. // Don't handle long double formats, which have strange constraints.
return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
} }
// We can handle constant integers that are multiple of 8 bits. // We can handle constant integers that are multiple of 8 bits.
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
if (CI->getBitWidth() % 8 == 0) { if (CI->getBitWidth() % 8 == 0) {
assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
if (!CI->getValue().isSplat(8)) if (!CI->getValue().isSplat(8))
return nullptr; return nullptr;
return ConstantInt::get(V->getContext(), CI->getValue().trunc(8)); return ConstantInt::get(Ctx, CI->getValue().trunc(8));
} }
} }
// A ConstantDataArray/Vector is splatable if all its members are equal and auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
// also splatable. if (LHS == RHS)
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) { return LHS;
Value *Elt = CA->getElementAsConstant(0); if (!LHS || !RHS)
Value *Val = isBytewiseValue(Elt);
if (!Val)
return nullptr; return nullptr;
if (LHS == UndefInt8)
return RHS;
if (RHS == UndefInt8)
return LHS;
return nullptr;
};
for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
if (CA->getElementAsConstant(I) != Elt) Value *Val = UndefInt8;
for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
return nullptr; return nullptr;
return Val; return Val;
} }
// Conceptually, we could handle things like: if (isa<ConstantVector>(C)) {
// %a = zext i8 %X to i16 Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
// %b = shl i16 %a, 8 return Splat ? isBytewiseValue(Splat) : nullptr;
// %c = or i16 %a, %b }
// but until there is an example that actually needs this, it doesn't seem
// worth worrying about. if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
return nullptr;
return Val;
}
// Don't try to handle the handful of other constants.
return nullptr; return nullptr;
} }

View File

@ -348,6 +348,9 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16. /// just replicate their input array and then pass on to memset_pattern16.
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
// FIXME: This could check for UndefValue because it can be merged into any
// other valid pattern.
// If the value isn't a constant, we can't promote it to being in a constant // If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but // array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile. // that doesn't seem worthwhile.
@ -645,9 +648,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
if (For == ForMemset::Yes) { if (For == ForMemset::Yes) {
if (isa<UndefValue>(FirstSplatValue))
FirstSplatValue = SecondSplatValue;
if (FirstSplatValue != SecondSplatValue) if (FirstSplatValue != SecondSplatValue)
continue; continue;
} else { } else {
if (isa<UndefValue>(FirstPatternValue))
FirstPatternValue = SecondPatternValue;
if (FirstPatternValue != SecondPatternValue) if (FirstPatternValue != SecondPatternValue)
continue; continue;
} }

View File

@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!NextStore->isSimple()) break; if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value. // Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
break; break;
// Check to see if this store is to a constant offset from the start ptr. // Check to see if this store is to a constant offset from the start ptr.

View File

@ -73,13 +73,16 @@ define void @copyalias(%S* %src, %S* %dst) {
ret void ret void
} }
; If the store address is computed ina complex manner, make ; If the store address is computed in a complex manner, make
; sure we lift the computation as well if needed and possible. ; sure we lift the computation as well if needed and possible.
define void @addrproducer(%S* %src, %S* %dst) { define void @addrproducer(%S* %src, %S* %dst) {
; CHECK-LABEL: addrproducer ; CHECK-LABEL: addrproducer(
; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1 ; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 ; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1
; CHECK-NEXT: store %S undef, %S* %dst ; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
%1 = load %S, %S* %src %1 = load %S, %S* %src
store %S undef, %S* %dst store %S undef, %S* %dst
@ -89,7 +92,14 @@ define void @addrproducer(%S* %src, %S* %dst) {
} }
define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
; CHECK-LABEL: aliasaddrproducer ; CHECK-LABEL: aliasaddrproducer(
; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src
; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
; CHECK-NEXT: store %S %[[SRC]], %S* %dst2
; CHECK-NEXT: ret void
%1 = load %S, %S* %src %1 = load %S, %S* %src
store %S undef, %S* %dst store %S undef, %S* %dst
%dstindex = load i32, i32* %dstidptr %dstindex = load i32, i32* %dstidptr
@ -99,7 +109,16 @@ define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
} }
define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) { define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
; CHECK-LABEL: noaliasaddrproducer ; CHECK-LABEL: noaliasaddrproducer(
; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr
; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
%1 = load %S, %S* %src %1 = load %S, %S* %src
store %S undef, %S* %src store %S undef, %S* %src
%2 = load i32, i32* %dstidptr %2 = load i32, i32* %dstidptr

View File

@ -1,19 +1,89 @@
; RUN: opt -memcpyopt -S < %s | FileCheck %s ; RUN: opt -memcpyopt -S < %s | FileCheck %s
@cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @foo(i32*) nounwind
define void @test1() nounwind { @undef = internal constant i32 undef, align 4
%arr = alloca [3 x i32], align 4 define void @test_undef() nounwind {
%arr_i8 = bitcast [3 x i32]* %arr to i8* %a = alloca i32, align 4
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %arr_i8, i8* align 4 bitcast ([3 x i32]* @cst to i8*), i64 12, i1 false) %i8 = bitcast i32* %a to i8*
%arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false)
call void @foo(i32* %arraydecay) nounwind
ret void ret void
; CHECK-LABEL: @test1( ; CHECK-LABEL: @test_undef(
; CHECK: call void @llvm.memset ; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy ; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void ; CHECK: ret void
}
@i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
define void @test_i32x3() nounwind {
%a = alloca [3 x i32], align 4
%i8 = bitcast [3 x i32]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false)
ret void
; CHECK-LABEL: @test_i32x3(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}
@i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4
define void @test_i32x3_undef() nounwind {
%a = alloca [3 x i32], align 4
%i8 = bitcast [3 x i32]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false)
ret void
; CHECK-LABEL: @test_i32x3_undef(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}
%struct.bitfield = type { i8, [3 x i8] }
@bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4
define void @test_bitfield() nounwind {
%a = alloca %struct.bitfield, align 4
%i8 = bitcast %struct.bitfield* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false)
ret void
; CHECK-LABEL: @test_bitfield(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}
@i1x16_zero = internal constant <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, align 4
define void @test_i1x16_zero() nounwind {
%a = alloca <16 x i1>, align 4
%i8 = bitcast <16 x i1>* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false)
ret void
; CHECK-LABEL: @test_i1x16_zero(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
}
; i1 isn't currently handled. Should it?
@i1x16_one = internal constant <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, align 4
define void @test_i1x16_one() nounwind {
%a = alloca <16 x i1>, align 4
%i8 = bitcast <16 x i1>* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false)
ret void
; CHECK-LABEL: @test_i1x16_one(
; CHECK-NOT: call void @llvm.memset
; CHECK: call void @llvm.memcpy
; CHECK: ret void
}
@half = internal constant half 0xH0000, align 4
define void @test_half() nounwind {
%a = alloca half, align 4
%i8 = bitcast half* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false)
ret void
; CHECK-LABEL: @test_half(
; CHECK: call void @llvm.memset
; CHECK-NOT: call void @llvm.memcpy
; CHECK: ret void
} }