mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[ARM] Implement TTI::getMemcpyCost
This implements TargetTransformInfo method getMemcpyCost, which estimates the number of instructions to which a memcpy instruction expands to. Differential Revision: https://reviews.llvm.org/D59787 llvm-svn: 359547
This commit is contained in:
parent
af2892e778
commit
ee665fe73b
@ -584,6 +584,12 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp,
|
||||
return Cost;
|
||||
}
|
||||
|
||||
int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
|
||||
int Cost = TTIImpl->getMemcpyCost(I);
|
||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
|
||||
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
|
||||
bool IsPairwiseForm) const {
|
||||
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
@ -401,6 +402,40 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
|
||||
const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
|
||||
assert(MI && "MemcpyInst expected");
|
||||
ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
|
||||
|
||||
// To model the cost of a library call, we assume 1 for the call, and
|
||||
// 3 for the argument setup.
|
||||
const unsigned LibCallCost = 4;
|
||||
|
||||
// If 'size' is not a constant, a library call will be generated.
|
||||
if (!C)
|
||||
return LibCallCost;
|
||||
|
||||
const unsigned Size = C->getValue().getZExtValue();
|
||||
const unsigned DstAlign = MI->getDestAlignment();
|
||||
const unsigned SrcAlign = MI->getSourceAlignment();
|
||||
const Function *F = I->getParent()->getParent();
|
||||
const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
|
||||
std::vector<EVT> MemOps;
|
||||
|
||||
// MemOps will be poplulated with a list of data types that needs to be
|
||||
// loaded and stored. That's why we multiply the number of elements by 2 to
|
||||
// get the cost for this memcpy.
|
||||
if (getTLI()->findOptimalMemOpLowering(
|
||||
MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
|
||||
false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
|
||||
MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
|
||||
F->getAttributes()))
|
||||
return MemOps.size() * 2;
|
||||
|
||||
// If we can't find an optimal memop lowering, return the default cost
|
||||
return LibCallCost;
|
||||
}
|
||||
|
||||
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
Type *SubTp) {
|
||||
if (Kind == TTI::SK_Broadcast) {
|
||||
|
@ -148,6 +148,8 @@ public:
|
||||
return ST->getMaxInterleaveFactor();
|
||||
}
|
||||
|
||||
int getMemcpyCost(const Instruction *I);
|
||||
|
||||
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
|
||||
|
||||
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
|
@ -1,12 +1,670 @@
|
||||
; RUN: opt < %s -cost-model -analyze -cost-kind=code-size | FileCheck %s
|
||||
; RUN: opt < %s -cost-model -analyze -cost-kind=code-size | \
|
||||
; RUN: FileCheck %s --check-prefixes=COMMON,CHECK-NO-SA
|
||||
; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mattr=+strict-align | \
|
||||
; RUN: FileCheck %s --check-prefixes=COMMON,CHECK-SA
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv7m-arm-unknown-eabi"
|
||||
|
||||
define void @memcpy(i8* %d, i8* %s, i32 %N) {
|
||||
;;;;;;;;;;;;
|
||||
; Align 1, 1
|
||||
;;;;;;;;;;;;
|
||||
|
||||
define void @memcpy_1(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r1, [r1]
|
||||
; strb r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_1'
|
||||
; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
; CHECK: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 36, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_2(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrb r2, [r1]
|
||||
; ldrb r1, [r1, #1]
|
||||
; strb r1, [r0, #1]
|
||||
; strb r2, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_2'
|
||||
; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_3(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldrb r2, [r1, #2]
|
||||
; strb r2, [r0, #2]
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrb r2, [r1]
|
||||
; ldrb r3, [r1, #1]
|
||||
; ldrb r1, [r1, #2]
|
||||
; strb r1, [r0, #2]
|
||||
; strb r3, [r0, #1]
|
||||
; strb r2, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_3'
|
||||
; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_4(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr r1, [r1]
|
||||
; str r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrb.w r12, [r1]
|
||||
; ldrb r3, [r1, #1]
|
||||
; ldrb r2, [r1, #2]
|
||||
; ldrb r1, [r1, #3]
|
||||
; strb r1, [r0, #3]
|
||||
; strb r2, [r0, #2]
|
||||
; strb r3, [r0, #1]
|
||||
; strb.w r12, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_4'
|
||||
; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_8(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr r2, [r1]
|
||||
; ldr r1, [r1, #4]
|
||||
; str r1, [r0, #4]
|
||||
; str r2, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; push {r7, lr}
|
||||
; movs r2, #8
|
||||
; bl __aeabi_memcpy
|
||||
; pop {r7, pc}
|
||||
;
|
||||
; COMMON: function 'memcpy_8'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_16(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr.w r12, [r1]
|
||||
; ldr r3, [r1, #4]
|
||||
; ldr r2, [r1, #8]
|
||||
; ldr r1, [r1, #12]
|
||||
; str r1, [r0, #12]
|
||||
; str r2, [r0, #8]
|
||||
; str r3, [r0, #4]
|
||||
; str.w r12, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; push {r7, lr}
|
||||
; movs r2, #8
|
||||
; bl __aeabi_memcpy
|
||||
; pop {r7, pc}
|
||||
;
|
||||
; COMMON: function 'memcpy_16'
|
||||
; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_32(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; movs r2, #32
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_32'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_N(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_N'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;
|
||||
; Align 2, 2
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
define void @memcpy_1_al2(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r1, [r1]
|
||||
; strb r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_1_al2'
|
||||
; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_2_al2(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_2_al2'
|
||||
; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_3_al2(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r2, [r1, #2]
|
||||
; strb r2, [r0, #2]
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_3_al2'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_4_al2(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr r1, [r1]
|
||||
; str r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrh r2, [r1, #2]
|
||||
; strh r2, [r0, #2]
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_4_al2'
|
||||
; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_8_al2(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr r2, [r1]
|
||||
; ldr r1, [r1, #4]
|
||||
; str r1, [r0, #4]
|
||||
; str r2, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrh r2, [r1, #6]
|
||||
; strh r2, [r0, #6]
|
||||
; ldrh r2, [r1, #4]
|
||||
; strh r2, [r0, #4]
|
||||
; ldrh r2, [r1, #2]
|
||||
; strh r2, [r0, #2]
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_8_al2'
|
||||
; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_16_al2(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr.w r12, [r1]
|
||||
; ldr r3, [r1, #4]
|
||||
; ldr r2, [r1, #8]
|
||||
; ldr r1, [r1, #12]
|
||||
; str r1, [r0, #12]
|
||||
; str r2, [r0, #8]
|
||||
; str r3, [r0, #4]
|
||||
; str.w r12, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; movs r2, #16
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_16_al2'
|
||||
; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_32_al2(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; movs r2, #32
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_32_al2'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_N_al2(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_N_al2'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;
|
||||
; Align 4, 4
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
define void @memcpy_1_al4(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r1, [r1]
|
||||
; strb r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_1_al4'
|
||||
; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_2_al4(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_2_al4'
|
||||
; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_3_al4(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r2, [r1, #2]
|
||||
; strb r2, [r0, #2]
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_3_al4'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_4_al4(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldr r1, [r1]
|
||||
; str r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_4_al4'
|
||||
; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_8_al4(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrd r2, r1, [r1]
|
||||
; strd r2, r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_8_al4'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_16_al4(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldm.w r1, {r2, r3, r12}
|
||||
; ldr r1, [r1, #12]
|
||||
; stm.w r0, {r2, r3, r12}
|
||||
; str r1, [r0, #12]
|
||||
;
|
||||
; COMMON: function 'memcpy_16_al4'
|
||||
; COMMON-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_32_al4(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldm.w r1!, {r2, r3, r12, lr}
|
||||
; stm.w r0!, {r2, r3, r12, lr}
|
||||
; ldm.w r1, {r2, r3, r12, lr}
|
||||
; stm.w r0, {r2, r3, r12, lr}
|
||||
;
|
||||
; COMMON: function 'memcpy_32_al4'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_N_al4(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; bl __aeabi_memcpy4
|
||||
;
|
||||
; COMMON: function 'memcpy_N_al4'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;
|
||||
; Align 1, 4
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
define void @memcpy_1_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r1, [r1]
|
||||
; strb r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_1_al14'
|
||||
; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_2_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrb r2, [r1]
|
||||
; ldrb r1, [r1, #1]
|
||||
; strb r1, [r0, #1]
|
||||
; strb r2, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_2_al14'
|
||||
; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_3_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldrb r2, [r1, #2]
|
||||
; strb r2, [r0, #2]
|
||||
; ldrh r1, [r1]
|
||||
; strh r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrb r2, [r1]
|
||||
; ldrb r3, [r1, #1]
|
||||
; ldrb r1, [r1, #2]
|
||||
; strb r1, [r0, #2]
|
||||
; strb r3, [r0, #1]
|
||||
; strb r2, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_3_al14'
|
||||
; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_4_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr r1, [r1]
|
||||
; str r1, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; ldrb.w r12, [r1]
|
||||
; ldrb r3, [r1, #1]
|
||||
; ldrb r2, [r1, #2]
|
||||
; ldrb r1, [r1, #3]
|
||||
; strb r1, [r0, #3]
|
||||
; strb r2, [r0, #2]
|
||||
; strb r3, [r0, #1]
|
||||
; strb.w r12, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_4_al14'
|
||||
; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_8_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr r2, [r1]
|
||||
; ldr r1, [r1, #4]
|
||||
; str r1, [r0, #4]
|
||||
; str r2, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; push {r7, lr}
|
||||
; movs r2, #8
|
||||
; bl __aeabi_memcpy
|
||||
; pop {r7, pc}
|
||||
;
|
||||
; COMMON: function 'memcpy_8_al14'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_16_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; no strict-align:
|
||||
;
|
||||
; ldr.w r12, [r1]
|
||||
; ldr r3, [r1, #4]
|
||||
; ldr r2, [r1, #8]
|
||||
; ldr r1, [r1, #12]
|
||||
; str r1, [r0, #12]
|
||||
; str r2, [r0, #8]
|
||||
; str r3, [r0, #4]
|
||||
; str.w r12, [r0]
|
||||
;
|
||||
; strict-align:
|
||||
;
|
||||
; movs r2, #16
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_16_al14'
|
||||
; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_32_al14(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; movs r2, #32
|
||||
; bl __aeabi_memcpy
|
||||
;
|
||||
; COMMON: function 'memcpy_32_al14'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memcpy_N_al14(i8* %d, i8* %s, i32 %N) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; bl __aeabi_memcpy4
|
||||
;
|
||||
; COMMON: function 'memcpy_N_al14'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;
|
||||
; Align 4, 1
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
define void @memcpy_1_al41(i8* %d, i8* %s) {
|
||||
;
|
||||
; with/without strict-align:
|
||||
;
|
||||
; ldrb r1, [r1]
|
||||
; strb r1, [r0]
|
||||
;
|
||||
; COMMON: function 'memcpy_1_al41'
|
||||
; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
|
||||
;
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user