mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[Hexagon] Enable interleaving in loop vectorizer
llvm-svn: 340447
This commit is contained in:
parent
c4518513f4
commit
a83db8d2cb
@ -207,8 +207,10 @@ unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
|
||||
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
|
||||
Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
|
||||
unsigned Alignment, unsigned AddressSpace) {
|
||||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
Alignment, AddressSpace);
|
||||
if (Indices.size() != Factor)
|
||||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
Alignment, AddressSpace);
|
||||
return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
|
@ -98,6 +98,9 @@ public:
|
||||
bool prefersVectorizedAddressing() {
|
||||
return false;
|
||||
}
|
||||
bool enableInterleavedAccessVectorization() {
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
|
||||
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
|
||||
|
52
test/CodeGen/Hexagon/autohvx/interleave.ll
Normal file
52
test/CodeGen/Hexagon/autohvx/interleave.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s | FileCheck %s
|
||||
; Check that the loop has been interleaved.
|
||||
; CHECK: store <64 x i32> %interleaved.vec
|
||||
|
||||
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
target triple = "hexagon"
|
||||
|
||||
define void @f0(i32* noalias nocapture %a0, i32* noalias nocapture readonly %a1, i32 %a2) #0 {
|
||||
b0:
|
||||
%v0 = icmp eq i32 %a2, 0
|
||||
br i1 %v0, label %b3, label %b1
|
||||
|
||||
b1: ; preds = %b0
|
||||
br label %b4
|
||||
|
||||
b2: ; preds = %b4
|
||||
br label %b3
|
||||
|
||||
b3: ; preds = %b2, %b0
|
||||
ret void
|
||||
|
||||
b4: ; preds = %b4, %b1
|
||||
%v1 = phi i32 [ %v13, %b4 ], [ 0, %b1 ]
|
||||
%v2 = getelementptr inbounds i32, i32* %a1, i32 %v1
|
||||
%v3 = load i32, i32* %v2, align 4, !tbaa !1
|
||||
%v4 = getelementptr inbounds i32, i32* %a0, i32 %v1
|
||||
%v5 = load i32, i32* %v4, align 4, !tbaa !1
|
||||
%v6 = add nsw i32 %v5, %v3
|
||||
store i32 %v6, i32* %v4, align 4, !tbaa !1
|
||||
%v7 = or i32 %v1, 1
|
||||
%v8 = getelementptr inbounds i32, i32* %a1, i32 %v7
|
||||
%v9 = load i32, i32* %v8, align 4, !tbaa !1
|
||||
%v10 = getelementptr inbounds i32, i32* %a0, i32 %v7
|
||||
%v11 = load i32, i32* %v10, align 4, !tbaa !1
|
||||
%v12 = add nsw i32 %v11, %v9
|
||||
store i32 %v12, i32* %v10, align 4, !tbaa !1
|
||||
%v13 = add nuw nsw i32 %v1, 2
|
||||
%v14 = icmp eq i32 %v13, %a2
|
||||
br i1 %v14, label %b2, label %b4, !llvm.loop !5
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length128b,+hvxv60" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{!2, !2, i64 0}
|
||||
!2 = !{!"int", !3, i64 0}
|
||||
!3 = !{!"omnipotent char", !4, i64 0}
|
||||
!4 = !{!"Simple C/C++ TBAA"}
|
||||
!5 = distinct !{!5, !6}
|
||||
!6 = !{!"llvm.loop.unroll.disable"}
|
Loading…
Reference in New Issue
Block a user