mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[Hexagon] Realign HVX vectors wherever possible
Introduce HexagonVectorCombine as a helper class for vector-related optimizations.
This commit is contained in:
parent
4ae2c1c200
commit
deb082d99d
@ -60,6 +60,7 @@ add_llvm_target(HexagonCodeGen
|
||||
HexagonTargetMachine.cpp
|
||||
HexagonTargetObjectFile.cpp
|
||||
HexagonTargetTransformInfo.cpp
|
||||
HexagonVectorCombine.cpp
|
||||
HexagonVectorLoopCarriedReuse.cpp
|
||||
HexagonVectorPrint.cpp
|
||||
HexagonVExtract.cpp
|
||||
|
@ -100,6 +100,9 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
|
||||
static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
|
||||
cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
|
||||
|
||||
static cl::opt<bool> EnableVectorCombine("hexagon-vector-combine", cl::Hidden,
|
||||
cl::ZeroOrMore, cl::init(true), cl::desc("Enable HVX vector combining"));
|
||||
|
||||
static cl::opt<bool> EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true),
|
||||
cl::desc("Simplify the CFG after atomic expansion pass"));
|
||||
@ -140,12 +143,13 @@ namespace llvm {
|
||||
void initializeHexagonGenMuxPass(PassRegistry&);
|
||||
void initializeHexagonHardwareLoopsPass(PassRegistry&);
|
||||
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
|
||||
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
|
||||
void initializeHexagonNewValueJumpPass(PassRegistry&);
|
||||
void initializeHexagonOptAddrModePass(PassRegistry&);
|
||||
void initializeHexagonPacketizerPass(PassRegistry&);
|
||||
void initializeHexagonRDFOptPass(PassRegistry&);
|
||||
void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
|
||||
void initializeHexagonVectorCombineLegacyPass(PassRegistry&);
|
||||
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
|
||||
void initializeHexagonVExtractPass(PassRegistry&);
|
||||
Pass *createHexagonLoopIdiomPass();
|
||||
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
|
||||
@ -169,14 +173,15 @@ namespace llvm {
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createHexagonLoopRescheduling();
|
||||
FunctionPass *createHexagonNewValueJump();
|
||||
FunctionPass *createHexagonOptimizeSZextends();
|
||||
FunctionPass *createHexagonOptAddrMode();
|
||||
FunctionPass *createHexagonOptimizeSZextends();
|
||||
FunctionPass *createHexagonPacketizer(bool Minimal);
|
||||
FunctionPass *createHexagonPeephole();
|
||||
FunctionPass *createHexagonRDFOpt();
|
||||
FunctionPass *createHexagonSplitConst32AndConst64();
|
||||
FunctionPass *createHexagonSplitDoubleRegs();
|
||||
FunctionPass *createHexagonStoreWidening();
|
||||
FunctionPass *createHexagonVectorCombineLegacyPass();
|
||||
FunctionPass *createHexagonVectorPrint();
|
||||
FunctionPass *createHexagonVExtract();
|
||||
} // end namespace llvm;
|
||||
@ -199,12 +204,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
|
||||
initializeHexagonGenMuxPass(PR);
|
||||
initializeHexagonHardwareLoopsPass(PR);
|
||||
initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
|
||||
initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PR);
|
||||
initializeHexagonNewValueJumpPass(PR);
|
||||
initializeHexagonOptAddrModePass(PR);
|
||||
initializeHexagonPacketizerPass(PR);
|
||||
initializeHexagonRDFOptPass(PR);
|
||||
initializeHexagonSplitDoubleRegsPass(PR);
|
||||
initializeHexagonVectorCombineLegacyPass(PR);
|
||||
initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PR);
|
||||
initializeHexagonVExtractPass(PR);
|
||||
}
|
||||
|
||||
@ -350,6 +356,8 @@ void HexagonPassConfig::addIRPasses() {
|
||||
.sinkCommonInsts(true)));
|
||||
if (EnableLoopPrefetch)
|
||||
addPass(createLoopDataPrefetchPass());
|
||||
if (EnableVectorCombine)
|
||||
addPass(createHexagonVectorCombineLegacyPass());
|
||||
if (EnableCommGEP)
|
||||
addPass(createHexagonCommonGEP());
|
||||
// Replace certain combinations of shifts and ands with extracts.
|
||||
|
@ -36,7 +36,7 @@ static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
|
||||
cl::desc("Control lookup table emission on Hexagon target"));
|
||||
|
||||
static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true),
|
||||
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
|
||||
cl::Hidden, cl::desc("Enable masked loads/stores for HVX"));
|
||||
|
||||
// Constant "cost factor" to make floating point operations more expensive
|
||||
// in terms of vectorization cost. This isn't the best way, but it should
|
||||
|
1474
lib/Target/Hexagon/HexagonVectorCombine.cpp
Normal file
1474
lib/Target/Hexagon/HexagonVectorCombine.cpp
Normal file
File diff suppressed because it is too large
Load Diff
69
test/CodeGen/Hexagon/autohvx/masked-vector-align.ll
Normal file
69
test/CodeGen/Hexagon/autohvx/masked-vector-align.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define <32 x i32> @f0(i8* %a0, i32 %a1) #0 {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: r0 = add(r1,r0)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: r7 = #8
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: r4 = ##.LCPI0_0
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: r2 = #-1
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0 = vmem(r0+#1)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v1 = vmem(r0+#2)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: r0 = add(r0,#128)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v1 = valign(v1,v0,r7)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v2 = vmem(r4+#0)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: q0 = vand(v2,r2)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.w = vadd(v0.w,v1.w)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: if (q0) vmem(r0+#0) = v0
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = add i32 %a1, 128
|
||||
%v1 = getelementptr i8, i8* %a0, i32 %v0
|
||||
%v2 = bitcast i8* %v1 to <32 x i32>*
|
||||
%v3 = tail call <32 x i32> @llvm.masked.load.v32i32.p0v32i32(<32 x i32>* %v2, i32 128, <32 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i32> undef)
|
||||
%v4 = add i32 %a1, 136
|
||||
%v5 = getelementptr i8, i8* %a0, i32 %v4
|
||||
%v6 = bitcast i8* %v5 to <32 x i32>*
|
||||
%v7 = tail call <32 x i32> @llvm.masked.load.v32i32.p0v32i32(<32 x i32>* %v6, i32 8, <32 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i32> undef)
|
||||
%v8 = add <32 x i32> %v3, %v7
|
||||
tail call void @llvm.masked.store.v32i32.p0v32i32(<32 x i32> %v8, <32 x i32>* %v2, i32 128, <32 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
|
||||
ret <32 x i32> %v8
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind readonly willreturn
|
||||
declare <32 x i32> @llvm.masked.load.v32i32.p0v32i32(<32 x i32>*, i32 immarg, <32 x i1>, <32 x i32>) #1
|
||||
|
||||
; Function Attrs: argmemonly nounwind willreturn
|
||||
declare void @llvm.masked.store.v32i32.p0v32i32(<32 x i32>, <32 x i32>*, i32 immarg, <32 x i1>) #2
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b,-packets" }
|
||||
attributes #1 = { argmemonly nounwind readonly willreturn }
|
||||
attributes #2 = { argmemonly nounwind willreturn }
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=hexagon -enable-pipeliner=false < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -enable-pipeliner=false -hexagon-vector-combine=false < %s | FileCheck %s
|
||||
|
||||
; Test that the vsplat and vmemu are not all serialized due to chain edges
|
||||
; caused by the hasSideEffects flag. The exact code generation may change
|
||||
|
Loading…
Reference in New Issue
Block a user