mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Teach instcombine to set the alignment arguments for NEON load/store intrinsics.
llvm-svn: 117154
This commit is contained in:
parent
20b70697bb
commit
0290dbe7d4
@ -702,6 +702,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::arm_neon_vld1:
|
||||
case Intrinsic::arm_neon_vld2:
|
||||
case Intrinsic::arm_neon_vld3:
|
||||
case Intrinsic::arm_neon_vld4:
|
||||
case Intrinsic::arm_neon_vld2lane:
|
||||
case Intrinsic::arm_neon_vld3lane:
|
||||
case Intrinsic::arm_neon_vld4lane:
|
||||
case Intrinsic::arm_neon_vst1:
|
||||
case Intrinsic::arm_neon_vst2:
|
||||
case Intrinsic::arm_neon_vst3:
|
||||
case Intrinsic::arm_neon_vst4:
|
||||
case Intrinsic::arm_neon_vst2lane:
|
||||
case Intrinsic::arm_neon_vst3lane:
|
||||
case Intrinsic::arm_neon_vst4lane: {
|
||||
unsigned MemAlign = GetOrEnforceKnownAlignment(II->getArgOperand(0));
|
||||
unsigned AlignArg = II->getNumArgOperands() - 1;
|
||||
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
|
||||
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
|
||||
II->setArgOperand(AlignArg,
|
||||
ConstantInt::get(Type::getInt32Ty(II->getContext()),
|
||||
MemAlign, false));
|
||||
return II;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::stackrestore: {
|
||||
// If the save is right next to the restore, remove the restore. This can
|
||||
// happen when variable allocas are DCE'd.
|
||||
|
25
test/Transforms/InstCombine/neon-intrinsics.ll
Normal file
25
test/Transforms/InstCombine/neon-intrinsics.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
; The alignment arguments for NEON load/store intrinsics can be increased
|
||||
; by instcombine. Check for this.
|
||||
|
||||
; CHECK: vld4.v2i32({{.*}}, i32 32)
|
||||
; CHECK: vst4.v2i32({{.*}}, i32 16)
|
||||
|
||||
@x = common global [8 x i32] zeroinitializer, align 32
|
||||
@y = common global [8 x i32] zeroinitializer, align 16
|
||||
|
||||
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
|
||||
|
||||
define void @test() nounwind ssp {
|
||||
%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
|
||||
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
|
||||
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
|
||||
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
|
||||
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
|
||||
call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
|
||||
declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
|
Loading…
x
Reference in New Issue
Block a user