1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Teach instcombine to set the alignment arguments for NEON load/store intrinsics.

llvm-svn: 117154
This commit is contained in:
Bob Wilson 2010-10-22 21:41:48 +00:00
parent 20b70697bb
commit 0290dbe7d4
2 changed files with 51 additions and 0 deletions

View File

@ -702,6 +702,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
case Intrinsic::arm_neon_vst4:
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
unsigned MemAlign = GetOrEnforceKnownAlignment(II->getArgOperand(0));
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
II->setArgOperand(AlignArg,
ConstantInt::get(Type::getInt32Ty(II->getContext()),
MemAlign, false));
return II;
}
break;
}
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.

View File

@ -0,0 +1,25 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
; The alignment arguments for NEON load/store intrinsics can be increased
; by instcombine. Check for this.
; CHECK: vld4.v2i32({{.*}}, i32 32)
; CHECK: vst4.v2i32({{.*}}, i32 16)
@x = common global [8 x i32] zeroinitializer, align 32
@y = common global [8 x i32] zeroinitializer, align 16
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
define void @test() nounwind ssp {
%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
ret void
}
declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind