1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
Sebastian Neubauer 8980610845 [InstCombine] Move target-specific inst combining
For a long time, the InstCombine pass handled target specific
intrinsics. Having target specific code in general passes was noted as
an area for improvement for a long time.

D81728 moves most target specific code out of the InstCombine pass.
Applying the target specific combinations in an extra pass would
probably result in inferior optimizations compared to the current
fixed-point iteration, therefore the InstCombine pass resorts to newly
introduced functions in the TargetTransformInfo when it encounters
unknown intrinsics.
The patch should not have any effect on generated code (under the
assumption that code never uses intrinsics from a foreign target).

This introduces three new functions:
TargetTransformInfo::instCombineIntrinsic
TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic

A few target specific parts are left in the InstCombine folder, where
it makes sense to share code. The largest left-over part in
InstCombineCalls.cpp is the code shared between arm and aarch64.

This allows to move about 3000 lines out from InstCombine to the targets.

Differential Revision: https://reviews.llvm.org/D81728
2020-07-22 15:59:49 +02:00

26 lines
1.2 KiB
LLVM

; RUN: opt < %s -instcombine -mtriple=arm -S | FileCheck %s
; The alignment arguments for NEON load/store intrinsics can be increased
; by instcombine. Check for this.
; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32)
; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16)
@x = common global [8 x i32] zeroinitializer, align 32
@y = common global [8 x i32] zeroinitializer, align 16
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
define void @test() nounwind ssp {
%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
ret void
}
declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind