mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
3863a9d4f9
Significantly reduces performancei (~30%) of gipfeli (https://github.com/google/gipfeli) I have not yet managed to reproduce this regression with the open-source version of the benchmark on github, but will work with others to get a reproducer to you later today. llvm-svn: 315680
18 lines
997 B
LLVM
18 lines
997 B
LLVM
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-linux-unknown-unknown -mattr=+avx512f | FileCheck %s
|
|
|
|
%struct.S = type { [1000 x i32] }
|
|
|
|
|
|
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
|
|
|
|
define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
|
|
%temp = insertelement <4 x i64> undef, i64 %base, i32 0
|
|
%vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer
|
|
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S
|
|
%B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
|
|
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
|
|
%arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
|
|
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
|
|
ret <4 x i32> %res
|
|
}
|