mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
0b47d4e59e
Summary: Currently we determine if macro fusion is supported based on the AVX flag as a proxy for the processor being Sandy Bridge". This is really strange as now AMD supports AVX. It also means if user explicitly disables AVX we disable macro fusion. This patch adds an explicit macro fusion feature. I've also enabled for the generic 64-bit CPU (which doesn't have AVX) This is probably another candidate for being in the MI layer, but for now I at least wanted to correct the overloading of the AVX feature. Reviewers: spatel, chandlerc, RKSimon, zvi Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D37280 llvm-svn: 312097
174 lines
6.9 KiB
LLVM
174 lines
6.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
|
|
|
define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: funcA:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
ret <32 x i8> %shuffle
|
|
}
|
|
|
|
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: funcB:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
|
|
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
ret <16 x i16> %shuffle
|
|
}
|
|
|
|
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: funcC:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vmovq %rdi, %xmm0
|
|
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
|
%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
|
|
%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
|
|
%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
|
|
ret <4 x i64> %vecinit6.i
|
|
}
|
|
|
|
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: funcD:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
|
|
%vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
|
|
%vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
|
|
%vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
|
|
ret <4 x double> %vecinit6.i
|
|
}
|
|
|
|
; Test this turns into a broadcast:
|
|
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
|
|
;
|
|
define <8 x float> @funcE() nounwind {
|
|
; CHECK-LABEL: funcE:
|
|
; CHECK: # BB#0: # %for_exit499
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: testb %al, %al
|
|
; CHECK-NEXT: # implicit-def: %YMM0
|
|
; CHECK-NEXT: jne .LBB4_2
|
|
; CHECK-NEXT: # BB#1: # %load.i1247
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
; CHECK-NEXT: subq $1312, %rsp # imm = 0x520
|
|
; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
|
|
; CHECK-NEXT: retq
|
|
allocas:
|
|
%udx495 = alloca [18 x [18 x float]], align 32
|
|
br label %for_test505.preheader
|
|
|
|
for_test505.preheader: ; preds = %for_test505.preheader, %allocas
|
|
br i1 undef, label %for_exit499, label %for_test505.preheader
|
|
|
|
for_exit499: ; preds = %for_test505.preheader
|
|
br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
|
|
|
|
load.i1247: ; preds = %for_exit499
|
|
%ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
|
|
%ptr.i1237 = bitcast float* %ptr1227 to i32*
|
|
%val.i1238 = load i32, i32* %ptr.i1237, align 4
|
|
%ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
|
|
%ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
|
|
%phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
|
|
br label %__load_and_broadcast_32.exit1249
|
|
|
|
__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
|
|
%load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
|
|
ret <8 x float> %load_broadcast12281250
|
|
}
|
|
|
|
define <8 x float> @funcF(i32 %val) nounwind {
|
|
; CHECK-LABEL: funcF:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmovd %edi, %xmm0
|
|
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
|
|
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
|
|
%tmp = bitcast <8 x i32> %ret7 to <8 x float>
|
|
ret <8 x float> %tmp
|
|
}
|
|
|
|
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: funcG:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
ret <8 x float> %shuffle
|
|
}
|
|
|
|
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: funcH:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
|
|
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
ret <8 x float> %shuffle
|
|
}
|
|
|
|
define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
|
|
; CHECK-LABEL: splat_load_2f64_11:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
|
; CHECK-NEXT: retq
|
|
%x = load <2 x double>, <2 x double>* %ptr
|
|
%x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
|
|
ret <2 x double> %x1
|
|
}
|
|
|
|
define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
|
|
; CHECK-LABEL: splat_load_4f64_2222:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
|
|
; CHECK-NEXT: retq
|
|
%x = load <4 x double>, <4 x double>* %ptr
|
|
%x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
|
|
ret <4 x double> %x1
|
|
}
|
|
|
|
define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
|
|
; CHECK-LABEL: splat_load_4f32_0000:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load <4 x float>, <4 x float>* %ptr
|
|
%x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
ret <4 x float> %x1
|
|
}
|
|
|
|
define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
|
|
; CHECK-LABEL: splat_load_8f32_77777777:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
|
|
; CHECK-NEXT: retq
|
|
%x = load <8 x float>, <8 x float>* %ptr
|
|
%x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
|
ret <8 x float> %x1
|
|
}
|