1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

Add new X86 AVX2 VBROADCAST instructions.

llvm-svn: 143612
This commit is contained in:
Craig Topper 2011-11-03 07:35:53 +00:00
parent e173a9ebf9
commit 124b2fd08c
4 changed files with 66 additions and 21 deletions

View File

@ -1281,13 +1281,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_vbroadcastss :
def int_x86_avx_vbroadcast_ss :
GCCBuiltin<"__builtin_ia32_vbroadcastss">,
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
def int_x86_avx_vbroadcast_sd_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
def int_x86_avx_vbroadcastss_256 :
def int_x86_avx_vbroadcast_ss_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
def int_x86_avx_vbroadcastf128_pd_256 :
@ -1672,6 +1672,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_vbroadcast_ss_ps :
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
def int_x86_avx2_vbroadcast_sd_pd_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrReadMem]>;
def int_x86_avx2_vbroadcast_ss_ps_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
def int_x86_avx2_vbroadcasti128 :
GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;

View File

@ -7083,35 +7083,48 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, VEX;
def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
int_x86_avx_vbroadcastss>;
def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
int_x86_avx_vbroadcastss_256>;
def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
int_x86_avx_vbroadcast_sd_256>;
class avx_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
Intrinsic Int> :
AVX8I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int VR128:$src))]>, VEX;
def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
int_x86_avx_vbroadcast_ss>;
def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
int_x86_avx_vbroadcast_ss_256>;
def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
int_x86_avx_vbroadcast_sd_256>;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
let Predicates = [HasAVX2] in
let Predicates = [HasAVX2] in {
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
int_x86_avx2_vbroadcasti128>;
def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128,
int_x86_avx2_vbroadcast_ss_ps>;
def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256,
int_x86_avx2_vbroadcast_ss_ps_256>;
def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256,
int_x86_avx2_vbroadcast_sd_pd_256>;
}
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSY addr:$src)>;
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VBROADCASTSD addr:$src)>;
(VBROADCASTSDrm addr:$src)>;
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSY addr:$src)>;
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSD addr:$src)>;
(VBROADCASTSDrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSS addr:$src)>;
(VBROADCASTSSrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSS addr:$src)>;
(VBROADCASTSSrm addr:$src)>;
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values

View File

@ -2292,20 +2292,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
; CHECK: vbroadcastss
%res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
%res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
; CHECK: vbroadcastss
%res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
%res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {

View File

@ -743,3 +743,26 @@ define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
; CHECK: vbroadcastsd
%res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
; CHECK: vbroadcastss
%res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
; CHECK: vbroadcastss
%res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly