mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[PowerPC][Power10] Implement centrifuge, vector gather every nth bit, vector evaluate Builtins in LLVM/Clang
This patch implements builtins for the following prototypes: unsigned long long __builtin_cfuged (unsigned long long, unsigned long long); vector unsigned long long vec_cfuge (vector unsigned long long, vector unsigned long long); unsigned long long vec_gnb (vector unsigned __int128, const unsigned int); vector unsigned char vec_ternarylogic (vector unsigned char, vector unsigned char, vector unsigned char, const unsigned int); vector unsigned short vec_ternarylogic (vector unsigned short, vector unsigned short, vector unsigned short, const unsigned int); vector unsigned int vec_ternarylogic (vector unsigned int, vector unsigned int, vector unsigned int, const unsigned int); vector unsigned long long vec_ternarylogic (vector unsigned long long, vector unsigned long long, vector unsigned long long, const unsigned int); vector unsigned __int128 vec_ternarylogic (vector unsigned __int128, vector unsigned __int128, vector unsigned __int128, const unsigned int); Differential Revision: https://reviews.llvm.org/D80970
This commit is contained in:
parent
8a7cb41484
commit
8e4bd7c3f3
@ -68,6 +68,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
|
||||
: GCCBuiltin<"__builtin_pextd">,
|
||||
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Centrifuge Doubleword Builtin.
|
||||
def int_ppc_cfuged
|
||||
: GCCBuiltin<"__builtin_cfuged">,
|
||||
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Count Leading / Trailing Zeroes under bit Mask Builtins.
|
||||
def int_ppc_cntlzdm
|
||||
: GCCBuiltin<"__builtin_cntlzdm">,
|
||||
@ -426,6 +431,16 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// P10 Vector Centrifuge Builtin.
|
||||
def int_ppc_altivec_vcfuged : GCCBuiltin<"__builtin_altivec_vcfuged">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// P10 Vector Gather Every Nth Bit Builtin.
|
||||
def int_ppc_altivec_vgnb : GCCBuiltin<"__builtin_altivec_vgnb">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_v1i128_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
// P10 Vector Clear Bytes
|
||||
def int_ppc_altivec_vclrlb : GCCBuiltin<"__builtin_altivec_vclrlb">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
|
||||
@ -969,6 +984,11 @@ def int_ppc_vsx_xxinsertw :
|
||||
PowerPC_VSX_Intrinsic<"xxinsertw",[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty,llvm_v2i64_ty,llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_ppc_vsx_xxeval :
|
||||
PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v2i64_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
|
||||
def int_ppc_vsx_xxgenpcvbm :
|
||||
PowerPC_VSX_Intrinsic<"xxgenpcvbm", [llvm_v16i8_ty],
|
||||
[llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
@ -177,6 +177,54 @@ class XForm_XT6_IMM5_VB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
|
||||
let Inst{31} = XT{5};
|
||||
}
|
||||
|
||||
class 8RR_XX4Form_IMM8_XTAB6<bits<6> opcode, bits<2> xo,
|
||||
dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: PI<1, opcode, OOL, IOL, asmstr, itin> {
|
||||
bits<6> XT;
|
||||
bits<6> XA;
|
||||
bits<6> XB;
|
||||
bits<6> XC;
|
||||
bits<8> IMM;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
// The prefix.
|
||||
let Inst{6-7} = 1;
|
||||
let Inst{8} = 0;
|
||||
let Inst{9-11} = 0;
|
||||
let Inst{12-13} = 0;
|
||||
let Inst{14-23} = 0;
|
||||
let Inst{24-31} = IMM;
|
||||
|
||||
// The instruction.
|
||||
let Inst{38-42} = XT{4-0};
|
||||
let Inst{43-47} = XA{4-0};
|
||||
let Inst{48-52} = XB{4-0};
|
||||
let Inst{53-57} = XC{4-0};
|
||||
let Inst{58-59} = xo;
|
||||
let Inst{60} = XC{5};
|
||||
let Inst{61} = XA{5};
|
||||
let Inst{62} = XB{5};
|
||||
let Inst{63} = XT{5};
|
||||
}
|
||||
|
||||
class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I<4, OOL, IOL, asmstr, itin> {
|
||||
bits<5> RD;
|
||||
bits<5> VB;
|
||||
bits<3> N;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
let Inst{6-10} = RD;
|
||||
let Inst{11-12} = 0;
|
||||
let Inst{13-15} = N;
|
||||
let Inst{16-20} = VB;
|
||||
let Inst{21-31} = xo;
|
||||
}
|
||||
|
||||
multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
|
||||
dag PCRel_IOL, string asmstr,
|
||||
InstrItinClass itin> {
|
||||
@ -532,6 +580,23 @@ let Predicates = [IsISA3_1] in {
|
||||
def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
|
||||
"pextd $rA, $rS, $rB", IIC_IntGeneral,
|
||||
[(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>;
|
||||
def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vcfuged $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v2i64:$vD,
|
||||
(int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>;
|
||||
def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N),
|
||||
"vgnb $rD, $vB, $N", IIC_VecGeneral,
|
||||
[(set i64:$rD,
|
||||
(int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>;
|
||||
def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
|
||||
"cfuged $rA, $rS, $rB", IIC_IntGeneral,
|
||||
[(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>;
|
||||
def XXEVAL :
|
||||
8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
|
||||
vsrc:$XC, u8imm:$IMM),
|
||||
"xxeval $XT, $XA, $XB, $XC, $IMM", IIC_VecGeneral,
|
||||
[(set v2i64:$XT, (int_ppc_vsx_xxeval v2i64:$XA,
|
||||
v2i64:$XB, v2i64:$XC, timm:$IMM))]>;
|
||||
def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vclzdm $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v2i64:$vD,
|
||||
|
@ -9,6 +9,10 @@ declare <2 x i64> @llvm.ppc.altivec.vpdepd(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64>, <2 x i64>)
|
||||
declare i64 @llvm.ppc.pdepd(i64, i64)
|
||||
declare i64 @llvm.ppc.pextd(i64, i64)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vcfuged(<2 x i64>, <2 x i64>)
|
||||
declare i64 @llvm.ppc.cfuged(i64, i64)
|
||||
declare i64 @llvm.ppc.altivec.vgnb(<1 x i128>, i32)
|
||||
declare <2 x i64> @llvm.ppc.vsx.xxeval(<2 x i64>, <2 x i64>, <2 x i64>, i32)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vclzdm(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vctzdm(<2 x i64>, <2 x i64>)
|
||||
declare i64 @llvm.ppc.cntlzdm(i64, i64)
|
||||
@ -54,6 +58,66 @@ entry:
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vcfuged(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vcfuged:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vcfuged v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vcfuged(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define i64 @test_cfuged(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: test_cfuged:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cfuged r3, r3, r4
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.ppc.cfuged(i64 %a, i64 %b)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
define i64 @test_vgnb_1(<1 x i128> %a) {
|
||||
; CHECK-LABEL: test_vgnb_1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vgnb r3, v2, 2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.ppc.altivec.vgnb(<1 x i128> %a, i32 2)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
define i64 @test_vgnb_2(<1 x i128> %a) {
|
||||
; CHECK-LABEL: test_vgnb_2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vgnb r3, v2, 7
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.ppc.altivec.vgnb(<1 x i128> %a, i32 7)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
define i64 @test_vgnb_3(<1 x i128> %a) {
|
||||
; CHECK-LABEL: test_vgnb_3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vgnb r3, v2, 5
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%tmp = tail call i64 @llvm.ppc.altivec.vgnb(<1 x i128> %a, i32 5)
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @test_xxeval(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
|
||||
; CHECK-LABEL: test_xxeval:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxeval v2, v2, v3, v4, 255
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%tmp = tail call <2 x i64> @llvm.ppc.vsx.xxeval(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 255)
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vclzdm(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vclzdm:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
|
@ -13,6 +13,18 @@
|
||||
# CHECK: pextd 1, 2, 4
|
||||
0x7c 0x41 0x21 0x78
|
||||
|
||||
# CHECK: vcfuged 1, 2, 4
|
||||
0x10 0x22 0x25 0x4d
|
||||
|
||||
# CHECK: cfuged 1, 2, 4
|
||||
0x7c 0x41 0x21 0xb8
|
||||
|
||||
# CHECK: vgnb 1, 2, 2
|
||||
0x10 0x22 0x14 0xcc
|
||||
|
||||
# CHECK: xxeval 32, 1, 2, 3, 2
|
||||
0x05 0x00 0x00 0x02 0x88 0x01 0x10 0xd1
|
||||
|
||||
# CHECK: vclzdm 1, 2, 3
|
||||
0x10 0x22 0x1f 0x84
|
||||
|
||||
|
@ -15,6 +15,20 @@
|
||||
# CHECK-BE: pextd 1, 2, 4 # encoding: [0x7c,0x41,0x21,0x78]
|
||||
# CHECK-LE: pextd 1, 2, 4 # encoding: [0x78,0x21,0x41,0x7c]
|
||||
pextd 1, 2, 4
|
||||
# CHECK-BE: vcfuged 1, 2, 4 # encoding: [0x10,0x22,0x25,0x4d]
|
||||
# CHECK-LE: vcfuged 1, 2, 4 # encoding: [0x4d,0x25,0x22,0x10]
|
||||
vcfuged 1, 2, 4
|
||||
# CHECK-BE: cfuged 1, 2, 4 # encoding: [0x7c,0x41,0x21,0xb8]
|
||||
# CHECK-LE: cfuged 1, 2, 4 # encoding: [0xb8,0x21,0x41,0x7c]
|
||||
cfuged 1, 2, 4
|
||||
# CHECK-BE: vgnb 1, 2, 2 # encoding: [0x10,0x22,0x14,0xcc]
|
||||
# CHECK-LE: vgnb 1, 2, 2 # encoding: [0xcc,0x14,0x22,0x10]
|
||||
vgnb 1, 2, 2
|
||||
# CHECK-BE: xxeval 32, 1, 2, 3, 2 # encoding: [0x05,0x00,0x00,0x02,
|
||||
# CHECK-BE-SAME: 0x88,0x01,0x10,0xd1]
|
||||
# CHECK-LE: xxeval 32, 1, 2, 3, 2 # encoding: [0x02,0x00,0x00,0x05,
|
||||
# CHECK-LE-SAME: 0xd1,0x10,0x01,0x88]
|
||||
xxeval 32, 1, 2, 3, 2
|
||||
# CHECK-BE: vclzdm 1, 2, 3 # encoding: [0x10,0x22,0x1f,0x84]
|
||||
# CHECK-LE: vclzdm 1, 2, 3 # encoding: [0x84,0x1f,0x22,0x10]
|
||||
vclzdm 1, 2, 3
|
||||
|
Loading…
x
Reference in New Issue
Block a user