mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[PowerPC] Implement Truncate and Store VSX Vector Builtins
This patch implements the `vec_xst_trunc` function in altivec.h in order to utilize the Store VSX Vector Rightmost [byte | half | word | doubleword] Indexed instructions introduced in Power10. Differential Revision: https://reviews.llvm.org/D82467
This commit is contained in:
parent
b6d2a38769
commit
dedffec69e
@ -1100,6 +1100,17 @@ let Predicates = [IsISA3_1] in {
|
||||
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 400, Predicates = [IsISA3_1] in {
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
|
||||
(STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src),
|
||||
(STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
|
||||
def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src),
|
||||
(STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
|
||||
def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src),
|
||||
(STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
|
||||
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
|
||||
i32immNonAllOneNonZero:$A,
|
||||
|
@ -2,9 +2,12 @@
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-O0
|
||||
|
||||
; These test cases aims to test the builtins for the Power10 VSX vector
|
||||
; instructions introduced in ISA 3.1.
|
||||
@ -19,6 +22,14 @@ define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
|
||||
; CHECK-NEXT: srwi r3, r3, 31
|
||||
; CHECK-NEXT: extsw r3, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: xvtlsbb cr0, v2
|
||||
; CHECK-O0-NEXT: mfocrf r3, 128
|
||||
; CHECK-O0-NEXT: srwi r3, r3, 31
|
||||
; CHECK-O0-NEXT: extsw r3, r3
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1)
|
||||
ret i32 %0
|
||||
@ -32,7 +43,199 @@ define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
|
||||
; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31
|
||||
; CHECK-NEXT: extsw r3, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: xvtlsbb cr0, v2
|
||||
; CHECK-O0-NEXT: mfocrf r3, 128
|
||||
; CHECK-O0-NEXT: rlwinm r3, r3, 3, 31, 31
|
||||
; CHECK-O0-NEXT: extsw r3, r3
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0)
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_sc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: stxvrbx v2, r6, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_sc:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: li r3, 0
|
||||
; CHECK-O0-NEXT: vextubrx r3, r3, v2
|
||||
; CHECK-O0-NEXT: # kill: def $r3 killed $r3 killed $x3
|
||||
; CHECK-O0-NEXT: add r4, r6, r5
|
||||
; CHECK-O0-NEXT: stb r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <16 x i8>
|
||||
%conv = extractelement <16 x i8> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %__ptr, i64 %__offset
|
||||
store i8 %conv, i8* %add.ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_uc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: stxvrbx v2, r6, r5
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_uc:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: li r3, 0
|
||||
; CHECK-O0-NEXT: vextubrx r3, r3, v2
|
||||
; CHECK-O0-NEXT: # kill: def $r3 killed $r3 killed $x3
|
||||
; CHECK-O0-NEXT: add r4, r6, r5
|
||||
; CHECK-O0-NEXT: stb r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <16 x i8>
|
||||
%conv = extractelement <16 x i8> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %__ptr, i64 %__offset
|
||||
store i8 %conv, i8* %add.ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_ss:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r5, 1
|
||||
; CHECK-NEXT: stxvrhx v2, r6, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_ss:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: li r3, 0
|
||||
; CHECK-O0-NEXT: vextuhrx r3, r3, v2
|
||||
; CHECK-O0-NEXT: # kill: def $r3 killed $r3 killed $x3
|
||||
; CHECK-O0-NEXT: sldi r4, r5, 1
|
||||
; CHECK-O0-NEXT: add r4, r6, r4
|
||||
; CHECK-O0-NEXT: sth r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <8 x i16>
|
||||
%conv = extractelement <8 x i16> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %__ptr, i64 %__offset
|
||||
store i16 %conv, i16* %add.ptr, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_us:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r5, 1
|
||||
; CHECK-NEXT: stxvrhx v2, r6, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_us:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: li r3, 0
|
||||
; CHECK-O0-NEXT: vextuhrx r3, r3, v2
|
||||
; CHECK-O0-NEXT: # kill: def $r3 killed $r3 killed $x3
|
||||
; CHECK-O0-NEXT: sldi r4, r5, 1
|
||||
; CHECK-O0-NEXT: add r4, r6, r4
|
||||
; CHECK-O0-NEXT: sth r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <8 x i16>
|
||||
%conv = extractelement <8 x i16> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %__ptr, i64 %__offset
|
||||
store i16 %conv, i16* %add.ptr, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_si:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r5, 2
|
||||
; CHECK-NEXT: stxvrwx v2, r6, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_si:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: li r3, 0
|
||||
; CHECK-O0-NEXT: vextuwrx r3, r3, v2
|
||||
; CHECK-O0-NEXT: # kill: def $r3 killed $r3 killed $x3
|
||||
; CHECK-O0-NEXT: sldi r4, r5, 2
|
||||
; CHECK-O0-NEXT: add r4, r6, r4
|
||||
; CHECK-O0-NEXT: stw r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <4 x i32>
|
||||
%conv = extractelement <4 x i32> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i32, i32* %__ptr, i64 %__offset
|
||||
store i32 %conv, i32* %add.ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_ui:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r5, 2
|
||||
; CHECK-NEXT: stxvrwx v2, r6, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_ui:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: li r3, 0
|
||||
; CHECK-O0-NEXT: vextuwrx r3, r3, v2
|
||||
; CHECK-O0-NEXT: # kill: def $r3 killed $r3 killed $x3
|
||||
; CHECK-O0-NEXT: sldi r4, r5, 2
|
||||
; CHECK-O0-NEXT: add r4, r6, r4
|
||||
; CHECK-O0-NEXT: stw r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <4 x i32>
|
||||
%conv = extractelement <4 x i32> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i32, i32* %__ptr, i64 %__offset
|
||||
store i32 %conv, i32* %add.ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_sll:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r5, 3
|
||||
; CHECK-NEXT: stxvrdx v2, r6, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_sll:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: mfvsrld r3, v2
|
||||
; CHECK-O0-NEXT: sldi r4, r5, 3
|
||||
; CHECK-O0-NEXT: add r4, r6, r4
|
||||
; CHECK-O0-NEXT: std r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <2 x i64>
|
||||
%conv = extractelement <2 x i64> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i64, i64* %__ptr, i64 %__offset
|
||||
store i64 %conv, i64* %add.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) {
|
||||
; CHECK-LABEL: vec_xst_trunc_ull:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r3, r5, 3
|
||||
; CHECK-NEXT: stxvrdx v2, r6, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-O0-LABEL: vec_xst_trunc_ull:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-O0-NEXT: mfvsrld r3, v2
|
||||
; CHECK-O0-NEXT: sldi r4, r5, 3
|
||||
; CHECK-O0-NEXT: add r4, r6, r4
|
||||
; CHECK-O0-NEXT: std r3, 0(r4)
|
||||
; CHECK-O0-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast <1 x i128> %__vec to <2 x i64>
|
||||
%conv = extractelement <2 x i64> %0, i32 0
|
||||
%add.ptr = getelementptr inbounds i64, i64* %__ptr, i64 %__offset
|
||||
store i64 %conv, i64* %add.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user