1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[PowerPC] Add PowerPC population count, reversed load and store related builtins and instrinsics for XL compatibility

This patch is in a series of patches to provide builtins for compatibility
with the XL compiler. This patch adds the builtins and instrisics for population
count, reversed load and store related operations.

Reviewed By: nemanjai, #powerpc

Differential revision: https://reviews.llvm.org/D106021
This commit is contained in:
Victor Huang 2021-07-15 17:21:54 -05:00
parent 4f5c97bb0f
commit 61ce66a632
6 changed files with 209 additions and 0 deletions

View File

@ -1598,6 +1598,26 @@ let TargetPrefix = "ppc" in {
def int_ppc_maddld
: GCCBuiltin<"__builtin_ppc_maddld">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// load
def int_ppc_load2r
: GCCBuiltin<"__builtin_ppc_load2r">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_ppc_load4r
: GCCBuiltin<"__builtin_ppc_load4r">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_ppc_load8r
: GCCBuiltin<"__builtin_ppc_load8r">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
// store
def int_ppc_store2r
: GCCBuiltin<"__builtin_ppc_store2r">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>;
def int_ppc_store4r
: GCCBuiltin<"__builtin_ppc_store4r">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>;
def int_ppc_store8r
: GCCBuiltin<"__builtin_ppc_store8r">,
Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], [IntrWriteMem]>;
}
//===----------------------------------------------------------------------===//

View File

@ -1790,6 +1790,10 @@ def : Pat<(i64 (int_ppc_mulhd g8rc:$a, g8rc:$b)),
(i64 (MULHD $a, $b))>;
def : Pat<(i64 (int_ppc_mulhdu g8rc:$a, g8rc:$b)),
(i64 (MULHDU $a, $b))>;
def : Pat<(int_ppc_load8r ForceXForm:$ptr),
(LDBRX ForceXForm:$ptr)>;
def : Pat<(int_ppc_store8r g8rc:$a, ForceXForm:$ptr),
(STDBRX g8rc:$a, ForceXForm:$ptr)>;
}
let Predicates = [IsISA3_0] in {

View File

@ -5274,6 +5274,16 @@ def : Pat<(i32 (int_ppc_mulhw gprc:$a, gprc:$b)),
def : Pat<(i32 (int_ppc_mulhwu gprc:$a, gprc:$b)),
(i32 (MULHWU $a, $b))>;
def : Pat<(int_ppc_load2r ForceXForm:$ptr),
(LHBRX ForceXForm:$ptr)>;
def : Pat<(int_ppc_load4r ForceXForm:$ptr),
(LWBRX ForceXForm:$ptr)>;
def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
(STHBRX gprc:$a, ForceXForm:$ptr)>;
def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
(STWBRX gprc:$a, ForceXForm:$ptr)>;
// Fast 32-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);

View File

@ -0,0 +1,37 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr7 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
; RUN: -mcpu=pwr7 < %s | FileCheck %s
@ull = external global i64, align 8
@ull_addr = external global i64*, align 8
define dso_local void @test_builtin_ppc_store8r() {
; CHECK-LABEL: test_builtin_ppc_store8r:
; CHECK: stdbrx 3, 0, 4
; CHECK-NEXT: blr
;
entry:
%0 = load i64, i64* @ull, align 8
%1 = load i64*, i64** @ull_addr, align 8
%2 = bitcast i64* %1 to i8*
call void @llvm.ppc.store8r(i64 %0, i8* %2)
ret void
}
declare void @llvm.ppc.store8r(i64, i8*)
define dso_local i64 @test_builtin_ppc_load8r() {
; CHECK-LABEL: test_builtin_ppc_load8r:
; CHECK: ldbrx 3, 0, 3
; CHECK-NEXT: blr
entry:
%0 = load i64*, i64** @ull_addr, align 8
%1 = bitcast i64* %0 to i8*
%2 = call i64 @llvm.ppc.load8r(i8* %1)
ret i64 %2
}
declare i64 @llvm.ppc.load8r(i8*)

View File

@ -0,0 +1,87 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-64B
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32B
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B
@us = external global i16, align 2
@us_addr = external global i16*, align 8
@ui = external global i32, align 4
@ui_addr = external global i32*, align 8
define dso_local void @test_builtin_ppc_store2r() {
; CHECK-64B-LABEL: test_builtin_ppc_store2r:
; CHECK-64B: sthbrx 3, 0, 4
; CHECK-64B-NEXT: blr
; CHECK-32B-LABEL: test_builtin_ppc_store2r:
; CHECK-32B: sthbrx 3, 0, 4
; CHECK-32B-NEXT: blr
entry:
%0 = load i16, i16* @us, align 2
%conv = zext i16 %0 to i32
%1 = load i16*, i16** @us_addr, align 8
%2 = bitcast i16* %1 to i8*
call void @llvm.ppc.store2r(i32 %conv, i8* %2)
ret void
}
declare void @llvm.ppc.store2r(i32, i8*)
define dso_local void @test_builtin_ppc_store4r() {
; CHECK-64B-LABEL: test_builtin_ppc_store4r:
; CHECK-64B: stwbrx 3, 0, 4
; CHECK-64B-NEXT: blr
; CHECK-32B-LABEL: test_builtin_ppc_store4r:
; CHECK-32B: stwbrx 3, 0, 4
; CHECK-32B-NEXT: blr
entry:
%0 = load i32, i32* @ui, align 4
%1 = load i32*, i32** @ui_addr, align 8
%2 = bitcast i32* %1 to i8*
call void @llvm.ppc.store4r(i32 %0, i8* %2)
ret void
}
declare void @llvm.ppc.store4r(i32, i8*)
define dso_local zeroext i16 @test_builtin_ppc_load2r() {
; CHECK-64B-LABEL: test_builtin_ppc_load2r:
; CHECK-64B: lhbrx 3, 0, 3
; CHECK-64B-NEXT: clrldi 3, 3, 48
; CHECK-64B-NEXT: blr
; CHECK-32B-LABEL: test_builtin_ppc_load2r:
; CHECK-32B: lhbrx 3, 0, 3
; CHECK-32B-NEXT: clrlwi 3, 3, 16
; CHECK-32B-NEXT: blr
entry:
%0 = load i16*, i16** @us_addr, align 8
%1 = bitcast i16* %0 to i8*
%2 = call i32 @llvm.ppc.load2r(i8* %1)
%conv = trunc i32 %2 to i16
ret i16 %conv
}
declare i32 @llvm.ppc.load2r(i8*)
define dso_local zeroext i32 @test_builtin_ppc_load4r() {
; CHECK-64B-LABEL: test_builtin_ppc_load4r:
; CHECK-64B: lwbrx 3, 0, 3
; CHECK-64B-NEXT: blr
; CHECK-32B-LABEL: test_builtin_ppc_load4r:
; CHECK-32B: lwbrx 3, 0, 3
; CHECK-32B-NEXT: blr
entry:
%0 = load i32*, i32** @ui_addr, align 8
%1 = bitcast i32* %0 to i8*
%2 = call i32 @llvm.ppc.load4r(i8* %1)
ret i32 %2
}
declare i32 @llvm.ppc.load4r(i8*)

View File

@ -0,0 +1,51 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-64B
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32B
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B
@ui = external global i32, align 4
@ull = external global i64, align 8
define dso_local signext i32 @test_builtin_ppc_poppar4() {
; CHECK-32B-LABEL: test_builtin_ppc_poppar4:
; CHECK-32B: popcntw 3, 3
; CHECK-32B-NEXT: clrlwi 3, 3, 31
; CHECK-32B-NEXT: blr
; CHECK-64B-LABEL: test_builtin_ppc_poppar4:
; CHECK-64B: popcntw 3, 3
; CHECK-64B-NEXT: clrlwi 3, 3, 31
; CHECK-64B-NEXT: blr
entry:
%0 = load i32, i32* @ui, align 4
%1 = load i32, i32* @ui, align 4
%2 = call i32 @llvm.ctpop.i32(i32 %1)
%3 = and i32 %2, 1
ret i32 %3
}
declare i32 @llvm.ctpop.i32(i32)
define dso_local signext i32 @test_builtin_ppc_poppar8() {
; CHECK-32B-LABEL: test_builtin_ppc_poppar8:
; CHECK-32B: xor 3, 3, 4
; CHECK-32B-NEXT: popcntw 3, 3
; CHECK-32B-NEXT: clrlwi 3, 3, 31
; CHECK-32B-NEXT: blr
; CHECK-64B-LABEL: test_builtin_ppc_poppar8:
; CHECK-64B: popcntd 3, 3
; CHECK-64B-NEXT: clrldi 3, 3, 63
; CHECK-64B-NEXT: blr
entry:
%0 = load i64, i64* @ull, align 8
%1 = load i64, i64* @ull, align 8
%2 = call i64 @llvm.ctpop.i64(i64 %1)
%3 = and i64 %2, 1
%cast = trunc i64 %3 to i32
ret i32 %cast
}
declare i64 @llvm.ctpop.i64(i64)