1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[PowerPC] Enable safe for 32bit vins* P10 instructions

Correctly emit `vins`instructions that are safe in 32bit mode.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D101383
This commit is contained in:
Zarko Todorovski 2021-05-10 08:06:28 -04:00
parent 09ab6038cf
commit b797f9d9ad
3 changed files with 432 additions and 56 deletions

View File

@ -10444,6 +10444,8 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return Op;
if (Subtarget.isISA3_1()) {
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
return SDValue();
// On P10, we have legal lowering for constant and variable indices for
// integer vectors.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||

View File

@ -1,3 +1,6 @@
//-------------------------- Predicate definitions ---------------------------//
def IsPPC32 : Predicate<"!Subtarget->isPPC64()">;
// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
@ -2752,7 +2755,44 @@ let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
(VINSBLX $vDi, $rB, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
(VINSHLX $vDi, $rB, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
(VINSWLX $vDi, $rB, $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
(VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
(VINSW $vDi, !mul(i, 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
}
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;

View File

@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64
; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-64-P10
; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-32-P10
; Byte indexed
@ -22,6 +24,16 @@ define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: stbx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testByte:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: vinsblx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testByte:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinsblx 2, 6, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i8
%vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
@ -48,6 +60,17 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: sthx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testHalf:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: slwi 4, 4, 1
; CHECK-64-P10-NEXT: vinshlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testHalf:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinshlx 2, 6, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i16
%vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
@ -74,6 +97,17 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: stwx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testWord:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testWord:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinswlx 2, 6, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
@ -96,6 +130,18 @@ define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
; CHECK-32-NEXT: xxinsertw 34, 0, 4
; CHECK-32-NEXT: xxinsertw 34, 0, 12
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testWordImm:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: vinsw 2, 3, 4
; CHECK-64-P10-NEXT: vinsw 2, 3, 12
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testWordImm:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinsw 2, 4, 4
; CHECK-32-P10-NEXT: vinsw 2, 4, 12
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
@ -130,6 +176,20 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: stwx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleword:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleword:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: add 5, 6, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: addi 3, 5, 1
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
ret <2 x i64> %vecins
@ -151,6 +211,17 @@ define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
; CHECK-32-NEXT: mtfprwz 0, 4
; CHECK-32-NEXT: xxinsertw 34, 0, 12
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoublewordImm:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: vinsd 2, 3, 8
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoublewordImm:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinsw 2, 3, 8
; CHECK-32-P10-NEXT: vinsw 2, 4, 12
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x i64> %a, i64 %b, i32 1
ret <2 x i64> %vecins
@ -170,6 +241,17 @@ define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
; CHECK-32-NEXT: mtfprwz 0, 4
; CHECK-32-NEXT: xxinsertw 34, 0, 4
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoublewordImm2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: vinsd 2, 3, 0
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoublewordImm2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinsw 2, 3, 0
; CHECK-32-P10-NEXT: vinsw 2, 4, 4
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x i64> %a, i64 %b, i32 0
ret <2 x i64> %vecins
@ -195,6 +277,24 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
; CHECK-32-NEXT: stfsx 1, 4, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
; CHECK-64-P10-NEXT: extsw 3, 4
; CHECK-64-P10-NEXT: slwi 3, 3, 2
; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-P10-NEXT: mffprwz 4, 0
; CHECK-64-P10-NEXT: vinswlx 2, 3, 4
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloat1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-P10-NEXT: mffprwz 3, 0
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <4 x float> %a, float %b, i32 %idx1
ret <4 x float> %vecins
@ -203,18 +303,18 @@ entry:
define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testFloat2:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-DAG: lwz 6, 0(3)
; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
; CHECK-64-DAG: addi 7, 1, -32
; CHECK-64-DAG: stxv 34, -32(1)
; CHECK-64-DAG: stwx 6, 7, 4
; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
; CHECK-64-DAG: addi 5, 1, -16
; CHECK-64-DAG: lxv 0, -32(1)
; CHECK-64-DAG: lwz 3, 1(3)
; CHECK-64-DAG: stxv 0, -16(1)
; CHECK-64-DAG: stwx 3, 5, 4
; CHECK-64-DAG: lxv 34, -16(1)
; CHECK-64-NEXT: lwz 6, 0(3)
; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
; CHECK-64-NEXT: addi 7, 1, -32
; CHECK-64-NEXT: stxv 34, -32(1)
; CHECK-64-NEXT: stwx 6, 7, 4
; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
; CHECK-64-NEXT: addi 5, 1, -16
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: lwz 3, 1(3)
; CHECK-64-NEXT: stxv 0, -16(1)
; CHECK-64-NEXT: stwx 3, 5, 4
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testFloat2:
@ -232,6 +332,26 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-NEXT: stwx 3, 4, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lwz 6, 0(3)
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: lwz 3, 1(3)
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
; CHECK-64-P10-NEXT: extsw 4, 5
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloat2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lwz 6, 0(3)
; CHECK-32-P10-NEXT: lwz 3, 1(3)
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i8* %b to float*
%add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
@ -246,21 +366,21 @@ entry:
define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testFloat3:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-DAG: lis 6, 1
; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
; CHECK-64-DAG: addi 7, 1, -32
; CHECK-64-DAG: lwzx 6, 3, 6
; CHECK-64-DAG: stxv 34, -32(1)
; CHECK-64-DAG: stwx 6, 7, 4
; CHECK-64-DAG: li 4, 1
; CHECK-64-DAG: lxv 0, -32(1)
; CHECK-64-DAG: rldic 4, 4, 36, 27
; CHECK-64-DAG: lwzx 3, 3, 4
; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
; CHECK-64-DAG: addi 5, 1, -16
; CHECK-64-DAG: stxv 0, -16(1)
; CHECK-64-DAG: stwx 3, 5, 4
; CHECK-64-DAG: lxv 34, -16(1)
; CHECK-64-NEXT: lis 6, 1
; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
; CHECK-64-NEXT: addi 7, 1, -32
; CHECK-64-NEXT: lwzx 6, 3, 6
; CHECK-64-NEXT: stxv 34, -32(1)
; CHECK-64-NEXT: stwx 6, 7, 4
; CHECK-64-NEXT: li 4, 1
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: rldic 4, 4, 36, 27
; CHECK-64-NEXT: lwzx 3, 3, 4
; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
; CHECK-64-NEXT: addi 5, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
; CHECK-64-NEXT: stwx 3, 5, 4
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testFloat3:
@ -279,6 +399,29 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-NEXT: stwx 3, 4, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
; CHECK-64-P10-NEXT: lwzx 3, 3, 4
; CHECK-64-P10-NEXT: extsw 4, 5
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloat3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lis 6, 1
; CHECK-32-P10-NEXT: lwzx 6, 3, 6
; CHECK-32-P10-NEXT: lwz 3, 0(3)
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
%0 = bitcast i8* %add.ptr to float*
@ -309,6 +452,22 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
; CHECK-32-NEXT: xxinsertw 34, 0, 0
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloatImm1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-P10-NEXT: xxinsertw 34, 0, 0
; CHECK-64-P10-NEXT: xxinsertw 34, 0, 8
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloatImm1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0
; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <4 x float> %a, float %b, i32 0
%vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
@ -339,6 +498,22 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloatImm2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lwz 4, 0(3)
; CHECK-64-P10-NEXT: lwz 3, 4(3)
; CHECK-64-P10-NEXT: vinsw 2, 4, 0
; CHECK-64-P10-NEXT: vinsw 2, 3, 8
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloatImm2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lwz 4, 0(3)
; CHECK-32-P10-NEXT: lwz 3, 4(3)
; CHECK-32-P10-NEXT: vinsw 2, 4, 0
; CHECK-32-P10-NEXT: vinsw 2, 3, 8
; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i32* %b to float*
%add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
@ -378,6 +553,25 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloatImm3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: plwz 4, 262144(3), 0
; CHECK-64-P10-NEXT: vinsw 2, 4, 0
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25
; CHECK-64-P10-NEXT: lwzx 3, 3, 4
; CHECK-64-P10-NEXT: vinsw 2, 3, 8
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloatImm3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lis 4, 4
; CHECK-32-P10-NEXT: lwzx 4, 3, 4
; CHECK-32-P10-NEXT: lwz 3, 0(3)
; CHECK-32-P10-NEXT: vinsw 2, 4, 0
; CHECK-32-P10-NEXT: vinsw 2, 3, 8
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
%0 = bitcast i32* %add.ptr to float*
@ -410,6 +604,23 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
; CHECK-32-NEXT: stfdx 1, 4, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: mffprd 3, 1
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: addi 4, 1, -16
; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
; CHECK-32-P10-NEXT: lxv 34, -16(1)
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x double> %a, double %b, i32 %idx1
ret <2 x double> %vecins
@ -418,19 +629,19 @@ entry:
define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testDouble2:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-DAG: ld 6, 0(3)
; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
; CHECK-64-DAG: addi 7, 1, -32
; CHECK-64-DAG: stxv 34, -32(1)
; CHECK-64-DAG: stdx 6, 7, 4
; CHECK-64-DAG: li 4, 1
; CHECK-64-DAG: lxv 0, -32(1)
; CHECK-64-DAG: ldx 3, 3, 4
; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
; CHECK-64-DAG: addi 5, 1, -16
; CHECK-64-DAG: stxv 0, -16(1)
; CHECK-64-DAG: stdx 3, 5, 4
; CHECK-64-DAG: lxv 34, -16(1)
; CHECK-64-NEXT: ld 6, 0(3)
; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-64-NEXT: addi 7, 1, -32
; CHECK-64-NEXT: stxv 34, -32(1)
; CHECK-64-NEXT: stdx 6, 7, 4
; CHECK-64-NEXT: li 4, 1
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: ldx 3, 3, 4
; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
; CHECK-64-NEXT: addi 5, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
; CHECK-64-NEXT: stdx 3, 5, 4
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDouble2:
@ -448,6 +659,34 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
; CHECK-32-NEXT: stfdx 1, 3, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: ld 6, 0(3)
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: pld 3, 1(3), 0
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
; CHECK-64-P10-NEXT: extsw 4, 5
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
; CHECK-32-P10-NEXT: lxv 0, -32(1)
; CHECK-32-P10-NEXT: lfd 1, 1(3)
; CHECK-32-P10-NEXT: addi 3, 1, -16
; CHECK-32-P10-NEXT: stxv 0, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 3, 5
; CHECK-32-P10-NEXT: lxv 34, -16(1)
; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i8* %b to double*
%add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
@ -462,21 +701,21 @@ entry:
define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testDouble3:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-DAG: lis 6, 1
; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
; CHECK-64-DAG: addi 7, 1, -32
; CHECK-64-DAG: ldx 6, 3, 6
; CHECK-64-DAG: stxv 34, -32(1)
; CHECK-64-DAG: stdx 6, 7, 4
; CHECK-64-DAG: li 4, 1
; CHECK-64-DAG: lxv 0, -32(1)
; CHECK-64-DAG: rldic 4, 4, 36, 27
; CHECK-64-DAG: ldx 3, 3, 4
; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
; CHECK-64-DAG: addi 5, 1, -16
; CHECK-64-DAG: stxv 0, -16(1)
; CHECK-64-DAG: stdx 3, 5, 4
; CHECK-64-DAG: lxv 34, -16(1)
; CHECK-64-NEXT: lis 6, 1
; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-64-NEXT: addi 7, 1, -32
; CHECK-64-NEXT: ldx 6, 3, 6
; CHECK-64-NEXT: stxv 34, -32(1)
; CHECK-64-NEXT: stdx 6, 7, 4
; CHECK-64-NEXT: li 4, 1
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: rldic 4, 4, 36, 27
; CHECK-64-NEXT: ldx 3, 3, 4
; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
; CHECK-64-NEXT: addi 5, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
; CHECK-64-NEXT: stdx 3, 5, 4
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDouble3:
@ -495,6 +734,37 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
; CHECK-32-NEXT: stfdx 1, 3, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: pld 6, 65536(3), 0
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
; CHECK-64-P10-NEXT: ldx 3, 3, 4
; CHECK-64-P10-NEXT: extsw 4, 5
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lis 6, 1
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: lfdx 0, 3, 6
; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
; CHECK-32-P10-NEXT: lxv 0, -32(1)
; CHECK-32-P10-NEXT: lfd 1, 0(3)
; CHECK-32-P10-NEXT: addi 3, 1, -16
; CHECK-32-P10-NEXT: stxv 0, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 3, 5
; CHECK-32-P10-NEXT: lxv 34, -16(1)
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
%0 = bitcast i8* %add.ptr to double*
@ -521,6 +791,18 @@ define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleImm1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleImm1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x double> %a, double %b, i32 0
ret <2 x double> %vecins
@ -538,6 +820,18 @@ define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfd 0, 0(3)
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleImm2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lfd 0, 0(3)
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleImm2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i32* %b to double*
%1 = load double, double* %0, align 8
@ -557,6 +851,18 @@ define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfd 0, 4(3)
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleImm3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lfd 0, 4(3)
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleImm3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 4(3)
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 1
%0 = bitcast i32* %add.ptr to double*
@ -579,6 +885,20 @@ define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfdx 0, 3, 4
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleImm4:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lis 4, 4
; CHECK-64-P10-NEXT: lfdx 0, 3, 4
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleImm4:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lis 4, 4
; CHECK-32-P10-NEXT: lfdx 0, 3, 4
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
%0 = bitcast i32* %add.ptr to double*
@ -601,6 +921,20 @@ define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfd 0, 0(3)
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleImm5:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25
; CHECK-64-P10-NEXT: lfdx 0, 3, 4
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleImm5:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
%0 = bitcast i32* %add.ptr to double*