mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[PowerPC] Enable safe for 32bit vins* P10 instructions
Correctly emit `vins`instructions that are safe in 32bit mode. Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D101383
This commit is contained in:
parent
09ab6038cf
commit
b797f9d9ad
@ -10444,6 +10444,8 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
return Op;
|
||||
|
||||
if (Subtarget.isISA3_1()) {
|
||||
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
|
||||
return SDValue();
|
||||
// On P10, we have legal lowering for constant and variable indices for
|
||||
// integer vectors.
|
||||
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
|
||||
|
@ -1,3 +1,6 @@
|
||||
//-------------------------- Predicate definitions ---------------------------//
|
||||
def IsPPC32 : Predicate<"!Subtarget->isPPC64()">;
|
||||
|
||||
// Mask immediates for MMA instructions (2, 4 and 8 bits).
|
||||
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
|
||||
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
|
||||
@ -2752,7 +2755,44 @@ let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
|
||||
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
|
||||
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
|
||||
// Indexed vector insert element
|
||||
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSBLX $vDi, $rB, $rA)>;
|
||||
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSHLX $vDi, $rB, $rA)>;
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, $rA)>;
|
||||
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
|
||||
i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
|
||||
i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
|
||||
i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
|
||||
|
||||
// Immediate vector insert element
|
||||
foreach i = [0, 1, 2, 3] in {
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), $rA)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
|
||||
(i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
|
||||
(i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
|
||||
(i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
|
||||
// Indexed vector insert element
|
||||
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
|
||||
|
@ -1,6 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-64-P10
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-32-P10
|
||||
|
||||
; Byte indexed
|
||||
|
||||
@ -22,6 +24,16 @@ define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
|
||||
; CHECK-32-NEXT: stbx 4, 5, 3
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testByte:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: vinsblx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testByte:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinsblx 2, 6, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%conv = trunc i64 %b to i8
|
||||
%vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
|
||||
@ -48,6 +60,17 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
|
||||
; CHECK-32-NEXT: sthx 4, 5, 3
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testHalf:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 1
|
||||
; CHECK-64-P10-NEXT: vinshlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testHalf:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinshlx 2, 6, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%conv = trunc i64 %b to i16
|
||||
%vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
|
||||
@ -74,6 +97,17 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
|
||||
; CHECK-32-NEXT: stwx 4, 5, 3
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testWord:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testWord:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 6, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%conv = trunc i64 %b to i32
|
||||
%vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
|
||||
@ -96,6 +130,18 @@ define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 4
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 12
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testWordImm:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: vinsw 2, 3, 4
|
||||
; CHECK-64-P10-NEXT: vinsw 2, 3, 12
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testWordImm:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 4, 4
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 4, 12
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%conv = trunc i64 %b to i32
|
||||
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
|
||||
@ -130,6 +176,20 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
|
||||
; CHECK-32-NEXT: stwx 4, 5, 3
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleword:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
|
||||
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleword:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: add 5, 6, 6
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
|
||||
; CHECK-32-P10-NEXT: addi 3, 5, 1
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
|
||||
ret <2 x i64> %vecins
|
||||
@ -151,6 +211,17 @@ define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
|
||||
; CHECK-32-NEXT: mtfprwz 0, 4
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 12
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoublewordImm:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: vinsd 2, 3, 8
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoublewordImm:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 3, 8
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 4, 12
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <2 x i64> %a, i64 %b, i32 1
|
||||
ret <2 x i64> %vecins
|
||||
@ -170,6 +241,17 @@ define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
|
||||
; CHECK-32-NEXT: mtfprwz 0, 4
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 4
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoublewordImm2:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: vinsd 2, 3, 0
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoublewordImm2:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 3, 0
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 4, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <2 x i64> %a, i64 %b, i32 0
|
||||
ret <2 x i64> %vecins
|
||||
@ -195,6 +277,24 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
|
||||
; CHECK-32-NEXT: stfsx 1, 4, 3
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testFloat1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-P10-NEXT: extsw 3, 4
|
||||
; CHECK-64-P10-NEXT: slwi 3, 3, 2
|
||||
; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-P10-NEXT: mffprwz 4, 0
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 3, 4
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloat1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-P10-NEXT: mffprwz 3, 0
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 %idx1
|
||||
ret <4 x float> %vecins
|
||||
@ -203,18 +303,18 @@ entry:
|
||||
define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
|
||||
; CHECK-64-LABEL: testFloat2:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-DAG: lwz 6, 0(3)
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-DAG: stxv 34, -32(1)
|
||||
; CHECK-64-DAG: stwx 6, 7, 4
|
||||
; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 5, 1, -16
|
||||
; CHECK-64-DAG: lxv 0, -32(1)
|
||||
; CHECK-64-DAG: lwz 3, 1(3)
|
||||
; CHECK-64-DAG: stxv 0, -16(1)
|
||||
; CHECK-64-DAG: stwx 3, 5, 4
|
||||
; CHECK-64-DAG: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: lwz 6, 0(3)
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stwx 6, 7, 4
|
||||
; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 5, 1, -16
|
||||
; CHECK-64-NEXT: lxv 0, -32(1)
|
||||
; CHECK-64-NEXT: lwz 3, 1(3)
|
||||
; CHECK-64-NEXT: stxv 0, -16(1)
|
||||
; CHECK-64-NEXT: stwx 3, 5, 4
|
||||
; CHECK-64-NEXT: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testFloat2:
|
||||
@ -232,6 +332,26 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
|
||||
; CHECK-32-NEXT: stwx 3, 4, 5
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testFloat2:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: lwz 6, 0(3)
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: lwz 3, 1(3)
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
|
||||
; CHECK-64-P10-NEXT: extsw 4, 5
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloat2:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lwz 6, 0(3)
|
||||
; CHECK-32-P10-NEXT: lwz 3, 1(3)
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %b to float*
|
||||
%add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
|
||||
@ -246,21 +366,21 @@ entry:
|
||||
define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
|
||||
; CHECK-64-LABEL: testFloat3:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-DAG: lis 6, 1
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-DAG: lwzx 6, 3, 6
|
||||
; CHECK-64-DAG: stxv 34, -32(1)
|
||||
; CHECK-64-DAG: stwx 6, 7, 4
|
||||
; CHECK-64-DAG: li 4, 1
|
||||
; CHECK-64-DAG: lxv 0, -32(1)
|
||||
; CHECK-64-DAG: rldic 4, 4, 36, 27
|
||||
; CHECK-64-DAG: lwzx 3, 3, 4
|
||||
; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 5, 1, -16
|
||||
; CHECK-64-DAG: stxv 0, -16(1)
|
||||
; CHECK-64-DAG: stwx 3, 5, 4
|
||||
; CHECK-64-DAG: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: lis 6, 1
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: lwzx 6, 3, 6
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stwx 6, 7, 4
|
||||
; CHECK-64-NEXT: li 4, 1
|
||||
; CHECK-64-NEXT: lxv 0, -32(1)
|
||||
; CHECK-64-NEXT: rldic 4, 4, 36, 27
|
||||
; CHECK-64-NEXT: lwzx 3, 3, 4
|
||||
; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 5, 1, -16
|
||||
; CHECK-64-NEXT: stxv 0, -16(1)
|
||||
; CHECK-64-NEXT: stwx 3, 5, 4
|
||||
; CHECK-64-NEXT: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testFloat3:
|
||||
@ -279,6 +399,29 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
|
||||
; CHECK-32-NEXT: stwx 3, 4, 5
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testFloat3:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
|
||||
; CHECK-64-P10-NEXT: li 4, 1
|
||||
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
|
||||
; CHECK-64-P10-NEXT: lwzx 3, 3, 4
|
||||
; CHECK-64-P10-NEXT: extsw 4, 5
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloat3:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lis 6, 1
|
||||
; CHECK-32-P10-NEXT: lwzx 6, 3, 6
|
||||
; CHECK-32-P10-NEXT: lwz 3, 0(3)
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
|
||||
%0 = bitcast i8* %add.ptr to float*
|
||||
@ -309,6 +452,22 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testFloatImm1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-P10-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-64-P10-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloatImm1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 0
|
||||
%vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
|
||||
@ -339,6 +498,22 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testFloatImm2:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: lwz 4, 0(3)
|
||||
; CHECK-64-P10-NEXT: lwz 3, 4(3)
|
||||
; CHECK-64-P10-NEXT: vinsw 2, 4, 0
|
||||
; CHECK-64-P10-NEXT: vinsw 2, 3, 8
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloatImm2:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lwz 4, 0(3)
|
||||
; CHECK-32-P10-NEXT: lwz 3, 4(3)
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 4, 0
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 3, 8
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i32* %b to float*
|
||||
%add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
|
||||
@ -378,6 +553,25 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testFloatImm3:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: plwz 4, 262144(3), 0
|
||||
; CHECK-64-P10-NEXT: vinsw 2, 4, 0
|
||||
; CHECK-64-P10-NEXT: li 4, 1
|
||||
; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25
|
||||
; CHECK-64-P10-NEXT: lwzx 3, 3, 4
|
||||
; CHECK-64-P10-NEXT: vinsw 2, 3, 8
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloatImm3:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lis 4, 4
|
||||
; CHECK-32-P10-NEXT: lwzx 4, 3, 4
|
||||
; CHECK-32-P10-NEXT: lwz 3, 0(3)
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 4, 0
|
||||
; CHECK-32-P10-NEXT: vinsw 2, 3, 8
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
|
||||
%0 = bitcast i32* %add.ptr to float*
|
||||
@ -410,6 +604,23 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
|
||||
; CHECK-32-NEXT: stfdx 1, 4, 3
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDouble1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: mffprd 3, 1
|
||||
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
|
||||
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDouble1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: addi 4, 1, -16
|
||||
; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: stxv 34, -16(1)
|
||||
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
|
||||
; CHECK-32-P10-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <2 x double> %a, double %b, i32 %idx1
|
||||
ret <2 x double> %vecins
|
||||
@ -418,19 +629,19 @@ entry:
|
||||
define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
|
||||
; CHECK-64-LABEL: testDouble2:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-DAG: ld 6, 0(3)
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-DAG: stxv 34, -32(1)
|
||||
; CHECK-64-DAG: stdx 6, 7, 4
|
||||
; CHECK-64-DAG: li 4, 1
|
||||
; CHECK-64-DAG: lxv 0, -32(1)
|
||||
; CHECK-64-DAG: ldx 3, 3, 4
|
||||
; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
|
||||
; CHECK-64-DAG: addi 5, 1, -16
|
||||
; CHECK-64-DAG: stxv 0, -16(1)
|
||||
; CHECK-64-DAG: stdx 3, 5, 4
|
||||
; CHECK-64-DAG: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: ld 6, 0(3)
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stdx 6, 7, 4
|
||||
; CHECK-64-NEXT: li 4, 1
|
||||
; CHECK-64-NEXT: lxv 0, -32(1)
|
||||
; CHECK-64-NEXT: ldx 3, 3, 4
|
||||
; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 5, 1, -16
|
||||
; CHECK-64-NEXT: stxv 0, -16(1)
|
||||
; CHECK-64-NEXT: stdx 3, 5, 4
|
||||
; CHECK-64-NEXT: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testDouble2:
|
||||
@ -448,6 +659,34 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
|
||||
; CHECK-32-NEXT: stfdx 1, 3, 5
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDouble2:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: ld 6, 0(3)
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: pld 3, 1(3), 0
|
||||
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
|
||||
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
|
||||
; CHECK-64-P10-NEXT: extsw 4, 5
|
||||
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
|
||||
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDouble2:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lfd 0, 0(3)
|
||||
; CHECK-32-P10-NEXT: addi 6, 1, -32
|
||||
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: stxv 34, -32(1)
|
||||
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
|
||||
; CHECK-32-P10-NEXT: lxv 0, -32(1)
|
||||
; CHECK-32-P10-NEXT: lfd 1, 1(3)
|
||||
; CHECK-32-P10-NEXT: addi 3, 1, -16
|
||||
; CHECK-32-P10-NEXT: stxv 0, -16(1)
|
||||
; CHECK-32-P10-NEXT: stfdx 1, 3, 5
|
||||
; CHECK-32-P10-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %b to double*
|
||||
%add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
|
||||
@ -462,21 +701,21 @@ entry:
|
||||
define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
|
||||
; CHECK-64-LABEL: testDouble3:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-DAG: lis 6, 1
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-DAG: ldx 6, 3, 6
|
||||
; CHECK-64-DAG: stxv 34, -32(1)
|
||||
; CHECK-64-DAG: stdx 6, 7, 4
|
||||
; CHECK-64-DAG: li 4, 1
|
||||
; CHECK-64-DAG: lxv 0, -32(1)
|
||||
; CHECK-64-DAG: rldic 4, 4, 36, 27
|
||||
; CHECK-64-DAG: ldx 3, 3, 4
|
||||
; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
|
||||
; CHECK-64-DAG: addi 5, 1, -16
|
||||
; CHECK-64-DAG: stxv 0, -16(1)
|
||||
; CHECK-64-DAG: stdx 3, 5, 4
|
||||
; CHECK-64-DAG: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: lis 6, 1
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: ldx 6, 3, 6
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stdx 6, 7, 4
|
||||
; CHECK-64-NEXT: li 4, 1
|
||||
; CHECK-64-NEXT: lxv 0, -32(1)
|
||||
; CHECK-64-NEXT: rldic 4, 4, 36, 27
|
||||
; CHECK-64-NEXT: ldx 3, 3, 4
|
||||
; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 5, 1, -16
|
||||
; CHECK-64-NEXT: stxv 0, -16(1)
|
||||
; CHECK-64-NEXT: stdx 3, 5, 4
|
||||
; CHECK-64-NEXT: lxv 34, -16(1)
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testDouble3:
|
||||
@ -495,6 +734,37 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
|
||||
; CHECK-32-NEXT: stfdx 1, 3, 5
|
||||
; CHECK-32-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDouble3:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: pld 6, 65536(3), 0
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
|
||||
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
|
||||
; CHECK-64-P10-NEXT: li 4, 1
|
||||
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
|
||||
; CHECK-64-P10-NEXT: ldx 3, 3, 4
|
||||
; CHECK-64-P10-NEXT: extsw 4, 5
|
||||
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
|
||||
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDouble3:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lis 6, 1
|
||||
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: lfdx 0, 3, 6
|
||||
; CHECK-32-P10-NEXT: addi 6, 1, -32
|
||||
; CHECK-32-P10-NEXT: stxv 34, -32(1)
|
||||
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
|
||||
; CHECK-32-P10-NEXT: lxv 0, -32(1)
|
||||
; CHECK-32-P10-NEXT: lfd 1, 0(3)
|
||||
; CHECK-32-P10-NEXT: addi 3, 1, -16
|
||||
; CHECK-32-P10-NEXT: stxv 0, -16(1)
|
||||
; CHECK-32-P10-NEXT: stfdx 1, 3, 5
|
||||
; CHECK-32-P10-NEXT: lxv 34, -16(1)
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
|
||||
%0 = bitcast i8* %add.ptr to double*
|
||||
@ -521,6 +791,18 @@ define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
|
||||
; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleImm1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleImm1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
||||
; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <2 x double> %a, double %b, i32 0
|
||||
ret <2 x double> %vecins
|
||||
@ -538,6 +820,18 @@ define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
|
||||
; CHECK-32-NEXT: lfd 0, 0(3)
|
||||
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleImm2:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: lfd 0, 0(3)
|
||||
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleImm2:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lfd 0, 0(3)
|
||||
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i32* %b to double*
|
||||
%1 = load double, double* %0, align 8
|
||||
@ -557,6 +851,18 @@ define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
|
||||
; CHECK-32-NEXT: lfd 0, 4(3)
|
||||
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleImm3:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: lfd 0, 4(3)
|
||||
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleImm3:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lfd 0, 4(3)
|
||||
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i32, i32* %b, i64 1
|
||||
%0 = bitcast i32* %add.ptr to double*
|
||||
@ -579,6 +885,20 @@ define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
|
||||
; CHECK-32-NEXT: lfdx 0, 3, 4
|
||||
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleImm4:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: lis 4, 4
|
||||
; CHECK-64-P10-NEXT: lfdx 0, 3, 4
|
||||
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleImm4:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lis 4, 4
|
||||
; CHECK-32-P10-NEXT: lfdx 0, 3, 4
|
||||
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
|
||||
%0 = bitcast i32* %add.ptr to double*
|
||||
@ -601,6 +921,20 @@ define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
|
||||
; CHECK-32-NEXT: lfd 0, 0(3)
|
||||
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
; CHECK-64-P10-LABEL: testDoubleImm5:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: li 4, 1
|
||||
; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25
|
||||
; CHECK-64-P10-NEXT: lfdx 0, 3, 4
|
||||
; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleImm5:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lfd 0, 0(3)
|
||||
; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
|
||||
%0 = bitcast i32* %add.ptr to double*
|
||||
|
Loading…
Reference in New Issue
Block a user