1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[SelectionDAG] Legalize intrinsic get.active.lane.mask

This adapts legalization of intrinsic get.active.lane.mask to the new semantics
as described in D86147. Because the second argument is now the loop tripcount,
we legalize this intrinsic to an 'icmp ULT' instead of an ULE when it was the
backedge-taken count.

Differential Revision: https://reviews.llvm.org/D86302
This commit is contained in:
Sjoerd Meijer 2020-08-25 14:41:53 +01:00
parent f5080847e6
commit 02f39d5a7e
3 changed files with 13 additions and 13 deletions

View File

@ -6890,16 +6890,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::get_active_lane_mask: { case Intrinsic::get_active_lane_mask: {
auto DL = getCurSDLoc(); auto DL = getCurSDLoc();
SDValue Index = getValue(I.getOperand(0)); SDValue Index = getValue(I.getOperand(0));
SDValue BTC = getValue(I.getOperand(1)); SDValue TripCount = getValue(I.getOperand(1));
Type *ElementTy = I.getOperand(0)->getType(); Type *ElementTy = I.getOperand(0)->getType();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned VecWidth = VT.getVectorNumElements(); unsigned VecWidth = VT.getVectorNumElements();
SmallVector<SDValue, 16> OpsBTC; SmallVector<SDValue, 16> OpsTripCount;
SmallVector<SDValue, 16> OpsIndex; SmallVector<SDValue, 16> OpsIndex;
SmallVector<SDValue, 16> OpsStepConstants; SmallVector<SDValue, 16> OpsStepConstants;
for (unsigned i = 0; i < VecWidth; i++) { for (unsigned i = 0; i < VecWidth; i++) {
OpsBTC.push_back(BTC); OpsTripCount.push_back(TripCount);
OpsIndex.push_back(Index); OpsIndex.push_back(Index);
OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy))); OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
} }
@ -6912,9 +6912,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
SDValue VectorInduction = DAG.getNode( SDValue VectorInduction = DAG.getNode(
ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC); SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
VectorBTC, ISD::CondCode::SETULE); VectorTripCount, ISD::CondCode::SETULT);
setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
SetCC)); SetCC));

View File

@ -253,7 +253,7 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vpnot ; CHECK-NEXT: vpnot
; CHECK-NEXT: vpstt ; CHECK-NEXT: vpstt
; CHECK-NEXT: vcmpt.u32 cs, q1, q2 ; CHECK-NEXT: vcmpt.u32 hi, q1, q2
; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16
; CHECK-NEXT: vrintr.f32 s15, s11 ; CHECK-NEXT: vrintr.f32 s15, s11
; CHECK-NEXT: vrintr.f32 s14, s10 ; CHECK-NEXT: vrintr.f32 s14, s10

View File

@ -13,7 +13,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) {
; CHECK-NEXT: vdup.32 q1, r1 ; CHECK-NEXT: vdup.32 q1, r1
; CHECK-NEXT: vpnot ; CHECK-NEXT: vpnot
; CHECK-NEXT: vpst ; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.u32 cs, q1, q0 ; CHECK-NEXT: vcmpt.u32 hi, q1, q0
; CHECK-NEXT: vmov d0, r2, r3 ; CHECK-NEXT: vmov d0, r2, r3
; CHECK-NEXT: vldr d1, [sp] ; CHECK-NEXT: vldr d1, [sp]
; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q1, [r0]
@ -43,7 +43,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
; CHECK-NEXT: vmov.i8 q1, #0x0 ; CHECK-NEXT: vmov.i8 q1, #0x0
; CHECK-NEXT: vmov.i8 q2, #0xff ; CHECK-NEXT: vmov.i8 q2, #0xff
; CHECK-NEXT: vadd.i32 q3, q0, r0 ; CHECK-NEXT: vadd.i32 q3, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q5, q3 ; CHECK-NEXT: vcmp.u32 hi, q5, q3
; CHECK-NEXT: vpsel q4, q2, q1 ; CHECK-NEXT: vpsel q4, q2, q1
; CHECK-NEXT: vmov r1, s16 ; CHECK-NEXT: vmov r1, s16
; CHECK-NEXT: vmov.16 q0[0], r1 ; CHECK-NEXT: vmov.16 q0[0], r1
@ -56,7 +56,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
; CHECK-NEXT: adr r1, .LCPI1_1 ; CHECK-NEXT: adr r1, .LCPI1_1
; CHECK-NEXT: vldrw.u32 q4, [r1] ; CHECK-NEXT: vldrw.u32 q4, [r1]
; CHECK-NEXT: vadd.i32 q4, q4, r0 ; CHECK-NEXT: vadd.i32 q4, q4, r0
; CHECK-NEXT: vcmp.u32 cs, q5, q4 ; CHECK-NEXT: vcmp.u32 hi, q5, q4
; CHECK-NEXT: vpsel q5, q2, q1 ; CHECK-NEXT: vpsel q5, q2, q1
; CHECK-NEXT: vmov r1, s20 ; CHECK-NEXT: vmov r1, s20
; CHECK-NEXT: vmov.16 q0[4], r1 ; CHECK-NEXT: vmov.16 q0[4], r1
@ -128,7 +128,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: vmov.i8 q5, #0x0 ; CHECK-NEXT: vmov.i8 q5, #0x0
; CHECK-NEXT: vmov.i8 q4, #0xff ; CHECK-NEXT: vmov.i8 q4, #0xff
; CHECK-NEXT: vadd.i32 q1, q0, r0 ; CHECK-NEXT: vadd.i32 q1, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q1 ; CHECK-NEXT: vcmp.u32 hi, q7, q1
; CHECK-NEXT: vpsel q0, q4, q5 ; CHECK-NEXT: vpsel q0, q4, q5
; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov.16 q2[0], r1 ; CHECK-NEXT: vmov.16 q2[0], r1
@ -141,7 +141,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: adr r1, .LCPI2_1 ; CHECK-NEXT: adr r1, .LCPI2_1
; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vadd.i32 q3, q0, r0 ; CHECK-NEXT: vadd.i32 q3, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q3 ; CHECK-NEXT: vcmp.u32 hi, q7, q3
; CHECK-NEXT: vpsel q0, q4, q5 ; CHECK-NEXT: vpsel q0, q4, q5
; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov.16 q2[4], r1 ; CHECK-NEXT: vmov.16 q2[4], r1
@ -172,7 +172,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: adr r1, .LCPI2_2 ; CHECK-NEXT: adr r1, .LCPI2_2
; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vadd.i32 q0, q0, r0 ; CHECK-NEXT: vadd.i32 q0, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q0 ; CHECK-NEXT: vcmp.u32 hi, q7, q0
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vpsel q6, q4, q5 ; CHECK-NEXT: vpsel q6, q4, q5
; CHECK-NEXT: vmov r1, s24 ; CHECK-NEXT: vmov r1, s24
@ -186,7 +186,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: adr r1, .LCPI2_3 ; CHECK-NEXT: adr r1, .LCPI2_3
; CHECK-NEXT: vldrw.u32 q6, [r1] ; CHECK-NEXT: vldrw.u32 q6, [r1]
; CHECK-NEXT: vadd.i32 q6, q6, r0 ; CHECK-NEXT: vadd.i32 q6, q6, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q6 ; CHECK-NEXT: vcmp.u32 hi, q7, q6
; CHECK-NEXT: vpsel q7, q4, q5 ; CHECK-NEXT: vpsel q7, q4, q5
; CHECK-NEXT: vmov r1, s28 ; CHECK-NEXT: vmov r1, s28
; CHECK-NEXT: vmov.16 q0[4], r1 ; CHECK-NEXT: vmov.16 q0[4], r1