1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[SelectionDAG] Legalize intrinsic get.active.lane.mask

This adapts legalization of intrinsic get.active.lane.mask to the new semantics
as described in D86147. Because the second argument is now the loop tripcount,
we legalize this intrinsic to an 'icmp ULT' instead of an ULE when it was the
backedge-taken count.

Differential Revision: https://reviews.llvm.org/D86302
This commit is contained in:
Sjoerd Meijer 2020-08-25 14:41:53 +01:00
parent f5080847e6
commit 02f39d5a7e
3 changed files with 13 additions and 13 deletions

View File

@ -6890,16 +6890,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::get_active_lane_mask: {
auto DL = getCurSDLoc();
SDValue Index = getValue(I.getOperand(0));
SDValue BTC = getValue(I.getOperand(1));
SDValue TripCount = getValue(I.getOperand(1));
Type *ElementTy = I.getOperand(0)->getType();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned VecWidth = VT.getVectorNumElements();
SmallVector<SDValue, 16> OpsBTC;
SmallVector<SDValue, 16> OpsTripCount;
SmallVector<SDValue, 16> OpsIndex;
SmallVector<SDValue, 16> OpsStepConstants;
for (unsigned i = 0; i < VecWidth; i++) {
OpsBTC.push_back(BTC);
OpsTripCount.push_back(TripCount);
OpsIndex.push_back(Index);
OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
}
@ -6912,9 +6912,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
SDValue VectorInduction = DAG.getNode(
ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC);
SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
VectorBTC, ISD::CondCode::SETULE);
VectorTripCount, ISD::CondCode::SETULT);
setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
SetCC));

View File

@ -253,7 +253,7 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpstt
; CHECK-NEXT: vcmpt.u32 cs, q1, q2
; CHECK-NEXT: vcmpt.u32 hi, q1, q2
; CHECK-NEXT: vldrwt.u32 q2, [r0], #16
; CHECK-NEXT: vrintr.f32 s15, s11
; CHECK-NEXT: vrintr.f32 s14, s10

View File

@ -13,7 +13,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) {
; CHECK-NEXT: vdup.32 q1, r1
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.u32 cs, q1, q0
; CHECK-NEXT: vcmpt.u32 hi, q1, q0
; CHECK-NEXT: vmov d0, r2, r3
; CHECK-NEXT: vldr d1, [sp]
; CHECK-NEXT: vldrw.u32 q1, [r0]
@ -43,7 +43,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
; CHECK-NEXT: vmov.i8 q1, #0x0
; CHECK-NEXT: vmov.i8 q2, #0xff
; CHECK-NEXT: vadd.i32 q3, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q5, q3
; CHECK-NEXT: vcmp.u32 hi, q5, q3
; CHECK-NEXT: vpsel q4, q2, q1
; CHECK-NEXT: vmov r1, s16
; CHECK-NEXT: vmov.16 q0[0], r1
@ -56,7 +56,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) {
; CHECK-NEXT: adr r1, .LCPI1_1
; CHECK-NEXT: vldrw.u32 q4, [r1]
; CHECK-NEXT: vadd.i32 q4, q4, r0
; CHECK-NEXT: vcmp.u32 cs, q5, q4
; CHECK-NEXT: vcmp.u32 hi, q5, q4
; CHECK-NEXT: vpsel q5, q2, q1
; CHECK-NEXT: vmov r1, s20
; CHECK-NEXT: vmov.16 q0[4], r1
@ -128,7 +128,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: vmov.i8 q5, #0x0
; CHECK-NEXT: vmov.i8 q4, #0xff
; CHECK-NEXT: vadd.i32 q1, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q1
; CHECK-NEXT: vcmp.u32 hi, q7, q1
; CHECK-NEXT: vpsel q0, q4, q5
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov.16 q2[0], r1
@ -141,7 +141,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: adr r1, .LCPI2_1
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vadd.i32 q3, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q3
; CHECK-NEXT: vcmp.u32 hi, q7, q3
; CHECK-NEXT: vpsel q0, q4, q5
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov.16 q2[4], r1
@ -172,7 +172,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: adr r1, .LCPI2_2
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vadd.i32 q0, q0, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q0
; CHECK-NEXT: vcmp.u32 hi, q7, q0
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vpsel q6, q4, q5
; CHECK-NEXT: vmov r1, s24
@ -186,7 +186,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) {
; CHECK-NEXT: adr r1, .LCPI2_3
; CHECK-NEXT: vldrw.u32 q6, [r1]
; CHECK-NEXT: vadd.i32 q6, q6, r0
; CHECK-NEXT: vcmp.u32 cs, q7, q6
; CHECK-NEXT: vcmp.u32 hi, q7, q6
; CHECK-NEXT: vpsel q7, q4, q5
; CHECK-NEXT: vmov r1, s28
; CHECK-NEXT: vmov.16 q0[4], r1