diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 8fd55d2a0dd..b628d19b5a9 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1791,8 +1791,8 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { SDValue Ops[] = {Base, NewOffset, CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, Chain}; - SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0), - MVT::i32, MVT::Other, Ops); + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, + N->getValueType(0), MVT::Other, Ops); transferMemOperands(N, New); ReplaceUses(SDValue(N, 0), SDValue(New, 1)); ReplaceUses(SDValue(N, 1), SDValue(New, 0)); @@ -2514,7 +2514,16 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, Ops.push_back(N->getOperand(0)); // chain - CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); + SmallVector VTs; + VTs.push_back(N->getValueType(1)); + VTs.push_back(N->getValueType(0)); + VTs.push_back(N->getValueType(2)); + + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); + ReplaceUses(SDValue(N, 0), SDValue(New, 1)); + ReplaceUses(SDValue(N, 1), SDValue(New, 0)); + ReplaceUses(SDValue(N, 2), SDValue(New, 2)); + CurDAG->RemoveDeadNode(N); } void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, diff --git a/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll b/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll index 7eac79094f5..a25dd721d54 100644 --- a/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll +++ b/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll @@ -202,8 +202,8 @@ entry: define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) { ; CHECK-LABEL: test_vldrdq_gather_base_wb_s64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrd.u64 q1, [q0, #576]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrd.u64 q0, [q1, #576]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -220,8 +220,8 @@ declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(< define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) { ; CHECK-LABEL: test_vldrdq_gather_base_wb_u64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrd.u64 q1, [q0, #-328]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -237,9 +237,9 @@ define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %a ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrdt.u64 q1, [q0, #664]! +; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -259,9 +259,9 @@ define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %a ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrdt.u64 q1, [q0, #656]! +; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -727,8 +727,8 @@ entry: define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_f32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [q0, #-64]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -745,8 +745,8 @@ declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_s32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [q0, #80]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q0, [q1, #80]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -763,8 +763,8 @@ declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(< define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_u32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [q0, #480]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q0, [q1, #480]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -780,9 +780,9 @@ define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q1, [q0, #-352]! +; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -802,9 +802,9 @@ define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %a ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q1, [q0, #276]! +; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -824,9 +824,9 @@ define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %a ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q1, [q0, #88]! +; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: diff --git a/test/CodeGen/Thumb2/mve-intrinsics/vldr.ll b/test/CodeGen/Thumb2/mve-intrinsics/vldr.ll index aa893abc057..a5490e1fc39 100644 --- a/test/CodeGen/Thumb2/mve-intrinsics/vldr.ll +++ b/test/CodeGen/Thumb2/mve-intrinsics/vldr.ll @@ -4,8 +4,8 @@ define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_s32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [q0, #80]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q0, [q1, #80]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -22,8 +22,8 @@ declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(< define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) { ; CHECK-LABEL: test_vldrwq_gather_base_wb_f32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [q0, #64]! +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q0, [q1, #64]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: @@ -41,9 +41,9 @@ define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %a ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst -; CHECK-NEXT: vldrdt.u64 q1, [q0, #656]! +; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]! ; CHECK-NEXT: vstrw.32 q1, [r0] ; CHECK-NEXT: bx lr entry: