diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index 18eaca3722..5f8dc1c7a1 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -180,7 +180,7 @@ s64 spursInit( name += "CellSpursKernel0"; for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++) { - auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0); + auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, (u64)num << 32, spurs.addr(), 0, 0); #ifndef PRX_DEBUG_XXX spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelEntry); #endif @@ -389,10 +389,10 @@ s64 spursInit( spurs->m.sub3.unk2 = 3; // unknown const spurs->m.sub3.port = (u64)spurs->m.port; - if (flags & SAF_SYSTEM_WORKLOAD_ENABLED) // initialize system workload + if (flags & SAF_SYSTEM_WORKLOAD_ENABLED) // initialize system workload (disabled) { s32 res = CELL_OK; -#ifdef PRX_DEBUG +#ifdef PRX_DEBUG_XXX res = cb_call, u32, u32, u32>(GetCurrentPPUThread(), libsre + 0x10428, libsre_rtoc, spurs, vm::get_addr(swlPriority), swlMaxSpu, swlIsPreem); #endif diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index 6b8c3ed01e..794a17af82 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -156,20 +156,19 @@ void spursHalt(SPUThread & spu) { /// Select a workload to run bool spursKernel1SelectWorkload(SPUThread & spu) { - auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. // If the first argument is true then the shared data is not updated with the result. - const auto isPoll = spu.GPR[3]._u32[3]; + const auto isPoll = spu.GPR[3]._u32[3]; - u32 wklSelectedId; - u32 pollStatus; + u32 wklSelectedId; + u32 pollStatus; - do { - // DMA and lock the first 0x80 bytes of spurs - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + vm::reservation_op(vm::cast(ctxt->spurs.addr()), 128, [&]() { + // lock the first 0x80 bytes of spurs + auto spurs = ctxt->spurs.get_priv_ptr(); // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD]; @@ -302,7 +301,9 @@ bool spursKernel1SelectWorkload(SPUThread & spu) { ctxt->wklLocPendingContention[i] = 0; } } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x100), spurs, 128); + }); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; @@ -322,10 +323,9 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { u32 wklSelectedId; u32 pollStatus; - do { - // DMA and lock the first 0x80 bytes of spurs - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + vm::reservation_op(vm::cast(ctxt->spurs.addr()), 128, [&]() { + // lock the first 0x80 bytes of spurs + auto spurs = ctxt->spurs.get_priv_ptr(); // Calculate the contention (number of SPUs used) for each workload u8 contention[CELL_SPURS_MAX_WORKLOAD2]; @@ -448,7 +448,9 @@ bool spursKernel2SelectWorkload(SPUThread & spu) { ctxt->wklLocPendingContention[i] = 0; } } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x100), spurs, 128); + }); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; @@ -468,13 +470,13 @@ void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { auto wklInfoOffset = wid < CELL_SPURS_MAX_WORKLOAD ? &ctxt->spurs->m.wklInfo1[wid] : wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &ctxt->spurs->m.wklInfo2[wid & 0xf] : &ctxt->spurs->m.wklInfoSysSrv; - spursDma(spu, MFC_GET_CMD, vm::get_addr(wklInfoOffset), 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); - spursDmaWaitForCompletion(spu, 0x80000000); + + memcpy(vm::get_ptr(spu.ls_offset + 0x3FFE0), wklInfoOffset, 0x20); // Load the workload to LS auto wklInfo = vm::get_ptr(spu.ls_offset + 0x3FFE0); if (ctxt->wklCurrentAddr != wklInfo->addr) { - switch (wklInfo->addr.addr()) { + switch (wklInfo->addr.addr().value()) { case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: spu.RegisterHleFunction(0xA00, spursSysServiceEntry); break; @@ -482,8 +484,7 @@ void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { spu.RegisterHleFunction(0xA00, spursTasksetEntry); break; default: - spursDma(spu, MFC_GET_CMD, wklInfo-> addr.addr(), 0xA00/*LSA*/, wklInfo->size, CELL_SPURS_KERNEL_DMA_TAG_ID); - spursDmaWaitForCompletion(spu, 0x80000000); + memcpy(vm::get_ptr(spu.ls_offset + 0xA00), wklInfo->addr.get_ptr(), wklInfo->size); break; } @@ -524,12 +525,12 @@ bool spursKernelWorkloadExit(SPUThread & spu) { /// SPURS kernel entry point bool spursKernelEntry(SPUThread & spu) { - while (true) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (Emu.IsStopped()) { - return false; - } - } + //while (true) { + // std::this_thread::sleep_for(std::chrono::milliseconds(100)); + // if (Emu.IsStopped()) { + // return false; + // } + //} auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); memset(ctxt, 0, sizeof(SpursKernelContext)); @@ -594,12 +595,10 @@ bool spursSysServiceEntry(SPUThread & spu) { /// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { - // Monitor only lock line reservation lost events - spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR)); - bool shouldExit; + while (true) { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + vm::reservation_acquire(vm::get_ptr(spu.ls_offset + 0x100), vm::cast(ctxt->spurs.addr()), 128/*, [&spu](){ spu.Notify(); }*/); auto spurs = vm::get_ptr(spu.ls_offset + 0x100); // Find the number of SPUs that are idling in this SPURS instance @@ -666,13 +665,11 @@ void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. if (spuIdling && shouldExit == false && foundReadyWorkload == false) { // The system service blocks by making a reservation and waiting on the lock line reservation lost event. - u128 r; - spu.ReadChannel(r, SPU_RdEventStat); - spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR)); + spu.WaitForAnySignal(1); + if (Emu.IsStopped()) return; } - auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - if (dmaSuccess && (shouldExit || foundReadyWorkload)) { + if (vm::reservation_update(vm::cast(ctxt->spurs.addr()), vm::get_ptr(spu.ls_offset + 0x100), 128) && (shouldExit || foundReadyWorkload)) { break; } } @@ -687,28 +684,31 @@ void spursSysServiceMain(SPUThread & spu, u32 pollStatus) { auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); if (ctxt->spurs.addr() % CellSpurs::align) { - spursHalt(spu); - return; + assert(!"spursSysServiceMain(): invalid spurs alignment"); + //spursHalt(spu); + //return; } // Initialise the system service if this is the first time its being started on this SPU if (ctxt->sysSrvInitialised == 0) { ctxt->sysSrvInitialised = 1; - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + vm::reservation_acquire(vm::get_ptr(spu.ls_offset + 0x100), vm::cast(ctxt->spurs.addr()), 128); - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + vm::reservation_op(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1)), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); // Halt if already initialised if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) { - spursHalt(spu); - return; + assert(!"spursSysServiceMain(): already initialized"); + //spursHalt(spu); + //return; } spurs->m.sysSrvOnSpu |= 1 << ctxt->spuNum; - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2D80), spurs->m.wklState1, 128); + }); ctxt->traceBuffer = 0; ctxt->traceMsgCount = -1; @@ -751,7 +751,7 @@ poll: pkt.data.stop = SPURS_GUID_SYS_WKL; cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); - spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + //spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); break; } @@ -773,6 +773,8 @@ poll: cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); spursSysServiceIdleHandler(spu, ctxt); + if (Emu.IsStopped()) return; + goto poll; } } @@ -783,9 +785,8 @@ void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) bool updateWorkload = false; bool terminate = false; - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + vm::reservation_op(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1)), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); // Terminate request if (spurs->m.sysSrvMsgTerminate & (1 << ctxt->spuNum)) { @@ -803,7 +804,9 @@ void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) if (spurs->m.sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) { updateTrace = true; } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2D80), spurs->m.wklState1, 128); + }); // Process update workload message if (updateWorkload) { @@ -824,9 +827,9 @@ void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) /// Activate a workload void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) { auto spurs = vm::get_ptr(spu.ls_offset + 0x100); - spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + memcpy(vm::get_ptr(spu.ls_offset + 0x30000), vm::get_ptr(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1))), 0x200); if (spurs->m.flags1 & SF1_32_WORKLOADS) { - spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + memcpy(vm::get_ptr(spu.ls_offset + 0x30200), vm::get_ptr(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2))), 0x200); } u32 wklShutdownBitSet = 0; @@ -849,9 +852,8 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) } } - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + vm::reservation_op(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1)), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { // Update workload status and runnable flag based on the workload state @@ -892,7 +894,9 @@ void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) } } } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2D80), spurs->m.wklState1, 128); + }); if (wklShutdownBitSet) { spursSysServiceUpdateShutdownCompletionEvents(spu, ctxt, wklShutdownBitSet); @@ -905,9 +909,8 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelC // workloads that have a shutdown completion hook registered u32 wklNotifyBitSet; u8 spuPort; - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + vm::reservation_op(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1)), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); wklNotifyBitSet = 0; spuPort = spurs->m.spuPort;; @@ -926,7 +929,9 @@ void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelC } } } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2D80), spurs->m.wklState1, 128); + }); if (wklNotifyBitSet) { // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask); @@ -946,9 +951,8 @@ void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 bool notify; u8 sysSrvMsgUpdateTrace; - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + vm::reservation_op(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1)), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); sysSrvMsgUpdateTrace = spurs->m.sysSrvMsgUpdateTrace; spurs->m.sysSrvMsgUpdateTrace &= ~(1 << ctxt->spuNum); @@ -965,17 +969,19 @@ void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 spurs->m.xCD = 0; notify = true; } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2D80), spurs->m.wklState1, 128); + }); // Get trace parameters from CellSpurs and store them in the LS if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0)) { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/); + vm::reservation_acquire(vm::get_ptr(spu.ls_offset + 0x80), vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer)), 128); auto spurs = vm::get_ptr(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer)); if (ctxt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) { spursSysServiceTraceSaveCount(spu, ctxt); } else { - spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, ctxt->dmaTagId); + memcpy(vm::get_ptr(spu.ls_offset + 0x2C00), vm::get_ptr(spurs->m.traceBuffer.addr() & -0x4), 0x80); auto traceBuffer = vm::get_ptr(spu.ls_offset + 0x2C00); ctxt->traceMsgCount = traceBuffer->count[ctxt->spuNum]; } @@ -997,23 +1003,28 @@ void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt) { u8 wklId; - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + bool do_return = false; + + vm::reservation_op(vm::cast(ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1)), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); if (spurs->m.sysSrvWorkload[ctxt->spuNum] == 0xFF) { + do_return = true; return; } wklId = spurs->m.sysSrvWorkload[ctxt->spuNum]; spurs->m.sysSrvWorkload[ctxt->spuNum] = 0xFF; - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2D80), spurs->m.wklState1, 128); + }); + + if (do_return) return; spursSysServiceActivateWorkload(spu, ctxt); - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + vm::reservation_op(vm::cast(ctxt->spurs.addr()), 128, [&]() { + auto spurs = ctxt->spurs.get_priv_ptr(); if (wklId >= CELL_SPURS_MAX_WORKLOAD) { spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; @@ -1022,7 +1033,9 @@ void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelConte spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); } - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x100), spurs, 128); + }); // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace // uses the current worload id to determine the workload to which the trace belongs @@ -1144,16 +1157,16 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * s32 rc = CELL_OK; s32 numNewlyReadyTasks; - do { - spursDma(spu, MFC_GETLLAR_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + vm::reservation_op(vm::cast(ctxt->taskset.addr()), 128, [&]() { + auto taskset = ctxt->taskset.get_priv_ptr(); // Verify taskset state is valid auto _0 = be_t::make(u128::from32(0)); if ((taskset->m.waiting & taskset->m.running) != _0 || (taskset->m.ready & taskset->m.pending_ready) != _0 || ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.signalled | taskset->m.waiting) & be_t::make(~taskset->m.enabled.value())) != _0) { - spursHalt(spu); - return CELL_OK; + assert(!"Invalid taskset state"); + //spursHalt(spu); + //return CELL_OK; } // Find the number of tasks that have become ready since the last iteration @@ -1269,8 +1282,9 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * } break; default: - spursHalt(spu); - return CELL_OK; + assert(!"Unknown taskset request"); + //spursHalt(spu); + //return CELL_OK; } taskset->m.pending_ready = _0; @@ -1279,12 +1293,13 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * taskset->m.enabled = enabled; taskset->m.signalled = signalled; taskset->m.ready = ready; - } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x2700), taskset, 128); + }); // Increment the ready count of the workload by the number of tasks that have become ready - do { - spursDma(spu, MFC_GETLLAR_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); - auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + vm::reservation_op(vm::cast(kernelCtxt->spurs.addr()), 128, [&]() { + auto spurs = kernelCtxt->spurs.get_priv_ptr(); s32 readyCount = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed(); readyCount += numNewlyReadyTasks; @@ -1295,7 +1310,9 @@ s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * } else { spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount); } - } while (spursDma(spu, MFC_PUTLLC_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + memcpy(vm::get_ptr(spu.ls_offset + 0x100), spurs, 128); + }); return rc; } @@ -1332,7 +1349,8 @@ void spursTasksetExit(SPUThread & spu) { // Not sure why this check exists. Perhaps to check for memory corruption. if (memcmp(ctxt->moduleId, "SPURSTASK MODULE", 16) != 0) { - spursHalt(spu); + //spursHalt(spu); + assert(!"spursTasksetExit(): memory corruption"); } cellSpursModuleExit(spu); @@ -1342,8 +1360,7 @@ void spursTasksetExit(SPUThread & spu) { void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) { auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); - spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0); - spursDmaWaitForCompletion(spu, 1); + memcpy(vm::get_ptr(spu.ls_offset + 0x10000), vm::get_ptr(addr & -0x80), (addr & 0x7F) << 11); spu.GPR[3]._u64[1] = ctxt->taskset.addr(); spu.GPR[4]._u32[3] = taskId; @@ -1357,7 +1374,7 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); - spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + //spursDmaWaitForCompletion(spu, 0xFFFFFFFF); if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) { return CELL_SPURS_TASK_ERROR_STAT; @@ -1392,18 +1409,18 @@ s32 spursTasketSaveTaskContext(SPUThread & spu) { ctxt->savedWriteTagGroupQueryMask = r._u32[3]; // Store the processor context - u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; - spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); + const u32 contextSaveStorage = vm::cast(taskInfo->context_save_storage_and_alloc_ls_blocks & -0x80); + memcpy(vm::get_ptr(contextSaveStorage), vm::get_ptr(spu.ls_offset + 0x2C80), 0x380); // Save LS context for (auto i = 6; i < 128; i++) { if (taskInfo->ls_pattern._u128.value()._bit[i]) { // TODO: Combine DMA requests for consecutive blocks into a single request - spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); + memcpy(vm::get_ptr(contextSaveStorage + 0x400 + ((i - 6) << 11)), vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP + ((i - 6) << 11)), 0x800); } } - spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + //spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); return CELL_OK; } @@ -1423,8 +1440,7 @@ void spursTasksetDispatch(SPUThread & spu) { ctxt->taskId = taskId; // DMA in the task info for the selected task - spursDma(spu, MFC_GET_CMD, vm::get_addr(&ctxt->taskset->m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), ctxt->dmaTagId); - spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + memcpy(vm::get_ptr(spu.ls_offset + 0x2780), &ctxt->taskset->m.task_info[taskId], sizeof(CellSpursTaskset::TaskInfo)); auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); auto elfAddr = taskInfo->elf_addr.addr().value(); taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull); @@ -1445,11 +1461,12 @@ void spursTasksetDispatch(SPUThread & spu) { u32 entryPoint; u32 lowestLoadAddr; if (spursTasksetLoadElf(spu, &entryPoint, &lowestLoadAddr, taskInfo->elf_addr.addr(), false) != CELL_OK) { - spursHalt(spu); - return; + assert(!"spursTaskLoadElf() failed"); + //spursHalt(spu); + //return; } - spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + //spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); ctxt->savedContextLr = u128::from32r(entryPoint); ctxt->guidAddr = lowestLoadAddr; @@ -1459,7 +1476,7 @@ void spursTasksetDispatch(SPUThread & spu) { ctxt->x2FD4 = elfAddr & 5; // TODO: Figure this out if ((elfAddr & 5) == 1) { - spursDma(spu, MFC_GET_CMD, vm::get_addr(&((CellSpursTaskset2*)(ctxt->taskset.get_ptr()))->m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, ctxt->dmaTagId); + memcpy(vm::get_ptr(spu.ls_offset + 0x2FC0), &((CellSpursTaskset2*)(ctxt->taskset.get_ptr()))->m.task_exit_code[taskId], 0x10); } // Trace - GUID @@ -1485,22 +1502,23 @@ void spursTasksetDispatch(SPUThread & spu) { // Load the ELF u32 entryPoint; if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) { - spursHalt(spu); - return; + assert(!"spursTasksetLoadElf() failed"); + //spursHalt(spu); + //return; } } // Load saved context from main memory to LS - u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; - spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); + const u32 contextSaveStorage = vm::cast(taskInfo->context_save_storage_and_alloc_ls_blocks & -0x80); + memcpy(vm::get_ptr(spu.ls_offset + 0x2C80), vm::get_ptr(contextSaveStorage), 0x380); for (auto i = 6; i < 128; i++) { if (taskInfo->ls_pattern._u128.value()._bit[i]) { // TODO: Combine DMA requests for consecutive blocks into a single request - spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); + memcpy(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP + ((i - 6) << 11)), vm::get_ptr(contextSaveStorage + 0x400 + ((i - 6) << 11)), 0x800); } } - spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + //spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); // Restore saved registers spu.FPSCR.Write(ctxt->savedContextFpscr.value()); @@ -1533,7 +1551,7 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { // syscall (e.g. cellSpursYield2 instead of cellSpursYield) and so don't wait // for DMA completion if ((syscallNum & 0x10) == 0) { - spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + //spursDmaWaitForCompletion(spu, 0xFFFFFFFF); } s32 rc = 0; @@ -1579,7 +1597,8 @@ s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { break; case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG: if (args == 0) { // TODO: Figure this out - spursHalt(spu); + assert(!"args == 0"); + //spursHalt(spu); } if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG, nullptr, nullptr) != 1) { @@ -1642,7 +1661,7 @@ s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, return CELL_SPURS_TASK_ERROR_INVAL; } - vfsStreamMemory stream(elfAddr); + vfsStreamMemory stream(vm::cast(elfAddr)); loader::handlers::elf32 loader; auto rc = loader.init(stream); if (rc != loader::handler::ok) { diff --git a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp index b182ecd4b5..3a3f32ae6f 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_ppu_thread.cpp @@ -23,6 +23,15 @@ void ppu_thread_exit(PPUThread& CPU, u64 errorcode) CPU.SetExitStatus(errorcode); CPU.Stop(); + + if (!CPU.IsJoinable()) + { + const u32 id = CPU.GetId(); + CallAfter([id]() + { + Emu.GetCPU().RemoveThread(id); + }); + } } void sys_ppu_thread_exit(PPUThread& CPU, u64 errorcode) @@ -65,6 +74,7 @@ s32 sys_ppu_thread_join(u64 thread_id, vm::ptr vptr) } *vptr = thr->GetExitStatus(); + Emu.GetCPU().RemoveThread(thread_id); return CELL_OK; } @@ -156,7 +166,7 @@ PPUThread* ppu_thread_create(u32 entry, u64 arg, s32 prio, u32 stacksize, bool i new_thread.SetEntry(entry); new_thread.SetPrio(prio); new_thread.SetStackSize(stacksize); - //new_thread.flags = flags; + new_thread.SetJoinable(is_joinable); new_thread.m_has_interrupt = false; new_thread.m_is_interrupt = is_interrupt; new_thread.SetName(name);