diff --git a/CMakeLists.txt b/CMakeLists.txt index 022fe02d..8794435e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,9 +26,9 @@ ENDIF() add_definitions(-DGLM_FORCE_RADIANS) IF(${PROFILING}) - add_definitions(-DRENDER_PROFILER=1) + add_definitions(-DRENDER_PROFILER=0 -DRW_PROFILER=1) else() - add_definitions(-DRENDER_PROFILER=0) + add_definitions(-DRENDER_PROFILER=0 -DRW_PROFILER=0) ENDIF() IF(${ENABLE_SCRIPT_DEBUG}) diff --git a/rwengine/include/core/Profiler.hpp b/rwengine/include/core/Profiler.hpp new file mode 100644 index 00000000..4287d826 --- /dev/null +++ b/rwengine/include/core/Profiler.hpp @@ -0,0 +1,84 @@ +#ifndef _RWENGINE_PROFILER_HPP_ +#define _RWENGINE_PROFILER_HPP_ + +#if RW_PROFILER +#include +#include +#include +#include +#include +#define time_unit std::chrono::microseconds + +namespace perf +{ + +struct ProfileEntry +{ + std::string label; + int64_t start; + int64_t end; + std::vector childProfiles; +}; + +class Profiler +{ + ProfileEntry frame; + std::chrono::high_resolution_clock::time_point frameBegin; + std::stack currentStack; + +public: + + static Profiler& get() + { + static Profiler profile; + return profile; + } + + const ProfileEntry& getFrame() const + { + return frame; + } + + void startFrame() + { + frameBegin = std::chrono::high_resolution_clock::now(); + frame = { "Frame", 0, 0, {} }; + } + + void beginEvent(const std::string& label) + { + auto now = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - frameBegin); + currentStack.push({label, now.count(), 0, {} }); + } + + void endEvent() + { + auto now = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - frameBegin); + RW_CHECK(currentStack.size() > 0, "Perf stack is empty"); + currentStack.top().end = now.count(); + if (currentStack.size() == 1) { + frame.childProfiles.push_back(currentStack.top()); + currentStack.pop(); + } + else { + auto tmp = currentStack.top(); + currentStack.pop(); + currentStack.top().childProfiles.push_back(tmp); + } + } +}; +} +#define RW_PROFILE_FRAME_BOUNDARY() \ + perf::Profiler::get().startFrame(); +#define RW_PROFILE_BEGIN(label) \ + perf::Profiler::get().beginEvent(label); +#define RW_PROFILE_END() \ + perf::Profiler::get().endEvent(); +#else +#define RW_PROFILE_BEGIN(label) +#define RW_PROFILE_END() +#endif + +#endif diff --git a/rwengine/include/render/GameRenderer.hpp b/rwengine/include/render/GameRenderer.hpp index 8deae218..d2fa3e38 100644 --- a/rwengine/include/render/GameRenderer.hpp +++ b/rwengine/include/render/GameRenderer.hpp @@ -146,6 +146,7 @@ public: * @brief Draws a texture on the screen */ void drawTexture(TextureData* texture, glm::vec4 extents); + void drawColour(const glm::vec4& colour, glm::vec4 extents); /** * Renders a model (who'd have thought) diff --git a/rwengine/include/render/ObjectRenderer.hpp b/rwengine/include/render/ObjectRenderer.hpp index 2efd1ec9..9f808385 100644 --- a/rwengine/include/render/ObjectRenderer.hpp +++ b/rwengine/include/render/ObjectRenderer.hpp @@ -9,36 +9,6 @@ #include #include - -/* -Rendering Instruction contents: - Model matrix - List of subgeometries(??) -*/ -typedef uint64_t RenderKey; -struct RenderInstruction -{ - RenderKey sortKey; - // Ideally, this would just be an index into a buffer that contains the matrix - glm::mat4 model; - DrawBuffer* dbuff; - Renderer::DrawParameters drawInfo; - - RenderInstruction( - RenderKey key, - const glm::mat4& model, - DrawBuffer* dbuff, - const Renderer::DrawParameters& dp) - : sortKey(key) - , model(model) - , dbuff(dbuff) - , drawInfo(dp) - { - - } -}; -typedef std::vector RenderList; - /** * @brief The ObjectRenderer class handles object -> renderer transformation * diff --git a/rwengine/include/render/OpenGLRenderer.hpp b/rwengine/include/render/OpenGLRenderer.hpp index c063b104..1ad9c94a 100644 --- a/rwengine/include/render/OpenGLRenderer.hpp +++ b/rwengine/include/render/OpenGLRenderer.hpp @@ -6,6 +6,8 @@ #include #include +typedef uint64_t RenderKey; + // Maximum depth of debug group stack #define MAX_DEBUG_DEPTH 5 @@ -51,6 +53,36 @@ public: unsigned int start; /// Start index. }; + /** + * @brief The RenderInstruction struct Generic Rendering instruction + * + * These are generated by the ObjectRenderer, and passed in to the + * OpenGLRenderer by GameRenderer. + */ + struct RenderInstruction + { + RenderKey sortKey; + // Ideally, this would just be an index into a buffer that contains the matrix + glm::mat4 model; + DrawBuffer* dbuff; + Renderer::DrawParameters drawInfo; + + RenderInstruction( + RenderKey key, + const glm::mat4& model, + DrawBuffer* dbuff, + const Renderer::DrawParameters& dp) + : sortKey(key) + , model(model) + , dbuff(dbuff) + , drawInfo(dp) + { + + } + }; + typedef std::vector RenderList; + + struct ObjectUniformData { glm::mat4 model; glm::vec4 colour; @@ -96,6 +128,8 @@ public: virtual void draw(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p) = 0; virtual void drawArrays(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p) = 0; + virtual void drawBatched(const RenderList& list) = 0; + void setViewport(const glm::ivec2& vp); const glm::ivec2& getViewport() const { return viewport; } @@ -203,6 +237,8 @@ public: void draw(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p); void drawArrays(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p); + void drawBatched(const RenderList& list) override; + void invalidate(); virtual void pushDebugGroup(const std::string& title); @@ -235,6 +271,9 @@ private: } GLuint UBOObject; + GLuint maxObjectEntries; + GLuint currentObjectEntry; + GLuint entryAlignment; GLuint UBOScene; // Debug group profiling timers @@ -247,4 +286,7 @@ private: GLuint compileShader(GLenum type, const char *source); GLuint compileProgram(const char* vertex, const char* fragment); + +typedef Renderer::RenderList RenderList; + #endif diff --git a/rwengine/src/core/Profiler.cpp b/rwengine/src/core/Profiler.cpp new file mode 100644 index 00000000..a4fcd742 --- /dev/null +++ b/rwengine/src/core/Profiler.cpp @@ -0,0 +1 @@ +#include diff --git a/rwengine/src/render/GameRenderer.cpp b/rwengine/src/render/GameRenderer.cpp index a7ac7060..ea8e7946 100644 --- a/rwengine/src/render/GameRenderer.cpp +++ b/rwengine/src/render/GameRenderer.cpp @@ -27,6 +27,8 @@ #include #include +#include + const size_t skydomeSegments = 8, skydomeRows = 10; struct WaterVertex { @@ -303,11 +305,15 @@ void GameRenderer::renderWorld(GameWorld* world, const ViewCamera &camera, float // Render List Construction //--------------------------------------------------------------- + RW_PROFILE_BEGIN("RenderList"); + // This is sequential at the moment, it should be easy to make it // run in parallel with a good threading system. RenderList renderList; // Naive optimisation, assume 10% hitrate - renderList.reserve(world->allObjects.size() * 0.1f); + renderList.reserve(world->allObjects.size() * 0.5f); + + RW_PROFILE_BEGIN("Build"); // World Objects for (auto object : world->allObjects) { @@ -318,23 +324,30 @@ void GameRenderer::renderWorld(GameWorld* world, const ViewCamera &camera, float _renderAlpha, renderList); } + RW_PROFILE_END(); renderer->pushDebugGroup("Objects"); renderer->pushDebugGroup("RenderList"); // Also parallelizable + RW_PROFILE_BEGIN("Sort"); std::sort(renderList.begin(), renderList.end(), - [](const RenderInstruction&a, const RenderInstruction&b) { + [](const Renderer::RenderInstruction& a, + const Renderer::RenderInstruction&b) { return a.sortKey < b.sortKey; }); - for (RenderInstruction& ri : renderList) { - renderer->draw(ri.model, ri.dbuff, ri.drawInfo); - } + RW_PROFILE_END(); + + RW_PROFILE_BEGIN("Draw"); + renderer->drawBatched(renderList); + RW_PROFILE_END(); renderer->popDebugGroup(); profObjects = renderer->popDebugGroup(); + RW_PROFILE_END(); + // Render arrows above anything that isn't radar only (or hidden) ModelRef& arrowModel = world->data->models["arrow"]; if( arrowModel && arrowModel->resource ) @@ -700,6 +713,37 @@ void GameRenderer::drawTexture(TextureData* texture, glm::vec4 extents) renderer->invalidate(); } +void GameRenderer::drawColour(const glm::vec4& colour, glm::vec4 extents) +{ + glUseProgram(ssRectProgram); + + // Move into NDC + extents.x /= renderer->getViewport().x; + extents.y /= renderer->getViewport().y; + extents.z /= renderer->getViewport().x; + extents.w /= renderer->getViewport().y; + extents.x += extents.z / 2.f; + extents.y += extents.w / 2.f; + extents.x -= .5f; + extents.y -= .5f; + extents *= glm::vec4(2.f,-2.f, 1.f, 1.f); + + glEnable(GL_BLEND); + glUniform2f(ssRectOffset, extents.x, extents.y); + glUniform2f(ssRectSize, extents.z, extents.w); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, 0); + glUniform1i(ssRectTexture, 0); + glUniform4f(ssRectColour, colour.r, colour.g, colour.b, colour.a); + + glBindVertexArray( ssRectDraw.getVAOName() ); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + // Ooops + renderer->invalidate(); +} + bool GameRenderer::renderFrame(Model* m, ModelFrame* f, const glm::mat4& matrix, GameObject* object, float opacity, bool queueTransparent) { auto localmatrix = matrix; diff --git a/rwengine/src/render/ObjectRenderer.cpp b/rwengine/src/render/ObjectRenderer.cpp index 5097d9e4..18629347 100644 --- a/rwengine/src/render/ObjectRenderer.cpp +++ b/rwengine/src/render/ObjectRenderer.cpp @@ -180,7 +180,7 @@ void renderInstance(GameWorld* world, InstanceObject *instance, const ViewCamera& camera, float renderAlpha, - std::vector& outList) + RenderList& outList) { if(!instance->model->resource) { diff --git a/rwengine/src/render/OpenGLRenderer.cpp b/rwengine/src/render/OpenGLRenderer.cpp index fbf5f157..6802a680 100644 --- a/rwengine/src/render/OpenGLRenderer.cpp +++ b/rwengine/src/render/OpenGLRenderer.cpp @@ -156,8 +156,39 @@ void OpenGLRenderer::useProgram(Renderer::ShaderProgram* p) } } +#if 0 +template<> +void OpenGLRenderer::uploadUBO(GLuint buffer, const ObjectUniformData& data) +{ + if( currentUBO != buffer ) { + glBindBuffer(GL_UNIFORM_BUFFER, buffer); + currentUBO = buffer; + } + /*glBindBufferRange(GL_UNIFORM_BUFFER, + 2, + UBOObject, + entryAlignment * currentObjectEntry, + sizeof(ObjectUniformData));*/ + glBufferSubData(GL_UNIFORM_BUFFER, + 0, + sizeof(ObjectUniformData), &data); +#if RW_USING(RENDER_PROFILER) + if( currentDebugDepth > 0 ) + { + profileInfo[currentDebugDepth-1].uploads++; + } +#endif + currentObjectEntry = (currentObjectEntry+1) % maxObjectEntries; +} +#endif + OpenGLRenderer::OpenGLRenderer() - : currentDbuff(nullptr), currentProgram(nullptr), currentDebugDepth(0) + : currentDbuff(nullptr) + , currentProgram(nullptr) + , maxObjectEntries(0) + , currentObjectEntry(0) + , entryAlignment(0) + , currentDebugDepth(0) { // We need to query for some profiling exts. ogl_CheckExtensions(); @@ -170,10 +201,20 @@ OpenGLRenderer::OpenGLRenderer() swap(); - GLint maxUBOSize; + GLint maxUBOSize, UBOAlignment; glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUBOSize); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &UBOAlignment); + entryAlignment = 128; + maxObjectEntries = maxUBOSize / entryAlignment; std::cout << "Max UBO Size: " << maxUBOSize << std::endl; - std::cout << "Max batch size: " << (maxUBOSize/sizeof(ObjectUniformData)) << std::endl; + std::cout << "UBO Alignment: " << UBOAlignment << std::endl; + std::cout << "Max batch size: " << maxObjectEntries << std::endl; + + glBindBuffer(GL_UNIFORM_BUFFER, UBOObject); + glBufferData(GL_UNIFORM_BUFFER, + entryAlignment * maxObjectEntries, + NULL, + GL_STREAM_DRAW); glGenQueries(1, &debugQuery); } @@ -323,6 +364,58 @@ void OpenGLRenderer::drawArrays(const glm::mat4& model, DrawBuffer* draw, const glDrawArrays(draw->getFaceType(), p.start, p.count); } +void OpenGLRenderer::drawBatched(const RenderList& list) +{ +#if 0 // Needs shader changes + // Determine how many batches we need to process the entire list + auto entries = list.size(); + glBindBuffer(GL_UNIFORM_BUFFER, UBOObject); + for (int b = 0; b < entries; b += maxObjectEntries) + { + auto toConsume = std::min((GLuint)entries, b + maxObjectEntries) - b; + std::vector uploadBuffer; + uploadBuffer.resize(toConsume); + for (int d = 0; d < toConsume; ++d) + { + auto& draw = list[b+d]; + uploadBuffer[d] = { + draw.model, + glm::vec4(draw.drawInfo.colour.r/255.f, + draw.drawInfo.colour.g/255.f, + draw.drawInfo.colour.b/255.f, 1.f), + 1.f, + 1.f, + draw.drawInfo.colour.a/255.f + }; + } + glBufferData(GL_UNIFORM_BUFFER, + toConsume * sizeof(ObjectUniformData), + uploadBuffer.data(), + GL_STREAM_DRAW); + + // Dispatch individual draws + for (int d = 0; d < toConsume; ++d) + { + auto& draw = list[b+d]; + useDrawBuffer(draw.dbuff); + + for( GLuint u = 0; u < draw.drawInfo.textures.size(); ++u ) + { + useTexture(u, draw.drawInfo.textures[u]); + } + + glDrawElements(draw.dbuff->getFaceType(), draw.drawInfo.count, GL_UNSIGNED_INT, + (void*) (sizeof(RenderIndex) * draw.drawInfo.start)); + } + } +#else + for(auto& ri : list) + { + draw(ri.model, ri.dbuff, ri.drawInfo); + } +#endif +} + void OpenGLRenderer::invalidate() { currentDbuff = nullptr; diff --git a/rwgame/RWGame.cpp b/rwgame/RWGame.cpp index ab746008..e26a8882 100644 --- a/rwgame/RWGame.cpp +++ b/rwgame/RWGame.cpp @@ -7,6 +7,8 @@ #include "benchmarkstate.hpp" #include "debug/HttpServer.hpp" +#include + #include #include #include @@ -295,7 +297,10 @@ int RWGame::run() // Loop until the window is closed or we run out of state. while (window.isOpen() && StateManager::get().states.size()) { State* state = StateManager::get().states.back(); + + RW_PROFILE_FRAME_BOUNDARY(); + RW_PROFILE_BEGIN("Input"); sf::Event event; while (window.pollEvent(event)) { switch (event.type) { @@ -313,8 +318,11 @@ int RWGame::run() default: break; } + RW_PROFILE_BEGIN("State"); state->handleEvent(event); + RW_PROFILE_END() } + RW_PROFILE_END(); if(! window.isOpen() ) { @@ -324,13 +332,19 @@ int RWGame::run() float timer = clock.restart().asSeconds(); accum += timer * timescale; - while ( accum >= GAME_TIMESTEP ) { + RW_PROFILE_BEGIN("Update"); + if ( accum >= GAME_TIMESTEP ) { + RW_PROFILE_BEGIN("state"); StateManager::get().tick(GAME_TIMESTEP); + RW_PROFILE_END(); + if (StateManager::get().states.size() == 0) { break; } + RW_PROFILE_BEGIN("engine"); tick(GAME_TIMESTEP); + RW_PROFILE_END(); accum -= GAME_TIMESTEP; @@ -340,6 +354,7 @@ int RWGame::run() accum = 0.f; } } + RW_PROFILE_END(); float alpha = fmod(accum, GAME_TIMESTEP) / GAME_TIMESTEP; if( ! state->shouldWorldUpdate() ) @@ -347,11 +362,19 @@ int RWGame::run() alpha = 1.f; } + RW_PROFILE_BEGIN("Render"); + RW_PROFILE_BEGIN("engine"); render(alpha, timer); + RW_PROFILE_END(); + RW_PROFILE_BEGIN("state"); if (StateManager::get().states.size() > 0) { StateManager::get().draw(renderer); } + RW_PROFILE_END(); + RW_PROFILE_END(); + + renderProfile(); window.display(); } @@ -529,10 +552,14 @@ void RWGame::render(float alpha, float time) renderer->getRenderer()->pushDebugGroup("World"); + RW_PROFILE_BEGIN("world"); renderer->renderWorld(world, viewCam, alpha); + RW_PROFILE_END(); + auto rendertime = renderer->getRenderer()->popDebugGroup(); + RW_PROFILE_BEGIN("debug"); if( showDebugPaths ) { renderDebugPaths(time); @@ -551,6 +578,7 @@ void RWGame::render(float alpha, float time) debug->flush(renderer); } } + RW_PROFILE_END(); drawOnScreenText(world, renderer); } @@ -698,6 +726,60 @@ void RWGame::renderDebugPaths(float time) debug->flush(renderer); } +void RWGame::renderProfile() +{ +#if RW_PROFILER + auto& frame = perf::Profiler::get().getFrame(); + constexpr float upperlimit = 30000.f; + constexpr float lineHeight = 15.f; + static std::vector perf_colours; + if (perf_colours.size() == 0) { + float c = 8.f; + for (int r = 0; r < c; ++r) { + for (int g = 0; g < c; ++g) { + for (int b = 0; b < c; ++b) { + perf_colours.push_back({ + r / c, g / c, b / c, 1.f + }); + } + } + } + } + + + float xscale = renderer->getRenderer()->getViewport().x / upperlimit; + TextRenderer::TextInfo ti; + ti.align = TextRenderer::TextInfo::Left; + ti.font = 2; + ti.size = lineHeight - 2.f; + std::function renderEntry = [&](const perf::ProfileEntry& entry, int depth) + { + int g = 0; + for(auto& event : entry.childProfiles) + { + auto duration = event.end - event.start; + float y = 60.f + (depth * (lineHeight + 5.f)); + renderer->drawColour(perf_colours[(std::hash()(entry.label) * (g++))%perf_colours.size()], + { + xscale * event.start, + y, + xscale * duration, + lineHeight + }); + ti.screenPosition.x = xscale * (event.start); + ti.screenPosition.y = y + 2.f; + ti.text = event.label + " " + std::to_string(duration) + " us "; + renderer->text.renderText(ti); + renderEntry(event, depth+1); + } + }; + renderEntry(frame, 0); + ti.screenPosition = glm::vec2( xscale * (16000), 40.f); + ti.text = ".16 ms"; + renderer->text.renderText(ti); +#endif +} + void RWGame::globalKeyEvent(const sf::Event& event) { switch (event.key.code) { @@ -722,6 +804,9 @@ void RWGame::globalKeyEvent(const sf::Event& event) case sf::Keyboard::F3: showDebugPhysics = ! showDebugPhysics; break; + case sf::Keyboard::F12: + window.capture().saveToFile("/home/dan/screenshot.png"); + break; default: break; } } diff --git a/rwgame/RWGame.hpp b/rwgame/RWGame.hpp index b9712b57..0a3ed3fb 100644 --- a/rwgame/RWGame.hpp +++ b/rwgame/RWGame.hpp @@ -126,6 +126,7 @@ private: void renderDebugStats(float time, Renderer::ProfileInfo& worldRenderTime); void renderDebugPaths(float time); + void renderProfile(); void globalKeyEvent(const sf::Event& event); };