1
0
mirror of https://github.com/rwengine/openrw.git synced 2024-09-03 17:19:46 +02:00

Implement a trivial profiler for perf. overview

This commit is contained in:
Daniel Evans 2016-04-26 23:35:04 +01:00
parent 4fc177d98a
commit b0a2ae4da5
11 changed files with 363 additions and 42 deletions

View File

@ -26,9 +26,9 @@ ENDIF()
add_definitions(-DGLM_FORCE_RADIANS)
IF(${PROFILING})
add_definitions(-DRENDER_PROFILER=1)
add_definitions(-DRENDER_PROFILER=0 -DRW_PROFILER=1)
else()
add_definitions(-DRENDER_PROFILER=0)
add_definitions(-DRENDER_PROFILER=0 -DRW_PROFILER=0)
ENDIF()
IF(${ENABLE_SCRIPT_DEBUG})

View File

@ -0,0 +1,84 @@
#ifndef _RWENGINE_PROFILER_HPP_
#define _RWENGINE_PROFILER_HPP_
#if RW_PROFILER
#include <rw/defines.hpp>
#include <vector>
#include <string>
#include <chrono>
#include <stack>
#define time_unit std::chrono::microseconds
namespace perf
{
struct ProfileEntry
{
std::string label;
int64_t start;
int64_t end;
std::vector<ProfileEntry> childProfiles;
};
class Profiler
{
ProfileEntry frame;
std::chrono::high_resolution_clock::time_point frameBegin;
std::stack<ProfileEntry> currentStack;
public:
static Profiler& get()
{
static Profiler profile;
return profile;
}
const ProfileEntry& getFrame() const
{
return frame;
}
void startFrame()
{
frameBegin = std::chrono::high_resolution_clock::now();
frame = { "Frame", 0, 0, {} };
}
void beginEvent(const std::string& label)
{
auto now = std::chrono::duration_cast<time_unit>(
std::chrono::high_resolution_clock::now() - frameBegin);
currentStack.push({label, now.count(), 0, {} });
}
void endEvent()
{
auto now = std::chrono::duration_cast<time_unit>(
std::chrono::high_resolution_clock::now() - frameBegin);
RW_CHECK(currentStack.size() > 0, "Perf stack is empty");
currentStack.top().end = now.count();
if (currentStack.size() == 1) {
frame.childProfiles.push_back(currentStack.top());
currentStack.pop();
}
else {
auto tmp = currentStack.top();
currentStack.pop();
currentStack.top().childProfiles.push_back(tmp);
}
}
};
}
#define RW_PROFILE_FRAME_BOUNDARY() \
perf::Profiler::get().startFrame();
#define RW_PROFILE_BEGIN(label) \
perf::Profiler::get().beginEvent(label);
#define RW_PROFILE_END() \
perf::Profiler::get().endEvent();
#else
#define RW_PROFILE_BEGIN(label)
#define RW_PROFILE_END()
#endif
#endif

View File

@ -146,6 +146,7 @@ public:
* @brief Draws a texture on the screen
*/
void drawTexture(TextureData* texture, glm::vec4 extents);
void drawColour(const glm::vec4& colour, glm::vec4 extents);
/**
* Renders a model (who'd have thought)

View File

@ -9,36 +9,6 @@
#include <engine/GameWorld.hpp>
#include <gl/DrawBuffer.hpp>
/*
Rendering Instruction contents:
Model matrix
List of subgeometries(??)
*/
typedef uint64_t RenderKey;
struct RenderInstruction
{
RenderKey sortKey;
// Ideally, this would just be an index into a buffer that contains the matrix
glm::mat4 model;
DrawBuffer* dbuff;
Renderer::DrawParameters drawInfo;
RenderInstruction(
RenderKey key,
const glm::mat4& model,
DrawBuffer* dbuff,
const Renderer::DrawParameters& dp)
: sortKey(key)
, model(model)
, dbuff(dbuff)
, drawInfo(dp)
{
}
};
typedef std::vector<RenderInstruction> RenderList;
/**
* @brief The ObjectRenderer class handles object -> renderer transformation
*

View File

@ -6,6 +6,8 @@
#include <gl/DrawBuffer.hpp>
#include <gl/GeometryBuffer.hpp>
typedef uint64_t RenderKey;
// Maximum depth of debug group stack
#define MAX_DEBUG_DEPTH 5
@ -51,6 +53,36 @@ public:
unsigned int start; /// Start index.
};
/**
* @brief The RenderInstruction struct Generic Rendering instruction
*
* These are generated by the ObjectRenderer, and passed in to the
* OpenGLRenderer by GameRenderer.
*/
struct RenderInstruction
{
RenderKey sortKey;
// Ideally, this would just be an index into a buffer that contains the matrix
glm::mat4 model;
DrawBuffer* dbuff;
Renderer::DrawParameters drawInfo;
RenderInstruction(
RenderKey key,
const glm::mat4& model,
DrawBuffer* dbuff,
const Renderer::DrawParameters& dp)
: sortKey(key)
, model(model)
, dbuff(dbuff)
, drawInfo(dp)
{
}
};
typedef std::vector<RenderInstruction> RenderList;
struct ObjectUniformData {
glm::mat4 model;
glm::vec4 colour;
@ -96,6 +128,8 @@ public:
virtual void draw(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p) = 0;
virtual void drawArrays(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p) = 0;
virtual void drawBatched(const RenderList& list) = 0;
void setViewport(const glm::ivec2& vp);
const glm::ivec2& getViewport() const { return viewport; }
@ -203,6 +237,8 @@ public:
void draw(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p);
void drawArrays(const glm::mat4& model, DrawBuffer* draw, const DrawParameters& p);
void drawBatched(const RenderList& list) override;
void invalidate();
virtual void pushDebugGroup(const std::string& title);
@ -235,6 +271,9 @@ private:
}
GLuint UBOObject;
GLuint maxObjectEntries;
GLuint currentObjectEntry;
GLuint entryAlignment;
GLuint UBOScene;
// Debug group profiling timers
@ -247,4 +286,7 @@ private:
GLuint compileShader(GLenum type, const char *source);
GLuint compileProgram(const char* vertex, const char* fragment);
typedef Renderer::RenderList RenderList;
#endif

View File

@ -0,0 +1 @@
#include <core/Profiler.hpp>

View File

@ -27,6 +27,8 @@
#include <glm/gtc/type_ptr.hpp>
#include <glm/gtx/string_cast.hpp>
#include <core/Profiler.hpp>
const size_t skydomeSegments = 8, skydomeRows = 10;
struct WaterVertex {
@ -303,11 +305,15 @@ void GameRenderer::renderWorld(GameWorld* world, const ViewCamera &camera, float
// Render List Construction
//---------------------------------------------------------------
RW_PROFILE_BEGIN("RenderList");
// This is sequential at the moment, it should be easy to make it
// run in parallel with a good threading system.
RenderList renderList;
// Naive optimisation, assume 10% hitrate
renderList.reserve(world->allObjects.size() * 0.1f);
renderList.reserve(world->allObjects.size() * 0.5f);
RW_PROFILE_BEGIN("Build");
// World Objects
for (auto object : world->allObjects) {
@ -318,23 +324,30 @@ void GameRenderer::renderWorld(GameWorld* world, const ViewCamera &camera, float
_renderAlpha,
renderList);
}
RW_PROFILE_END();
renderer->pushDebugGroup("Objects");
renderer->pushDebugGroup("RenderList");
// Also parallelizable
RW_PROFILE_BEGIN("Sort");
std::sort(renderList.begin(), renderList.end(),
[](const RenderInstruction&a, const RenderInstruction&b) {
[](const Renderer::RenderInstruction& a,
const Renderer::RenderInstruction&b) {
return a.sortKey < b.sortKey;
});
for (RenderInstruction& ri : renderList) {
renderer->draw(ri.model, ri.dbuff, ri.drawInfo);
}
RW_PROFILE_END();
RW_PROFILE_BEGIN("Draw");
renderer->drawBatched(renderList);
RW_PROFILE_END();
renderer->popDebugGroup();
profObjects = renderer->popDebugGroup();
RW_PROFILE_END();
// Render arrows above anything that isn't radar only (or hidden)
ModelRef& arrowModel = world->data->models["arrow"];
if( arrowModel && arrowModel->resource )
@ -700,6 +713,37 @@ void GameRenderer::drawTexture(TextureData* texture, glm::vec4 extents)
renderer->invalidate();
}
void GameRenderer::drawColour(const glm::vec4& colour, glm::vec4 extents)
{
glUseProgram(ssRectProgram);
// Move into NDC
extents.x /= renderer->getViewport().x;
extents.y /= renderer->getViewport().y;
extents.z /= renderer->getViewport().x;
extents.w /= renderer->getViewport().y;
extents.x += extents.z / 2.f;
extents.y += extents.w / 2.f;
extents.x -= .5f;
extents.y -= .5f;
extents *= glm::vec4(2.f,-2.f, 1.f, 1.f);
glEnable(GL_BLEND);
glUniform2f(ssRectOffset, extents.x, extents.y);
glUniform2f(ssRectSize, extents.z, extents.w);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
glUniform1i(ssRectTexture, 0);
glUniform4f(ssRectColour, colour.r, colour.g, colour.b, colour.a);
glBindVertexArray( ssRectDraw.getVAOName() );
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Ooops
renderer->invalidate();
}
bool GameRenderer::renderFrame(Model* m, ModelFrame* f, const glm::mat4& matrix, GameObject* object, float opacity, bool queueTransparent)
{
auto localmatrix = matrix;

View File

@ -180,7 +180,7 @@ void renderInstance(GameWorld* world,
InstanceObject *instance,
const ViewCamera& camera,
float renderAlpha,
std::vector<RenderInstruction>& outList)
RenderList& outList)
{
if(!instance->model->resource)
{

View File

@ -156,8 +156,39 @@ void OpenGLRenderer::useProgram(Renderer::ShaderProgram* p)
}
}
#if 0
template<>
void OpenGLRenderer::uploadUBO<OpenGLRenderer::ObjectUniformData>(GLuint buffer, const ObjectUniformData& data)
{
if( currentUBO != buffer ) {
glBindBuffer(GL_UNIFORM_BUFFER, buffer);
currentUBO = buffer;
}
/*glBindBufferRange(GL_UNIFORM_BUFFER,
2,
UBOObject,
entryAlignment * currentObjectEntry,
sizeof(ObjectUniformData));*/
glBufferSubData(GL_UNIFORM_BUFFER,
0,
sizeof(ObjectUniformData), &data);
#if RW_USING(RENDER_PROFILER)
if( currentDebugDepth > 0 )
{
profileInfo[currentDebugDepth-1].uploads++;
}
#endif
currentObjectEntry = (currentObjectEntry+1) % maxObjectEntries;
}
#endif
OpenGLRenderer::OpenGLRenderer()
: currentDbuff(nullptr), currentProgram(nullptr), currentDebugDepth(0)
: currentDbuff(nullptr)
, currentProgram(nullptr)
, maxObjectEntries(0)
, currentObjectEntry(0)
, entryAlignment(0)
, currentDebugDepth(0)
{
// We need to query for some profiling exts.
ogl_CheckExtensions();
@ -170,10 +201,20 @@ OpenGLRenderer::OpenGLRenderer()
swap();
GLint maxUBOSize;
GLint maxUBOSize, UBOAlignment;
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUBOSize);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &UBOAlignment);
entryAlignment = 128;
maxObjectEntries = maxUBOSize / entryAlignment;
std::cout << "Max UBO Size: " << maxUBOSize << std::endl;
std::cout << "Max batch size: " << (maxUBOSize/sizeof(ObjectUniformData)) << std::endl;
std::cout << "UBO Alignment: " << UBOAlignment << std::endl;
std::cout << "Max batch size: " << maxObjectEntries << std::endl;
glBindBuffer(GL_UNIFORM_BUFFER, UBOObject);
glBufferData(GL_UNIFORM_BUFFER,
entryAlignment * maxObjectEntries,
NULL,
GL_STREAM_DRAW);
glGenQueries(1, &debugQuery);
}
@ -323,6 +364,58 @@ void OpenGLRenderer::drawArrays(const glm::mat4& model, DrawBuffer* draw, const
glDrawArrays(draw->getFaceType(), p.start, p.count);
}
void OpenGLRenderer::drawBatched(const RenderList& list)
{
#if 0 // Needs shader changes
// Determine how many batches we need to process the entire list
auto entries = list.size();
glBindBuffer(GL_UNIFORM_BUFFER, UBOObject);
for (int b = 0; b < entries; b += maxObjectEntries)
{
auto toConsume = std::min((GLuint)entries, b + maxObjectEntries) - b;
std::vector<ObjectUniformData> uploadBuffer;
uploadBuffer.resize(toConsume);
for (int d = 0; d < toConsume; ++d)
{
auto& draw = list[b+d];
uploadBuffer[d] = {
draw.model,
glm::vec4(draw.drawInfo.colour.r/255.f,
draw.drawInfo.colour.g/255.f,
draw.drawInfo.colour.b/255.f, 1.f),
1.f,
1.f,
draw.drawInfo.colour.a/255.f
};
}
glBufferData(GL_UNIFORM_BUFFER,
toConsume * sizeof(ObjectUniformData),
uploadBuffer.data(),
GL_STREAM_DRAW);
// Dispatch individual draws
for (int d = 0; d < toConsume; ++d)
{
auto& draw = list[b+d];
useDrawBuffer(draw.dbuff);
for( GLuint u = 0; u < draw.drawInfo.textures.size(); ++u )
{
useTexture(u, draw.drawInfo.textures[u]);
}
glDrawElements(draw.dbuff->getFaceType(), draw.drawInfo.count, GL_UNSIGNED_INT,
(void*) (sizeof(RenderIndex) * draw.drawInfo.start));
}
}
#else
for(auto& ri : list)
{
draw(ri.model, ri.dbuff, ri.drawInfo);
}
#endif
}
void OpenGLRenderer::invalidate()
{
currentDbuff = nullptr;

View File

@ -7,6 +7,8 @@
#include "benchmarkstate.hpp"
#include "debug/HttpServer.hpp"
#include <core/Profiler.hpp>
#include <objects/GameObject.hpp>
#include <engine/GameState.hpp>
#include <engine/SaveGame.hpp>
@ -295,7 +297,10 @@ int RWGame::run()
// Loop until the window is closed or we run out of state.
while (window.isOpen() && StateManager::get().states.size()) {
State* state = StateManager::get().states.back();
RW_PROFILE_FRAME_BOUNDARY();
RW_PROFILE_BEGIN("Input");
sf::Event event;
while (window.pollEvent(event)) {
switch (event.type) {
@ -313,8 +318,11 @@ int RWGame::run()
default: break;
}
RW_PROFILE_BEGIN("State");
state->handleEvent(event);
RW_PROFILE_END()
}
RW_PROFILE_END();
if(! window.isOpen() )
{
@ -324,13 +332,19 @@ int RWGame::run()
float timer = clock.restart().asSeconds();
accum += timer * timescale;
while ( accum >= GAME_TIMESTEP ) {
RW_PROFILE_BEGIN("Update");
if ( accum >= GAME_TIMESTEP ) {
RW_PROFILE_BEGIN("state");
StateManager::get().tick(GAME_TIMESTEP);
RW_PROFILE_END();
if (StateManager::get().states.size() == 0) {
break;
}
RW_PROFILE_BEGIN("engine");
tick(GAME_TIMESTEP);
RW_PROFILE_END();
accum -= GAME_TIMESTEP;
@ -340,6 +354,7 @@ int RWGame::run()
accum = 0.f;
}
}
RW_PROFILE_END();
float alpha = fmod(accum, GAME_TIMESTEP) / GAME_TIMESTEP;
if( ! state->shouldWorldUpdate() )
@ -347,11 +362,19 @@ int RWGame::run()
alpha = 1.f;
}
RW_PROFILE_BEGIN("Render");
RW_PROFILE_BEGIN("engine");
render(alpha, timer);
RW_PROFILE_END();
RW_PROFILE_BEGIN("state");
if (StateManager::get().states.size() > 0) {
StateManager::get().draw(renderer);
}
RW_PROFILE_END();
RW_PROFILE_END();
renderProfile();
window.display();
}
@ -529,10 +552,14 @@ void RWGame::render(float alpha, float time)
renderer->getRenderer()->pushDebugGroup("World");
RW_PROFILE_BEGIN("world");
renderer->renderWorld(world, viewCam, alpha);
RW_PROFILE_END();
auto rendertime = renderer->getRenderer()->popDebugGroup();
RW_PROFILE_BEGIN("debug");
if( showDebugPaths )
{
renderDebugPaths(time);
@ -551,6 +578,7 @@ void RWGame::render(float alpha, float time)
debug->flush(renderer);
}
}
RW_PROFILE_END();
drawOnScreenText(world, renderer);
}
@ -698,6 +726,60 @@ void RWGame::renderDebugPaths(float time)
debug->flush(renderer);
}
void RWGame::renderProfile()
{
#if RW_PROFILER
auto& frame = perf::Profiler::get().getFrame();
constexpr float upperlimit = 30000.f;
constexpr float lineHeight = 15.f;
static std::vector<glm::vec4> perf_colours;
if (perf_colours.size() == 0) {
float c = 8.f;
for (int r = 0; r < c; ++r) {
for (int g = 0; g < c; ++g) {
for (int b = 0; b < c; ++b) {
perf_colours.push_back({
r / c, g / c, b / c, 1.f
});
}
}
}
}
float xscale = renderer->getRenderer()->getViewport().x / upperlimit;
TextRenderer::TextInfo ti;
ti.align = TextRenderer::TextInfo::Left;
ti.font = 2;
ti.size = lineHeight - 2.f;
std::function<void(const perf::ProfileEntry&,int)> renderEntry = [&](const perf::ProfileEntry& entry, int depth)
{
int g = 0;
for(auto& event : entry.childProfiles)
{
auto duration = event.end - event.start;
float y = 60.f + (depth * (lineHeight + 5.f));
renderer->drawColour(perf_colours[(std::hash<std::string>()(entry.label) * (g++))%perf_colours.size()],
{
xscale * event.start,
y,
xscale * duration,
lineHeight
});
ti.screenPosition.x = xscale * (event.start);
ti.screenPosition.y = y + 2.f;
ti.text = event.label + " " + std::to_string(duration) + " us ";
renderer->text.renderText(ti);
renderEntry(event, depth+1);
}
};
renderEntry(frame, 0);
ti.screenPosition = glm::vec2( xscale * (16000), 40.f);
ti.text = ".16 ms";
renderer->text.renderText(ti);
#endif
}
void RWGame::globalKeyEvent(const sf::Event& event)
{
switch (event.key.code) {
@ -722,6 +804,9 @@ void RWGame::globalKeyEvent(const sf::Event& event)
case sf::Keyboard::F3:
showDebugPhysics = ! showDebugPhysics;
break;
case sf::Keyboard::F12:
window.capture().saveToFile("/home/dan/screenshot.png");
break;
default: break;
}
}

View File

@ -126,6 +126,7 @@ private:
void renderDebugStats(float time, Renderer::ProfileInfo& worldRenderTime);
void renderDebugPaths(float time);
void renderProfile();
void globalKeyEvent(const sf::Event& event);
};