mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 02:32:36 +01:00
vk: Add basic support for FSR 1.0
- Only implemented for image upscaling. - Disabled by default. Emulators cannot ensure upscalers are injected at the right rendering step. - GUI integration not implemented.
This commit is contained in:
parent
69b34693f0
commit
d0a824996b
2656
3rdparty/GPUOpen/include/ffx_a.h
vendored
Normal file
2656
3rdparty/GPUOpen/include/ffx_a.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1199
3rdparty/GPUOpen/include/ffx_fsr1.h
vendored
Normal file
1199
3rdparty/GPUOpen/include/ffx_fsr1.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -450,6 +450,7 @@ target_sources(rpcs3_emu PRIVATE
|
||||
|
||||
if(TARGET 3rdparty_vulkan)
|
||||
target_sources(rpcs3_emu PRIVATE
|
||||
RSX/VK/upscalers/fsr1/fsr_pass.cpp
|
||||
RSX/VK/vkutils/barriers.cpp
|
||||
RSX/VK/vkutils/buffer_object.cpp
|
||||
RSX/VK/vkutils/chip_class.cpp
|
||||
|
@ -623,10 +623,10 @@ VKGSRender::~VKGSRender()
|
||||
// Clear flush requests
|
||||
m_flush_requests.clear_pending_flag();
|
||||
|
||||
//Texture cache
|
||||
// Texture cache
|
||||
m_texture_cache.destroy();
|
||||
|
||||
//Shaders
|
||||
// Shaders
|
||||
vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled
|
||||
vk::finalize_compiler_context(); // Shut down the glslang compiler
|
||||
m_prog_buffer->clear(); // Delete shader objects
|
||||
@ -636,10 +636,13 @@ VKGSRender::~VKGSRender()
|
||||
m_volatile_attribute_storage.reset();
|
||||
m_vertex_layout_storage.reset();
|
||||
|
||||
//Global resources
|
||||
// Upscaler (references some global resources)
|
||||
m_upscaler.reset();
|
||||
|
||||
// Global resources
|
||||
vk::destroy_global_resources();
|
||||
|
||||
//Heaps
|
||||
// Heaps
|
||||
m_attrib_ring_info.destroy();
|
||||
m_fragment_env_ring_info.destroy();
|
||||
m_vertex_env_ring_info.destroy();
|
||||
@ -653,13 +656,13 @@ VKGSRender::~VKGSRender()
|
||||
m_fragment_instructions_buffer.destroy();
|
||||
m_raster_env_ring_info.destroy();
|
||||
|
||||
//Fallback bindables
|
||||
// Fallback bindables
|
||||
null_buffer.reset();
|
||||
null_buffer_view.reset();
|
||||
|
||||
if (m_current_frame == &m_aux_frame_context)
|
||||
{
|
||||
//Return resources back to the owner
|
||||
// Return resources back to the owner
|
||||
m_current_frame = &frame_context_storage[m_current_queue_index];
|
||||
m_current_frame->swap_storage(m_aux_frame_context);
|
||||
m_current_frame->grab_resources(m_aux_frame_context);
|
||||
@ -667,7 +670,7 @@ VKGSRender::~VKGSRender()
|
||||
|
||||
m_aux_frame_context.buffer_views_to_clean.clear();
|
||||
|
||||
//NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed
|
||||
// NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed
|
||||
for (auto &ctx : frame_context_storage)
|
||||
{
|
||||
vkDestroySemaphore((*m_device), ctx.present_wait_semaphore, nullptr);
|
||||
@ -677,24 +680,24 @@ VKGSRender::~VKGSRender()
|
||||
ctx.buffer_views_to_clean.clear();
|
||||
}
|
||||
|
||||
//Textures
|
||||
// Textures
|
||||
m_rtts.destroy();
|
||||
m_texture_cache.destroy();
|
||||
|
||||
m_stencil_mirror_sampler.reset();
|
||||
|
||||
//Overlay text handler
|
||||
// Overlay text handler
|
||||
m_text_writer.reset();
|
||||
|
||||
//Pipeline descriptors
|
||||
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
|
||||
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
|
||||
|
||||
//Queries
|
||||
// Queries
|
||||
m_occlusion_query_manager.reset();
|
||||
m_cond_render_buffer.reset();
|
||||
|
||||
//Command buffer
|
||||
// Command buffer
|
||||
for (auto &cb : m_primary_cb_list)
|
||||
cb.destroy();
|
||||
|
||||
@ -703,7 +706,7 @@ VKGSRender::~VKGSRender()
|
||||
m_secondary_command_buffer.destroy();
|
||||
m_secondary_command_buffer_pool.destroy();
|
||||
|
||||
//Device handles/contexts
|
||||
// Device handles/contexts
|
||||
m_swapchain->destroy();
|
||||
m_instance.destroy();
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include "Emu/RSX/GSRender.h"
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include "upscalers/upscaling.h"
|
||||
|
||||
#include "vkutils/descriptors.hpp"
|
||||
#include "vkutils/data_heap.h"
|
||||
#include "vkutils/instance.hpp"
|
||||
@ -336,6 +338,7 @@ namespace vk
|
||||
}
|
||||
|
||||
using namespace vk::vmm_allocation_pool_; // clang workaround.
|
||||
using namespace vk::upscaling_flags_; // ditto
|
||||
|
||||
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
||||
{
|
||||
@ -380,6 +383,7 @@ private:
|
||||
std::unique_ptr<vk::buffer_view> null_buffer_view;
|
||||
|
||||
std::unique_ptr<vk::text_writer> m_text_writer;
|
||||
std::unique_ptr<vk::upscaler> m_upscaler;
|
||||
|
||||
std::unique_ptr<vk::buffer> m_cond_render_buffer;
|
||||
u64 m_cond_render_sync_tag = 0;
|
||||
@ -518,7 +522,7 @@ private:
|
||||
void present(vk::frame_context_t *ctx);
|
||||
void reinitialize_swapchain();
|
||||
|
||||
vk::image* get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig);
|
||||
vk::viewable_image* get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig);
|
||||
|
||||
void begin_render_pass();
|
||||
void close_render_pass();
|
||||
|
@ -1037,8 +1037,7 @@ namespace vk
|
||||
|
||||
for (auto& img : src)
|
||||
{
|
||||
// Only raw uploads can possibly have mismatched layout here
|
||||
img->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
img->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
views.push_back(img->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector));
|
||||
}
|
||||
|
||||
@ -1048,5 +1047,10 @@ namespace vk
|
||||
}
|
||||
|
||||
overlay_pass::run(cmd, viewport, target, views, render_pass);
|
||||
|
||||
for (auto& img : src)
|
||||
{
|
||||
img->pop_layout(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include "Emu/RSX/Overlays/overlays.h"
|
||||
#include "Emu/Cell/Modules/cellVideoOut.h"
|
||||
|
||||
#include "upscalers/bilinear_pass.hpp"
|
||||
#include "upscalers/fsr_pass.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
void VKGSRender::reinitialize_swapchain()
|
||||
@ -34,6 +36,9 @@ void VKGSRender::reinitialize_swapchain()
|
||||
frame_context_cleanup(&ctx, true);
|
||||
}
|
||||
|
||||
// Discard the current upscaling pipeline if any
|
||||
m_upscaler.reset();
|
||||
|
||||
// Drain all the queues
|
||||
vkDeviceWaitIdle(*m_device);
|
||||
|
||||
@ -275,9 +280,9 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resou
|
||||
vk::advance_completed_frame_counter();
|
||||
}
|
||||
|
||||
vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig)
|
||||
vk::viewable_image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig)
|
||||
{
|
||||
vk::image* image_to_flip = nullptr;
|
||||
vk::viewable_image* image_to_flip = nullptr;
|
||||
|
||||
// Check the surface store first
|
||||
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
|
||||
@ -329,7 +334,8 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
|
||||
{
|
||||
// Hack - this should be the first location to check for output
|
||||
// The render might have been done offscreen or in software and a blit used to display
|
||||
image_to_flip = surface->get_raw_texture();
|
||||
image_to_flip = dynamic_cast<vk::viewable_image*>(surface->get_raw_texture());
|
||||
ensure(image_to_flip);
|
||||
}
|
||||
|
||||
if (!image_to_flip)
|
||||
@ -461,7 +467,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
||||
}
|
||||
|
||||
// Scan memory for required data. This is done early to optimize waiting for the driver image acquire below.
|
||||
vk::image *image_to_flip = nullptr, *image_to_flip2 = nullptr;
|
||||
vk::viewable_image *image_to_flip = nullptr, *image_to_flip2 = nullptr;
|
||||
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
|
||||
{
|
||||
vk::present_surface_info present_info;
|
||||
@ -590,23 +596,63 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
||||
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
}
|
||||
|
||||
if (!m_upscaler)
|
||||
{
|
||||
if (g_cfg.video.vk.fsr_upscaling)
|
||||
{
|
||||
m_upscaler = std::make_unique<vk::fsr_upscale_pass>();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_upscaler = std::make_unique<vk::bilinear_upscale_pass>();
|
||||
}
|
||||
}
|
||||
|
||||
if (image_to_flip)
|
||||
{
|
||||
const bool use_full_rgb_range_output = g_cfg.video.full_rgb_range_output.get();
|
||||
|
||||
if (!use_full_rgb_range_output || !rsx::fcmp(avconfig.gamma, 1.f) || avconfig._3d) [[unlikely]]
|
||||
{
|
||||
calibration_src.push_back(dynamic_cast<vk::viewable_image*>(image_to_flip));
|
||||
ensure(calibration_src.front());
|
||||
if (image_to_flip) calibration_src.push_back(image_to_flip);
|
||||
if (image_to_flip2) calibration_src.push_back(image_to_flip2);
|
||||
|
||||
if (image_to_flip2)
|
||||
if (g_cfg.video.vk.fsr_upscaling && !avconfig._3d) // 3D will be implemented later
|
||||
{
|
||||
calibration_src.push_back(dynamic_cast<vk::viewable_image*>(image_to_flip2));
|
||||
ensure(calibration_src.back());
|
||||
}
|
||||
}
|
||||
// Run upscaling pass before the rest of the output effects pipeline
|
||||
// This can be done with all upscalers but we already get bilinear upscaling for free if we just out the filters directly
|
||||
VkImageBlit request = {};
|
||||
request.srcSubresource = { image_to_flip->aspect(), 0, 0, 1 };
|
||||
request.dstSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
|
||||
request.srcOffsets[0] = { 0, 0, 0 };
|
||||
request.srcOffsets[1] = { s32(buffer_width), s32(buffer_height), 1 };
|
||||
request.dstOffsets[0] = { 0, 0, 0 };
|
||||
request.dstOffsets[1] = { aspect_ratio.width, aspect_ratio.height, 1 };
|
||||
|
||||
if (calibration_src.empty()) [[likely]]
|
||||
for (unsigned i = 0; i < calibration_src.size(); ++i)
|
||||
{
|
||||
const rsx::flags32_t mode = (i == 0) ? UPSCALE_LEFT_VIEW : UPSCALE_RIGHT_VIEW;
|
||||
calibration_src[i] = m_upscaler->scale_output(*m_current_command_buffer, image_to_flip, VK_NULL_HANDLE, VK_IMAGE_LAYOUT_UNDEFINED, request, mode);
|
||||
}
|
||||
}
|
||||
|
||||
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
|
||||
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
|
||||
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
|
||||
single_target_pass = vk::get_renderpass(*m_device, key);
|
||||
ensure(single_target_pass != VK_NULL_HANDLE);
|
||||
|
||||
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, VK_FALSE, single_target_pass, m_swapchain->get_surface_format(), target_image);
|
||||
direct_fbo->add_ref();
|
||||
|
||||
vk::get_overlay_pass<vk::video_out_calibration_pass>()->run(
|
||||
*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src,
|
||||
avconfig.gamma, !use_full_rgb_range_output, avconfig._3d, single_target_pass);
|
||||
|
||||
direct_fbo->release();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Do raw transfer here as there is no image object associated with textures owned by the driver (TODO)
|
||||
const areai dst_rect = aspect_ratio;
|
||||
@ -619,35 +665,13 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
||||
rgn.dstOffsets[0] = { dst_rect.x1, dst_rect.y1, 0 };
|
||||
rgn.dstOffsets[1] = { dst_rect.x2, dst_rect.y2, 1 };
|
||||
|
||||
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
if (target_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||
{
|
||||
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
|
||||
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
}
|
||||
|
||||
vkCmdBlitImage(*m_current_command_buffer, image_to_flip->value, image_to_flip->current_layout, target_image, target_layout, 1, &rgn, VK_FILTER_LINEAR);
|
||||
image_to_flip->pop_layout(*m_current_command_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
|
||||
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
|
||||
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
|
||||
single_target_pass = vk::get_renderpass(*m_device, key);
|
||||
ensure(single_target_pass != VK_NULL_HANDLE);
|
||||
|
||||
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, VK_FALSE, single_target_pass, m_swapchain->get_surface_format(), target_image);
|
||||
direct_fbo->add_ref();
|
||||
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
vk::get_overlay_pass<vk::video_out_calibration_pass>()->run(
|
||||
*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src,
|
||||
avconfig.gamma, !use_full_rgb_range_output, avconfig._3d, single_target_pass);
|
||||
|
||||
image_to_flip->pop_layout(*m_current_command_buffer);
|
||||
direct_fbo->release();
|
||||
m_upscaler->scale_output(*m_current_command_buffer, image_to_flip, target_image, target_layout, rgn, UPSCALE_AND_COMMIT | UPSCALE_DEFAULT_VIEW);
|
||||
}
|
||||
|
||||
if (m_frame->screenshot_toggle)
|
||||
|
@ -1199,7 +1199,7 @@ namespace vk
|
||||
baseclass::on_frame_end();
|
||||
}
|
||||
|
||||
vk::image* texture_cache::upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch)
|
||||
vk::viewable_image* texture_cache::upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch)
|
||||
{
|
||||
bool linear_format_supported = false;
|
||||
|
||||
|
@ -497,7 +497,7 @@ namespace vk
|
||||
|
||||
void on_frame_end() override;
|
||||
|
||||
vk::image* upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch);
|
||||
vk::viewable_image* upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch);
|
||||
|
||||
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, vk::surface_cache& m_rtts, vk::command_buffer& cmd);
|
||||
|
||||
|
32
rpcs3/Emu/RSX/VK/upscalers/bilinear_pass.hpp
Normal file
32
rpcs3/Emu/RSX/VK/upscalers/bilinear_pass.hpp
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include "upscaling.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
struct bilinear_upscale_pass : public upscaler
|
||||
{
|
||||
vk::viewable_image* scale_output(
|
||||
const vk::command_buffer& cmd, // CB
|
||||
vk::viewable_image* src, // Source input
|
||||
VkImage present_surface, // Present target. May be VK_NULL_HANDLE for some passes
|
||||
VkImageLayout present_surface_layout, // Present surface layout, or VK_IMAGE_LAYOUT_UNDEFINED if no present target is provided
|
||||
const VkImageBlit& request, // Scaling request information
|
||||
rsx::flags32_t mode // Mode
|
||||
) override
|
||||
{
|
||||
if (mode & UPSCALE_AND_COMMIT)
|
||||
{
|
||||
ensure(present_surface);
|
||||
|
||||
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
vkCmdBlitImage(cmd, src->value, src->current_layout, present_surface, present_surface_layout, 1, &request, VK_FILTER_LINEAR);
|
||||
src->pop_layout(cmd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Upscaling source only is unsupported
|
||||
return src;
|
||||
}
|
||||
};
|
||||
}
|
1323
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_ffx_a_flattened.inc
Normal file
1323
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_ffx_a_flattened.inc
Normal file
File diff suppressed because it is too large
Load Diff
649
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_ffx_fsr1_flattened.inc
Normal file
649
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_ffx_fsr1_flattened.inc
Normal file
@ -0,0 +1,649 @@
|
||||
// Proprocessed version of ffx_a.h from FSR package
|
||||
// For original source, see 3rdParty/GPUOpen/Include/ffx_fsr1.h
|
||||
// Modifications are required to meet MSVC's strict limitations on string length. No single string literal (concatenated or otherwise) can exceed 64K
|
||||
// * Removed CPU section as we do not need it
|
||||
// * Stripped all comment-only lines, the original source is still readable for this purpose
|
||||
// * Stripped all unnecessary whitespace
|
||||
|
||||
R"--RPCS3--(
|
||||
#if defined(A_GPU)&&defined(FSR_EASU_F)
|
||||
AF4 FsrEasuRF(AF2 p);
|
||||
AF4 FsrEasuGF(AF2 p);
|
||||
AF4 FsrEasuBF(AF2 p);
|
||||
void FsrEasuTapF(
|
||||
inout AF3 aC, // Accumulated color, with negative lobe.
|
||||
inout AF1 aW, // Accumulated weight.
|
||||
AF2 off, // Pixel offset from resolve position to tap.
|
||||
AF2 dir, // Gradient direction.
|
||||
AF2 len, // Length.
|
||||
AF1 lob, // Negative lobe strength.
|
||||
AF1 clp, // Clipping point.
|
||||
AF3 c){ // Tap color.
|
||||
AF2 v;
|
||||
v.x=(off.x*( dir.x))+(off.y*dir.y);
|
||||
v.y=(off.x*(-dir.y))+(off.y*dir.x);
|
||||
v*=len;
|
||||
AF1 d2=v.x*v.x+v.y*v.y;
|
||||
d2=min(d2,clp);
|
||||
AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0);
|
||||
AF1 wA=lob*d2+AF1_(-1.0);
|
||||
wB*=wB;
|
||||
wA*=wA;
|
||||
wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0));
|
||||
AF1 w=wB*wA;
|
||||
aC+=c*w;aW+=w;}
|
||||
void FsrEasuSetF(
|
||||
inout AF2 dir,
|
||||
inout AF1 len,
|
||||
AF2 pp,
|
||||
AP1 biS,AP1 biT,AP1 biU,AP1 biV,
|
||||
AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){
|
||||
AF1 w = AF1_(0.0);
|
||||
if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y);
|
||||
if(biT)w= pp.x *(AF1_(1.0)-pp.y);
|
||||
if(biU)w=(AF1_(1.0)-pp.x)* pp.y ;
|
||||
if(biV)w= pp.x * pp.y ;
|
||||
AF1 dc=lD-lC;
|
||||
AF1 cb=lC-lB;
|
||||
AF1 lenX=max(abs(dc),abs(cb));
|
||||
lenX=APrxLoRcpF1(lenX);
|
||||
AF1 dirX=lD-lB;
|
||||
dir.x+=dirX*w;
|
||||
lenX=ASatF1(abs(dirX)*lenX);
|
||||
lenX*=lenX;
|
||||
len+=lenX*w;
|
||||
AF1 ec=lE-lC;
|
||||
AF1 ca=lC-lA;
|
||||
AF1 lenY=max(abs(ec),abs(ca));
|
||||
lenY=APrxLoRcpF1(lenY);
|
||||
AF1 dirY=lE-lA;
|
||||
dir.y+=dirY*w;
|
||||
lenY=ASatF1(abs(dirY)*lenY);
|
||||
lenY*=lenY;
|
||||
len+=lenY*w;}
|
||||
void FsrEasuF(
|
||||
out AF3 pix,
|
||||
AU2 ip, // Integer pixel position in output.
|
||||
AU4 con0, // Constants generated by FsrEasuCon().
|
||||
AU4 con1,
|
||||
AU4 con2,
|
||||
AU4 con3){
|
||||
AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
|
||||
AF2 fp=floor(pp);
|
||||
pp-=fp;
|
||||
AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
|
||||
AF2 p1=p0+AF2_AU2(con2.xy);
|
||||
AF2 p2=p0+AF2_AU2(con2.zw);
|
||||
AF2 p3=p0+AF2_AU2(con3.xy);
|
||||
AF4 bczzR=FsrEasuRF(p0);
|
||||
AF4 bczzG=FsrEasuGF(p0);
|
||||
AF4 bczzB=FsrEasuBF(p0);
|
||||
AF4 ijfeR=FsrEasuRF(p1);
|
||||
AF4 ijfeG=FsrEasuGF(p1);
|
||||
AF4 ijfeB=FsrEasuBF(p1);
|
||||
AF4 klhgR=FsrEasuRF(p2);
|
||||
AF4 klhgG=FsrEasuGF(p2);
|
||||
AF4 klhgB=FsrEasuBF(p2);
|
||||
AF4 zzonR=FsrEasuRF(p3);
|
||||
AF4 zzonG=FsrEasuGF(p3);
|
||||
AF4 zzonB=FsrEasuBF(p3);
|
||||
AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG);
|
||||
AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG);
|
||||
AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG);
|
||||
AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG);
|
||||
AF1 bL=bczzL.x;
|
||||
AF1 cL=bczzL.y;
|
||||
AF1 iL=ijfeL.x;
|
||||
AF1 jL=ijfeL.y;
|
||||
AF1 fL=ijfeL.z;
|
||||
AF1 eL=ijfeL.w;
|
||||
AF1 kL=klhgL.x;
|
||||
AF1 lL=klhgL.y;
|
||||
AF1 hL=klhgL.z;
|
||||
AF1 gL=klhgL.w;
|
||||
AF1 oL=zzonL.z;
|
||||
AF1 nL=zzonL.w;
|
||||
AF2 dir=AF2_(0.0);
|
||||
AF1 len=AF1_(0.0);
|
||||
FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL);
|
||||
FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL);
|
||||
FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL);
|
||||
FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL);
|
||||
AF2 dir2=dir*dir;
|
||||
AF1 dirR=dir2.x+dir2.y;
|
||||
AP1 zro=dirR<AF1_(1.0/32768.0);
|
||||
dirR=APrxLoRsqF1(dirR);
|
||||
dirR=zro?AF1_(1.0):dirR;
|
||||
dir.x=zro?AF1_(1.0):dir.x;
|
||||
dir*=AF2_(dirR);
|
||||
len=len*AF1_(0.5);
|
||||
len*=len;
|
||||
AF1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpF1(max(abs(dir.x),abs(dir.y)));
|
||||
AF2 len2=AF2(AF1_(1.0)+(stretch-AF1_(1.0))*len,AF1_(1.0)+AF1_(-0.5)*len);
|
||||
AF1 lob=AF1_(0.5)+AF1_((1.0/4.0-0.04)-0.5)*len;
|
||||
AF1 clp=APrxLoRcpF1(lob);
|
||||
AF3 min4=min(AMin3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
|
||||
AF3(klhgR.x,klhgG.x,klhgB.x));
|
||||
AF3 max4=max(AMax3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
|
||||
AF3(klhgR.x,klhgG.x,klhgB.x));
|
||||
AF3 aC=AF3_(0.0);
|
||||
AF1 aW=AF1_(0.0);
|
||||
FsrEasuTapF(aC,aW,AF2( 0.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.x,bczzG.x,bczzB.x)); // b
|
||||
FsrEasuTapF(aC,aW,AF2( 1.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.y,bczzG.y,bczzB.y)); // c
|
||||
FsrEasuTapF(aC,aW,AF2(-1.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.x,ijfeG.x,ijfeB.x)); // i
|
||||
FsrEasuTapF(aC,aW,AF2( 0.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.y,ijfeG.y,ijfeB.y)); // j
|
||||
FsrEasuTapF(aC,aW,AF2( 0.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.z,ijfeG.z,ijfeB.z)); // f
|
||||
FsrEasuTapF(aC,aW,AF2(-1.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.w,ijfeG.w,ijfeB.w)); // e
|
||||
FsrEasuTapF(aC,aW,AF2( 1.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.x,klhgG.x,klhgB.x)); // k
|
||||
FsrEasuTapF(aC,aW,AF2( 2.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.y,klhgG.y,klhgB.y)); // l
|
||||
FsrEasuTapF(aC,aW,AF2( 2.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.z,klhgG.z,klhgB.z)); // h
|
||||
FsrEasuTapF(aC,aW,AF2( 1.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.w,klhgG.w,klhgB.w)); // g
|
||||
FsrEasuTapF(aC,aW,AF2( 1.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.z,zzonG.z,zzonB.z)); // o
|
||||
FsrEasuTapF(aC,aW,AF2( 0.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.w,zzonG.w,zzonB.w)); // n
|
||||
pix=min(max4,max(min4,aC*AF3_(ARcpF1(aW))));}
|
||||
#endif
|
||||
#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_EASU_H)
|
||||
AH4 FsrEasuRH(AF2 p);
|
||||
AH4 FsrEasuGH(AF2 p);
|
||||
AH4 FsrEasuBH(AF2 p);
|
||||
void FsrEasuTapH(
|
||||
inout AH2 aCR,inout AH2 aCG,inout AH2 aCB,
|
||||
inout AH2 aW,
|
||||
AH2 offX,AH2 offY,
|
||||
AH2 dir,
|
||||
AH2 len,
|
||||
AH1 lob,
|
||||
AH1 clp,
|
||||
AH2 cR,AH2 cG,AH2 cB){
|
||||
AH2 vX,vY;
|
||||
vX=offX* dir.xx +offY*dir.yy;
|
||||
vY=offX*(-dir.yy)+offY*dir.xx;
|
||||
vX*=len.x;vY*=len.y;
|
||||
AH2 d2=vX*vX+vY*vY;
|
||||
d2=min(d2,AH2_(clp));
|
||||
AH2 wB=AH2_(2.0/5.0)*d2+AH2_(-1.0);
|
||||
AH2 wA=AH2_(lob)*d2+AH2_(-1.0);
|
||||
wB*=wB;
|
||||
wA*=wA;
|
||||
wB=AH2_(25.0/16.0)*wB+AH2_(-(25.0/16.0-1.0));
|
||||
AH2 w=wB*wA;
|
||||
aCR+=cR*w;aCG+=cG*w;aCB+=cB*w;aW+=w;}
|
||||
void FsrEasuSetH(
|
||||
inout AH2 dirPX,inout AH2 dirPY,
|
||||
inout AH2 lenP,
|
||||
AH2 pp,
|
||||
AP1 biST,AP1 biUV,
|
||||
AH2 lA,AH2 lB,AH2 lC,AH2 lD,AH2 lE){
|
||||
AH2 w = AH2_(0.0);
|
||||
if(biST)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_(AH1_(1.0)-pp.y);
|
||||
if(biUV)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_( pp.y);
|
||||
AH2 dc=lD-lC;
|
||||
AH2 cb=lC-lB;
|
||||
AH2 lenX=max(abs(dc),abs(cb));
|
||||
lenX=ARcpH2(lenX);
|
||||
AH2 dirX=lD-lB;
|
||||
dirPX+=dirX*w;
|
||||
lenX=ASatH2(abs(dirX)*lenX);
|
||||
lenX*=lenX;
|
||||
lenP+=lenX*w;
|
||||
AH2 ec=lE-lC;
|
||||
AH2 ca=lC-lA;
|
||||
AH2 lenY=max(abs(ec),abs(ca));
|
||||
lenY=ARcpH2(lenY);
|
||||
AH2 dirY=lE-lA;
|
||||
dirPY+=dirY*w;
|
||||
lenY=ASatH2(abs(dirY)*lenY);
|
||||
lenY*=lenY;
|
||||
lenP+=lenY*w;}
|
||||
void FsrEasuH(
|
||||
out AH3 pix,
|
||||
AU2 ip,
|
||||
AU4 con0,
|
||||
AU4 con1,
|
||||
AU4 con2,
|
||||
AU4 con3){
|
||||
AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
|
||||
AF2 fp=floor(pp);
|
||||
pp-=fp;
|
||||
AH2 ppp=AH2(pp);
|
||||
AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
|
||||
AF2 p1=p0+AF2_AU2(con2.xy);
|
||||
AF2 p2=p0+AF2_AU2(con2.zw);
|
||||
AF2 p3=p0+AF2_AU2(con3.xy);
|
||||
AH4 bczzR=FsrEasuRH(p0);
|
||||
AH4 bczzG=FsrEasuGH(p0);
|
||||
AH4 bczzB=FsrEasuBH(p0);
|
||||
AH4 ijfeR=FsrEasuRH(p1);
|
||||
AH4 ijfeG=FsrEasuGH(p1);
|
||||
AH4 ijfeB=FsrEasuBH(p1);
|
||||
AH4 klhgR=FsrEasuRH(p2);
|
||||
AH4 klhgG=FsrEasuGH(p2);
|
||||
AH4 klhgB=FsrEasuBH(p2);
|
||||
AH4 zzonR=FsrEasuRH(p3);
|
||||
AH4 zzonG=FsrEasuGH(p3);
|
||||
AH4 zzonB=FsrEasuBH(p3);
|
||||
AH4 bczzL=bczzB*AH4_(0.5)+(bczzR*AH4_(0.5)+bczzG);
|
||||
AH4 ijfeL=ijfeB*AH4_(0.5)+(ijfeR*AH4_(0.5)+ijfeG);
|
||||
AH4 klhgL=klhgB*AH4_(0.5)+(klhgR*AH4_(0.5)+klhgG);
|
||||
AH4 zzonL=zzonB*AH4_(0.5)+(zzonR*AH4_(0.5)+zzonG);
|
||||
AH1 bL=bczzL.x;
|
||||
AH1 cL=bczzL.y;
|
||||
AH1 iL=ijfeL.x;
|
||||
AH1 jL=ijfeL.y;
|
||||
AH1 fL=ijfeL.z;
|
||||
AH1 eL=ijfeL.w;
|
||||
AH1 kL=klhgL.x;
|
||||
AH1 lL=klhgL.y;
|
||||
AH1 hL=klhgL.z;
|
||||
AH1 gL=klhgL.w;
|
||||
AH1 oL=zzonL.z;
|
||||
AH1 nL=zzonL.w;
|
||||
AH2 dirPX=AH2_(0.0);
|
||||
AH2 dirPY=AH2_(0.0);
|
||||
AH2 lenP=AH2_(0.0);
|
||||
FsrEasuSetH(dirPX,dirPY,lenP,ppp,true, false,AH2(bL,cL),AH2(eL,fL),AH2(fL,gL),AH2(gL,hL),AH2(jL,kL));
|
||||
FsrEasuSetH(dirPX,dirPY,lenP,ppp,false,true ,AH2(fL,gL),AH2(iL,jL),AH2(jL,kL),AH2(kL,lL),AH2(nL,oL));
|
||||
AH2 dir=AH2(dirPX.r+dirPX.g,dirPY.r+dirPY.g);
|
||||
AH1 len=lenP.r+lenP.g;
|
||||
AH2 dir2=dir*dir;
|
||||
AH1 dirR=dir2.x+dir2.y;
|
||||
AP1 zro=dirR<AH1_(1.0/32768.0);
|
||||
dirR=APrxLoRsqH1(dirR);
|
||||
dirR=zro?AH1_(1.0):dirR;
|
||||
dir.x=zro?AH1_(1.0):dir.x;
|
||||
dir*=AH2_(dirR);
|
||||
len=len*AH1_(0.5);
|
||||
len*=len;
|
||||
AH1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpH1(max(abs(dir.x),abs(dir.y)));
|
||||
AH2 len2=AH2(AH1_(1.0)+(stretch-AH1_(1.0))*len,AH1_(1.0)+AH1_(-0.5)*len);
|
||||
AH1 lob=AH1_(0.5)+AH1_((1.0/4.0-0.04)-0.5)*len;
|
||||
AH1 clp=APrxLoRcpH1(lob);
|
||||
AH2 bothR=max(max(AH2(-ijfeR.z,ijfeR.z),AH2(-klhgR.w,klhgR.w)),max(AH2(-ijfeR.y,ijfeR.y),AH2(-klhgR.x,klhgR.x)));
|
||||
AH2 bothG=max(max(AH2(-ijfeG.z,ijfeG.z),AH2(-klhgG.w,klhgG.w)),max(AH2(-ijfeG.y,ijfeG.y),AH2(-klhgG.x,klhgG.x)));
|
||||
AH2 bothB=max(max(AH2(-ijfeB.z,ijfeB.z),AH2(-klhgB.w,klhgB.w)),max(AH2(-ijfeB.y,ijfeB.y),AH2(-klhgB.x,klhgB.x)));
|
||||
AH2 pR=AH2_(0.0);
|
||||
AH2 pG=AH2_(0.0);
|
||||
AH2 pB=AH2_(0.0);
|
||||
AH2 pW=AH2_(0.0);
|
||||
FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0, 1.0)-ppp.xx,AH2(-1.0,-1.0)-ppp.yy,dir,len2,lob,clp,bczzR.xy,bczzG.xy,bczzB.xy);
|
||||
FsrEasuTapH(pR,pG,pB,pW,AH2(-1.0, 0.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,ijfeR.xy,ijfeG.xy,ijfeB.xy);
|
||||
FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0,-1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,ijfeR.zw,ijfeG.zw,ijfeB.zw);
|
||||
FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 2.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,klhgR.xy,klhgG.xy,klhgB.xy);
|
||||
FsrEasuTapH(pR,pG,pB,pW,AH2( 2.0, 1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,klhgR.zw,klhgG.zw,klhgB.zw);
|
||||
FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 0.0)-ppp.xx,AH2( 2.0, 2.0)-ppp.yy,dir,len2,lob,clp,zzonR.zw,zzonG.zw,zzonB.zw);
|
||||
AH3 aC=AH3(pR.x+pR.y,pG.x+pG.y,pB.x+pB.y);
|
||||
AH1 aW=pW.x+pW.y;
|
||||
pix=min(AH3(bothR.y,bothG.y,bothB.y),max(-AH3(bothR.x,bothG.x,bothB.x),aC*AH3_(ARcpH1(aW))));}
|
||||
#endif
|
||||
#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
|
||||
A_STATIC void FsrRcasCon(
|
||||
outAU4 con,
|
||||
AF1 sharpness){
|
||||
sharpness=AExp2F1(-sharpness);
|
||||
varAF2(hSharp)=initAF2(sharpness,sharpness);
|
||||
con[0]=AU1_AF1(sharpness);
|
||||
con[1]=AU1_AH2_AF2(hSharp);
|
||||
con[2]=0;
|
||||
con[3]=0;}
|
||||
#if defined(A_GPU)&&defined(FSR_RCAS_F)
|
||||
AF4 FsrRcasLoadF(ASU2 p);
|
||||
void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b);
|
||||
void FsrRcasF(
|
||||
out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
|
||||
out AF1 pixG,
|
||||
out AF1 pixB,
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
out AF1 pixA,
|
||||
#endif
|
||||
AU2 ip, // Integer pixel position in output.
|
||||
AU4 con){ // Constant generated by RcasSetup().
|
||||
ASU2 sp=ASU2(ip);
|
||||
AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb;
|
||||
AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb;
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
AF4 ee=FsrRcasLoadF(sp);
|
||||
AF3 e=ee.rgb;pixA=ee.a;
|
||||
#else
|
||||
AF3 e=FsrRcasLoadF(sp).rgb;
|
||||
#endif
|
||||
)--RPCS3--"
|
||||
R"--RPCS3--(
|
||||
AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb;
|
||||
AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb;
|
||||
AF1 bR=b.r;
|
||||
AF1 bG=b.g;
|
||||
AF1 bB=b.b;
|
||||
AF1 dR=d.r;
|
||||
AF1 dG=d.g;
|
||||
AF1 dB=d.b;
|
||||
AF1 eR=e.r;
|
||||
AF1 eG=e.g;
|
||||
AF1 eB=e.b;
|
||||
AF1 fR=f.r;
|
||||
AF1 fG=f.g;
|
||||
AF1 fB=f.b;
|
||||
AF1 hR=h.r;
|
||||
AF1 hG=h.g;
|
||||
AF1 hB=h.b;
|
||||
FsrRcasInputF(bR,bG,bB);
|
||||
FsrRcasInputF(dR,dG,dB);
|
||||
FsrRcasInputF(eR,eG,eB);
|
||||
FsrRcasInputF(fR,fG,fB);
|
||||
FsrRcasInputF(hR,hG,hB);
|
||||
AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG);
|
||||
AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG);
|
||||
AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG);
|
||||
AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG);
|
||||
AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG);
|
||||
AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL;
|
||||
nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL)));
|
||||
nz=AF1_(-0.5)*nz+AF1_(1.0);
|
||||
AF1 mn4R=min(AMin3F1(bR,dR,fR),hR);
|
||||
AF1 mn4G=min(AMin3F1(bG,dG,fG),hG);
|
||||
AF1 mn4B=min(AMin3F1(bB,dB,fB),hB);
|
||||
AF1 mx4R=max(AMax3F1(bR,dR,fR),hR);
|
||||
AF1 mx4G=max(AMax3F1(bG,dG,fG),hG);
|
||||
AF1 mx4B=max(AMax3F1(bB,dB,fB),hB);
|
||||
AF2 peakC=AF2(1.0,-1.0*4.0);
|
||||
AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R);
|
||||
AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G);
|
||||
AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B);
|
||||
AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y);
|
||||
AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y);
|
||||
AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y);
|
||||
AF1 lobeR=max(-hitMinR,hitMaxR);
|
||||
AF1 lobeG=max(-hitMinG,hitMaxG);
|
||||
AF1 lobeB=max(-hitMinB,hitMaxB);
|
||||
AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x);
|
||||
#ifdef FSR_RCAS_DENOISE
|
||||
lobe*=nz;
|
||||
#endif
|
||||
AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0));
|
||||
pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
|
||||
pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
|
||||
pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
|
||||
return;}
|
||||
#endif
|
||||
#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H)
|
||||
AH4 FsrRcasLoadH(ASW2 p);
|
||||
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b);
|
||||
void FsrRcasH(
|
||||
out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
|
||||
out AH1 pixG,
|
||||
out AH1 pixB,
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
out AH1 pixA,
|
||||
#endif
|
||||
AU2 ip, // Integer pixel position in output.
|
||||
AU4 con){ // Constant generated by RcasSetup().
|
||||
ASW2 sp=ASW2(ip);
|
||||
AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb;
|
||||
AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb;
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
AH4 ee=FsrRcasLoadH(sp);
|
||||
AH3 e=ee.rgb;pixA=ee.a;
|
||||
#else
|
||||
AH3 e=FsrRcasLoadH(sp).rgb;
|
||||
#endif
|
||||
AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb;
|
||||
AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb;
|
||||
AH1 bR=b.r;
|
||||
AH1 bG=b.g;
|
||||
AH1 bB=b.b;
|
||||
AH1 dR=d.r;
|
||||
AH1 dG=d.g;
|
||||
AH1 dB=d.b;
|
||||
AH1 eR=e.r;
|
||||
AH1 eG=e.g;
|
||||
AH1 eB=e.b;
|
||||
AH1 fR=f.r;
|
||||
AH1 fG=f.g;
|
||||
AH1 fB=f.b;
|
||||
AH1 hR=h.r;
|
||||
AH1 hG=h.g;
|
||||
AH1 hB=h.b;
|
||||
FsrRcasInputH(bR,bG,bB);
|
||||
FsrRcasInputH(dR,dG,dB);
|
||||
FsrRcasInputH(eR,eG,eB);
|
||||
FsrRcasInputH(fR,fG,fB);
|
||||
FsrRcasInputH(hR,hG,hB);
|
||||
AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG);
|
||||
AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG);
|
||||
AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG);
|
||||
AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG);
|
||||
AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG);
|
||||
AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL;
|
||||
nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL)));
|
||||
nz=AH1_(-0.5)*nz+AH1_(1.0);
|
||||
AH1 mn4R=min(AMin3H1(bR,dR,fR),hR);
|
||||
AH1 mn4G=min(AMin3H1(bG,dG,fG),hG);
|
||||
AH1 mn4B=min(AMin3H1(bB,dB,fB),hB);
|
||||
AH1 mx4R=max(AMax3H1(bR,dR,fR),hR);
|
||||
AH1 mx4G=max(AMax3H1(bG,dG,fG),hG);
|
||||
AH1 mx4B=max(AMax3H1(bB,dB,fB),hB);
|
||||
AH2 peakC=AH2(1.0,-1.0*4.0);
|
||||
AH1 hitMinR=mn4R*ARcpH1(AH1_(4.0)*mx4R);
|
||||
AH1 hitMinG=mn4G*ARcpH1(AH1_(4.0)*mx4G);
|
||||
AH1 hitMinB=mn4B*ARcpH1(AH1_(4.0)*mx4B);
|
||||
AH1 hitMaxR=(peakC.x-mx4R)*ARcpH1(AH1_(4.0)*mn4R+peakC.y);
|
||||
AH1 hitMaxG=(peakC.x-mx4G)*ARcpH1(AH1_(4.0)*mn4G+peakC.y);
|
||||
AH1 hitMaxB=(peakC.x-mx4B)*ARcpH1(AH1_(4.0)*mn4B+peakC.y);
|
||||
AH1 lobeR=max(-hitMinR,hitMaxR);
|
||||
AH1 lobeG=max(-hitMinG,hitMaxG);
|
||||
AH1 lobeB=max(-hitMinB,hitMaxB);
|
||||
AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x;
|
||||
#ifdef FSR_RCAS_DENOISE
|
||||
lobe*=nz;
|
||||
#endif
|
||||
AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0));
|
||||
pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
|
||||
pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
|
||||
pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
|
||||
#endif
|
||||
#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2)
|
||||
AH4 FsrRcasLoadHx2(ASW2 p);
|
||||
void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b);
|
||||
void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){
|
||||
#ifdef A_HLSL
|
||||
pix0.a=pix1.a=0.0;
|
||||
#endif
|
||||
pix0.rgb=AH3(pixR.x,pixG.x,pixB.x);
|
||||
pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);}
|
||||
void FsrRcasHx2(
|
||||
out AH2 pixR,
|
||||
out AH2 pixG,
|
||||
out AH2 pixB,
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
out AH2 pixA,
|
||||
#endif
|
||||
AU2 ip, // Integer pixel position in output.
|
||||
AU4 con){ // Constant generated by RcasSetup().
|
||||
ASW2 sp0=ASW2(ip);
|
||||
AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb;
|
||||
AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb;
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
AH4 ee0=FsrRcasLoadHx2(sp0);
|
||||
AH3 e0=ee0.rgb;pixA.r=ee0.a;
|
||||
#else
|
||||
AH3 e0=FsrRcasLoadHx2(sp0).rgb;
|
||||
#endif
|
||||
AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb;
|
||||
AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb;
|
||||
ASW2 sp1=sp0+ASW2(8,0);
|
||||
AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb;
|
||||
AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb;
|
||||
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
|
||||
AH4 ee1=FsrRcasLoadHx2(sp1);
|
||||
AH3 e1=ee1.rgb;pixA.g=ee1.a;
|
||||
#else
|
||||
AH3 e1=FsrRcasLoadHx2(sp1).rgb;
|
||||
#endif
|
||||
AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb;
|
||||
AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb;
|
||||
AH2 bR=AH2(b0.r,b1.r);
|
||||
AH2 bG=AH2(b0.g,b1.g);
|
||||
AH2 bB=AH2(b0.b,b1.b);
|
||||
AH2 dR=AH2(d0.r,d1.r);
|
||||
AH2 dG=AH2(d0.g,d1.g);
|
||||
AH2 dB=AH2(d0.b,d1.b);
|
||||
AH2 eR=AH2(e0.r,e1.r);
|
||||
AH2 eG=AH2(e0.g,e1.g);
|
||||
AH2 eB=AH2(e0.b,e1.b);
|
||||
AH2 fR=AH2(f0.r,f1.r);
|
||||
AH2 fG=AH2(f0.g,f1.g);
|
||||
AH2 fB=AH2(f0.b,f1.b);
|
||||
AH2 hR=AH2(h0.r,h1.r);
|
||||
AH2 hG=AH2(h0.g,h1.g);
|
||||
AH2 hB=AH2(h0.b,h1.b);
|
||||
FsrRcasInputHx2(bR,bG,bB);
|
||||
FsrRcasInputHx2(dR,dG,dB);
|
||||
FsrRcasInputHx2(eR,eG,eB);
|
||||
FsrRcasInputHx2(fR,fG,fB);
|
||||
FsrRcasInputHx2(hR,hG,hB);
|
||||
AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG);
|
||||
AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG);
|
||||
AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG);
|
||||
AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG);
|
||||
AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG);
|
||||
AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL;
|
||||
nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL)));
|
||||
nz=AH2_(-0.5)*nz+AH2_(1.0);
|
||||
AH2 mn4R=min(AMin3H2(bR,dR,fR),hR);
|
||||
AH2 mn4G=min(AMin3H2(bG,dG,fG),hG);
|
||||
AH2 mn4B=min(AMin3H2(bB,dB,fB),hB);
|
||||
AH2 mx4R=max(AMax3H2(bR,dR,fR),hR);
|
||||
AH2 mx4G=max(AMax3H2(bG,dG,fG),hG);
|
||||
AH2 mx4B=max(AMax3H2(bB,dB,fB),hB);
|
||||
AH2 peakC=AH2(1.0,-1.0*4.0);
|
||||
AH2 hitMinR=mn4R*ARcpH2(AH2_(4.0)*mx4R);
|
||||
AH2 hitMinG=mn4G*ARcpH2(AH2_(4.0)*mx4G);
|
||||
AH2 hitMinB=mn4B*ARcpH2(AH2_(4.0)*mx4B);
|
||||
AH2 hitMaxR=(peakC.x-mx4R)*ARcpH2(AH2_(4.0)*mn4R+peakC.y);
|
||||
AH2 hitMaxG=(peakC.x-mx4G)*ARcpH2(AH2_(4.0)*mn4G+peakC.y);
|
||||
AH2 hitMaxB=(peakC.x-mx4B)*ARcpH2(AH2_(4.0)*mn4B+peakC.y);
|
||||
AH2 lobeR=max(-hitMinR,hitMaxR);
|
||||
AH2 lobeG=max(-hitMinG,hitMaxG);
|
||||
AH2 lobeB=max(-hitMinB,hitMaxB);
|
||||
AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x);
|
||||
#ifdef FSR_RCAS_DENOISE
|
||||
lobe*=nz;
|
||||
#endif
|
||||
AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0));
|
||||
pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
|
||||
pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
|
||||
pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
|
||||
#endif
|
||||
#if defined(A_GPU)
|
||||
void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);}
|
||||
#endif
|
||||
#if defined(A_GPU)&&defined(A_HALF)
|
||||
void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);}
|
||||
void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){
|
||||
cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);}
|
||||
#endif
|
||||
#if defined(A_GPU)
|
||||
void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));}
|
||||
void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));}
|
||||
#endif
|
||||
#if defined(A_GPU)&&defined(A_HALF)
|
||||
void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));}
|
||||
void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));}
|
||||
void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
|
||||
AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;}
|
||||
void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
|
||||
AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;}
|
||||
#endif
|
||||
#if defined(A_GPU)
|
||||
AF1 FsrTepdDitF(AU2 p,AU1 f){
|
||||
AF1 x=AF1_(p.x+f);
|
||||
AF1 y=AF1_(p.y);
|
||||
AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
|
||||
AF1 b=AF1_(1.0/3.69);
|
||||
x=x*a+(y*b);
|
||||
return AFractF1(x);}
|
||||
void FsrTepdC8F(inout AF3 c,AF1 dit){
|
||||
AF3 n=sqrt(c);
|
||||
n=floor(n*AF3_(255.0))*AF3_(1.0/255.0);
|
||||
AF3 a=n*n;
|
||||
AF3 b=n+AF3_(1.0/255.0);b=b*b;
|
||||
AF3 r=(c-b)*APrxMedRcpF3(a-b);
|
||||
c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));}
|
||||
void FsrTepdC10F(inout AF3 c,AF1 dit){
|
||||
AF3 n=sqrt(c);
|
||||
n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0);
|
||||
AF3 a=n*n;
|
||||
AF3 b=n+AF3_(1.0/1023.0);b=b*b;
|
||||
AF3 r=(c-b)*APrxMedRcpF3(a-b);
|
||||
c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));}
|
||||
#endif
|
||||
#if defined(A_GPU)&&defined(A_HALF)
|
||||
AH1 FsrTepdDitH(AU2 p,AU1 f){
|
||||
AF1 x=AF1_(p.x+f);
|
||||
AF1 y=AF1_(p.y);
|
||||
AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
|
||||
AF1 b=AF1_(1.0/3.69);
|
||||
x=x*a+(y*b);
|
||||
return AH1(AFractF1(x));}
|
||||
void FsrTepdC8H(inout AH3 c,AH1 dit){
|
||||
AH3 n=sqrt(c);
|
||||
n=floor(n*AH3_(255.0))*AH3_(1.0/255.0);
|
||||
AH3 a=n*n;
|
||||
AH3 b=n+AH3_(1.0/255.0);b=b*b;
|
||||
AH3 r=(c-b)*APrxMedRcpH3(a-b);
|
||||
c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));}
|
||||
void FsrTepdC10H(inout AH3 c,AH1 dit){
|
||||
AH3 n=sqrt(c);
|
||||
n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0);
|
||||
AH3 a=n*n;
|
||||
AH3 b=n+AH3_(1.0/1023.0);b=b*b;
|
||||
AH3 r=(c-b)*APrxMedRcpH3(a-b);
|
||||
c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));}
|
||||
AH2 FsrTepdDitHx2(AU2 p,AU1 f){
|
||||
AF2 x;
|
||||
x.x=AF1_(p.x+f);
|
||||
x.y=x.x+AF1_(8.0);
|
||||
AF1 y=AF1_(p.y);
|
||||
AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
|
||||
AF1 b=AF1_(1.0/3.69);
|
||||
x=x*AF2_(a)+AF2_(y*b);
|
||||
return AH2(AFractF2(x));}
|
||||
void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
|
||||
AH2 nR=sqrt(cR);
|
||||
AH2 nG=sqrt(cG);
|
||||
AH2 nB=sqrt(cB);
|
||||
nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0);
|
||||
nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0);
|
||||
nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0);
|
||||
AH2 aR=nR*nR;
|
||||
AH2 aG=nG*nG;
|
||||
AH2 aB=nB*nB;
|
||||
AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR;
|
||||
AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG;
|
||||
AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB;
|
||||
AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
|
||||
AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
|
||||
AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
|
||||
cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0));
|
||||
cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0));
|
||||
cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));}
|
||||
void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
|
||||
AH2 nR=sqrt(cR);
|
||||
AH2 nG=sqrt(cG);
|
||||
AH2 nB=sqrt(cB);
|
||||
nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0);
|
||||
nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0);
|
||||
nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0);
|
||||
AH2 aR=nR*nR;
|
||||
AH2 aG=nG*nG;
|
||||
AH2 aB=nB*nB;
|
||||
AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR;
|
||||
AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG;
|
||||
AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB;
|
||||
AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
|
||||
AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
|
||||
AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
|
||||
cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0));
|
||||
cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0));
|
||||
cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));}
|
||||
#endif
|
||||
)--RPCS3--"
|
350
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp
Normal file
350
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp
Normal file
@ -0,0 +1,350 @@
|
||||
#include "../../vkutils/barriers.h"
|
||||
#include "../../VKHelpers.h"
|
||||
#include "../../VKResourceManager.h"
|
||||
|
||||
#include "../fsr_pass.h"
|
||||
|
||||
#define A_CPU 1
|
||||
#include "3rdParty/GPUOpen/include/ffx_a.h"
|
||||
#include "3rdParty/GPUOpen/include/ffx_fsr1.h"
|
||||
#undef A_CPU
|
||||
|
||||
namespace vk
|
||||
{
|
||||
namespace FidelityFX
|
||||
{
|
||||
fsr_pass::fsr_pass(const std::string& config_definitions, u32 push_constants_size_)
|
||||
{
|
||||
// Just use AMD-provided source with minimal modification
|
||||
const char* shader_core =
|
||||
#include "fsr_ubershader.glsl"
|
||||
;
|
||||
|
||||
// Replacements
|
||||
const char* ffx_a_contents =
|
||||
#include "fsr_ffx_a_flattened.inc"
|
||||
;
|
||||
|
||||
const char* ffx_fsr_contents =
|
||||
#include "fsr_ffx_fsr1_flattened.inc"
|
||||
;
|
||||
|
||||
m_src = shader_core;
|
||||
m_src = fmt::replace_all(m_src, { {"%FFX_DEFINITIONS%", config_definitions }, {"%FFX_A_IMPORT%", ffx_a_contents}, {"%FFX_FSR_IMPORT%", ffx_fsr_contents}});
|
||||
|
||||
// Fill with 0 to avoid sending incomplete/unused variables to the GPU
|
||||
memset(m_constants_buf, 0, sizeof(m_constants_buf));
|
||||
|
||||
// Enable push constants
|
||||
use_push_constants = true;
|
||||
push_constants_size = push_constants_size_;
|
||||
|
||||
create();
|
||||
}
|
||||
|
||||
std::vector<std::pair<VkDescriptorType, u8>> fsr_pass::get_descriptor_layout()
|
||||
{
|
||||
return
|
||||
{
|
||||
{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1 },
|
||||
{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 }
|
||||
};
|
||||
}
|
||||
|
||||
void fsr_pass::declare_inputs()
|
||||
{
|
||||
std::vector<vk::glsl::program_input> inputs =
|
||||
{
|
||||
{
|
||||
::glsl::program_domain::glsl_compute_program,
|
||||
vk::glsl::program_input_type::input_type_texture,
|
||||
{}, {},
|
||||
0,
|
||||
"InputTexture"
|
||||
},
|
||||
{
|
||||
::glsl::program_domain::glsl_compute_program,
|
||||
vk::glsl::program_input_type::input_type_texture,
|
||||
{}, {},
|
||||
1,
|
||||
"OutputTexture"
|
||||
}
|
||||
};
|
||||
|
||||
m_program->load_uniforms(inputs);
|
||||
}
|
||||
|
||||
void fsr_pass::bind_resources()
|
||||
{
|
||||
// Bind relevant stuff
|
||||
if (!m_sampler)
|
||||
{
|
||||
const auto pdev = vk::get_current_renderer();
|
||||
m_sampler = std::make_unique<vk::sampler>(*pdev,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
|
||||
}
|
||||
|
||||
m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set);
|
||||
m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set);
|
||||
}
|
||||
|
||||
void fsr_pass::run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size)
|
||||
{
|
||||
m_input_image = src->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
|
||||
m_output_image = dst->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
|
||||
m_input_size = input_size;
|
||||
m_output_size = output_size;
|
||||
|
||||
configure(cmd);
|
||||
|
||||
constexpr auto wg_size = 16;
|
||||
const auto invocations_x = utils::aligned_div(output_size.width, wg_size);
|
||||
const auto invocations_y = utils::aligned_div(output_size.height, wg_size);
|
||||
|
||||
ensure(invocations_x == (output_size.width + (wg_size - 1)) / wg_size);
|
||||
ensure(invocations_y == (output_size.height + (wg_size - 1)) / wg_size);
|
||||
compute_task::run(cmd, invocations_x, invocations_y, 1);
|
||||
}
|
||||
|
||||
easu_pass::easu_pass()
|
||||
: fsr_pass(
|
||||
"#define SAMPLE_EASU 1\n"
|
||||
"#define SAMPLE_RCAS 0\n"
|
||||
"#define SAMPLE_BILINEAR 0\n"
|
||||
"#define SAMPLE_SLOW_FALLBACK 1",
|
||||
80 // 5*VEC4
|
||||
)
|
||||
{}
|
||||
|
||||
void easu_pass::configure(const vk::command_buffer& cmd)
|
||||
{
|
||||
auto src_image = m_input_image->image();
|
||||
|
||||
// NOTE: Configuration vector 4 is unused as we do not support HDR natively
|
||||
auto con0 = &m_constants_buf[0];
|
||||
auto con1 = &m_constants_buf[4];
|
||||
auto con2 = &m_constants_buf[8];
|
||||
auto con3 = &m_constants_buf[12];
|
||||
|
||||
FsrEasuCon(con0, con1, con2, con3,
|
||||
static_cast<f32>(m_input_size.width), static_cast<f32>(m_input_size.height), // Incoming viewport size to upscale (actual size)
|
||||
static_cast<f32>(src_image->width()), static_cast<f32>(src_image->height()), // Size of the raw image to upscale (in case viewport does not cover it all)
|
||||
static_cast<f32>(m_output_size.width), static_cast<f32>(m_output_size.height)); // Size of output viewport (target size)
|
||||
|
||||
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf);
|
||||
}
|
||||
|
||||
rcas_pass::rcas_pass()
|
||||
: fsr_pass(
|
||||
"#define SAMPLE_RCAS 1\n"
|
||||
"#define SAMPLE_EASU 0\n"
|
||||
"#define SAMPLE_BILINEAR 0\n"
|
||||
"#define SAMPLE_SLOW_FALLBACK 1",
|
||||
32 // 2*VEC4
|
||||
)
|
||||
{}
|
||||
|
||||
void rcas_pass::configure(const vk::command_buffer& cmd)
|
||||
{
|
||||
// 0 is actually the sharpest with 2 being the chosen limit. Each progressive unit 'halves' the sharpening intensity.
|
||||
auto cas_attenuation = 2.f - (g_cfg.video.vk.rcas_sharpening_intensity / 50.f);
|
||||
FsrRcasCon(&m_constants_buf[0], cas_attenuation);
|
||||
|
||||
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf);
|
||||
}
|
||||
|
||||
} // Namespace FidelityFX
|
||||
|
||||
void fsr_upscale_pass::dispose_images()
|
||||
{
|
||||
auto safe_delete = [](auto& data)
|
||||
{
|
||||
if (data && data->value)
|
||||
{
|
||||
vk::get_resource_manager()->dispose(data);
|
||||
}
|
||||
else if (data)
|
||||
{
|
||||
data.reset();
|
||||
}
|
||||
};
|
||||
|
||||
safe_delete(m_output_left);
|
||||
safe_delete(m_output_right);
|
||||
safe_delete(m_intermediate_data);
|
||||
}
|
||||
|
||||
void fsr_upscale_pass::initialize_image(u32 output_w, u32 output_h, rsx::flags32_t mode)
|
||||
{
|
||||
dispose_images();
|
||||
|
||||
auto initialize_image_impl = [output_w, output_h](VkImageUsageFlags usage)
|
||||
{
|
||||
const auto pdev = vk::get_current_renderer();
|
||||
return std::make_unique<vk::viewable_image>(
|
||||
*pdev, // Owner
|
||||
pdev->get_memory_mapping().device_local, // Must be in device optimal memory
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_IMAGE_TYPE_2D,
|
||||
VK_FORMAT_B8G8R8A8_UNORM, // The only format guaranteed by spec
|
||||
output_w, output_h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, // Dimensions (w, h, d, mips, layers, samples)
|
||||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_TILING_OPTIMAL,
|
||||
usage,
|
||||
VK_IMAGE_CREATE_ALLOW_NULL, // Allow creation to fail if there is no memory
|
||||
VMM_ALLOCATION_POOL_SWAPCHAIN,
|
||||
RSX_FORMAT_CLASS_COLOR);
|
||||
};
|
||||
|
||||
bool failed = false;
|
||||
if (mode & UPSCALE_LEFT_VIEW)
|
||||
{
|
||||
m_output_left = initialize_image_impl(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
|
||||
failed |= (m_output_left->value == VK_NULL_HANDLE);
|
||||
}
|
||||
if ((mode & UPSCALE_RIGHT_VIEW) && !failed)
|
||||
{
|
||||
m_output_right = initialize_image_impl(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
|
||||
failed |= (m_output_right->value == VK_NULL_HANDLE);
|
||||
}
|
||||
if (!failed)
|
||||
{
|
||||
m_intermediate_data = initialize_image_impl(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
|
||||
failed |= (m_intermediate_data->value == VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
if (failed)
|
||||
{
|
||||
rsx_log.warning("FSR is enabled, but the system is out of memory. Will fall back to bilinear upscaling");
|
||||
dispose_images();
|
||||
}
|
||||
}
|
||||
|
||||
vk::viewable_image* fsr_upscale_pass::scale_output(
|
||||
const vk::command_buffer& cmd,
|
||||
vk::viewable_image* src,
|
||||
VkImage present_surface,
|
||||
VkImageLayout present_surface_layout,
|
||||
const VkImageBlit& request,
|
||||
rsx::flags32_t mode)
|
||||
{
|
||||
size2u input_size, output_size;
|
||||
input_size.width = std::abs(request.srcOffsets[1].x - request.srcOffsets[0].x);
|
||||
input_size.height = std::abs(request.srcOffsets[1].y - request.srcOffsets[0].y);
|
||||
output_size.width = std::abs(request.dstOffsets[1].x - request.dstOffsets[0].x);
|
||||
output_size.height = std::abs(request.dstOffsets[1].y - request.dstOffsets[0].y);
|
||||
|
||||
auto src_image = src;
|
||||
auto target_image = present_surface;
|
||||
auto target_image_layout = present_surface_layout;
|
||||
auto output_request = request;
|
||||
|
||||
if (input_size.width < output_size.width && input_size.height < output_size.height)
|
||||
{
|
||||
// Cannot upscale both LEFT and RIGHT images at the same time.
|
||||
// Default maps to LEFT for simplicity
|
||||
ensure((mode & (UPSCALE_LEFT_VIEW | UPSCALE_RIGHT_VIEW)) != (UPSCALE_LEFT_VIEW | UPSCALE_RIGHT_VIEW));
|
||||
|
||||
auto& m_output_data = (mode & UPSCALE_LEFT_VIEW) ? m_output_left : m_output_right;
|
||||
if (!m_output_data || m_output_data->width() != output_size.width || m_output_data->height() != output_size.height)
|
||||
{
|
||||
initialize_image(output_size.width, output_size.height, mode);
|
||||
}
|
||||
|
||||
if (m_output_data)
|
||||
{
|
||||
// Execute the pass here
|
||||
auto cs_easu_task = vk::get_compute_task<vk::FidelityFX::easu_pass>();
|
||||
auto cs_rcas_task = vk::get_compute_task<vk::FidelityFX::rcas_pass>();
|
||||
|
||||
// Prepare for EASU pass
|
||||
src->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
if (m_intermediate_data->current_layout != VK_IMAGE_LAYOUT_GENERAL)
|
||||
{
|
||||
m_intermediate_data->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
// R/W CS-CS barrier in case of back-to-back upscales
|
||||
vk::insert_image_memory_barrier(cmd,
|
||||
m_intermediate_data->value,
|
||||
m_intermediate_data->current_layout, m_intermediate_data->current_layout,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
|
||||
}
|
||||
|
||||
// EASU
|
||||
cs_easu_task->run(cmd, src, m_intermediate_data.get(), input_size, output_size);
|
||||
|
||||
// Prepare for RCAS pass
|
||||
m_output_data->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
// R/W CS-CS barrier before RCAS
|
||||
vk::insert_image_memory_barrier(cmd,
|
||||
m_intermediate_data->value,
|
||||
m_intermediate_data->current_layout, m_intermediate_data->current_layout,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
|
||||
|
||||
// RCAS
|
||||
cs_rcas_task->run(cmd, m_intermediate_data.get(), m_output_data.get(), input_size, output_size);
|
||||
|
||||
// Cleanup
|
||||
src->pop_layout(cmd);
|
||||
|
||||
// Swap input for FSR target
|
||||
src_image = m_output_data.get();
|
||||
|
||||
// Update output parameters to match expected output
|
||||
if (mode & UPSCALE_AND_COMMIT)
|
||||
{
|
||||
// Explicit CS-Transfer barrier
|
||||
vk::insert_image_memory_barrier(cmd,
|
||||
m_output_data->value,
|
||||
m_output_data->current_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_TRANSFER_READ_BIT,
|
||||
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
|
||||
|
||||
m_output_data->current_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
|
||||
output_request.srcOffsets[0].x = 0;
|
||||
output_request.srcOffsets[1].x = output_size.width;
|
||||
output_request.srcOffsets[0].y = 0;
|
||||
output_request.srcOffsets[1].y = output_size.height;
|
||||
|
||||
// Preserve mirroring/flipping
|
||||
if (request.srcOffsets[0].x > request.srcOffsets[1].x)
|
||||
{
|
||||
std::swap(output_request.srcOffsets[0].x, output_request.srcOffsets[1].x);
|
||||
}
|
||||
|
||||
if (request.srcOffsets[0].y > request.srcOffsets[1].y)
|
||||
{
|
||||
std::swap(output_request.srcOffsets[0].y, output_request.srcOffsets[1].y);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mode & UPSCALE_AND_COMMIT)
|
||||
{
|
||||
src_image->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
vkCmdBlitImage(cmd, src_image->value, src_image->current_layout, target_image, target_image_layout, 1, &output_request, VK_FILTER_LINEAR);
|
||||
src_image->pop_layout(cmd);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return src_image;
|
||||
}
|
||||
}
|
108
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_ubershader.glsl
Normal file
108
rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_ubershader.glsl
Normal file
@ -0,0 +1,108 @@
|
||||
R"(
|
||||
#version 450
|
||||
|
||||
#define A_GPU 1
|
||||
#define A_GLSL 1
|
||||
|
||||
%FFX_DEFINITIONS%
|
||||
|
||||
#if defined(SAMPLE_EASU) || defined(SAMPLE_RCAS)
|
||||
layout(push_constant) uniform const_buffer
|
||||
{
|
||||
uvec4 Const0;
|
||||
#if SAMPLE_EASU
|
||||
uvec4 Const1;
|
||||
uvec4 Const2;
|
||||
uvec4 Const3;
|
||||
#endif
|
||||
uvec4 Sample;
|
||||
};
|
||||
#endif
|
||||
|
||||
%FFX_A_IMPORT%
|
||||
|
||||
layout(set=0,binding=0) uniform sampler2D InputTexture;
|
||||
layout(set=0,binding=1,rgba8) uniform image2D OutputTexture;
|
||||
|
||||
#if A_HALF
|
||||
#if SAMPLE_EASU
|
||||
#define FSR_EASU_H 1
|
||||
AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
|
||||
AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
|
||||
AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
|
||||
#endif
|
||||
#if SAMPLE_RCAS
|
||||
#define FSR_RCAS_H
|
||||
AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
|
||||
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
|
||||
#endif
|
||||
#else
|
||||
#if SAMPLE_EASU
|
||||
#define FSR_EASU_F 1
|
||||
AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
|
||||
AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
|
||||
AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
|
||||
#endif
|
||||
#if SAMPLE_RCAS
|
||||
#define FSR_RCAS_F
|
||||
AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
|
||||
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(SAMPLE_EASU) || defined(SAMPLE_RCAS)
|
||||
%FFX_FSR_IMPORT%
|
||||
#endif
|
||||
|
||||
void CurrFilter(AU2 pos)
|
||||
{
|
||||
#if SAMPLE_BILINEAR
|
||||
AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
|
||||
imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
|
||||
#endif
|
||||
#if SAMPLE_EASU
|
||||
#if SAMPLE_SLOW_FALLBACK
|
||||
AF3 c;
|
||||
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
|
||||
if( Sample.x == 1 )
|
||||
c *= c;
|
||||
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
|
||||
#else
|
||||
AH3 c;
|
||||
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
|
||||
if( Sample.x == 1 )
|
||||
c *= c;
|
||||
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
|
||||
#endif
|
||||
#endif
|
||||
#if SAMPLE_RCAS
|
||||
#if SAMPLE_SLOW_FALLBACK
|
||||
AF3 c;
|
||||
FsrRcasF(c.r, c.g, c.b, pos, Const0);
|
||||
if( Sample.x == 1 )
|
||||
c *= c;
|
||||
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
|
||||
#else
|
||||
AH3 c;
|
||||
FsrRcasH(c.r, c.g, c.b, pos, Const0);
|
||||
if( Sample.x == 1 )
|
||||
c *= c;
|
||||
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
layout(local_size_x=64) in;
|
||||
void main()
|
||||
{
|
||||
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
|
||||
CurrFilter(gxy);
|
||||
gxy.x += 8u;
|
||||
CurrFilter(gxy);
|
||||
gxy.y += 8u;
|
||||
CurrFilter(gxy);
|
||||
gxy.x -= 8u;
|
||||
CurrFilter(gxy);
|
||||
}
|
||||
)"
|
69
rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h
Normal file
69
rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h
Normal file
@ -0,0 +1,69 @@
|
||||
#pragma once
|
||||
|
||||
#include "../vkutils/sampler.h"
|
||||
#include "../VKCompute.h"
|
||||
|
||||
#include "upscaling.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
namespace FidelityFX
|
||||
{
|
||||
class fsr_pass : public compute_task
|
||||
{
|
||||
protected:
|
||||
std::unique_ptr<vk::sampler> m_sampler;
|
||||
const vk::image_view* m_input_image = nullptr;
|
||||
const vk::image_view* m_output_image = nullptr;
|
||||
size2u m_input_size;
|
||||
size2u m_output_size;
|
||||
u32 m_constants_buf[20];
|
||||
|
||||
std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout() override;
|
||||
void declare_inputs() override;
|
||||
void bind_resources() override;
|
||||
|
||||
virtual void configure(const vk::command_buffer& cmd) = 0;
|
||||
|
||||
public:
|
||||
fsr_pass(const std::string& config_definitions, u32 push_constants_size_);
|
||||
void run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size);
|
||||
};
|
||||
|
||||
class easu_pass : public fsr_pass
|
||||
{
|
||||
void configure(const vk::command_buffer& cmd) override;
|
||||
|
||||
public:
|
||||
easu_pass();
|
||||
};
|
||||
|
||||
class rcas_pass : public fsr_pass
|
||||
{
|
||||
void configure(const vk::command_buffer& cmd) override;
|
||||
|
||||
public:
|
||||
rcas_pass();
|
||||
};
|
||||
}
|
||||
|
||||
class fsr_upscale_pass : public upscaler
|
||||
{
|
||||
std::unique_ptr<vk::viewable_image> m_output_left;
|
||||
std::unique_ptr<vk::viewable_image> m_output_right;
|
||||
std::unique_ptr<vk::viewable_image> m_intermediate_data;
|
||||
|
||||
void dispose_images();
|
||||
void initialize_image(u32 output_w, u32 output_h, rsx::flags32_t mode);
|
||||
|
||||
public:
|
||||
vk::viewable_image* scale_output(
|
||||
const vk::command_buffer& cmd, // CB
|
||||
vk::viewable_image* src, // Source input
|
||||
VkImage present_surface, // Present target. May be VK_NULL_HANDLE for some passes
|
||||
VkImageLayout present_surface_layout, // Present surface layout, or VK_IMAGE_LAYOUT_UNDEFINED if no present target is provided
|
||||
const VkImageBlit& request, // Scaling request information
|
||||
rsx::flags32_t mode // Mode
|
||||
) override;
|
||||
};
|
||||
}
|
36
rpcs3/Emu/RSX/VK/upscalers/upscaling.h
Normal file
36
rpcs3/Emu/RSX/VK/upscalers/upscaling.h
Normal file
@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include "util/types.hpp"
|
||||
|
||||
#include "../vkutils/commands.h"
|
||||
#include "../vkutils/image.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
namespace upscaling_flags_
|
||||
{
|
||||
enum upscaling_flags
|
||||
{
|
||||
UPSCALE_DEFAULT_VIEW = (1 << 0),
|
||||
UPSCALE_LEFT_VIEW = (1 << 0),
|
||||
UPSCALE_RIGHT_VIEW = (1 << 1),
|
||||
UPSCALE_AND_COMMIT = (1 << 2)
|
||||
};
|
||||
}
|
||||
|
||||
using namespace upscaling_flags_;
|
||||
|
||||
struct upscaler
|
||||
{
|
||||
virtual ~upscaler() {}
|
||||
|
||||
virtual vk::viewable_image* scale_output(
|
||||
const vk::command_buffer& cmd, // CB
|
||||
vk::viewable_image* src, // Source input
|
||||
VkImage present_surface, // Present target. May be VK_NULL_HANDLE for some passes
|
||||
VkImageLayout present_surface_layout, // Present surface layout, or VK_IMAGE_LAYOUT_UNDEFINED if no present target is provided
|
||||
const VkImageBlit& request, // Scaling request information
|
||||
rsx::flags32_t mode // Mode
|
||||
) = 0;
|
||||
};
|
||||
}
|
@ -175,8 +175,10 @@ struct cfg_root : cfg::node
|
||||
cfg::string adapter{ this, "Adapter" };
|
||||
cfg::_bool force_fifo{ this, "Force FIFO present mode" };
|
||||
cfg::_bool force_primitive_restart{ this, "Force primitive restart flag" };
|
||||
cfg::_bool force_disable_exclusive_fullscreen_mode{this, "Force Disable Exclusive Fullscreen Mode"};
|
||||
cfg::_bool force_disable_exclusive_fullscreen_mode{ this, "Force Disable Exclusive Fullscreen Mode" };
|
||||
cfg::_bool asynchronous_texture_streaming{ this, "Asynchronous Texture Streaming 2", false };
|
||||
cfg::_bool fsr_upscaling{ this, "Enable FidelityFX Super Resolution Upscaling", false, true };
|
||||
cfg::uint<0, 100> rcas_sharpening_intensity{ this, "FidelityFX CAS Sharpening Intensity", 50, true };
|
||||
cfg::_enum<vk_gpu_scheduler_mode> asynchronous_scheduler{ this, "Asynchronous Queue Scheduler", vk_gpu_scheduler_mode::device };
|
||||
|
||||
} vk{ this };
|
||||
|
@ -11,6 +11,9 @@
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Emu\RSX\VK\upscalers\bilinear_pass.hpp" />
|
||||
<ClInclude Include="Emu\RSX\VK\upscalers\fsr_pass.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\upscalers\upscaling.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKAsyncScheduler.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
|
||||
@ -57,6 +60,7 @@
|
||||
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Emu\RSX\VK\upscalers\fsr1\fsr_pass.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKAsyncScheduler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKCommonDecompiler.cpp" />
|
||||
@ -102,6 +106,11 @@
|
||||
<Project>{c4a10229-4712-4bd2-b63e-50d93c67a038}</Project>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_a_flattened.inc" />
|
||||
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_fsr1_flattened.inc" />
|
||||
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ubershader.glsl" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{3EE5F075-B546-42C4-B6A8-E3CCEF38B78D}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
|
@ -66,6 +66,9 @@
|
||||
<ClCompile Include="Emu\RSX\VK\VKCompute.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKAsyncScheduler.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\VKRenderTargets.cpp" />
|
||||
<ClCompile Include="Emu\RSX\VK\upscalers\fsr1\fsr_pass.cpp">
|
||||
<Filter>upscalers\fsr1</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
|
||||
@ -152,10 +155,36 @@
|
||||
<Filter>vkutils</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\VK\VKAsyncScheduler.h" />
|
||||
<ClInclude Include="Emu\RSX\VK\upscalers\bilinear_pass.hpp">
|
||||
<Filter>upscalers</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\VK\upscalers\fsr_pass.h">
|
||||
<Filter>upscalers</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\VK\upscalers\upscaling.h">
|
||||
<Filter>upscalers</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="vkutils">
|
||||
<UniqueIdentifier>{2c6cb5a5-ed99-44fe-a0b6-7ba1949c8b29}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="upscalers">
|
||||
<UniqueIdentifier>{7294bfa7-a561-4032-8b96-256afbb7476d}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="upscalers\fsr1">
|
||||
<UniqueIdentifier>{8387b0fc-178f-4c9c-9cf2-03df99ce4df2}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_a_flattened.inc">
|
||||
<Filter>upscalers\fsr1</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_fsr1_flattened.inc">
|
||||
<Filter>upscalers\fsr1</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ubershader.glsl">
|
||||
<Filter>upscalers\fsr1</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -71,6 +71,8 @@ enum class emu_settings_type
|
||||
AnisotropicFilterOverride,
|
||||
ResolutionScale,
|
||||
MinimumScalableDimension,
|
||||
FsrUpscalingEnable,
|
||||
FsrSharpeningStrength,
|
||||
ForceCPUBlitEmulation,
|
||||
DisableOnDiskShaderCache,
|
||||
DisableVulkanMemAllocator,
|
||||
@ -240,6 +242,8 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
|
||||
// Vulkan
|
||||
{ emu_settings_type::VulkanAsyncTextureUploads, { "Video", "Vulkan", "Asynchronous Texture Streaming 2"}},
|
||||
{ emu_settings_type::VulkanAsyncSchedulerDriver, { "Video", "Vulkan", "Asynchronous Queue Scheduler"}},
|
||||
{ emu_settings_type::FsrUpscalingEnable, { "Video", "Vulkan", "Enable FidelityFX Super Resolution Upscaling"}},
|
||||
{ emu_settings_type::FsrSharpeningStrength, { "Video", "Vulkan", "FidelityFX CAS Sharpening Intensity"}},
|
||||
|
||||
// Performance Overlay
|
||||
{ emu_settings_type::PerfOverlayEnabled, { "Video", "Performance Overlay", "Enabled" } },
|
||||
|
@ -469,6 +469,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
m_emu_settings->EnhanceCheckBox(ui->asyncTextureStreaming, emu_settings_type::VulkanAsyncTextureUploads);
|
||||
SubscribeTooltip(ui->asyncTextureStreaming, tooltips.settings.async_texture_streaming);
|
||||
|
||||
m_emu_settings->EnhanceCheckBox(ui->fsrUpscalingEnable, emu_settings_type::FsrUpscalingEnable);
|
||||
SubscribeTooltip(ui->fsrUpscalingEnable, tooltips.settings.fsr_upscaling);
|
||||
|
||||
// Radio buttons
|
||||
|
||||
SubscribeTooltip(ui->rb_legacy_recompiler, tooltips.settings.legacy_shader_recompiler);
|
||||
@ -541,6 +544,29 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
ui->minimumScalableDimension->setValue(minimum_scalable_dimension_def);
|
||||
});
|
||||
|
||||
const int fsr_sharpening_strength_def = stoi(m_emu_settings->GetSettingDefault(emu_settings_type::FsrSharpeningStrength));
|
||||
auto fmt_fsr_sharpening_strength = [fsr_sharpening_strength_def](int value)
|
||||
{
|
||||
if (value == fsr_sharpening_strength_def)
|
||||
{
|
||||
return tr("%1% (Default)").arg(value);
|
||||
}
|
||||
return tr("%1%").arg(value);
|
||||
};
|
||||
m_emu_settings->EnhanceSlider(ui->fsrSharpeningStrength, emu_settings_type::FsrSharpeningStrength);
|
||||
SubscribeTooltip(ui->fsrSharpeningStrength, tooltips.settings.fsr_rcas_strength);
|
||||
SubscribeTooltip(ui->fsrSharpeningStrengthVal, tooltips.settings.fsr_rcas_strength);
|
||||
SubscribeTooltip(ui->fsrSharpeningStrengthReset, tooltips.settings.fsr_rcas_strength);
|
||||
ui->fsrSharpeningStrengthVal->setText(fmt_fsr_sharpening_strength(ui->fsrSharpeningStrength->value()));
|
||||
connect(ui->fsrSharpeningStrength, &QSlider::valueChanged, [fmt_fsr_sharpening_strength, this](int value)
|
||||
{
|
||||
ui->fsrSharpeningStrengthVal->setText(fmt_fsr_sharpening_strength(value));
|
||||
});
|
||||
connect(ui->fsrSharpeningStrengthReset, &QAbstractButton::clicked, [fsr_sharpening_strength_def, this]()
|
||||
{
|
||||
ui->fsrSharpeningStrength->setValue(fsr_sharpening_strength_def);
|
||||
});
|
||||
|
||||
// Remove renderers from the renderer Combobox if not supported
|
||||
for (const auto& renderer : r_creator->renderers)
|
||||
{
|
||||
@ -666,8 +692,12 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
|
||||
auto apply_renderer_specific_options = [=, this](const QString& text)
|
||||
{
|
||||
// Vulkan-only
|
||||
ui->asyncTextureStreaming->setEnabled(text == r_creator->Vulkan.name);
|
||||
ui->vulkansched->setEnabled(text == r_creator->Vulkan.name);
|
||||
const bool is_vulkan = (text == r_creator->Vulkan.name);
|
||||
ui->asyncTextureStreaming->setEnabled(is_vulkan);
|
||||
ui->vulkansched->setEnabled(is_vulkan);
|
||||
ui->fsrUpscalingEnable->setEnabled(is_vulkan);
|
||||
ui->fsrSharpeningStrength->setEnabled(is_vulkan);
|
||||
ui->fsrSharpeningStrengthReset->setEnabled(is_vulkan);
|
||||
};
|
||||
|
||||
// Handle connects to disable specific checkboxes that depend on GUI state.
|
||||
|
@ -39,7 +39,7 @@
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="currentIndex">
|
||||
<number>0</number>
|
||||
<number>1</number>
|
||||
</property>
|
||||
<widget class="QWidget" name="coreTab">
|
||||
<attribute name="title">
|
||||
@ -345,7 +345,7 @@
|
||||
</attribute>
|
||||
<layout class="QVBoxLayout" name="gpuTab_layout" stretch="0,1,0">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="gpuTabLayout" stretch="1,1,1">
|
||||
<layout class="QHBoxLayout" name="gpuTabLayout" stretch="1,0,1">
|
||||
<item>
|
||||
<layout class="QVBoxLayout" name="gpuTabLayoutLeft">
|
||||
<item>
|
||||
@ -634,6 +634,100 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="gb_Upscaling">
|
||||
<property name="title">
|
||||
<string>Upscaling</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="gb_Upscaling_layout">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="fsrUpscalingEnable">
|
||||
<property name="text">
|
||||
<string>Enable FSR Upscaling</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="Line" name="line">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="fsrSharpeningStrengthLabel">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>RCAS Sharpening Strength</string>
|
||||
</property>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignBottom|Qt::AlignLeading|Qt::AlignLeft</set>
|
||||
</property>
|
||||
<property name="margin">
|
||||
<number>1</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="fsrSharpeningLayoutTop">
|
||||
<item>
|
||||
<widget class="QLabel" name="minSharpeningVal">
|
||||
<property name="text">
|
||||
<string>0</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QSlider" name="fsrSharpeningStrength">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="maxSharpeningVal">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>100</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="fsrSharpeningLayoutBottom" stretch="1,0">
|
||||
<item>
|
||||
<widget class="QLabel" name="fsrSharpeningStrengthVal">
|
||||
<property name="text">
|
||||
<string>0</string>
|
||||
</property>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="fsrSharpeningStrengthReset">
|
||||
<property name="text">
|
||||
<string>Reset</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="gpu_tab_layout_middle_spacer">
|
||||
<property name="orientation">
|
||||
|
@ -167,6 +167,9 @@ public:
|
||||
|
||||
const QString async_texture_streaming = tr("Stream textures to GPU in parallel with 3D rendering.\nCan improve performance on more powerful GPUs that have spare headroom.\nOnly works with Vulkan renderer.");
|
||||
|
||||
const QString fsr_upscaling = tr("Enable FidelityFX Super Resolution upscaling filter to improve the look of upscaled images.\nIf the game is rendering at an internal resolution lower than your window resolution, FidelityFX will handle the upscale.\nCan cause visual artefacts.\nDoes not work with stereo 3D output for now");
|
||||
const QString fsr_rcas_strength = tr("Control the sharpening strength applied by FidelityFX Super Resolution. Higher values will give sharper output but may introduce artefacts.");
|
||||
|
||||
// gui
|
||||
|
||||
const QString log_limit = tr("Sets the maximum amount of blocks that the log can display.\nThis usually equals the number of lines.\nSet 0 in order to remove the limit.");
|
||||
|
Loading…
Reference in New Issue
Block a user