mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-22 10:42:36 +01:00
cellRec: implement audio encoding (no mixing yet)
This commit is contained in:
parent
4c14290694
commit
51d0df97d3
@ -155,7 +155,8 @@ struct rec_param
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr u32 rec_framerate = 30; // Always 30 fps
|
static constexpr u32 rec_framerate = 30; // Always 30 fps
|
||||||
|
static constexpr u32 rec_channels = 2; // Always 2 channels
|
||||||
|
|
||||||
class rec_video_sink : public utils::video_sink
|
class rec_video_sink : public utils::video_sink
|
||||||
{
|
{
|
||||||
@ -219,11 +220,21 @@ struct rec_info
|
|||||||
vm::bptr<u8> video_input_buffer{}; // Used by the game to inject a frame right before it would render a frame to the screen.
|
vm::bptr<u8> video_input_buffer{}; // Used by the game to inject a frame right before it would render a frame to the screen.
|
||||||
vm::bptr<u8> audio_input_buffer{}; // Used by the game to inject audio: 2-channel interleaved (left-right) * 256 samples * sizeof(f32) at 48000 kHz
|
vm::bptr<u8> audio_input_buffer{}; // Used by the game to inject audio: 2-channel interleaved (left-right) * 256 samples * sizeof(f32) at 48000 kHz
|
||||||
|
|
||||||
|
// Wrapper for our audio data
|
||||||
|
struct audio_block
|
||||||
|
{
|
||||||
|
// 2-channel interleaved (left-right), 256 samples, float
|
||||||
|
static constexpr usz block_size = rec_channels * CELL_REC_AUDIO_BLOCK_SAMPLES * sizeof(f32);
|
||||||
|
std::array<u8, block_size> block{};
|
||||||
|
s64 pts{};
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<utils::video_sink::encoder_frame> video_ringbuffer;
|
std::vector<utils::video_sink::encoder_frame> video_ringbuffer;
|
||||||
std::vector<u8> audio_ringbuffer;
|
std::vector<audio_block> audio_ringbuffer;
|
||||||
usz video_ring_pos = 0;
|
usz video_ring_pos = 0;
|
||||||
usz video_ring_frame_count = 0;
|
usz video_ring_frame_count = 0;
|
||||||
usz audio_ring_step = 0;
|
usz audio_ring_pos = 0;
|
||||||
|
usz audio_ring_block_count = 0;
|
||||||
|
|
||||||
usz next_video_ring_pos()
|
usz next_video_ring_pos()
|
||||||
{
|
{
|
||||||
@ -232,6 +243,13 @@ struct rec_info
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usz next_audio_ring_pos()
|
||||||
|
{
|
||||||
|
const usz pos = audio_ring_pos;
|
||||||
|
audio_ring_pos = (audio_ring_pos + 1) % audio_ringbuffer.size();
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
std::shared_ptr<rec_video_sink> ringbuffer_sink;
|
std::shared_ptr<rec_video_sink> ringbuffer_sink;
|
||||||
std::shared_ptr<utils::video_encoder> encoder;
|
std::shared_ptr<utils::video_encoder> encoder;
|
||||||
std::unique_ptr<named_thread<std::function<void()>>> video_provider_thread;
|
std::unique_ptr<named_thread<std::function<void()>>> video_provider_thread;
|
||||||
@ -245,13 +263,13 @@ struct rec_info
|
|||||||
u32 video_bps = 512000;
|
u32 video_bps = 512000;
|
||||||
s32 video_codec_id = 12; // AV_CODEC_ID_MPEG4
|
s32 video_codec_id = 12; // AV_CODEC_ID_MPEG4
|
||||||
s32 max_b_frames = 2;
|
s32 max_b_frames = 2;
|
||||||
const u32 fps = rec_framerate; // Always 30 fps
|
static constexpr u32 fps = rec_framerate; // Always 30 fps
|
||||||
|
|
||||||
// Audio parameters
|
// Audio parameters
|
||||||
u32 sample_rate = 48000;
|
u32 sample_rate = 48000;
|
||||||
u32 audio_bps = 64000;
|
u32 audio_bps = 64000;
|
||||||
s32 audio_codec_id = 86018; // AV_CODEC_ID_AAC
|
s32 audio_codec_id = 86018; // AV_CODEC_ID_AAC
|
||||||
const u32 channels = 2; // Always 2 channels
|
static constexpr u32 channels = rec_channels; // Always 2 channels
|
||||||
|
|
||||||
// Recording duration
|
// Recording duration
|
||||||
atomic_t<u64> recording_time_start = 0;
|
atomic_t<u64> recording_time_start = 0;
|
||||||
@ -588,8 +606,7 @@ void rec_info::start_video_provider()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We only care for new video frames or audio samples that can be properly encoded, so we check the timestamps and pts.
|
// We only care for new video frames or audio samples that can be properly encoded, so we check the timestamps and pts.
|
||||||
const usz timestamp_us = get_system_time() - recording_time_start - pause_time_total;
|
const usz timestamp_ms = (get_system_time() - recording_time_start - pause_time_total) / 1000;
|
||||||
const usz timestamp_ms = timestamp_us / 1000;
|
|
||||||
|
|
||||||
/////////////////
|
/////////////////
|
||||||
// VIDEO //
|
// VIDEO //
|
||||||
@ -632,7 +649,7 @@ void rec_info::start_video_provider()
|
|||||||
// The video frames originate from our render pipeline and are stored in a ringbuffer.
|
// The video frames originate from our render pipeline and are stored in a ringbuffer.
|
||||||
utils::video_sink::encoder_frame frame = ringbuffer_sink->get_frame();
|
utils::video_sink::encoder_frame frame = ringbuffer_sink->get_frame();
|
||||||
|
|
||||||
if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_video_pts && frame.data.size() > 0)
|
if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_video_pts && !frame.data.empty())
|
||||||
{
|
{
|
||||||
ensure(frame.data.size() == frame_size);
|
ensure(frame.data.size() == frame_size);
|
||||||
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
|
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()];
|
||||||
@ -647,34 +664,75 @@ void rec_info::start_video_provider()
|
|||||||
// The video frames originate from our render pipeline and are directly encoded by the encoder video sink itself.
|
// The video frames originate from our render pipeline and are directly encoded by the encoder video sink itself.
|
||||||
//}
|
//}
|
||||||
|
|
||||||
if (use_internal_audio)
|
/////////////////
|
||||||
{
|
// AUDIO //
|
||||||
// TODO: fetch audio
|
/////////////////
|
||||||
}
|
|
||||||
|
|
||||||
if (use_external_audio && audio_input_buffer)
|
const usz timestamp_us = get_system_time() - recording_time_start - pause_time_total;
|
||||||
{
|
|
||||||
// 2-channel interleaved (left-right), 256 samples, float
|
|
||||||
std::array<f32, 2 * CELL_REC_AUDIO_BLOCK_SAMPLES> audio_data{};
|
|
||||||
std::memcpy(audio_data.data(), audio_input_buffer.get_ptr(), audio_data.size() * sizeof(f32));
|
|
||||||
|
|
||||||
// TODO: mix audio with param.audio_input_mix_vol
|
// TODO: mix external and internal audio with param.audio_input_mix_vol
|
||||||
}
|
// TODO: mix channels if necessary
|
||||||
|
if (use_external_audio)
|
||||||
|
{
|
||||||
|
// The audio samples originate from cellRec instead of our render pipeline.
|
||||||
|
// TODO: This needs to be synchronized with the game somehow if possible.
|
||||||
|
if (const s64 pts = encoder->get_audio_pts(timestamp_us); pts > last_audio_pts)
|
||||||
|
{
|
||||||
|
if (audio_input_buffer)
|
||||||
|
{
|
||||||
|
if (use_ring_buffer)
|
||||||
|
{
|
||||||
|
// The audio samples originate from cellRec and are stored in a ringbuffer.
|
||||||
|
audio_block& sample_block = audio_ringbuffer[next_audio_ring_pos()];
|
||||||
|
std::memcpy(sample_block.block.data(), audio_input_buffer.get_ptr(), sample_block.block.size());
|
||||||
|
sample_block.pts = pts;
|
||||||
|
audio_ring_block_count++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// The audio samples originate from cellRec and are pushed to the encoder immediately.
|
||||||
|
encoder->add_audio_samples(audio_input_buffer.get_ptr(), CELL_REC_AUDIO_BLOCK_SAMPLES, channels, timestamp_us);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (use_ring_buffer)
|
last_audio_pts = pts;
|
||||||
{
|
}
|
||||||
// TODO: add audio properly
|
|
||||||
//std::memcpy(&ringbuffer[get_ring_pos(pts) + ring_audio_offset], audio_data.data(), audio_data.size());
|
|
||||||
}
|
}
|
||||||
else
|
else if (use_ring_buffer && ringbuffer_sink && use_internal_audio)
|
||||||
{
|
{
|
||||||
// TODO: add audio to encoder
|
// The audio samples originate from cellAudio and are stored in a ringbuffer.
|
||||||
|
utils::video_sink::encoder_sample sample = ringbuffer_sink->get_sample();
|
||||||
|
|
||||||
|
if (!sample.data.empty() && sample.channels >= 2 && sample.sample_count >= CELL_REC_AUDIO_BLOCK_SAMPLES)
|
||||||
|
{
|
||||||
|
s64 pts = encoder->get_audio_pts(sample.timestamp_us);
|
||||||
|
|
||||||
|
// Each encoder_sample can have more than one block
|
||||||
|
for (usz i = 0; i < sample.sample_count; i += CELL_REC_AUDIO_BLOCK_SAMPLES)
|
||||||
|
{
|
||||||
|
if (pts > last_audio_pts)
|
||||||
|
{
|
||||||
|
audio_block& sample_block = audio_ringbuffer[next_audio_ring_pos()];
|
||||||
|
std::memcpy(sample_block.block.data(), &sample.data[i * channels * sizeof(f32)], sample_block.block.size());
|
||||||
|
sample_block.pts = pts;
|
||||||
|
last_audio_pts = pts;
|
||||||
|
audio_ring_block_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increase pts for each sample block
|
||||||
|
pts++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
//else
|
||||||
|
//{
|
||||||
|
// The audio samples originate from cellAudio and are directly encoded by the encoder video sink itself.
|
||||||
|
//}
|
||||||
|
|
||||||
// Update recording time
|
// Update recording time
|
||||||
recording_time_total = encoder->get_timestamp_ms(encoder->last_video_pts());
|
recording_time_total = encoder->get_timestamp_ms(encoder->last_video_pts());
|
||||||
|
|
||||||
thread_ctrl::wait_for(100);
|
thread_ctrl::wait_for(1);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -705,7 +763,7 @@ void rec_info::stop_video_provider(bool flush)
|
|||||||
// Flush the ringbuffer if necessary.
|
// Flush the ringbuffer if necessary.
|
||||||
// This should only happen if the video sink is not the encoder itself.
|
// This should only happen if the video sink is not the encoder itself.
|
||||||
// In this case the encoder should have been idle until now.
|
// In this case the encoder should have been idle until now.
|
||||||
if (flush && param.ring_sec > 0 && !video_ringbuffer.empty())
|
if (flush && param.ring_sec > 0 && (!video_ringbuffer.empty() || !audio_ringbuffer.empty()))
|
||||||
{
|
{
|
||||||
cellRec.notice("Flushing video ringbuffer.");
|
cellRec.notice("Flushing video ringbuffer.");
|
||||||
|
|
||||||
@ -714,19 +772,51 @@ void rec_info::stop_video_provider(bool flush)
|
|||||||
ensure(encoder);
|
ensure(encoder);
|
||||||
|
|
||||||
const usz frame_count = std::min(video_ringbuffer.size(), video_ring_frame_count);
|
const usz frame_count = std::min(video_ringbuffer.size(), video_ring_frame_count);
|
||||||
const usz start_offset = video_ring_frame_count < video_ringbuffer.size() ? 0 : video_ring_frame_count;
|
const usz video_start_offset = video_ring_frame_count < video_ringbuffer.size() ? 0 : video_ring_frame_count;
|
||||||
const s64 start_pts = video_ringbuffer[start_offset % video_ringbuffer.size()].pts;
|
const s64 video_start_pts = video_ringbuffer.empty() ? 0 : video_ringbuffer[video_start_offset % video_ringbuffer.size()].pts;
|
||||||
|
|
||||||
for (usz i = 0; i < frame_count; i++)
|
const usz block_count = std::min(audio_ringbuffer.size(), audio_ring_block_count);
|
||||||
|
const usz audio_start_offset = audio_ring_block_count < audio_ringbuffer.size() ? 0 : audio_ring_block_count;
|
||||||
|
const s64 audio_start_pts = audio_ringbuffer.empty() ? 0 : audio_ringbuffer[audio_start_offset % audio_ringbuffer.size()].pts;
|
||||||
|
|
||||||
|
cellRec.error("Flushing video ringbuffer: block_count=%d, audio_ringbuffer.size=%d", block_count, audio_ringbuffer.size());
|
||||||
|
cellRec.error("Flushing video ringbuffer: video_start_pts=%d, audio_start_pts=%d", video_start_pts, audio_start_pts);
|
||||||
|
|
||||||
|
// Try to add the frames and samples in proper order
|
||||||
|
for (usz sync_timestamp_us = 0, frame = 0, block = 0; frame < frame_count || block < block_count; frame++)
|
||||||
{
|
{
|
||||||
const usz pos = (start_offset + i) % video_ringbuffer.size();
|
// Add one frame
|
||||||
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos];
|
if (frame < frame_count)
|
||||||
encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts));
|
{
|
||||||
|
const usz pos = (video_start_offset + frame) % video_ringbuffer.size();
|
||||||
|
utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos];
|
||||||
|
const usz timestamp_ms = encoder->get_timestamp_ms(frame_data.pts - video_start_pts);
|
||||||
|
encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, timestamp_ms);
|
||||||
|
|
||||||
// TODO: add audio data to encoder
|
// Increase sync timestamp
|
||||||
|
sync_timestamp_us = timestamp_ms * 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add all the samples that fit into the last frame
|
||||||
|
for (usz i = block; i < block_count; i++)
|
||||||
|
{
|
||||||
|
const usz pos = (audio_start_offset + i) % audio_ringbuffer.size();
|
||||||
|
const audio_block& sample_block = audio_ringbuffer[pos];
|
||||||
|
const usz timestamp_us = encoder->get_audio_timestamp_us(sample_block.pts - audio_start_pts);
|
||||||
|
|
||||||
|
// Stop adding new samples if the sync timestamp is exceeded, unless we already added all the frames.
|
||||||
|
if (timestamp_us > sync_timestamp_us && frame < frame_count)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder->add_audio_samples(sample_block.block.data(), CELL_REC_AUDIO_BLOCK_SAMPLES, channels, timestamp_us);
|
||||||
|
block++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
video_ringbuffer.clear();
|
video_ringbuffer.clear();
|
||||||
|
audio_ringbuffer.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1093,6 +1183,8 @@ error_code cellRecOpen(vm::cptr<char> pDirName, vm::cptr<char> pFileName, vm::cp
|
|||||||
rec.cbUserData = cbUserData;
|
rec.cbUserData = cbUserData;
|
||||||
rec.last_video_pts = -1;
|
rec.last_video_pts = -1;
|
||||||
rec.audio_ringbuffer.clear();
|
rec.audio_ringbuffer.clear();
|
||||||
|
rec.audio_ring_block_count = 0;
|
||||||
|
rec.audio_ring_pos = 0;
|
||||||
rec.video_ringbuffer.clear();
|
rec.video_ringbuffer.clear();
|
||||||
rec.video_ring_frame_count = 0;
|
rec.video_ring_frame_count = 0;
|
||||||
rec.video_ring_pos = 0;
|
rec.video_ring_pos = 0;
|
||||||
@ -1103,16 +1195,13 @@ error_code cellRecOpen(vm::cptr<char> pDirName, vm::cptr<char> pFileName, vm::cp
|
|||||||
|
|
||||||
if (rec.param.ring_sec > 0)
|
if (rec.param.ring_sec > 0)
|
||||||
{
|
{
|
||||||
const u32 audio_size_per_sample = rec.channels * sizeof(float);
|
const usz audio_ring_buffer_size = static_cast<usz>(std::ceil((rec.param.ring_sec * rec.sample_rate) / static_cast<f32>(CELL_REC_AUDIO_BLOCK_SAMPLES)));
|
||||||
const u32 audio_size_per_second = rec.sample_rate * audio_size_per_sample;
|
|
||||||
const usz audio_ring_buffer_size = rec.param.ring_sec * audio_size_per_second;
|
|
||||||
const usz video_ring_buffer_size = rec.param.ring_sec * rec.fps;
|
const usz video_ring_buffer_size = rec.param.ring_sec * rec.fps;
|
||||||
|
|
||||||
cellRec.notice("Preparing ringbuffer for %d seconds. video_ring_buffer_size=%d, audio_ring_buffer_size=%d, pitch=%d, width=%d, height=%d", rec.param.ring_sec, video_ring_buffer_size, audio_ring_buffer_size, rec.input_format.pitch, rec.input_format.width, rec.input_format.height);
|
cellRec.notice("Preparing ringbuffer for %d seconds. video_ring_buffer_size=%d, audio_ring_buffer_size=%d, pitch=%d, width=%d, height=%d", rec.param.ring_sec, video_ring_buffer_size, audio_ring_buffer_size, rec.input_format.pitch, rec.input_format.width, rec.input_format.height);
|
||||||
|
|
||||||
rec.audio_ringbuffer.resize(audio_ring_buffer_size);
|
rec.audio_ringbuffer.resize(audio_ring_buffer_size);
|
||||||
rec.audio_ring_step = audio_size_per_sample;
|
rec.video_ringbuffer.resize(video_ring_buffer_size);
|
||||||
rec.video_ringbuffer.resize(video_ring_buffer_size, {});
|
|
||||||
|
|
||||||
rec.ringbuffer_sink = std::make_shared<rec_video_sink>();
|
rec.ringbuffer_sink = std::make_shared<rec_video_sink>();
|
||||||
rec.ringbuffer_sink->use_internal_audio = rec.param.use_internal_audio();
|
rec.ringbuffer_sink->use_internal_audio = rec.param.use_internal_audio();
|
||||||
|
@ -26,7 +26,7 @@ namespace utils
|
|||||||
m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
|
m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame));
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_audio_samples(u8* buf, u32 sample_count, u16 channels, usz timestamp_us)
|
void add_audio_samples(const u8* buf, u32 sample_count, u16 channels, usz timestamp_us)
|
||||||
{
|
{
|
||||||
// Do not allow new samples while flushing
|
// Do not allow new samples while flushing
|
||||||
if (m_flush || !buf || !sample_count || !channels)
|
if (m_flush || !buf || !sample_count || !channels)
|
||||||
@ -51,12 +51,14 @@ namespace utils
|
|||||||
|
|
||||||
usz get_timestamp_ms(s64 pts) const
|
usz get_timestamp_ms(s64 pts) const
|
||||||
{
|
{
|
||||||
return static_cast<usz>(std::round((pts * 1000) / static_cast<float>(m_framerate)));
|
return static_cast<usz>(std::round((pts * 1000) / static_cast<f32>(m_framerate)));
|
||||||
}
|
}
|
||||||
|
|
||||||
usz get_audio_timestamp_us(s64 pts) const
|
usz get_audio_timestamp_us(s64 pts) const
|
||||||
{
|
{
|
||||||
return static_cast<usz>(std::round((pts * 1000) / static_cast<float>(m_sample_rate)));
|
static constexpr f32 us_per_sec = 1000000.0f;
|
||||||
|
const f32 us_per_block = us_per_sec / (m_sample_rate / static_cast<f32>(m_samples_per_block));
|
||||||
|
return static_cast<usz>(pts * us_per_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_t<bool> has_error{false};
|
atomic_t<bool> has_error{false};
|
||||||
|
Loading…
Reference in New Issue
Block a user