diff --git a/SCsub b/SCsub index c5e55eb..a9ca34d 100644 --- a/SCsub +++ b/SCsub @@ -41,6 +41,9 @@ else: ffmpeg_install_action = ffmpeg_download.ffmpeg_install(env_ffmpeg, "#bin", "thirdparty/ffmpeg") env_ffmpeg.Depends(sources, ffmpeg_install_action) +env_ffmpeg.GLSL_HEADER("yuv_to_rgb.glsl") +env_ffmpeg.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"]) + # env_ffmpeg.Append(CPPDEFINES=["FFMPEG_MT_GPU_UPLOAD"]) if ARGUMENTS.get("ffmpeg_shared", "no") == "yes": # Shared lib compilation diff --git a/ffmpeg_video_stream.cpp b/ffmpeg_video_stream.cpp index 89e1d42..6e8759b 100644 --- a/ffmpeg_video_stream.cpp +++ b/ffmpeg_video_stream.cpp @@ -29,13 +29,73 @@ /**************************************************************************/ #include "ffmpeg_video_stream.h" +#include #ifdef GDEXTENSION #include "gdextension_build/gdex_print.h" +#include +#include +#include +#include +#include +#include +#include +#include +typedef RenderingDevice RD; +typedef RenderingServer RS; +typedef RDTextureView RDTextureViewC; +struct RDTextureFormatC { + RenderingDevice::DataFormat format; + int width; + int height; + int usage_bits; + int depth; + int array_layers; + int mipmaps; + + Ref get_texture_format() { + Ref tf; + tf.instantiate(); + tf->set_height(height); + tf->set_width(width); + tf->set_usage_bits(usage_bits); + tf->set_format(format); + tf->set_depth(depth); + tf->set_array_layers(array_layers); + tf->set_mipmaps(mipmaps); + return tf; + } +}; +RDTextureFormatC tfc_from_rdtf(Ref p_texture_format) { + RDTextureFormatC tfc; + tfc.width = p_texture_format->get_width(); + tfc.height = p_texture_format->get_height(); + tfc.usage_bits = p_texture_format->get_usage_bits(); + tfc.format = p_texture_format->get_format(); + tfc.depth = p_texture_format->get_depth(); + tfc.array_layers = p_texture_format->get_array_layers(); + tfc.mipmaps = p_texture_format->get_mipmaps(); + return tfc; +} + +typedef int64_t ComputeListID; +#define TEXTURE_FORMAT_COMPAT(tf) tfc_from_rdtf(tf); +#else +#include "servers/rendering/rendering_device_binds.h" +typedef RD::TextureFormat RDTextureFormatC; +typedef RD::TextureView RDTextureViewC; +#define TEXTURE_FORMAT_COMPAT(tf) tf; +typedef RD::ComputeListID ComputeListID; #endif #include "tracy_import.h" +#include "yuv_to_rgb.glsl.gen.h" +#ifdef GDEXTENSION +#define FREE_RD_RID(rid) RS::get_singleton()->get_rendering_device()->free_rid(rid); +#else +#define FREE_RD_RID(rid) RS::get_singleton()->get_rendering_device()->free(rid); +#endif void FFmpegVideoStreamPlayback::seek_into_sync() { decoder->seek(playback_position); Vector> decoded_frames; @@ -127,7 +187,22 @@ void FFmpegVideoStreamPlayback::update_internal(double p_delta) { } #ifndef FFMPEG_MT_GPU_UPLOAD if (got_new_frame) { - if (texture.is_valid()) { + // YUV conversion + if (last_frame->get_format() == FFmpegFrameFormat::YUV420P) { + Ref y_plane = last_frame->get_yuv_image_plane(0); + Ref u_plane = last_frame->get_yuv_image_plane(1); + Ref v_plane = last_frame->get_yuv_image_plane(2); + + ERR_FAIL_COND(!y_plane.is_valid()); + ERR_FAIL_COND(!u_plane.is_valid()); + ERR_FAIL_COND(!v_plane.is_valid()); + + yuv_converter->set_plane_image(0, y_plane); + yuv_converter->set_plane_image(1, u_plane); + yuv_converter->set_plane_image(2, v_plane); + yuv_converter->convert(); + // RGBA texture handling + } else if (texture.is_valid()) { if (texture->get_size() != last_frame_image->get_size() || texture->get_format() != last_frame_image->get_format()) { ZoneNamedN(__img_upate_slow, "Image update slow", true); texture->set_image(last_frame_image); // should never happen, but life has many doors ed-boy... @@ -195,11 +270,17 @@ void FFmpegVideoStreamPlayback::load(Ref p_file_access) { decoder->start_decoding(); Vector2i size = decoder->get_size(); if (decoder->get_decoder_state() != VideoDecoder::FAULTED) { + if (decoder->get_frame_format() == FFmpegFrameFormat::YUV420P) { + yuv_converter.instantiate(); + yuv_converter->set_frame_size(size); + yuv_texture = yuv_converter->get_output_texture(); + } else { #ifdef GDEXTENSION - texture = ImageTexture::create_from_image(Image::create(size.x, size.y, false, Image::FORMAT_RGBA8)); + texture = ImageTexture::create_from_image(Image::create(size.x, size.y, false, Image::FORMAT_RGBA8)); #else - texture = ImageTexture::create_from_image(Image::create_empty(size.x, size.y, false, Image::FORMAT_RGBA8)); + texture = ImageTexture::create_from_image(Image::create_empty(size.x, size.y, false, Image::FORMAT_RGBA8)); #endif + } } } @@ -250,6 +331,9 @@ Ref FFmpegVideoStreamPlayback::get_texture_internal() const { #ifdef FFMPEG_MT_GPU_UPLOAD return last_frame_texture; #else + if (yuv_converter.is_valid()) { + return yuv_converter->get_output_texture(); + } return texture; #endif } @@ -277,3 +361,237 @@ void FFmpegVideoStreamPlayback::clear() { frames_processed = 0; playing = false; } + +YUVGPUConverter::~YUVGPUConverter() { + RenderingDevice *rd = RS::get_singleton()->get_rendering_device(); + + for (size_t i = 0; i < std::size(yuv_planes_uniform_sets); i++) { + if (yuv_planes_uniform_sets[i].is_valid()) { + FREE_RD_RID(yuv_planes_uniform_sets[i]); + } + if (yuv_plane_textures[i].is_valid()) { + FREE_RD_RID(yuv_plane_textures[i]); + } + } + + if (out_texture.is_valid() && out_texture->get_texture_rd_rid().is_valid()) { + FREE_RD_RID(out_texture->get_texture_rd_rid()); + } + + if (pipeline.is_valid()) { + FREE_RD_RID(pipeline); + } + + if (shader.is_valid()) { + FREE_RD_RID(shader); + } +} + +void YUVGPUConverter::_ensure_pipeline() { + if (pipeline.is_valid()) { + return; + } + + RD *rd = RS::get_singleton()->get_rendering_device(); + +#ifdef GDEXTENSION + + Ref shader_source; + shader_source.instantiate(); + // Ugly hack to skip the #[compute] in the header, because parse_versions_from_text is not available through GDNative + shader_source->set_stage_source(RenderingDevice::ShaderStage::SHADER_STAGE_COMPUTE, yuv_to_rgb_shader_glsl + 10); + Ref shader_spirv = rd->shader_compile_spirv_from_source(shader_source); + print_line("MAKE SHADER"); + +#else + + Ref shader_file; + shader_file.instantiate(); + Error err = shader_file->parse_versions_from_text(yuv_to_rgb_shader_glsl); + if (err != OK) { + print_line("Something catastrophic happened, call eirexe"); + } + Vector shader_spirv = shader_file->get_spirv_stages(); + +#endif + shader = rd->shader_create_from_spirv(shader_spirv); + pipeline = rd->compute_pipeline_create(shader); +} + +Error YUVGPUConverter::_ensure_plane_textures() { + RD *rd = RS::get_singleton()->get_rendering_device(); + for (size_t i = 0; i < std::size(yuv_plane_textures); i++) { + if (yuv_plane_textures[i].is_valid()) { + RDTextureFormatC format = TEXTURE_FORMAT_COMPAT(rd->texture_get_format(yuv_plane_textures[i])); + + int desired_frame_width = i == 0 ? frame_size.width : Math::ceil(frame_size.width / 2.0f); + int desired_frame_height = i == 0 ? frame_size.height : Math::ceil(frame_size.height / 2.0f); + + if (format.width == desired_frame_width && format.height == desired_frame_height) { + continue; + } + continue; + } + + // Texture didn't exist or was invalid, re-create it + + // free existing texture if needed + if (yuv_plane_textures[i].is_valid()) { + FREE_RD_RID(yuv_plane_textures[i]); + } + + RDTextureFormatC new_format; + new_format.format = RenderingDevice::DATA_FORMAT_R8_UNORM; + // chroma planes are half the size of the luma plane + new_format.width = i == 0 ? frame_size.width : Math::ceil(frame_size.width / 2.0f); + new_format.height = i == 0 ? frame_size.height : Math::ceil(frame_size.height / 2.0f); + new_format.depth = 1; + new_format.array_layers = 1; + new_format.mipmaps = 1; + new_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + +#ifdef GDEXTENSION + Ref new_format_c = new_format.get_texture_format(); + Ref texture_view; + texture_view.instantiate(); +#else + RD::TextureFormat new_format_c = new_format; + RDTextureViewC texture_view; +#endif + yuv_plane_textures[i] = rd->texture_create(new_format_c, texture_view); + + if (yuv_planes_uniform_sets[i].is_valid()) { + FREE_RD_RID(yuv_planes_uniform_sets[i]); + } + + yuv_planes_uniform_sets[i] = _create_uniform_set(yuv_plane_textures[i]); + } + + return OK; +} + +Error YUVGPUConverter::_ensure_output_texture() { + _ensure_pipeline(); + RD *rd = RS::get_singleton()->get_rendering_device(); + if (!out_texture.is_valid()) { + out_texture.instantiate(); + } + + if (out_texture->get_texture_rd_rid().is_valid()) { + RDTextureFormatC format = TEXTURE_FORMAT_COMPAT(rd->texture_get_format(out_texture->get_texture_rd_rid())); + if (format.width == frame_size.width && format.height == frame_size.height) { + return OK; + } + } + + if (out_texture->get_texture_rd_rid().is_valid()) { + FREE_RD_RID(out_texture->get_texture_rd_rid()); + } + + print_line("CRECREATE OUTPUT TEXTURE"); + + RDTextureFormatC out_texture_format; + out_texture_format.format = RenderingDevice::DATA_FORMAT_R8G8B8A8_UNORM; + out_texture_format.width = frame_size.width; + out_texture_format.height = frame_size.height; + out_texture_format.depth = 1; + out_texture_format.array_layers = 1; + out_texture_format.mipmaps = 1; + // RD::TEXTURE_USAGE_CAN_UPDATE_BIT not needed since we won't update it from the CPU + out_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + +#ifdef GDEXTENSION + Ref texture_view; + texture_view.instantiate(); + Ref out_texture_format_c = out_texture_format.get_texture_format(); +#else + RD::TextureFormat out_texture_format_c = out_texture_format; + RD::TextureView texture_view; +#endif + out_texture->set_texture_rd_rid(rd->texture_create(out_texture_format_c, texture_view)); + + if (out_uniform_set.is_valid()) { + FREE_RD_RID(out_uniform_set); + } + out_uniform_set = _create_uniform_set(out_texture->get_texture_rd_rid()); + return OK; +} + +RID YUVGPUConverter::_create_uniform_set(const RID &p_texture_rd_rid) { +#ifdef GDEXTENSION + Ref uniform; + uniform.instantiate(); + uniform->set_binding(0); + uniform->set_uniform_type(RD::UNIFORM_TYPE_IMAGE); + uniform->add_id(p_texture_rd_rid); + TypedArray uniforms; + uniforms.push_back(uniform); +#else + RD::Uniform uniform; + uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE; + uniform.binding = 0; + uniform.append_id(p_texture_rd_rid); + Vector uniforms; + uniforms.push_back(uniform); +#endif + return RS::get_singleton()->get_rendering_device()->uniform_set_create(uniforms, shader, 0); +} + +void YUVGPUConverter::_upload_plane_images() { + for (size_t i = 0; i < std::size(yuv_plane_images); i++) { + ERR_CONTINUE_MSG(!yuv_plane_images[i].is_valid(), vformat("YUV plane %d was missing, cannot upload texture data.", i)); + RS::get_singleton()->get_rendering_device()->texture_update(yuv_plane_textures[i], 0, yuv_plane_images[i]->get_data()); + } +} + +void YUVGPUConverter::set_plane_image(int p_plane_idx, Ref p_image) { + ERR_FAIL_COND(!p_image.is_valid()); + ERR_FAIL_INDEX((size_t)p_plane_idx, std::size(yuv_plane_images)); + // Sanity checks + int desired_frame_width = p_plane_idx == 0 ? frame_size.width : Math::ceil(frame_size.width / 2.0f); + int desired_frame_height = p_plane_idx == 0 ? frame_size.height : Math::ceil(frame_size.height / 2.0f); + ERR_FAIL_COND_MSG(p_image->get_width() != desired_frame_width, vformat("Wrong YUV plane width for plane %d, expected %d got %d", p_plane_idx, desired_frame_width, p_image->get_width())); + ERR_FAIL_COND_MSG(p_image->get_height() != desired_frame_height, vformat("Wrong YUV plane height for plane %, expected %d got %d", p_plane_idx, desired_frame_height, p_image->get_height())); + ERR_FAIL_COND_MSG(p_image->get_format() != Image::FORMAT_R8, "Wrong image format, expected R8"); + yuv_plane_images[p_plane_idx] = p_image; +} + +Vector2i YUVGPUConverter::get_frame_size() const { return frame_size; } + +void YUVGPUConverter::set_frame_size(const Vector2i &p_frame_size) { + ERR_FAIL_COND_MSG(p_frame_size.x == 0, "Frame size cannot be zero!"); + ERR_FAIL_COND_MSG(p_frame_size.y == 0, "Frame size cannot be zero!"); + frame_size = p_frame_size; + + yuv_plane_images[0].unref(); + yuv_plane_images[1].unref(); + yuv_plane_images[2].unref(); +} + +void YUVGPUConverter::convert() { + // First we must ensure everything we need exists + _ensure_pipeline(); + _ensure_plane_textures(); + _ensure_output_texture(); + _upload_plane_images(); + + RD *rd = RS::get_singleton()->get_rendering_device(); + + ComputeListID compute_list = rd->compute_list_begin(); + rd->compute_list_bind_compute_pipeline(compute_list, pipeline); + rd->compute_list_bind_uniform_set(compute_list, yuv_planes_uniform_sets[0], 0); + rd->compute_list_bind_uniform_set(compute_list, yuv_planes_uniform_sets[1], 1); + rd->compute_list_bind_uniform_set(compute_list, yuv_planes_uniform_sets[2], 2); + rd->compute_list_bind_uniform_set(compute_list, out_uniform_set, 3); + rd->compute_list_dispatch(compute_list, Math::ceil(frame_size.x / 8.0f), Math::ceil(frame_size.y / 8.0f), 1); + rd->compute_list_end(); +} + +Ref YUVGPUConverter::get_output_texture() const { + const_cast(this)->_ensure_output_texture(); + return out_texture; +} + +YUVGPUConverter::YUVGPUConverter() { + out_texture.instantiate(); +} diff --git a/ffmpeg_video_stream.h b/ffmpeg_video_stream.h index bdcd3c6..7a7fcd5 100644 --- a/ffmpeg_video_stream.h +++ b/ffmpeg_video_stream.h @@ -34,6 +34,7 @@ #ifdef GDEXTENSION // Headers for building as GDExtension plug-in. +#include #include #include #include @@ -45,18 +46,47 @@ using namespace godot; #else #include "core/object/ref_counted.h" +#include "scene/resources/atlas_texture.h" +#include "scene/resources/texture_rd.h" #include "scene/resources/video_stream.h" #endif #include "video_decoder.h" +class YUVGPUConverter : public RefCounted { + RID shader; + Ref yuv_plane_images[3]; + RID yuv_plane_textures[3]; + RID yuv_planes_uniform_sets[3]; + RID pipeline; + Ref out_texture; + RID out_uniform_set; + Vector2i frame_size; + +private: + void _ensure_pipeline(); + Error _ensure_plane_textures(); + Error _ensure_output_texture(); + RID _create_uniform_set(const RID &p_texture_rd_rid); + void _upload_plane_images(); + +public: + void set_plane_image(int p_plane_idx, Ref p_image); + Vector2i get_frame_size() const; + void set_frame_size(const Vector2i &p_frame_size); + void convert(); + Ref get_output_texture() const; + YUVGPUConverter(); + ~YUVGPUConverter(); +}; + // We have to use this function redirection system for GDExtension because the naming conventions // for the functions we are supposed to override are different there - #include "gdextension_build/func_redirect.h" class FFmpegVideoStreamPlayback : public VideoStreamPlayback { GDCLASS(FFmpegVideoStreamPlayback, VideoStreamPlayback); + const int LENIENCE_BEFORE_SEEK = 2500; double playback_position = 0.0f; @@ -69,6 +99,7 @@ class FFmpegVideoStreamPlayback : public VideoStreamPlayback { #endif Ref last_frame_image; Ref texture; + Ref yuv_texture; bool looping = false; bool buffering = false; int frames_processed = 0; @@ -79,6 +110,8 @@ class FFmpegVideoStreamPlayback : public VideoStreamPlayback { bool paused = false; bool playing = false; + Ref yuv_converter; + private: bool is_paused_internal() const; void update_internal(double p_delta); diff --git a/gdextension_build/SConstruct b/gdextension_build/SConstruct index 00f117c..351b115 100644 --- a/gdextension_build/SConstruct +++ b/gdextension_build/SConstruct @@ -5,6 +5,7 @@ import sys import methods import time import ffmpeg_download +import glsl_builders # For the reference: # - CCFLAGS are compilation flags shared between C and C++ @@ -20,6 +21,14 @@ env = SConscript("./godot-cpp/SConstruct") env.__class__.disable_warnings = methods.disable_warnings env.__class__.Run = methods.Run +GLSL_BUILDERS = { + "GLSL_HEADER": env.Builder( + action=env.Run(glsl_builders.build_raw_headers, 'Building GLSL header: "$TARGET"'), + suffix="glsl.gen.h", + src_suffix=".glsl", + ), +} +env.Append(BUILDERS=GLSL_BUILDERS) # Allow generation of compilation DB (`compile_commands.json`) for intellisense / code hinting # Generating the compilation DB requires SCons 4.0.0 or later. @@ -99,6 +108,9 @@ license_install_action = env.InstallAs(f"{addon_base_dir}LICENSE-ffmpeg.txt", FF env.Depends(sources, ffmpeg_install_action) env.Depends(sources, license_install_action) +env.GLSL_HEADER("../yuv_to_rgb.glsl") +env.Depends(Glob("../*.glsl.gen.h"), ["#glsl_builders.py"]) + Default(library) diff --git a/gdextension_build/glsl_builders.py b/gdextension_build/glsl_builders.py new file mode 100644 index 0000000..8256fd4 --- /dev/null +++ b/gdextension_build/glsl_builders.py @@ -0,0 +1,78 @@ +from typing import Optional, Iterable + +import os.path + + +def generate_inline_code(input_lines: Iterable[str], insert_newline: bool = True): + """Take header data and generate inline code + + :param: input_lines: values for shared inline code + :return: str - generated inline value + """ + output = [] + for line in input_lines: + if line: + output.append(",".join(str(ord(c)) for c in line)) + if insert_newline: + output.append("%s" % ord("\n")) + output.append("0") + return ",".join(output) + + +class RAWHeaderStruct: + def __init__(self): + self.code = "" + + +def include_file_in_raw_header(filename: str, header_data: RAWHeaderStruct, depth: int) -> None: + fs = open(filename, "r") + line = fs.readline() + + while line: + while line.find("#include ") != -1: + includeline = line.replace("#include ", "").strip()[1:-1] + + included_file = os.path.relpath(os.path.dirname(filename) + "/" + includeline) + include_file_in_raw_header(included_file, header_data, depth + 1) + + line = fs.readline() + + header_data.code += line + line = fs.readline() + + fs.close() + + +def build_raw_header( + filename: str, optional_output_filename: Optional[str] = None, header_data: Optional[RAWHeaderStruct] = None +): + header_data = header_data or RAWHeaderStruct() + include_file_in_raw_header(filename, header_data, 0) + + if optional_output_filename is None: + out_file = filename + ".gen.h" + else: + out_file = optional_output_filename + + out_file_base = out_file.replace(".glsl.gen.h", "_shader_glsl") + out_file_base = out_file_base[out_file_base.rfind("/") + 1 :] + out_file_base = out_file_base[out_file_base.rfind("\\") + 1 :] + out_file_ifdef = out_file_base.replace(".", "_").upper() + + shader_template = f"""/* WARNING, THIS FILE WAS GENERATED, DO NOT EDIT */ +#ifndef {out_file_ifdef}_RAW_H +#define {out_file_ifdef}_RAW_H + +static const char {out_file_base}[] = {{ + {generate_inline_code(header_data.code, insert_newline=False)} +}}; +#endif +""" + + with open(out_file, "w") as f: + f.write(shader_template) + + +def build_raw_headers(target, source, env): + for x in source: + build_raw_header(filename=str(x)) diff --git a/gdextension_build/godot-cpp b/gdextension_build/godot-cpp index 3162be2..36847f6 160000 --- a/gdextension_build/godot-cpp +++ b/gdextension_build/godot-cpp @@ -1 +1 @@ -Subproject commit 3162be28e594bf5b17889117670fc6f2d75f2f0c +Subproject commit 36847f6af0be548bae96429fa84d59f407b51582 diff --git a/video_decoder.cpp b/video_decoder.cpp index 356811e..2815ea6 100644 --- a/video_decoder.cpp +++ b/video_decoder.cpp @@ -32,9 +32,11 @@ #include "ffmpeg_frame.h" #include "tracy_import.h" +#include #ifdef GDEXTENSION #include "gdextension_build/gdex_print.h" +#include #endif extern "C" { @@ -151,6 +153,12 @@ void VideoDecoder::recreate_codec_context() { } AVCodecParameters codec_params = *video_stream->codecpar; + // YUV conversion needs rendering device + if (codec_params.format == AVPixelFormat::AV_PIX_FMT_YUV420P && RenderingServer::get_singleton()->get_rendering_device()) { + frame_format = FFmpegFrameFormat::YUV420P; + } else { + frame_format = FFmpegFrameFormat::RGBA8; + } BitField target_hw_decoders = hw_decoding_allowed ? target_hw_video_decoders : HardwareVideoDecoder::NONE; for (const AvailableDecoderInfo &info : get_available_decoders(format_context->iformat, codec_params.codec_id, target_hw_decoders)) { @@ -435,6 +443,17 @@ void VideoDecoder::_read_decoded_frames(AVFrame *p_received_frame) { last_decoded_frame_time.set(frame_time); + if (frame_format == FFmpegFrameFormat::YUV420P) { + // Special path for YUV images + Ref yuv_frame = _unwrap_yuv_frame(frame_time, frame); + decoded_frames_mutex.lock(); + if (!skip_current_outputs.is_set()) { + decoded_frames.push_back(yuv_frame); + } + decoded_frames_mutex.unlock(); + continue; + } + // Note: this is the pixel format that the video texture expects internally frame = _ensure_frame_pixel_format(frame, AVPixelFormat::AV_PIX_FMT_RGBA); if (!frame.is_valid()) { @@ -552,6 +571,8 @@ void VideoDecoder::_scaler_frame_return(Ref p_decoder, Ref VideoDecoder::_ensure_frame_pixel_format(Ref p_frame, AVPixelFormat p_target_pixel_format) { ZoneScopedN("Video decoder rescale"); + + print_line(p_frame->get_frame()->format); if (p_frame->get_frame()->format == p_target_pixel_format) { return p_frame; } @@ -611,6 +632,39 @@ Ref VideoDecoder::_ensure_frame_pixel_format(Ref p_fra return scaler_frame; } +Ref VideoDecoder::_unwrap_yuv_frame(double p_frame_time, Ref p_frame) { + PackedByteArray temp_frame_storage; + Ref out_frame = memnew(DecodedFrame(p_frame_time, Ref())); + for (size_t plane_i = 0; plane_i < 3; plane_i++) { + ZoneNamedN(yuv_image_unwrap_copy, "YUV Image unwrap copy", true); + + int width = p_frame->get_frame()->width; + int height = p_frame->get_frame()->height; + + if (plane_i > 0) { + width = Math::ceil(width / 2.0f); + height = Math::ceil(height / 2.0f); + } + + int frame_size = p_frame->get_frame()->buf[plane_i]->size; + temp_frame_storage.resize(frame_size); + uint8_t *unwrapped_frame_ptrw = temp_frame_storage.ptrw(); + { + ZoneNamedN(yuv_image_unwrap_memcopy, "YUV memcpy", true); + for (int y = 0; y < height; y++) { + memcpy(unwrapped_frame_ptrw, p_frame->get_frame()->buf[plane_i]->data + y * p_frame->get_frame()->linesize[plane_i], width); + unwrapped_frame_ptrw += width; + } + } + temp_frame_storage.resize(width * height); + out_frame->set_yuv_image_plane(plane_i, Image::create_from_data(width, height, false, Image::FORMAT_R8, temp_frame_storage)); + } + + out_frame->set_format(FFmpegFrameFormat::YUV420P); + + return out_frame; +} + AVFrame *VideoDecoder::_ensure_frame_audio_format(AVFrame *p_frame, AVSampleFormat p_target_audio_format) { ZoneScopedN("Audio decoder rescale"); if (p_frame->format == p_target_audio_format) { @@ -877,6 +931,7 @@ DecodedFrame::DecodedFrame(double p_time, Ref p_texture) { DecodedFrame::DecodedFrame(double p_time, Ref p_image) { time = p_time; image = p_image; + format = FFmpegFrameFormat::RGBA8; } Ref DecodedFrame::get_texture() const { return texture; } @@ -887,6 +942,16 @@ double DecodedFrame::get_time() const { return time; } void DecodedFrame::set_time(double p_time) { time = p_time; } +void DecodedFrame::set_yuv_image_plane(int p_plane_idx, Ref p_image) { + ERR_FAIL_INDEX((size_t)p_plane_idx, std::size(yuv_images)); + yuv_images[p_plane_idx] = p_image; +} + +Ref DecodedFrame::get_yuv_image_plane(int p_plane_idx) const { + ERR_FAIL_INDEX_V((size_t)p_plane_idx, std::size(yuv_images), Ref()); + return yuv_images[p_plane_idx]; +} + double DecodedAudioFrame::get_time() const { return time; } diff --git a/video_decoder.h b/video_decoder.h index f300800..edf86d0 100644 --- a/video_decoder.h +++ b/video_decoder.h @@ -65,10 +65,17 @@ extern "C" { #include +enum FFmpegFrameFormat { + RGBA8, + YUV420P +}; + class DecodedFrame : public RefCounted { double time; Ref texture; Ref image; + Ref yuv_images[3]; + FFmpegFrameFormat format; public: Ref get_texture() const; @@ -78,8 +85,14 @@ class DecodedFrame : public RefCounted { double get_time() const; void set_time(double p_time); + void set_yuv_image_plane(int p_plane_idx, Ref p_image); + Ref get_yuv_image_plane(int p_plane_idx) const; + DecodedFrame(double p_time, Ref p_texture); DecodedFrame(double p_time, Ref p_image); + + FFmpegFrameFormat get_format() const { return format; } + void set_format(const FFmpegFrameFormat &p_format) { format = p_format; } }; class DecodedAudioFrame : public RefCounted { @@ -114,6 +127,7 @@ class VideoDecoder : public RefCounted { }; private: + FFmpegFrameFormat frame_format; Vector> decoded_audio_frames; Mutex audio_buffer_mutex; @@ -170,6 +184,7 @@ class VideoDecoder : public RefCounted { static void _scaler_frame_return(Ref p_decoder, Ref p_hw_frame); Ref _ensure_frame_pixel_format(Ref p_frame, AVPixelFormat p_target_pixel_format); + Ref _unwrap_yuv_frame(double p_frame_time, Ref p_frame); AVFrame *_ensure_frame_audio_format(AVFrame *p_frame, AVSampleFormat p_target_audio_format); public: @@ -191,6 +206,7 @@ class VideoDecoder : public RefCounted { Vector2i get_size() const; int get_audio_mix_rate() const; int get_audio_channel_count() const; + FFmpegFrameFormat get_frame_format() const { return frame_format; } VideoDecoder(Ref p_file); ~VideoDecoder(); diff --git a/yuv_to_rgb.glsl b/yuv_to_rgb.glsl new file mode 100644 index 0000000..bf2a3ef --- /dev/null +++ b/yuv_to_rgb.glsl @@ -0,0 +1,29 @@ +#[compute] +#version 450 + +// Invocations in the (x, y, z) dimension +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +// Our textures +layout(r8, set = 0, binding = 0) uniform restrict readonly image2D tex_y; +layout(r8, set = 1, binding = 0) uniform restrict readonly image2D tex_u; +layout(r8, set = 2, binding = 0) uniform restrict readonly image2D tex_v; +layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D output_image; + +// The code we want to execute in each invocation +void main() { + ivec2 uv = ivec2(gl_GlobalInvocationID.xy); + ivec2 uv_chroma = ivec2(gl_GlobalInvocationID.xy) / 2; + + float y = imageLoad(tex_y, uv).r; + vec2 chroma; + chroma.r = imageLoad(tex_u, uv_chroma).r; + chroma.g = imageLoad(tex_v, uv_chroma).r; + float u = chroma.r - 0.5; + float v = chroma.g - 0.5; + vec3 rgb; + rgb.r = y + (1.403 * v); + rgb.g = y - (0.344 * u) - (0.714 * v); + rgb.b = y + (1.770 * u); + imageStore(output_image, uv, vec4(rgb, 1.0)); +} \ No newline at end of file