[nvdec, android] proper detection and support for GPU decoder

This commit is contained in:
xbzk 2026-06-05 18:40:50 -03:00
parent f4f6f5831d
commit 2875a4db89
9 changed files with 220 additions and 53 deletions

View file

@ -269,7 +269,7 @@ class SettingsFragmentPresenter(
// TODO(crueter): sub-submenus?
private fun addGraphicsSettings(sl: ArrayList<SettingsItem>) {
sl.apply {
// add(IntSetting.RENDERER_NVDEC_EMULATION.key)
add(IntSetting.RENDERER_NVDEC_EMULATION.key)
add(IntSetting.RENDERER_RESOLUTION.key)
add(IntSetting.RENDERER_VSYNC.key)

View file

@ -26,52 +26,39 @@ void Decoder::Decode() {
}
const auto packet_data = ComposeFrame();
// Send assembled bitstream to decoder.
if (!decode_api.SendPacket(packet_data)) {
return;
}
// Only receive/store visible frames.
if (vp9_hidden_frame) {
return;
}
// Receive output frames from decoder.
auto frame = decode_api.ReceiveFrame();
if (!frame) {
return;
}
if (IsInterlaced()) {
auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets();
auto frame_copy = frame;
if (!frame.get()) {
LOG_ERROR(HW_GPU,
"Nvdec {} failed to decode interlaced frame for top {:#X} bottom 0x{:X}", id,
luma_top, luma_bottom);
}
if (UsingDecodeOrder()) {
host1x.frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
host1x.frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
} else {
host1x.frame_queue.PushPresentOrder(id, luma_top, std::move(frame));
host1x.frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
}
FFmpeg::FrameOffsets offsets{};
offsets.hidden = vp9_hidden_frame;
offsets.interlaced = IsInterlaced();
if (offsets.interlaced) {
std::tie(offsets.luma, offsets.luma_bottom, offsets.chroma, offsets.chroma_bottom) =
GetInterlacedOffsets();
} else {
auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
std::tie(offsets.luma, offsets.chroma) = GetProgressiveOffsets();
}
if (!frame.get()) {
LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma {:#X}", id,
luma_offset);
}
if (!decode_api.SendPacket(packet_data, offsets, GetFrameDimensions())) {
return;
}
auto push = [&](u64 luma, std::shared_ptr<FFmpeg::Frame> frame) {
if (UsingDecodeOrder()) {
host1x.frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
host1x.frame_queue.PushDecodeOrder(id, luma, std::move(frame));
} else {
host1x.frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
host1x.frame_queue.PushPresentOrder(id, luma, std::move(frame));
}
};
while (auto result = decode_api.ReceiveFrame()) {
auto& [frame, o] = *result;
if (o.hidden || !frame) {
continue;
}
if (o.interlaced) {
auto frame_copy = frame;
push(o.luma, std::move(frame));
push(o.luma_bottom, std::move(frame_copy));
} else {
push(o.luma, std::move(frame));
}
}
}

View file

@ -45,6 +45,9 @@ protected:
virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
virtual std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() = 0;
virtual bool IsInterlaced() = 0;
virtual std::optional<FFmpeg::FrameDimensions> GetFrameDimensions() {
return std::nullopt;
}
FFmpeg::DecodeApi decode_api;
Host1x::Host1x& host1x;

View file

@ -50,6 +50,16 @@ bool H264::IsInterlaced() {
current_context.h264_parameter_set.luma_bot_offset.Address() != 0;
}
std::optional<FFmpeg::FrameDimensions> H264::GetFrameDimensions() {
const auto& params = current_context.h264_parameter_set;
const s32 width = static_cast<s32>(params.pic_width_in_mbs) * 16;
const s32 height = static_cast<s32>(params.frame_height_in_mbs) * 16;
if (width <= 0 || height <= 0) {
return std::nullopt;
}
return FFmpeg::FrameDimensions{width, height};
}
std::span<const u8> H264::ComposeFrame() {
host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context, sizeof(H264DecoderContext));
const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();

View file

@ -79,6 +79,7 @@ public:
std::tuple<u64, u64> GetProgressiveOffsets() override;
std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() override;
bool IsInterlaced() override;
std::optional<FFmpeg::FrameDimensions> GetFrameDimensions() override;
std::string_view GetCurrentCodecName() const override {
return "H264";

View file

@ -4,6 +4,10 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <string_view>
#include <vector>
#include "common/assert.h"
#include "common/logging.h"
#include "common/scope_exit.h"
@ -83,6 +87,57 @@ std::string AVError(int errnum) {
return errbuf;
}
#ifdef ANDROID
// Match a 3- or 4-byte annex-B start code at `i`. Returns its length, or 0.
size_t MatchStartCode(std::span<const u8> data, size_t i) {
const size_t n = data.size();
if (i + 3 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1) {
return 4;
}
if (i + 2 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1) {
return 3;
}
return 0;
}
// Pull SPS (NAL type 7) + PPS (NAL type 8) out of an annex-B frame into an
// extradata buffer, each prefixed with a 4-byte start code. Eden synthesizes
// these inline into the very first frame; h264_mediacodec wants them at open.
std::vector<u8> ExtractH264AnnexBExtradata(std::span<const u8> packet) {
std::vector<u8> extradata;
const size_t size = packet.size();
size_t i = 0;
while (i < size) {
const size_t sc = MatchStartCode(packet, i);
if (sc == 0) {
++i;
continue;
}
const size_t nal_start = i + sc;
if (nal_start >= size) {
break;
}
const u8 nal_type = packet[nal_start] & 0x1F;
size_t j = nal_start + 1;
while (j < size && MatchStartCode(packet, j) == 0) {
++j;
}
if (nal_type == 7 || nal_type == 8) {
constexpr u8 start[4] = {0, 0, 0, 1};
extradata.insert(extradata.end(), start, start + sizeof(start));
extradata.insert(extradata.end(), packet.begin() + nal_start, packet.begin() + j);
} else if (nal_type == 1 || nal_type == 5) {
break;
}
i = j;
}
return extradata;
}
#endif
}
Packet::Packet(std::span<const u8> data) {
@ -117,7 +172,26 @@ Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
return AV_CODEC_ID_NONE;
}
}();
m_codec = avcodec_find_decoder(av_codec);
#ifdef ANDROID
// FFmpeg exposes MediaCodec via dedicated decoders rather than as a
// hw_config on the regular ones.
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
const char* mc_name = nullptr;
switch (av_codec) {
case AV_CODEC_ID_H264: mc_name = "h264_mediacodec"; break;
case AV_CODEC_ID_VP8: mc_name = "vp8_mediacodec"; break;
case AV_CODEC_ID_VP9: mc_name = "vp9_mediacodec"; break;
default: break;
}
if (mc_name) {
m_codec = avcodec_find_decoder_by_name(mc_name);
}
}
#endif
if (!m_codec) {
m_codec = avcodec_find_decoder(av_codec);
}
}
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
@ -205,6 +279,9 @@ DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
// Forwarded into MediaCodec as KEY_LOW_LATENCY on Android.
m_codec_context->flags |= AV_CODEC_FLAG_LOW_DELAY;
m_codec_context->flags2 |= AV_CODEC_FLAG2_FAST;
}
DecoderContext::~DecoderContext() {
@ -218,7 +295,19 @@ void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, A
m_codec_context->pix_fmt = hw_pix_fmt;
}
bool DecoderContext::OpenContext(const Decoder& decoder) {
bool DecoderContext::OpenContext(const Decoder& decoder, std::span<const u8> extradata) {
if (!extradata.empty()) {
av_freep(&m_codec_context->extradata);
m_codec_context->extradata = static_cast<u8*>(
av_mallocz(extradata.size() + AV_INPUT_BUFFER_PADDING_SIZE));
if (!m_codec_context->extradata) {
LOG_ERROR(HW_GPU, "Failed to allocate extradata");
return false;
}
std::memcpy(m_codec_context->extradata, extradata.data(), extradata.size());
m_codec_context->extradata_size = static_cast<int>(extradata.size());
}
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
return false;
@ -278,6 +367,12 @@ void DecodeApi::Reset() {
m_hardware_context.reset();
m_decoder_context.reset();
m_decoder.reset();
m_opened = false;
m_needs_h264_extradata = false;
m_next_pts = 0;
while (!m_pending_offsets.empty()) {
m_pending_offsets.pop();
}
}
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
@ -293,23 +388,69 @@ bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
}
// Open the decoder context.
#ifdef ANDROID
// h264_mediacodec needs SPS/PPS in extradata at open. We pull them from
// the first frame's bitstream in SendPacket.
m_needs_h264_extradata = m_decoder->GetCodec() &&
std::string_view(m_decoder->GetCodec()->name) == "h264_mediacodec";
if (m_needs_h264_extradata) {
return true;
}
#endif
if (!m_decoder_context->OpenContext(*m_decoder)) {
this->Reset();
return false;
}
m_opened = true;
return true;
}
bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
bool DecodeApi::SendPacket(std::span<const u8> packet_data, const FrameOffsets& offsets,
std::optional<FrameDimensions> dimensions) {
if (!m_opened) {
std::vector<u8> extradata;
#ifdef ANDROID
if (m_needs_h264_extradata) {
extradata = ExtractH264AnnexBExtradata(packet_data);
if (extradata.empty()) {
return true;
}
if (dimensions) {
auto* ctx = m_decoder_context->GetCodecContext();
ctx->width = dimensions->width;
ctx->height = dimensions->height;
ctx->coded_width = dimensions->width;
ctx->coded_height = dimensions->height;
}
}
#endif
if (!m_decoder_context->OpenContext(*m_decoder, extradata)) {
this->Reset();
return false;
}
m_opened = true;
}
m_pending_offsets.push(offsets);
FFmpeg::Packet packet(packet_data);
packet.GetPacket()->pts = m_next_pts;
packet.GetPacket()->dts = m_next_pts;
++m_next_pts;
return m_decoder_context->SendPacket(packet);
}
std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
// Receive raw frame from decoder.
return m_decoder_context->ReceiveFrame();
std::optional<DecodeApi::DecodedFrame> DecodeApi::ReceiveFrame() {
auto frame = m_decoder_context->ReceiveFrame();
if (!frame) {
return std::nullopt;
}
FrameOffsets offsets{};
if (!m_pending_offsets.empty()) {
offsets = m_pending_offsets.front();
m_pending_offsets.pop();
}
return DecodedFrame{std::move(frame), offsets};
}
}

View file

@ -179,7 +179,7 @@ public:
~DecoderContext();
void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
bool OpenContext(const Decoder& decoder);
bool OpenContext(const Decoder& decoder, std::span<const u8> extradata = {});
bool SendPacket(const Packet& packet);
std::shared_ptr<Frame> ReceiveFrame();
@ -198,6 +198,20 @@ private:
bool m_decode_order{};
};
struct FrameOffsets {
bool interlaced{};
bool hidden{};
u64 luma{};
u64 chroma{};
u64 luma_bottom{};
u64 chroma_bottom{};
};
struct FrameDimensions {
s32 width{};
s32 height{};
};
class DecodeApi {
public:
YUZU_NON_COPYABLE(DecodeApi);
@ -213,13 +227,23 @@ public:
return m_decoder_context->UsingDecodeOrder();
}
bool SendPacket(std::span<const u8> packet_data);
std::shared_ptr<Frame> ReceiveFrame();
bool SendPacket(std::span<const u8> packet_data, const FrameOffsets& offsets,
std::optional<FrameDimensions> dimensions = std::nullopt);
struct DecodedFrame {
std::shared_ptr<Frame> frame;
FrameOffsets offsets;
};
std::optional<DecodedFrame> ReceiveFrame();
private:
std::optional<FFmpeg::Decoder> m_decoder;
std::optional<FFmpeg::DecoderContext> m_decoder_context;
std::optional<FFmpeg::HardwareContext> m_hardware_context;
bool m_opened{};
bool m_needs_h264_extradata{};
s64 m_next_pts{};
std::queue<FrameOffsets> m_pending_offsets;
};
} // namespace FFmpeg

View file

@ -31,6 +31,7 @@ Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt)
Nvdec::~Nvdec() {
LOG_INFO(HW_GPU, "Destroying nvdec {}", id);
host1x.frame_queue.Close(id);
}
void Nvdec::ProcessMethod(u32 method, u32 argument) {

View file

@ -137,7 +137,7 @@ void Vic::Execute() noexcept {
break;
}
Blend(config, slot_config, config.output_surface_config.out_pixel_format);
} else {
} else if (nvdec_id != -1) {
LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset {:#X}", id, luma_offset);
}
}