// SPDX-FileCopyrightText: 2026 Alex Bates // // SPDX-License-Identifier: AGPL-3.0-or-later /// C bridge implementation for parallel-rdp's Granite Vulkan context and RDP /// command processor. #include "bridge.hpp" #include "context.hpp" #include "device.hpp" #include "logging.hpp" #include "rdp_device.hpp" #include #include #include #include #include using namespace Vulkan; // -- Logging -- static void (*s_log_callback)(uint32_t level, const char *msg) = nullptr; /// Routes Granite log messages to a Rust callback. class RdpLoggingInterface final : public Util::LoggingInterface { public: bool log(const char *tag, const char *fmt, va_list va) override { if (!s_log_callback) return false; uint32_t level; if (strncmp(tag, "[ERROR]", 7) == 0) level = RDP_LOG_LEVEL_ERROR; else if (strncmp(tag, "[WARN]", 6) == 0) level = RDP_LOG_LEVEL_WARN; else level = RDP_LOG_LEVEL_INFO; char buf[1024]; vsnprintf(buf, sizeof(buf), fmt, va); // Strip trailing newline (tracing adds its own). size_t len = strlen(buf); if (len > 0 && buf[len - 1] == '\n') buf[len - 1] = '\0'; s_log_callback(level, buf); return true; } }; static RdpLoggingInterface s_logging_interface; void rdp_set_log_callback(void (*callback)(uint32_t level, const char *msg)) { s_log_callback = callback; Util::set_thread_logging_interface(callback ? &s_logging_interface : nullptr); } // -- Internal types -- struct RdpContext { std::unique_ptr context; std::unique_ptr device; }; struct RdpRenderer { RdpContext *ctx; std::unique_ptr processor; uint32_t rdram_size; }; // -- Vulkan context -- void *rdp_context_create( const char *const *instance_ext, uint32_t num_instance_ext, const char *const *device_ext, uint32_t num_device_ext) { if (!Context::init_loader(nullptr)) return nullptr; auto context = std::make_unique(); if (!context->init_instance_and_device( instance_ext, num_instance_ext, device_ext, num_device_ext, 0)) return nullptr; auto device = std::make_unique(); device->set_context(*context); auto *ctx = new RdpContext(); ctx->context = std::move(context); ctx->device = std::move(device); return ctx; } void rdp_context_destroy(void *ctx) { delete static_cast(ctx); } void *rdp_context_get_instance(void *ctx) { return static_cast(ctx)->context->get_instance(); } void *rdp_context_get_physical_device(void *ctx) { return static_cast(ctx)->context->get_gpu(); } void *rdp_context_get_device(void *ctx) { return static_cast(ctx)->context->get_device(); } void *rdp_context_get_queue(void *ctx, uint32_t *family_index) { auto &info = static_cast(ctx)->context->get_queue_info(); // Use the graphics queue (QUEUE_INDEX_GRAPHICS = 0) if (family_index) *family_index = info.family_indices[0]; return info.queues[0]; } // -- Renderer -- void *rdp_renderer_create(void *ctx, uint32_t rdram_size, uint32_t flags) { auto *context = static_cast(ctx); auto renderer = std::make_unique(); renderer->ctx = context; renderer->rdram_size = rdram_size; // Pass nullptr for rdram_ptr so the CommandProcessor allocates its own // host-coherent GPU buffer. This avoids the non-coherent path where // host-to-GPU uploads during scanout can overwrite GPU-rendered data. renderer->processor = std::make_unique( *context->device, nullptr, 0, // rdram_offset rdram_size, rdram_size / 2, // hidden_rdram_size static_cast(flags)); if (!renderer->processor->device_is_supported()) { return nullptr; } auto *ptr = renderer.release(); return ptr; } void rdp_renderer_destroy(void *renderer) { auto *r = static_cast(renderer); // Ensure all GPU work completes before destroying the CommandProcessor, // otherwise its destructor may race with in-flight commands. uint64_t timeline = r->processor->signal_timeline(); r->processor->wait_for_timeline(timeline); delete r; } uint8_t *rdp_renderer_get_rdram(void *renderer) { auto *r = static_cast(renderer); // The CommandProcessor's RDRAM is a host-coherent GPU buffer. // begin_read_rdram() maps it for host access (persistent on coherent buffers). return static_cast(r->processor->begin_read_rdram()); } uint32_t rdp_renderer_get_rdram_size(void *renderer) { return static_cast(renderer)->rdram_size; } void rdp_renderer_begin_frame(void *renderer) { static_cast(renderer)->processor->begin_frame_context(); } void rdp_renderer_enqueue(void *renderer, const uint32_t *words, uint32_t num_words) { // RDP command lengths in 64-bit words, indexed by command byte (bits [29:24]). // Most commands are 1 word (= 2 x 32-bit words). Triangle commands are larger. static const unsigned cmd_len_lut[64] = { 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x07: nop/invalid 4, 6, 12, 14, 12, 14, 20, 22, // 0x08-0x0F: triangles 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x17: unused 1, 1, 1, 1, 1, 1, 1, 1, // 0x18-0x1F: unused 1, 1, 1, 1, 2, 2, 1, 1, // 0x20-0x27: tex rect (0x24,0x25) = 2 1, 1, 1, 1, 1, 1, 1, 1, // 0x28-0x2F: sync/scissor/modes 1, 1, 1, 1, 1, 1, 1, 1, // 0x30-0x37: load/tile/fill/color 1, 1, 1, 1, 1, 1, 1, 1, // 0x38-0x3F: color regs/combine/images }; auto *proc = static_cast(renderer)->processor.get(); // Parse the word stream and enqueue each command individually. // parallel-rdp's enqueue_command_direct() processes exactly one command // per call, so we must split the stream ourselves. uint32_t i = 0; while (i < num_words) { uint32_t cmd = (words[i] >> 24) & 63; uint32_t len_64 = cmd_len_lut[cmd]; uint32_t len_32 = len_64 * 2; if (i + len_32 > num_words) break; proc->enqueue_command(len_32, &words[i]); i += len_32; } } void rdp_renderer_set_vi_register(void *renderer, uint32_t reg, uint32_t value) { static_cast(renderer)->processor->set_vi_register( static_cast(reg), value); } void *rdp_renderer_scanout(void *renderer, uint32_t *width, uint32_t *height) { auto *r = static_cast(renderer); RDP::ScanoutOptions options = {}; options.persist_frame_on_invalid_input = true; options.blend_previous_frame = true; options.upscale_deinterlacing = false; Vulkan::ImageHandle image = r->processor->scanout(options); if (!image) { *width = 0; *height = 0; return nullptr; } *width = image->get_width(); *height = image->get_height(); // Return the raw VkImage handle. // The ImageHandle (ref-counted) keeps the image alive as long as the // CommandProcessor holds its internal reference (until next scanout). return image->get_image(); } int rdp_renderer_scanout_sync( void *renderer, uint8_t *buffer, uint32_t buffer_size, uint32_t *width, uint32_t *height) { auto *r = static_cast(renderer); std::vector colors; unsigned w = 0, h = 0; RDP::ScanoutOptions options = {}; options.persist_frame_on_invalid_input = true; options.blend_previous_frame = true; options.upscale_deinterlacing = false; r->processor->scanout_sync(colors, w, h, options); if (w == 0 || h == 0 || colors.empty()) { *width = 0; *height = 0; return 0; } *width = w; *height = h; uint32_t needed = w * h * 4; if (buffer_size < needed) return 0; std::memcpy(buffer, colors.data(), needed); return 1; } void rdp_renderer_flush(void *renderer) { auto *r = static_cast(renderer); uint64_t timeline = r->processor->signal_timeline(); r->processor->wait_for_timeline(timeline); } uint64_t rdp_renderer_signal_timeline(void *renderer) { auto *r = static_cast(renderer); return r->processor->signal_timeline(); } void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value) { auto *r = static_cast(renderer); r->processor->wait_for_timeline(value); }