Editor for papermario-dx mods
at main 302 lines 8.8 kB view raw
1// SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com> 2// 3// SPDX-License-Identifier: AGPL-3.0-or-later 4 5/// C bridge implementation for parallel-rdp's Granite Vulkan context and RDP 6/// command processor. 7 8#include "bridge.hpp" 9#include "context.hpp" 10#include "device.hpp" 11#include "logging.hpp" 12#include "rdp_device.hpp" 13 14#include <cstdio> 15#include <cstdlib> 16#include <cstring> 17#include <memory> 18#include <vector> 19 20using namespace Vulkan; 21 22// -- Logging -- 23 24static void (*s_log_callback)(uint32_t level, const char *msg) = nullptr; 25 26/// Routes Granite log messages to a Rust callback. 27class RdpLoggingInterface final : public Util::LoggingInterface { 28public: 29 bool log(const char *tag, const char *fmt, va_list va) override 30 { 31 if (!s_log_callback) 32 return false; 33 34 uint32_t level; 35 if (strncmp(tag, "[ERROR]", 7) == 0) 36 level = RDP_LOG_LEVEL_ERROR; 37 else if (strncmp(tag, "[WARN]", 6) == 0) 38 level = RDP_LOG_LEVEL_WARN; 39 else 40 level = RDP_LOG_LEVEL_INFO; 41 42 char buf[1024]; 43 vsnprintf(buf, sizeof(buf), fmt, va); 44 45 // Strip trailing newline (tracing adds its own). 46 size_t len = strlen(buf); 47 if (len > 0 && buf[len - 1] == '\n') 48 buf[len - 1] = '\0'; 49 50 s_log_callback(level, buf); 51 return true; 52 } 53}; 54 55static RdpLoggingInterface s_logging_interface; 56 57void rdp_set_log_callback(void (*callback)(uint32_t level, const char *msg)) 58{ 59 s_log_callback = callback; 60 Util::set_thread_logging_interface(callback ? &s_logging_interface : nullptr); 61} 62 63// -- Internal types -- 64 65struct RdpContext { 66 std::unique_ptr<Context> context; 67 std::unique_ptr<Device> device; 68}; 69 70struct RdpRenderer { 71 RdpContext *ctx; 72 std::unique_ptr<RDP::CommandProcessor> processor; 73 uint32_t rdram_size; 74}; 75 76// -- Vulkan context -- 77 78void *rdp_context_create( 79 const char *const *instance_ext, uint32_t num_instance_ext, 80 const char *const *device_ext, uint32_t num_device_ext) 81{ 82 if (!Context::init_loader(nullptr)) 83 return nullptr; 84 85 auto context = std::make_unique<Context>(); 86 if (!context->init_instance_and_device( 87 instance_ext, num_instance_ext, 88 device_ext, num_device_ext, 0)) 89 return nullptr; 90 91 auto device = std::make_unique<Device>(); 92 device->set_context(*context); 93 94 auto *ctx = new RdpContext(); 95 ctx->context = std::move(context); 96 ctx->device = std::move(device); 97 return ctx; 98} 99 100void rdp_context_destroy(void *ctx) 101{ 102 delete static_cast<RdpContext *>(ctx); 103} 104 105void *rdp_context_get_instance(void *ctx) 106{ 107 return static_cast<RdpContext *>(ctx)->context->get_instance(); 108} 109 110void *rdp_context_get_physical_device(void *ctx) 111{ 112 return static_cast<RdpContext *>(ctx)->context->get_gpu(); 113} 114 115void *rdp_context_get_device(void *ctx) 116{ 117 return static_cast<RdpContext *>(ctx)->context->get_device(); 118} 119 120void *rdp_context_get_queue(void *ctx, uint32_t *family_index) 121{ 122 auto &info = static_cast<RdpContext *>(ctx)->context->get_queue_info(); 123 // Use the graphics queue (QUEUE_INDEX_GRAPHICS = 0) 124 if (family_index) 125 *family_index = info.family_indices[0]; 126 return info.queues[0]; 127} 128 129// -- Renderer -- 130 131void *rdp_renderer_create(void *ctx, uint32_t rdram_size, uint32_t flags) 132{ 133 auto *context = static_cast<RdpContext *>(ctx); 134 135 auto renderer = std::make_unique<RdpRenderer>(); 136 renderer->ctx = context; 137 renderer->rdram_size = rdram_size; 138 139 // Pass nullptr for rdram_ptr so the CommandProcessor allocates its own 140 // host-coherent GPU buffer. This avoids the non-coherent path where 141 // host-to-GPU uploads during scanout can overwrite GPU-rendered data. 142 renderer->processor = std::make_unique<RDP::CommandProcessor>( 143 *context->device, 144 nullptr, 145 0, // rdram_offset 146 rdram_size, 147 rdram_size / 2, // hidden_rdram_size 148 static_cast<RDP::CommandProcessorFlags>(flags)); 149 150 if (!renderer->processor->device_is_supported()) { 151 return nullptr; 152 } 153 154 auto *ptr = renderer.release(); 155 return ptr; 156} 157 158void rdp_renderer_destroy(void *renderer) 159{ 160 auto *r = static_cast<RdpRenderer *>(renderer); 161 // Ensure all GPU work completes before destroying the CommandProcessor, 162 // otherwise its destructor may race with in-flight commands. 163 uint64_t timeline = r->processor->signal_timeline(); 164 r->processor->wait_for_timeline(timeline); 165 delete r; 166} 167 168uint8_t *rdp_renderer_get_rdram(void *renderer) 169{ 170 auto *r = static_cast<RdpRenderer *>(renderer); 171 // The CommandProcessor's RDRAM is a host-coherent GPU buffer. 172 // begin_read_rdram() maps it for host access (persistent on coherent buffers). 173 return static_cast<uint8_t *>(r->processor->begin_read_rdram()); 174} 175 176uint32_t rdp_renderer_get_rdram_size(void *renderer) 177{ 178 return static_cast<RdpRenderer *>(renderer)->rdram_size; 179} 180 181void rdp_renderer_begin_frame(void *renderer) 182{ 183 static_cast<RdpRenderer *>(renderer)->processor->begin_frame_context(); 184} 185 186void rdp_renderer_enqueue(void *renderer, const uint32_t *words, uint32_t num_words) 187{ 188 // RDP command lengths in 64-bit words, indexed by command byte (bits [29:24]). 189 // Most commands are 1 word (= 2 x 32-bit words). Triangle commands are larger. 190 static const unsigned cmd_len_lut[64] = { 191 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x07: nop/invalid 192 4, 6, 12, 14, 12, 14, 20, 22, // 0x08-0x0F: triangles 193 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x17: unused 194 1, 1, 1, 1, 1, 1, 1, 1, // 0x18-0x1F: unused 195 1, 1, 1, 1, 2, 2, 1, 1, // 0x20-0x27: tex rect (0x24,0x25) = 2 196 1, 1, 1, 1, 1, 1, 1, 1, // 0x28-0x2F: sync/scissor/modes 197 1, 1, 1, 1, 1, 1, 1, 1, // 0x30-0x37: load/tile/fill/color 198 1, 1, 1, 1, 1, 1, 1, 1, // 0x38-0x3F: color regs/combine/images 199 }; 200 201 auto *proc = static_cast<RdpRenderer *>(renderer)->processor.get(); 202 203 // Parse the word stream and enqueue each command individually. 204 // parallel-rdp's enqueue_command_direct() processes exactly one command 205 // per call, so we must split the stream ourselves. 206 uint32_t i = 0; 207 while (i < num_words) { 208 uint32_t cmd = (words[i] >> 24) & 63; 209 uint32_t len_64 = cmd_len_lut[cmd]; 210 uint32_t len_32 = len_64 * 2; 211 212 if (i + len_32 > num_words) 213 break; 214 215 proc->enqueue_command(len_32, &words[i]); 216 i += len_32; 217 } 218} 219 220void rdp_renderer_set_vi_register(void *renderer, uint32_t reg, uint32_t value) 221{ 222 static_cast<RdpRenderer *>(renderer)->processor->set_vi_register( 223 static_cast<RDP::VIRegister>(reg), value); 224} 225 226void *rdp_renderer_scanout(void *renderer, uint32_t *width, uint32_t *height) 227{ 228 auto *r = static_cast<RdpRenderer *>(renderer); 229 230 RDP::ScanoutOptions options = {}; 231 options.persist_frame_on_invalid_input = true; 232 options.blend_previous_frame = true; 233 options.upscale_deinterlacing = false; 234 235 Vulkan::ImageHandle image = r->processor->scanout(options); 236 if (!image) { 237 *width = 0; 238 *height = 0; 239 return nullptr; 240 } 241 242 *width = image->get_width(); 243 *height = image->get_height(); 244 245 // Return the raw VkImage handle. 246 // The ImageHandle (ref-counted) keeps the image alive as long as the 247 // CommandProcessor holds its internal reference (until next scanout). 248 return image->get_image(); 249} 250 251int rdp_renderer_scanout_sync( 252 void *renderer, 253 uint8_t *buffer, uint32_t buffer_size, 254 uint32_t *width, uint32_t *height) 255{ 256 auto *r = static_cast<RdpRenderer *>(renderer); 257 258 std::vector<RDP::RGBA> colors; 259 unsigned w = 0, h = 0; 260 261 RDP::ScanoutOptions options = {}; 262 options.persist_frame_on_invalid_input = true; 263 options.blend_previous_frame = true; 264 options.upscale_deinterlacing = false; 265 266 r->processor->scanout_sync(colors, w, h, options); 267 268 if (w == 0 || h == 0 || colors.empty()) { 269 *width = 0; 270 *height = 0; 271 return 0; 272 } 273 274 *width = w; 275 *height = h; 276 277 uint32_t needed = w * h * 4; 278 if (buffer_size < needed) 279 return 0; 280 281 std::memcpy(buffer, colors.data(), needed); 282 return 1; 283} 284 285void rdp_renderer_flush(void *renderer) 286{ 287 auto *r = static_cast<RdpRenderer *>(renderer); 288 uint64_t timeline = r->processor->signal_timeline(); 289 r->processor->wait_for_timeline(timeline); 290} 291 292uint64_t rdp_renderer_signal_timeline(void *renderer) 293{ 294 auto *r = static_cast<RdpRenderer *>(renderer); 295 return r->processor->signal_timeline(); 296} 297 298void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value) 299{ 300 auto *r = static_cast<RdpRenderer *>(renderer); 301 r->processor->wait_for_timeline(value); 302}