Editor for papermario-dx mods
1// SPDX-FileCopyrightText: 2026 Alex Bates <alex@bates64.com>
2//
3// SPDX-License-Identifier: AGPL-3.0-or-later
4
5/// C bridge implementation for parallel-rdp's Granite Vulkan context and RDP
6/// command processor.
7
8#include "bridge.hpp"
9#include "context.hpp"
10#include "device.hpp"
11#include "logging.hpp"
12#include "rdp_device.hpp"
13
14#include <cstdio>
15#include <cstdlib>
16#include <cstring>
17#include <memory>
18#include <vector>
19
20using namespace Vulkan;
21
22// -- Logging --
23
24static void (*s_log_callback)(uint32_t level, const char *msg) = nullptr;
25
26/// Routes Granite log messages to a Rust callback.
27class RdpLoggingInterface final : public Util::LoggingInterface {
28public:
29 bool log(const char *tag, const char *fmt, va_list va) override
30 {
31 if (!s_log_callback)
32 return false;
33
34 uint32_t level;
35 if (strncmp(tag, "[ERROR]", 7) == 0)
36 level = RDP_LOG_LEVEL_ERROR;
37 else if (strncmp(tag, "[WARN]", 6) == 0)
38 level = RDP_LOG_LEVEL_WARN;
39 else
40 level = RDP_LOG_LEVEL_INFO;
41
42 char buf[1024];
43 vsnprintf(buf, sizeof(buf), fmt, va);
44
45 // Strip trailing newline (tracing adds its own).
46 size_t len = strlen(buf);
47 if (len > 0 && buf[len - 1] == '\n')
48 buf[len - 1] = '\0';
49
50 s_log_callback(level, buf);
51 return true;
52 }
53};
54
55static RdpLoggingInterface s_logging_interface;
56
57void rdp_set_log_callback(void (*callback)(uint32_t level, const char *msg))
58{
59 s_log_callback = callback;
60 Util::set_thread_logging_interface(callback ? &s_logging_interface : nullptr);
61}
62
63// -- Internal types --
64
65struct RdpContext {
66 std::unique_ptr<Context> context;
67 std::unique_ptr<Device> device;
68};
69
70struct RdpRenderer {
71 RdpContext *ctx;
72 std::unique_ptr<RDP::CommandProcessor> processor;
73 uint32_t rdram_size;
74};
75
76// -- Vulkan context --
77
78void *rdp_context_create(
79 const char *const *instance_ext, uint32_t num_instance_ext,
80 const char *const *device_ext, uint32_t num_device_ext)
81{
82 if (!Context::init_loader(nullptr))
83 return nullptr;
84
85 auto context = std::make_unique<Context>();
86 if (!context->init_instance_and_device(
87 instance_ext, num_instance_ext,
88 device_ext, num_device_ext, 0))
89 return nullptr;
90
91 auto device = std::make_unique<Device>();
92 device->set_context(*context);
93
94 auto *ctx = new RdpContext();
95 ctx->context = std::move(context);
96 ctx->device = std::move(device);
97 return ctx;
98}
99
100void rdp_context_destroy(void *ctx)
101{
102 delete static_cast<RdpContext *>(ctx);
103}
104
105void *rdp_context_get_instance(void *ctx)
106{
107 return static_cast<RdpContext *>(ctx)->context->get_instance();
108}
109
110void *rdp_context_get_physical_device(void *ctx)
111{
112 return static_cast<RdpContext *>(ctx)->context->get_gpu();
113}
114
115void *rdp_context_get_device(void *ctx)
116{
117 return static_cast<RdpContext *>(ctx)->context->get_device();
118}
119
120void *rdp_context_get_queue(void *ctx, uint32_t *family_index)
121{
122 auto &info = static_cast<RdpContext *>(ctx)->context->get_queue_info();
123 // Use the graphics queue (QUEUE_INDEX_GRAPHICS = 0)
124 if (family_index)
125 *family_index = info.family_indices[0];
126 return info.queues[0];
127}
128
129// -- Renderer --
130
131void *rdp_renderer_create(void *ctx, uint32_t rdram_size, uint32_t flags)
132{
133 auto *context = static_cast<RdpContext *>(ctx);
134
135 auto renderer = std::make_unique<RdpRenderer>();
136 renderer->ctx = context;
137 renderer->rdram_size = rdram_size;
138
139 // Pass nullptr for rdram_ptr so the CommandProcessor allocates its own
140 // host-coherent GPU buffer. This avoids the non-coherent path where
141 // host-to-GPU uploads during scanout can overwrite GPU-rendered data.
142 renderer->processor = std::make_unique<RDP::CommandProcessor>(
143 *context->device,
144 nullptr,
145 0, // rdram_offset
146 rdram_size,
147 rdram_size / 2, // hidden_rdram_size
148 static_cast<RDP::CommandProcessorFlags>(flags));
149
150 if (!renderer->processor->device_is_supported()) {
151 return nullptr;
152 }
153
154 auto *ptr = renderer.release();
155 return ptr;
156}
157
158void rdp_renderer_destroy(void *renderer)
159{
160 auto *r = static_cast<RdpRenderer *>(renderer);
161 // Ensure all GPU work completes before destroying the CommandProcessor,
162 // otherwise its destructor may race with in-flight commands.
163 uint64_t timeline = r->processor->signal_timeline();
164 r->processor->wait_for_timeline(timeline);
165 delete r;
166}
167
168uint8_t *rdp_renderer_get_rdram(void *renderer)
169{
170 auto *r = static_cast<RdpRenderer *>(renderer);
171 // The CommandProcessor's RDRAM is a host-coherent GPU buffer.
172 // begin_read_rdram() maps it for host access (persistent on coherent buffers).
173 return static_cast<uint8_t *>(r->processor->begin_read_rdram());
174}
175
176uint32_t rdp_renderer_get_rdram_size(void *renderer)
177{
178 return static_cast<RdpRenderer *>(renderer)->rdram_size;
179}
180
181void rdp_renderer_begin_frame(void *renderer)
182{
183 static_cast<RdpRenderer *>(renderer)->processor->begin_frame_context();
184}
185
186void rdp_renderer_enqueue(void *renderer, const uint32_t *words, uint32_t num_words)
187{
188 // RDP command lengths in 64-bit words, indexed by command byte (bits [29:24]).
189 // Most commands are 1 word (= 2 x 32-bit words). Triangle commands are larger.
190 static const unsigned cmd_len_lut[64] = {
191 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x07: nop/invalid
192 4, 6, 12, 14, 12, 14, 20, 22, // 0x08-0x0F: triangles
193 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x17: unused
194 1, 1, 1, 1, 1, 1, 1, 1, // 0x18-0x1F: unused
195 1, 1, 1, 1, 2, 2, 1, 1, // 0x20-0x27: tex rect (0x24,0x25) = 2
196 1, 1, 1, 1, 1, 1, 1, 1, // 0x28-0x2F: sync/scissor/modes
197 1, 1, 1, 1, 1, 1, 1, 1, // 0x30-0x37: load/tile/fill/color
198 1, 1, 1, 1, 1, 1, 1, 1, // 0x38-0x3F: color regs/combine/images
199 };
200
201 auto *proc = static_cast<RdpRenderer *>(renderer)->processor.get();
202
203 // Parse the word stream and enqueue each command individually.
204 // parallel-rdp's enqueue_command_direct() processes exactly one command
205 // per call, so we must split the stream ourselves.
206 uint32_t i = 0;
207 while (i < num_words) {
208 uint32_t cmd = (words[i] >> 24) & 63;
209 uint32_t len_64 = cmd_len_lut[cmd];
210 uint32_t len_32 = len_64 * 2;
211
212 if (i + len_32 > num_words)
213 break;
214
215 proc->enqueue_command(len_32, &words[i]);
216 i += len_32;
217 }
218}
219
220void rdp_renderer_set_vi_register(void *renderer, uint32_t reg, uint32_t value)
221{
222 static_cast<RdpRenderer *>(renderer)->processor->set_vi_register(
223 static_cast<RDP::VIRegister>(reg), value);
224}
225
226void *rdp_renderer_scanout(void *renderer, uint32_t *width, uint32_t *height)
227{
228 auto *r = static_cast<RdpRenderer *>(renderer);
229
230 RDP::ScanoutOptions options = {};
231 options.persist_frame_on_invalid_input = true;
232 options.blend_previous_frame = true;
233 options.upscale_deinterlacing = false;
234
235 Vulkan::ImageHandle image = r->processor->scanout(options);
236 if (!image) {
237 *width = 0;
238 *height = 0;
239 return nullptr;
240 }
241
242 *width = image->get_width();
243 *height = image->get_height();
244
245 // Return the raw VkImage handle.
246 // The ImageHandle (ref-counted) keeps the image alive as long as the
247 // CommandProcessor holds its internal reference (until next scanout).
248 return image->get_image();
249}
250
251int rdp_renderer_scanout_sync(
252 void *renderer,
253 uint8_t *buffer, uint32_t buffer_size,
254 uint32_t *width, uint32_t *height)
255{
256 auto *r = static_cast<RdpRenderer *>(renderer);
257
258 std::vector<RDP::RGBA> colors;
259 unsigned w = 0, h = 0;
260
261 RDP::ScanoutOptions options = {};
262 options.persist_frame_on_invalid_input = true;
263 options.blend_previous_frame = true;
264 options.upscale_deinterlacing = false;
265
266 r->processor->scanout_sync(colors, w, h, options);
267
268 if (w == 0 || h == 0 || colors.empty()) {
269 *width = 0;
270 *height = 0;
271 return 0;
272 }
273
274 *width = w;
275 *height = h;
276
277 uint32_t needed = w * h * 4;
278 if (buffer_size < needed)
279 return 0;
280
281 std::memcpy(buffer, colors.data(), needed);
282 return 1;
283}
284
285void rdp_renderer_flush(void *renderer)
286{
287 auto *r = static_cast<RdpRenderer *>(renderer);
288 uint64_t timeline = r->processor->signal_timeline();
289 r->processor->wait_for_timeline(timeline);
290}
291
292uint64_t rdp_renderer_signal_timeline(void *renderer)
293{
294 auto *r = static_cast<RdpRenderer *>(renderer);
295 return r->processor->signal_timeline();
296}
297
298void rdp_renderer_wait_for_timeline(void *renderer, uint64_t value)
299{
300 auto *r = static_cast<RdpRenderer *>(renderer);
301 r->processor->wait_for_timeline(value);
302}