add golomb coding #2

merged
opened by altagos.dev targeting main from push-xmxnlpnsnyqo
Changed files
+255 -18
src
+252
src/Golomb.zig
···
··· 1 + //! Golomb codec implementation for encoding and decoding integers. 2 + //! Golomb coding is a lossless, variable-length encoding scheme that is optimal for geometric distributions. 3 + 4 + const std = @import("std"); 5 + 6 + const Self = @This(); 7 + 8 + /// Golomb parameter M - determines the division point for quotient and remainder 9 + m: usize, 10 + 11 + /// Internal bit buffer for accumulating bits during encoding/decoding 12 + bit_buffer: u8 = 0, 13 + /// Current bit position within the bit buffer (0-7) 14 + bit_idx: u8 = 0, 15 + /// Current byte position in the buffer 16 + byte_idx: usize = 0, 17 + 18 + /// Encodes a value using Golomb coding into the provided buffer. 19 + /// Returns the number of bits written. 20 + pub fn encode( 21 + self: *Self, 22 + buffer: []u8, 23 + value: usize, 24 + opts: struct { 25 + write_padding_bits: bool = false, 26 + reset_tmp_values: bool = true, 27 + }, 28 + ) error{BufferTooSmall}!usize { 29 + if (self.m == 0) @panic("The Golomb parameter M must be larger than 0"); 30 + 31 + const b_m = self.bM(); 32 + 33 + const q = @divFloor(value, self.m) + 1; 34 + const b_q = bitLength(q) - 1; 35 + const r = @rem(value, self.m); 36 + const b_r = bitLength(r); 37 + 38 + const needed_bits = b_q + b_q + 1 + b_m; 39 + const buffer_len_bits = needed_bits + self.byte_idx * 8 + self.bit_idx; 40 + 41 + if (buffer_len_bits > buffer.len * 8) return error.BufferTooSmall; 42 + 43 + // Write q 44 + for (0..b_q) |_| { 45 + self.writeBit(buffer, 0); 46 + } 47 + self.writeBits(buffer, q, b_q + 1); 48 + 49 + // Write r 50 + for (0..(b_m - b_r)) |_| { 51 + self.writeBit(buffer, 0); 52 + } 53 + self.writeBits(buffer, r, b_r); 54 + 55 + // Write padding bits 56 + if (opts.write_padding_bits) { 57 + const padding = buffer.len * 8 - buffer_len_bits; 58 + 59 + for (0..padding) |_| { 60 + self.writeBit(buffer, 0); 61 + } 62 + 63 + std.debug.assert(self.bit_buffer == 0); 64 + } 65 + 66 + // Reset helper vars 67 + if (opts.reset_tmp_values) { 68 + self.reset(); 69 + } 70 + 71 + return buffer_len_bits; 72 + } 73 + 74 + /// Decodes a Golomb-encoded value from the buffer. 75 + /// Returns the decoded value. 76 + pub fn decode( 77 + self: *Self, 78 + buffer: []const u8, 79 + opts: struct { reset_tmp_values: bool = true }, 80 + ) error{InvalidFormat}!usize { 81 + if (self.m == 0) @panic("The Golomb parameter M must be larger than 0"); 82 + 83 + const b_m = self.bM(); 84 + 85 + var q: usize = 0; 86 + var b_q: u8 = 0; 87 + var r: usize = 0; 88 + 89 + // Read b_q 90 + while (self.readBit(buffer)) |bit| { 91 + if (bit == 0) { 92 + b_q += 1; 93 + } else if (bit == 1) { 94 + q = 1; 95 + break; 96 + } else { 97 + return error.InvalidFormat; 98 + } 99 + } 100 + 101 + // Read q 102 + q <<= @as(u6, @intCast(b_q)); 103 + q |= self.readBits(buffer, b_q) catch return error.InvalidFormat; 104 + q -= 1; 105 + 106 + // Read r 107 + r = self.readBits(buffer, b_m) catch return error.InvalidFormat; 108 + 109 + // Reset helper vars 110 + if (opts.reset_tmp_values) { 111 + self.reset(); 112 + } 113 + 114 + return q * self.m + r; 115 + } 116 + 117 + /// Resets internal state variables used during encoding/decoding. 118 + pub fn reset(self: *Self) void { 119 + self.bit_buffer = 0; 120 + self.bit_idx = 0; 121 + self.byte_idx = 0; 122 + } 123 + 124 + /// Calculates the number of bits needed to represent the remainder in Golomb coding. 125 + fn bM(self: *const Self) u8 { 126 + const b = bitLength(self.m); 127 + return if (isPowerOfTwo(self.m)) b - 1 else b; 128 + } 129 + 130 + /// Writes a single bit to the buffer. 131 + fn writeBit(self: *Self, buffer: []u8, bit: u8) void { 132 + self.bit_buffer = (self.bit_buffer << 1) | (bit & 1); 133 + self.bit_idx += 1; 134 + 135 + if (self.bit_idx == 8) { 136 + buffer[self.byte_idx] = self.bit_buffer; 137 + self.byte_idx += 1; 138 + self.bit_buffer = 0; 139 + self.bit_idx = 0; 140 + } 141 + } 142 + 143 + /// Writes multiple bits from a value to the buffer. 144 + fn writeBits(self: *Self, buffer: []u8, value: usize, count: u8) void { 145 + var i = count; 146 + while (i > 0) { 147 + i -= 1; 148 + const bit = @as(u8, @intCast((value >> @as(u6, @intCast(i))) & 1)); 149 + self.writeBit(buffer, bit); 150 + } 151 + } 152 + 153 + /// Reads a single bit from the buffer. Returns null if at end of buffer. 154 + fn readBit(self: *Self, buffer: []const u8) ?u8 { 155 + if (self.byte_idx > buffer.len) return null; 156 + 157 + const bit = (buffer[self.byte_idx] >> @as(u3, @intCast(7 - self.bit_idx))) & 1; 158 + self.bit_idx += 1; 159 + 160 + if (self.bit_idx == 8) { 161 + self.byte_idx += 1; 162 + self.bit_idx = 0; 163 + } 164 + 165 + return bit; 166 + } 167 + 168 + /// Reads multiple bits from the buffer and returns them as a value. 169 + fn readBits(self: *Self, buffer: []const u8, count: u8) !usize { 170 + var result: usize = 0; 171 + 172 + for (0..count) |_| { 173 + const bit = self.readBit(buffer) orelse return error.OutOfBounds; 174 + result = (result << 1) | @as(usize, bit); 175 + } 176 + 177 + return result; 178 + } 179 + 180 + /// Calculates the number of bits required to represent a value. 181 + fn bitLength(value: anytype) u8 { 182 + return @bitSizeOf(@TypeOf(value)) - @clz(value); 183 + } 184 + 185 + /// Checks if a value is a power of two. 186 + fn isPowerOfTwo(value: usize) bool { 187 + return (value & (value - 1)) == 0; 188 + } 189 + 190 + test "encode val = 42, m = 8" { 191 + const testing = std.testing; 192 + 193 + var gol = Self{ .m = 8 }; 194 + const input: usize = 42; 195 + var encoded: [1]u8 = undefined; 196 + _ = try gol.encode(&encoded, input, .{}); 197 + try testing.expectEqualSlices(u8, &.{50}, &encoded); 198 + } 199 + 200 + test "decode val = {50}, m = 8" { 201 + const testing = std.testing; 202 + 203 + var gol = Self{ .m = 8 }; 204 + const input = &[_]u8{50}; 205 + const decoded = try gol.decode(input, .{}); 206 + 207 + try testing.expectEqual(42, decoded); 208 + } 209 + 210 + test "encode + decode val = 1564, m = 457" { 211 + const testing = std.testing; 212 + 213 + var gol = Self{ .m = 457 }; 214 + const input: usize = 1564; 215 + var encoded: [2]u8 = undefined; 216 + 217 + _ = try gol.encode( 218 + &encoded, 219 + input, 220 + .{ .write_padding_bits = true }, 221 + ); 222 + try testing.expectEqualSlices(u8, &.{ 35, 4 }, &encoded); 223 + 224 + const decoded = try gol.decode(&encoded, .{}); 225 + try testing.expectEqual(input, decoded); 226 + } 227 + 228 + test "encode multiple val = { 1564, 42 }, m = 457" { 229 + const testing = std.testing; 230 + 231 + var gol = Self{ .m = 457 }; 232 + const input = [_]usize{ 1564, 42 }; 233 + var encoded: [3]u8 = undefined; 234 + 235 + _ = try gol.encode(&encoded, input[0], .{ .reset_tmp_values = false }); 236 + _ = try gol.encode(&encoded, input[1], .{}); 237 + 238 + try testing.expectEqualSlices(u8, &.{ 35, 6, 42 }, &encoded); 239 + } 240 + 241 + test "decode multiple val = { 35, 6, 8 }, m = 457" { 242 + const testing = std.testing; 243 + 244 + var gol = Self{ .m = 457 }; 245 + const input = &[_]u8{ 35, 6, 42 }; 246 + 247 + const decoded1 = try gol.decode(input, .{ .reset_tmp_values = false }); 248 + try testing.expectEqual(1564, decoded1); 249 + 250 + const decoded2 = try gol.decode(input, .{}); 251 + try testing.expectEqual(42, decoded2); 252 + }
+3 -18
src/root.zig
··· 1 //! By convention, root.zig is the root source file when making a library. 2 const std = @import("std"); 3 4 - pub fn bufferedPrint() !void { 5 - // Stdout is for the actual output of your application, for example if you 6 - // are implementing gzip, then only the compressed bytes should be sent to 7 - // stdout, not any debugging messages. 8 - var stdout_buffer: [1024]u8 = undefined; 9 - var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); 10 - const stdout = &stdout_writer.interface; 11 12 - try stdout.print("Run `zig build test` to run the tests.\n", .{}); 13 - 14 - try stdout.flush(); // Don't forget to flush! 15 - } 16 - 17 - pub fn add(a: i32, b: i32) i32 { 18 - return a + b; 19 - } 20 - 21 - test "basic add functionality" { 22 - try std.testing.expect(add(3, 7) == 10); 23 }
··· 1 //! By convention, root.zig is the root source file when making a library. 2 const std = @import("std"); 3 4 + pub const Golomb = @import("Golomb.zig"); 5 6 + test { 7 + std.testing.refAllDecls(@This()); 8 }