src/Golomb.zig at main · altagos.dev/codings

altagos.dev / codings
this repo has no description
codings / src / Golomb.zig
at main 6.7 kB view raw
  1//! Golomb codec implementation for encoding and decoding integers.
  2//! Golomb coding is a lossless, variable-length encoding scheme that is optimal for geometric distributions.
  3
  4const std = @import("std");
  5
  6const Self = @This();
  7
  8/// Golomb parameter M - determines the division point for quotient and remainder
  9m: usize,
 10
 11/// Internal bit buffer for accumulating bits during encoding/decoding
 12bit_buffer: u8 = 0,
 13/// Current bit position within the bit buffer (0-7)
 14bit_idx: u8 = 0,
 15/// Current byte position in the buffer
 16byte_idx: usize = 0,
 17
 18/// Encodes a value using Golomb coding into the provided buffer.
 19/// Returns the number of bits written.
 20pub fn encode(
 21    self: *Self,
 22    buffer: []u8,
 23    value: usize,
 24    opts: struct {
 25        write_padding_bits: bool = false,
 26        reset_tmp_values: bool = true,
 27    },
 28) error{BufferTooSmall}!usize {
 29    if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");
 30
 31    const b_m = self.bM();
 32
 33    const q = @divFloor(value, self.m) + 1;
 34    const b_q = bitLength(q) - 1;
 35    const r = @rem(value, self.m);
 36    const b_r = bitLength(r);
 37
 38    const needed_bits = b_q + b_q + 1 + b_m;
 39    const buffer_len_bits = needed_bits + self.byte_idx * 8 + self.bit_idx;
 40
 41    if (buffer_len_bits > buffer.len * 8) return error.BufferTooSmall;
 42
 43    // Write q
 44    for (0..b_q) |_| {
 45        self.writeBit(buffer, 0);
 46    }
 47    self.writeBits(buffer, q, b_q + 1);
 48
 49    // Write r
 50    for (0..(b_m - b_r)) |_| {
 51        self.writeBit(buffer, 0);
 52    }
 53    self.writeBits(buffer, r, b_r);
 54
 55    // Write padding bits
 56    if (opts.write_padding_bits) {
 57        const padding = buffer.len * 8 - buffer_len_bits;
 58
 59        for (0..padding) |_| {
 60            self.writeBit(buffer, 0);
 61        }
 62
 63        std.debug.assert(self.bit_buffer == 0);
 64    }
 65
 66    // Reset helper vars
 67    if (opts.reset_tmp_values) {
 68        self.reset();
 69    }
 70
 71    return buffer_len_bits;
 72}
 73
 74/// Decodes a Golomb-encoded value from the buffer.
 75/// Returns the decoded value.
 76pub fn decode(
 77    self: *Self,
 78    buffer: []const u8,
 79    opts: struct { reset_tmp_values: bool = true },
 80) error{InvalidFormat}!usize {
 81    if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");
 82
 83    const b_m = self.bM();
 84
 85    var q: usize = 0;
 86    var b_q: u8 = 0;
 87    var r: usize = 0;
 88
 89    // Read b_q
 90    while (self.readBit(buffer)) |bit| {
 91        if (bit == 0) {
 92            b_q += 1;
 93        } else if (bit == 1) {
 94            q = 1;
 95            break;
 96        } else {
 97            return error.InvalidFormat;
 98        }
 99    }
100
101    // Read q
102    q <<= @as(u6, @intCast(b_q));
103    q |= self.readBits(buffer, b_q) catch return error.InvalidFormat;
104    q -= 1;
105
106    // Read r
107    r = self.readBits(buffer, b_m) catch return error.InvalidFormat;
108
109    // Reset helper vars
110    if (opts.reset_tmp_values) {
111        self.reset();
112    }
113
114    return q * self.m + r;
115}
116
117/// Resets internal state variables used during encoding/decoding.
118pub fn reset(self: *Self) void {
119    self.bit_buffer = 0;
120    self.bit_idx = 0;
121    self.byte_idx = 0;
122}
123
124/// Calculates the number of bits needed to represent the remainder in Golomb coding.
125fn bM(self: *const Self) u8 {
126    const b = bitLength(self.m);
127    return if (isPowerOfTwo(self.m)) b - 1 else b;
128}
129
130/// Writes a single bit to the buffer.
131fn writeBit(self: *Self, buffer: []u8, bit: u8) void {
132    self.bit_buffer = (self.bit_buffer << 1) | (bit & 1);
133    self.bit_idx += 1;
134
135    if (self.bit_idx == 8) {
136        buffer[self.byte_idx] = self.bit_buffer;
137        self.byte_idx += 1;
138        self.bit_buffer = 0;
139        self.bit_idx = 0;
140    }
141}
142
143/// Writes multiple bits from a value to the buffer.
144fn writeBits(self: *Self, buffer: []u8, value: usize, count: u8) void {
145    var i = count;
146    while (i > 0) {
147        i -= 1;
148        const bit = @as(u8, @intCast((value >> @as(u6, @intCast(i))) & 1));
149        self.writeBit(buffer, bit);
150    }
151}
152
153/// Reads a single bit from the buffer. Returns null if at end of buffer.
154fn readBit(self: *Self, buffer: []const u8) ?u8 {
155    if (self.byte_idx > buffer.len) return null;
156
157    const bit = (buffer[self.byte_idx] >> @as(u3, @intCast(7 - self.bit_idx))) & 1;
158    self.bit_idx += 1;
159
160    if (self.bit_idx == 8) {
161        self.byte_idx += 1;
162        self.bit_idx = 0;
163    }
164
165    return bit;
166}
167
168/// Reads multiple bits from the buffer and returns them as a value.
169fn readBits(self: *Self, buffer: []const u8, count: u8) !usize {
170    var result: usize = 0;
171
172    for (0..count) |_| {
173        const bit = self.readBit(buffer) orelse return error.OutOfBounds;
174        result = (result << 1) | @as(usize, bit);
175    }
176
177    return result;
178}
179
180/// Calculates the number of bits required to represent a value.
181fn bitLength(value: anytype) u8 {
182    return @bitSizeOf(@TypeOf(value)) - @clz(value);
183}
184
185/// Checks if a value is a power of two.
186fn isPowerOfTwo(value: usize) bool {
187    return (value & (value - 1)) == 0;
188}
189
190test "golomb.encode val = 42, m = 8" {
191    const testing = std.testing;
192
193    var gol = Self{ .m = 8 };
194    const input: usize = 42;
195    var encoded: [1]u8 = undefined;
196    _ = try gol.encode(&encoded, input, .{});
197    try testing.expectEqualSlices(u8, &.{50}, &encoded);
198}
199
200test "golomb.decode val = {50}, m = 8" {
201    const testing = std.testing;
202
203    var gol = Self{ .m = 8 };
204    const input = &[_]u8{50};
205    const decoded = try gol.decode(input, .{});
206
207    try testing.expectEqual(42, decoded);
208}
209
210test "golomb.encode + decode val = 1564, m = 457" {
211    const testing = std.testing;
212
213    var gol = Self{ .m = 457 };
214    const input: usize = 1564;
215    var encoded: [2]u8 = undefined;
216
217    _ = try gol.encode(
218        &encoded,
219        input,
220        .{ .write_padding_bits = true },
221    );
222    try testing.expectEqualSlices(u8, &.{ 35, 4 }, &encoded);
223
224    const decoded = try gol.decode(&encoded, .{});
225    try testing.expectEqual(input, decoded);
226}
227
228test "golomb.encode multiple val = { 1564, 42 }, m = 457" {
229    const testing = std.testing;
230
231    var gol = Self{ .m = 457 };
232    const input = [_]usize{ 1564, 42 };
233    var encoded: [3]u8 = undefined;
234
235    _ = try gol.encode(&encoded, input[0], .{ .reset_tmp_values = false });
236    _ = try gol.encode(&encoded, input[1], .{});
237
238    try testing.expectEqualSlices(u8, &.{ 35, 6, 42 }, &encoded);
239}
240
241test "golomb.decode multiple val = { 35, 6, 8 }, m = 457" {
242    const testing = std.testing;
243
244    var gol = Self{ .m = 457 };
245    const input = &[_]u8{ 35, 6, 42 };
246
247    const decoded1 = try gol.decode(input, .{ .reset_tmp_values = false });
248    try testing.expectEqual(1564, decoded1);
249
250    const decoded2 = try gol.decode(input, .{});
251    try testing.expectEqual(42, decoded2);
252}