+252
src/Golomb.zig
+252
src/Golomb.zig
···
···
1
+
//! Golomb codec implementation for encoding and decoding integers.
2
+
//! Golomb coding is a lossless, variable-length encoding scheme that is optimal for geometric distributions.
3
+
4
+
const std = @import("std");
5
+
6
+
const Self = @This();
7
+
8
+
/// Golomb parameter M - determines the division point for quotient and remainder
9
+
m: usize,
10
+
11
+
/// Internal bit buffer for accumulating bits during encoding/decoding
12
+
bit_buffer: u8 = 0,
13
+
/// Current bit position within the bit buffer (0-7)
14
+
bit_idx: u8 = 0,
15
+
/// Current byte position in the buffer
16
+
byte_idx: usize = 0,
17
+
18
+
/// Encodes a value using Golomb coding into the provided buffer.
19
+
/// Returns the number of bits written.
20
+
pub fn encode(
21
+
self: *Self,
22
+
buffer: []u8,
23
+
value: usize,
24
+
opts: struct {
25
+
write_padding_bits: bool = false,
26
+
reset_tmp_values: bool = true,
27
+
},
28
+
) error{BufferTooSmall}!usize {
29
+
if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");
30
+
31
+
const b_m = self.bM();
32
+
33
+
const q = @divFloor(value, self.m) + 1;
34
+
const b_q = bitLength(q) - 1;
35
+
const r = @rem(value, self.m);
36
+
const b_r = bitLength(r);
37
+
38
+
const needed_bits = b_q + b_q + 1 + b_m;
39
+
const buffer_len_bits = needed_bits + self.byte_idx * 8 + self.bit_idx;
40
+
41
+
if (buffer_len_bits > buffer.len * 8) return error.BufferTooSmall;
42
+
43
+
// Write q
44
+
for (0..b_q) |_| {
45
+
self.writeBit(buffer, 0);
46
+
}
47
+
self.writeBits(buffer, q, b_q + 1);
48
+
49
+
// Write r
50
+
for (0..(b_m - b_r)) |_| {
51
+
self.writeBit(buffer, 0);
52
+
}
53
+
self.writeBits(buffer, r, b_r);
54
+
55
+
// Write padding bits
56
+
if (opts.write_padding_bits) {
57
+
const padding = buffer.len * 8 - buffer_len_bits;
58
+
59
+
for (0..padding) |_| {
60
+
self.writeBit(buffer, 0);
61
+
}
62
+
63
+
std.debug.assert(self.bit_buffer == 0);
64
+
}
65
+
66
+
// Reset helper vars
67
+
if (opts.reset_tmp_values) {
68
+
self.reset();
69
+
}
70
+
71
+
return buffer_len_bits;
72
+
}
73
+
74
+
/// Decodes a Golomb-encoded value from the buffer.
75
+
/// Returns the decoded value.
76
+
pub fn decode(
77
+
self: *Self,
78
+
buffer: []const u8,
79
+
opts: struct { reset_tmp_values: bool = true },
80
+
) error{InvalidFormat}!usize {
81
+
if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");
82
+
83
+
const b_m = self.bM();
84
+
85
+
var q: usize = 0;
86
+
var b_q: u8 = 0;
87
+
var r: usize = 0;
88
+
89
+
// Read b_q
90
+
while (self.readBit(buffer)) |bit| {
91
+
if (bit == 0) {
92
+
b_q += 1;
93
+
} else if (bit == 1) {
94
+
q = 1;
95
+
break;
96
+
} else {
97
+
return error.InvalidFormat;
98
+
}
99
+
}
100
+
101
+
// Read q
102
+
q <<= @as(u6, @intCast(b_q));
103
+
q |= self.readBits(buffer, b_q) catch return error.InvalidFormat;
104
+
q -= 1;
105
+
106
+
// Read r
107
+
r = self.readBits(buffer, b_m) catch return error.InvalidFormat;
108
+
109
+
// Reset helper vars
110
+
if (opts.reset_tmp_values) {
111
+
self.reset();
112
+
}
113
+
114
+
return q * self.m + r;
115
+
}
116
+
117
+
/// Resets internal state variables used during encoding/decoding.
118
+
pub fn reset(self: *Self) void {
119
+
self.bit_buffer = 0;
120
+
self.bit_idx = 0;
121
+
self.byte_idx = 0;
122
+
}
123
+
124
+
/// Calculates the number of bits needed to represent the remainder in Golomb coding.
125
+
fn bM(self: *const Self) u8 {
126
+
const b = bitLength(self.m);
127
+
return if (isPowerOfTwo(self.m)) b - 1 else b;
128
+
}
129
+
130
+
/// Writes a single bit to the buffer.
131
+
fn writeBit(self: *Self, buffer: []u8, bit: u8) void {
132
+
self.bit_buffer = (self.bit_buffer << 1) | (bit & 1);
133
+
self.bit_idx += 1;
134
+
135
+
if (self.bit_idx == 8) {
136
+
buffer[self.byte_idx] = self.bit_buffer;
137
+
self.byte_idx += 1;
138
+
self.bit_buffer = 0;
139
+
self.bit_idx = 0;
140
+
}
141
+
}
142
+
143
+
/// Writes multiple bits from a value to the buffer.
144
+
fn writeBits(self: *Self, buffer: []u8, value: usize, count: u8) void {
145
+
var i = count;
146
+
while (i > 0) {
147
+
i -= 1;
148
+
const bit = @as(u8, @intCast((value >> @as(u6, @intCast(i))) & 1));
149
+
self.writeBit(buffer, bit);
150
+
}
151
+
}
152
+
153
+
/// Reads a single bit from the buffer. Returns null if at end of buffer.
154
+
fn readBit(self: *Self, buffer: []const u8) ?u8 {
155
+
if (self.byte_idx > buffer.len) return null;
156
+
157
+
const bit = (buffer[self.byte_idx] >> @as(u3, @intCast(7 - self.bit_idx))) & 1;
158
+
self.bit_idx += 1;
159
+
160
+
if (self.bit_idx == 8) {
161
+
self.byte_idx += 1;
162
+
self.bit_idx = 0;
163
+
}
164
+
165
+
return bit;
166
+
}
167
+
168
+
/// Reads multiple bits from the buffer and returns them as a value.
169
+
fn readBits(self: *Self, buffer: []const u8, count: u8) !usize {
170
+
var result: usize = 0;
171
+
172
+
for (0..count) |_| {
173
+
const bit = self.readBit(buffer) orelse return error.OutOfBounds;
174
+
result = (result << 1) | @as(usize, bit);
175
+
}
176
+
177
+
return result;
178
+
}
179
+
180
+
/// Calculates the number of bits required to represent a value.
181
+
fn bitLength(value: anytype) u8 {
182
+
return @bitSizeOf(@TypeOf(value)) - @clz(value);
183
+
}
184
+
185
+
/// Checks if a value is a power of two.
186
+
fn isPowerOfTwo(value: usize) bool {
187
+
return (value & (value - 1)) == 0;
188
+
}
189
+
190
+
test "encode val = 42, m = 8" {
191
+
const testing = std.testing;
192
+
193
+
var gol = Self{ .m = 8 };
194
+
const input: usize = 42;
195
+
var encoded: [1]u8 = undefined;
196
+
_ = try gol.encode(&encoded, input, .{});
197
+
try testing.expectEqualSlices(u8, &.{50}, &encoded);
198
+
}
199
+
200
+
test "decode val = {50}, m = 8" {
201
+
const testing = std.testing;
202
+
203
+
var gol = Self{ .m = 8 };
204
+
const input = &[_]u8{50};
205
+
const decoded = try gol.decode(input, .{});
206
+
207
+
try testing.expectEqual(42, decoded);
208
+
}
209
+
210
+
test "encode + decode val = 1564, m = 457" {
211
+
const testing = std.testing;
212
+
213
+
var gol = Self{ .m = 457 };
214
+
const input: usize = 1564;
215
+
var encoded: [2]u8 = undefined;
216
+
217
+
_ = try gol.encode(
218
+
&encoded,
219
+
input,
220
+
.{ .write_padding_bits = true },
221
+
);
222
+
try testing.expectEqualSlices(u8, &.{ 35, 4 }, &encoded);
223
+
224
+
const decoded = try gol.decode(&encoded, .{});
225
+
try testing.expectEqual(input, decoded);
226
+
}
227
+
228
+
test "encode multiple val = { 1564, 42 }, m = 457" {
229
+
const testing = std.testing;
230
+
231
+
var gol = Self{ .m = 457 };
232
+
const input = [_]usize{ 1564, 42 };
233
+
var encoded: [3]u8 = undefined;
234
+
235
+
_ = try gol.encode(&encoded, input[0], .{ .reset_tmp_values = false });
236
+
_ = try gol.encode(&encoded, input[1], .{});
237
+
238
+
try testing.expectEqualSlices(u8, &.{ 35, 6, 42 }, &encoded);
239
+
}
240
+
241
+
test "decode multiple val = { 35, 6, 8 }, m = 457" {
242
+
const testing = std.testing;
243
+
244
+
var gol = Self{ .m = 457 };
245
+
const input = &[_]u8{ 35, 6, 42 };
246
+
247
+
const decoded1 = try gol.decode(input, .{ .reset_tmp_values = false });
248
+
try testing.expectEqual(1564, decoded1);
249
+
250
+
const decoded2 = try gol.decode(input, .{});
251
+
try testing.expectEqual(42, decoded2);
252
+
}
+3
-18
src/root.zig
+3
-18
src/root.zig
···
1
//! By convention, root.zig is the root source file when making a library.
2
const std = @import("std");
3
4
-
pub fn bufferedPrint() !void {
5
-
// Stdout is for the actual output of your application, for example if you
6
-
// are implementing gzip, then only the compressed bytes should be sent to
7
-
// stdout, not any debugging messages.
8
-
var stdout_buffer: [1024]u8 = undefined;
9
-
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
10
-
const stdout = &stdout_writer.interface;
11
12
-
try stdout.print("Run `zig build test` to run the tests.\n", .{});
13
-
14
-
try stdout.flush(); // Don't forget to flush!
15
-
}
16
-
17
-
pub fn add(a: i32, b: i32) i32 {
18
-
return a + b;
19
-
}
20
-
21
-
test "basic add functionality" {
22
-
try std.testing.expect(add(3, 7) == 10);
23
}