ls but with io_uring

Use natural ordering when sorting #10

merged opened by knightpp.bsky.social targeting main from knightpp.bsky.social/lsr: use-natord-sorting

The code of natord.zig is a direct translation of https://github.com/sourcefrog/natsort C implementation. To preserve C string handling, Zig implementation injects 0 when reaches end of string. That may be improved to use .len.

Actually, I've also created a Zig package https://codeberg.org/knightpp/natural-ordering-zig, but decided to just copy the code here. If you'd like to make a dependency on my project instead, let me know.

Before

$ ll chapter3/
-rw-r--r-- user user 1.7 KB 21 May 12:43 󰈤 digital-circuits.scm
-rw-r--r-- user user 2.5 KB 21 May 12:43 󰈤 doubly-list.scm
-rw-r--r-- user user 223 B  21 May 12:43 󰈤 exercise3_1.scm
-rw-r--r-- user user 683 B  21 May 12:43 󰈤 exercise3_10.scm
-rw-r--r-- user user 399 B  21 May 12:43 󰈤 exercise3_11.scm
-rw-r--r-- user user 147 B  21 May 12:43 󰈤 exercise3_12.scm
-rw-r--r-- user user 263 B  21 May 12:43 󰈤 exercise3_13.scm
-rw-r--r-- user user 292 B  21 May 12:43 󰈤 exercise3_14.scm
-rw-r--r-- user user 189 B  21 May 12:43 󰈤 exercise3_15.scm
-rw-r--r-- user user 350 B  21 May 12:43 󰈤 exercise3_16.scm
-rw-r--r-- user user 530 B  21 May 12:43 󰈤 exercise3_17.scm
-rw-r--r-- user user 341 B  21 May 12:43 󰈤 exercise3_18.scm
-rw-r--r-- user user 636 B  21 May 12:43 󰈤 exercise3_19.scm
-rw-r--r-- user user 460 B  21 May 12:43 󰈤 exercise3_2.scm
-rw-r--r-- user user 742 B  21 May 12:43 󰈤 exercise3_20.scm
-rw-r--r-- user user 427 B  21 May 12:43 󰈤 exercise3_21.scm
-rw-r--r-- user user 1.1 KB 21 May 12:43 󰈤 exercise3_22.scm
-rw-r--r-- user user 481 B  21 May 12:43 󰈤 exercise3_23.scm
-rw-r--r-- user user 1.5 KB 21 May 12:43 󰈤 exercise3_24.scm
-rw-r--r-- user user 1.3 KB 21 May 12:43 󰈤 exercise3_25.scm
-rw-r--r-- user user 670 B  21 May 12:43 󰈤 exercise3_26.scm
-rw-r--r-- user user 814 B  21 May 12:43 󰈤 exercise3_27.scm
-rw-r--r-- user user 481 B  21 May 12:43 󰈤 exercise3_28.scm
-rw-r--r-- user user 342 B  21 May 12:43 󰈤 exercise3_29.scm
-rw-r--r-- user user 455 B  21 May 12:43 󰈤 exercise3_3.scm
-rw-r--r-- user user 1.1 KB 21 May 12:43 󰈤 exercise3_30.scm
-rw-r--r-- user user 869 B  21 May 12:43 󰈤 exercise3_4.scm
-rw-r--r-- user user 971 B  21 May 12:43 󰈤 exercise3_5.scm
-rw-r--r-- user user 571 B  21 May 12:43 󰈤 exercise3_6.scm
-rw-r--r-- user user 754 B  21 May 12:43 󰈤 exercise3_7.scm
-rw-r--r-- user user 157 B  21 May 12:43 󰈤 exercise3_8.scm
-rw-r--r-- user user 383 B  21 May 12:43 󰈤 exercise3_9.scm
-rw-r--r-- user user 1.0 KB 21 May 12:43 󰈤 queue.scm

After

$ ~/src/forks/lsr/zig-out/bin/lsr -l chapter3/
-rw-r--r-- user user 1.7 KB 21 May 12:43 󰈤 digital-circuits.scm
-rw-r--r-- user user 2.5 KB 21 May 12:43 󰈤 doubly-list.scm
-rw-r--r-- user user 223 B  21 May 12:43 󰈤 exercise3_1.scm
-rw-r--r-- user user 460 B  21 May 12:43 󰈤 exercise3_2.scm
-rw-r--r-- user user 455 B  21 May 12:43 󰈤 exercise3_3.scm
-rw-r--r-- user user 869 B  21 May 12:43 󰈤 exercise3_4.scm
-rw-r--r-- user user 971 B  21 May 12:43 󰈤 exercise3_5.scm
-rw-r--r-- user user 571 B  21 May 12:43 󰈤 exercise3_6.scm
-rw-r--r-- user user 754 B  21 May 12:43 󰈤 exercise3_7.scm
-rw-r--r-- user user 157 B  21 May 12:43 󰈤 exercise3_8.scm
-rw-r--r-- user user 383 B  21 May 12:43 󰈤 exercise3_9.scm
-rw-r--r-- user user 683 B  21 May 12:43 󰈤 exercise3_10.scm
-rw-r--r-- user user 399 B  21 May 12:43 󰈤 exercise3_11.scm
-rw-r--r-- user user 147 B  21 May 12:43 󰈤 exercise3_12.scm
-rw-r--r-- user user 263 B  21 May 12:43 󰈤 exercise3_13.scm
-rw-r--r-- user user 292 B  21 May 12:43 󰈤 exercise3_14.scm
-rw-r--r-- user user 189 B  21 May 12:43 󰈤 exercise3_15.scm
-rw-r--r-- user user 350 B  21 May 12:43 󰈤 exercise3_16.scm
-rw-r--r-- user user 530 B  21 May 12:43 󰈤 exercise3_17.scm
-rw-r--r-- user user 341 B  21 May 12:43 󰈤 exercise3_18.scm
-rw-r--r-- user user 636 B  21 May 12:43 󰈤 exercise3_19.scm
-rw-r--r-- user user 742 B  21 May 12:43 󰈤 exercise3_20.scm
-rw-r--r-- user user 427 B  21 May 12:43 󰈤 exercise3_21.scm
-rw-r--r-- user user 1.1 KB 21 May 12:43 󰈤 exercise3_22.scm
-rw-r--r-- user user 481 B  21 May 12:43 󰈤 exercise3_23.scm
-rw-r--r-- user user 1.5 KB 21 May 12:43 󰈤 exercise3_24.scm
-rw-r--r-- user user 1.3 KB 21 May 12:43 󰈤 exercise3_25.scm
-rw-r--r-- user user 670 B  21 May 12:43 󰈤 exercise3_26.scm
-rw-r--r-- user user 814 B  21 May 12:43 󰈤 exercise3_27.scm
-rw-r--r-- user user 481 B  21 May 12:43 󰈤 exercise3_28.scm
-rw-r--r-- user user 342 B  21 May 12:43 󰈤 exercise3_29.scm
-rw-r--r-- user user 1.1 KB 21 May 12:43 󰈤 exercise3_30.scm
-rw-r--r-- user user 1.0 KB 21 May 12:43 󰈤 queue.scm
Labels

None yet.

Participants 2
AT URI
at://did:plc:3zsjx5jgd6dbi4orckspsqiq/sh.tangled.repo.pull/3lpocvhvcyc22
+354 -4
Diff #0
+347
src/natord.zig
···
··· 1 + //! This file is a port of C implementaion that can be found here 2 + //! https://github.com/sourcefrog/natsort. 3 + const std = @import("std"); 4 + const isSpace = std.ascii.isWhitespace; 5 + const isDigit = std.ascii.isDigit; 6 + const Order = std.math.Order; 7 + const testing = std.testing; 8 + 9 + pub fn order(a: []const u8, b: []const u8) Order { 10 + return natOrder(a, b, false); 11 + } 12 + 13 + pub fn orderIgnoreCase(a: []const u8, b: []const u8) Order { 14 + return natOrder(a, b, true); 15 + } 16 + 17 + fn natOrder(a: []const u8, b: []const u8, comptime fold_case: bool) Order { 18 + var ai: usize = 0; 19 + var bi: usize = 0; 20 + 21 + while (true) : ({ 22 + ai += 1; 23 + bi += 1; 24 + }) { 25 + var ca = if (ai == a.len) 0 else a[ai]; 26 + var cb = if (bi == b.len) 0 else b[bi]; 27 + 28 + while (isSpace(ca)) { 29 + ai += 1; 30 + ca = if (ai == a.len) 0 else a[ai]; 31 + } 32 + 33 + while (isSpace(cb)) { 34 + bi += 1; 35 + cb = if (bi == b.len) 0 else b[bi]; 36 + } 37 + 38 + if (isDigit(ca) and isDigit(cb)) { 39 + const fractional = ca == '0' or cb == '0'; 40 + 41 + if (fractional) { 42 + const result = compareLeft(a[ai..], b[bi..]); 43 + if (result != .eq) return result; 44 + } else { 45 + const result = compareRight(a[ai..], b[bi..]); 46 + if (result != .eq) return result; 47 + } 48 + } 49 + 50 + if (ca == 0 and cb == 0) { 51 + return .eq; 52 + } 53 + 54 + if (fold_case) { 55 + ca = std.ascii.toUpper(ca); 56 + cb = std.ascii.toUpper(cb); 57 + } 58 + 59 + if (ca < cb) { 60 + return .lt; 61 + } 62 + 63 + if (ca > cb) { 64 + return .gt; 65 + } 66 + } 67 + } 68 + 69 + fn compareLeft(a: []const u8, b: []const u8) Order { 70 + var i: usize = 0; 71 + while (true) : (i += 1) { 72 + const ca = if (i == a.len) 0 else a[i]; 73 + const cb = if (i == b.len) 0 else b[i]; 74 + 75 + if (!isDigit(ca) and !isDigit(cb)) { 76 + return .eq; 77 + } 78 + if (!isDigit(ca)) { 79 + return .lt; 80 + } 81 + if (!isDigit(cb)) { 82 + return .gt; 83 + } 84 + if (ca < cb) { 85 + return .lt; 86 + } 87 + if (ca > cb) { 88 + return .gt; 89 + } 90 + } 91 + 92 + return .eq; 93 + } 94 + 95 + fn compareRight(a: []const u8, b: []const u8) Order { 96 + var bias = Order.eq; 97 + 98 + var i: usize = 0; 99 + while (true) : (i += 1) { 100 + const ca = if (i == a.len) 0 else a[i]; 101 + const cb = if (i == b.len) 0 else b[i]; 102 + 103 + if (!isDigit(ca) and !isDigit(cb)) { 104 + return bias; 105 + } 106 + if (!isDigit(ca)) { 107 + return .lt; 108 + } 109 + if (!isDigit(cb)) { 110 + return .gt; 111 + } 112 + 113 + if (ca < cb) { 114 + if (bias != .eq) { 115 + bias = .lt; 116 + } 117 + } else if (ca > cb) { 118 + if (bias != .eq) { 119 + bias = .gt; 120 + } 121 + } else if (ca == 0 and cb == 0) { 122 + return bias; 123 + } 124 + } 125 + 126 + return .eq; 127 + } 128 + 129 + const SortContext = struct { 130 + ignore_case: bool = false, 131 + reverse: bool = false, 132 + 133 + fn compare(self: @This(), a: []const u8, b: []const u8) bool { 134 + const ord: std.math.Order = if (self.reverse) .gt else .lt; 135 + if (self.ignore_case) { 136 + return orderIgnoreCase(a, b) == ord; 137 + } else { 138 + return order(a, b) == ord; 139 + } 140 + } 141 + }; 142 + 143 + test "lt" { 144 + try testing.expectEqual(Order.lt, order("a_1", "a_10")); 145 + } 146 + 147 + test "eq" { 148 + try testing.expectEqual(Order.eq, order("a_1", "a_1")); 149 + } 150 + 151 + test "gt" { 152 + try testing.expectEqual(Order.gt, order("a_10", "a_1")); 153 + } 154 + 155 + fn sortAndAssert(context: SortContext, input: [][]const u8, want: []const []const u8) !void { 156 + std.sort.pdq([]const u8, input, context, SortContext.compare); 157 + 158 + for (input, want) |actual, expected| { 159 + try testing.expectEqualStrings(expected, actual); 160 + } 161 + } 162 + 163 + test "sorting" { 164 + const context = SortContext{}; 165 + var items = [_][]const u8{ 166 + "item100", 167 + "item10", 168 + "item1", 169 + }; 170 + const want = [_][]const u8{ 171 + "item1", 172 + "item10", 173 + "item100", 174 + }; 175 + 176 + try sortAndAssert(context, &items, &want); 177 + } 178 + 179 + test "sorting 2" { 180 + const context = SortContext{}; 181 + var items = [_][]const u8{ 182 + "item_30", 183 + "item_15", 184 + "item_3", 185 + "item_2", 186 + "item_10", 187 + }; 188 + const want = [_][]const u8{ 189 + "item_2", 190 + "item_3", 191 + "item_10", 192 + "item_15", 193 + "item_30", 194 + }; 195 + 196 + try sortAndAssert(context, &items, &want); 197 + } 198 + 199 + test "leading zeros" { 200 + const context = SortContext{}; 201 + var items = [_][]const u8{ 202 + "item100", 203 + "item999", 204 + "item001", 205 + "item010", 206 + "item000", 207 + }; 208 + const want = [_][]const u8{ 209 + "item000", 210 + "item001", 211 + "item010", 212 + "item100", 213 + "item999", 214 + }; 215 + 216 + try sortAndAssert(context, &items, &want); 217 + } 218 + 219 + test "dates" { 220 + const context = SortContext{}; 221 + var items = [_][]const u8{ 222 + "2000-1-10", 223 + "2000-1-2", 224 + "1999-12-25", 225 + "2000-3-23", 226 + "1999-3-3", 227 + }; 228 + const want = [_][]const u8{ 229 + "1999-3-3", 230 + "1999-12-25", 231 + "2000-1-2", 232 + "2000-1-10", 233 + "2000-3-23", 234 + }; 235 + 236 + try sortAndAssert(context, &items, &want); 237 + } 238 + 239 + test "fractions" { 240 + const context = SortContext{}; 241 + var items = [_][]const u8{ 242 + "Fractional release numbers", 243 + "1.011.02", 244 + "1.010.12", 245 + "1.009.02", 246 + "1.009.20", 247 + "1.009.10", 248 + "1.002.08", 249 + "1.002.03", 250 + "1.002.01", 251 + }; 252 + const want = [_][]const u8{ 253 + "1.002.01", 254 + "1.002.03", 255 + "1.002.08", 256 + "1.009.02", 257 + "1.009.10", 258 + "1.009.20", 259 + "1.010.12", 260 + "1.011.02", 261 + "Fractional release numbers", 262 + }; 263 + 264 + try sortAndAssert(context, &items, &want); 265 + } 266 + 267 + test "words" { 268 + const context = SortContext{}; 269 + var items = [_][]const u8{ 270 + "fred", 271 + "pic2", 272 + "pic100a", 273 + "pic120", 274 + "pic121", 275 + "jane", 276 + "tom", 277 + "pic02a", 278 + "pic3", 279 + "pic4", 280 + "1-20", 281 + "pic100", 282 + "pic02000", 283 + "10-20", 284 + "1-02", 285 + "1-2", 286 + "x2-y7", 287 + "x8-y8", 288 + "x2-y08", 289 + "x2-g8", 290 + "pic01", 291 + "pic02", 292 + "pic 6", 293 + "pic 7", 294 + "pic 5", 295 + "pic05", 296 + "pic 5 ", 297 + "pic 5 something", 298 + "pic 4 else", 299 + }; 300 + const want = [_][]const u8{ 301 + "1-02", 302 + "1-2", 303 + "1-20", 304 + "10-20", 305 + "fred", 306 + "jane", 307 + "pic01", 308 + "pic02", 309 + "pic02a", 310 + "pic02000", 311 + "pic05", 312 + "pic2", 313 + "pic3", 314 + "pic4", 315 + "pic 4 else", 316 + "pic 5", 317 + "pic 5 ", 318 + "pic 5 something", 319 + "pic 6", 320 + "pic 7", 321 + "pic100", 322 + "pic100a", 323 + "pic120", 324 + "pic121", 325 + "tom", 326 + "x2-g8", 327 + "x2-y08", 328 + "x2-y7", 329 + "x8-y8", 330 + }; 331 + 332 + try sortAndAssert(context, &items, &want); 333 + } 334 + 335 + test "fuzz" { 336 + const Context = struct { 337 + fn testOne(context: @This(), input: []const u8) anyerror!void { 338 + _ = context; 339 + 340 + const a = input[0..(input.len / 2)]; 341 + const b = input[(input.len / 2)..]; 342 + _ = order(a, b); 343 + } 344 + }; 345 + 346 + try std.testing.fuzz(Context{}, Context.testOne, .{}); 347 + }
+7 -4
src/main.zig
··· 2 const builtin = @import("builtin"); 3 const ourio = @import("ourio"); 4 const zeit = @import("zeit"); 5 const build_options = @import("build_options"); 6 7 const posix = std.posix; ··· 289 290 if (cmd.entries.len == 0) return; 291 292 - if (cmd.opts.sort_by_mod_time) { 293 - std.sort.pdq(Entry, cmd.entries, cmd.opts, Entry.lessThan); 294 - } 295 296 if (cmd.opts.reverse_sort) { 297 std.mem.reverse(Entry, cmd.entries); ··· 695 return lhs.statx.mtime.sec > rhs.statx.mtime.sec; 696 } 697 698 - return std.ascii.lessThanIgnoreCase(lhs.name, rhs.name); 699 } 700 701 fn modeStr(self: Entry) [10]u8 { ··· 1197 if (std.mem.startsWith(u8, a, "-")) return .short; 1198 return .positional; 1199 }
··· 2 const builtin = @import("builtin"); 3 const ourio = @import("ourio"); 4 const zeit = @import("zeit"); 5 + const natord = @import("natord.zig"); 6 const build_options = @import("build_options"); 7 8 const posix = std.posix; ··· 290 291 if (cmd.entries.len == 0) return; 292 293 + std.sort.pdq(Entry, cmd.entries, cmd.opts, Entry.lessThan); 294 295 if (cmd.opts.reverse_sort) { 296 std.mem.reverse(Entry, cmd.entries); ··· 694 return lhs.statx.mtime.sec > rhs.statx.mtime.sec; 695 } 696 697 + return natord.orderIgnoreCase(lhs.name, rhs.name) == .lt; 698 } 699 700 fn modeStr(self: Entry) [10]u8 { ··· 1196 if (std.mem.startsWith(u8, a, "-")) return .short; 1197 return .positional; 1198 } 1199 + 1200 + test "ref" { 1201 + _ = natord; 1202 + }

History

1 round 3 comments
sign up or login to add to the discussion
2 commits
expand
Add natord.zig
Use natord in sorting
expand 3 comments

This is awesome! The only concern I had was that this would reduce perf a little bit, but it is consistently about 5% faster than the previous sorting.

Oh, and re: dependency - I prefer copying such a small dep, so thanks for that!

Wow, merged so quickly! 🎉

pull request successfully merged