Serenity Operating System
at master 268 lines 8.3 kB view raw
1/* 2 * Copyright (c) 2019-2020, Marios Prokopakis <mariosprokopakis@gmail.com> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/DeprecatedString.h> 8#include <AK/QuickSort.h> 9#include <AK/StdLibExtras.h> 10#include <AK/Vector.h> 11#include <LibCore/ArgsParser.h> 12#include <LibMain/Main.h> 13#include <stdio.h> 14#include <stdlib.h> 15 16struct Range { 17 size_t m_from { 1 }; 18 size_t m_to { SIZE_MAX }; 19 20 [[nodiscard]] bool intersects(Range const& other) const 21 { 22 return !(other.m_from > m_to || other.m_to < m_from); 23 } 24 25 void merge(Range const& other) 26 { 27 // Can't merge two ranges that are disjoint. 28 VERIFY(intersects(other)); 29 30 m_from = min(m_from, other.m_from); 31 m_to = max(m_to, other.m_to); 32 } 33}; 34 35static bool expand_list(DeprecatedString& list, Vector<Range>& ranges) 36{ 37 Vector<DeprecatedString> tokens = list.split(','); 38 39 for (auto& token : tokens) { 40 if (token.length() == 0) { 41 warnln("cut: byte/character positions are numbered from 1"); 42 return false; 43 } 44 45 if (token == "-") { 46 warnln("cut: invalid range with no endpoint: {}", token); 47 return false; 48 } 49 50 if (token[0] == '-') { 51 auto index = token.substring(1, token.length() - 1).to_uint(); 52 if (!index.has_value()) { 53 warnln("cut: invalid byte/character position '{}'", token); 54 return false; 55 } 56 57 if (index.value() == 0) { 58 warnln("cut: byte/character positions are numbered from 1"); 59 return false; 60 } 61 62 ranges.append({ 1, index.value() }); 63 } else if (token[token.length() - 1] == '-') { 64 auto index = token.substring(0, token.length() - 1).to_uint(); 65 if (!index.has_value()) { 66 warnln("cut: invalid byte/character position '{}'", token); 67 return false; 68 } 69 70 if (index.value() == 0) { 71 warnln("cut: byte/character positions are numbered from 1"); 72 return false; 73 } 74 75 ranges.append({ index.value(), SIZE_MAX }); 76 } else { 77 auto range = token.split('-'); 78 if (range.size() == 2) { 79 auto index1 = range[0].to_uint(); 80 if (!index1.has_value()) { 81 warnln("cut: invalid byte/character position '{}'", range[0]); 82 return false; 83 } 84 85 auto index2 = range[1].to_uint(); 86 if (!index2.has_value()) { 87 warnln("cut: invalid byte/character position '{}'", range[1]); 88 return false; 89 } 90 91 if (index1.value() > index2.value()) { 92 warnln("cut: invalid decreasing range"); 93 return false; 94 } else if (index1.value() == 0 || index2.value() == 0) { 95 warnln("cut: byte/character positions are numbered from 1"); 96 return false; 97 } 98 99 ranges.append({ index1.value(), index2.value() }); 100 } else if (range.size() == 1) { 101 auto index = range[0].to_uint(); 102 if (!index.has_value()) { 103 warnln("cut: invalid byte/character position '{}'", range[0]); 104 return false; 105 } 106 107 if (index.value() == 0) { 108 warnln("cut: byte/character positions are numbered from 1"); 109 return false; 110 } 111 112 ranges.append({ index.value(), index.value() }); 113 } else { 114 warnln("cut: invalid byte or character range"); 115 return false; 116 } 117 } 118 } 119 120 return true; 121} 122 123static void process_line_bytes(char* line, size_t length, Vector<Range> const& ranges) 124{ 125 for (auto& i : ranges) { 126 if (i.m_from >= length) 127 continue; 128 129 auto to = min(i.m_to, length); 130 auto sub_string = DeprecatedString(line).substring(i.m_from - 1, to - i.m_from + 1); 131 out("{}", sub_string); 132 } 133 outln(); 134} 135 136static void process_line_fields(char* line, size_t length, Vector<Range> const& ranges, char delimiter) 137{ 138 auto string_split = DeprecatedString(line, length).split(delimiter); 139 Vector<DeprecatedString> output_fields; 140 141 for (auto& range : ranges) { 142 for (size_t i = range.m_from - 1; i < min(range.m_to, string_split.size()); i++) { 143 output_fields.append(string_split[i]); 144 } 145 } 146 147 outln("{}", DeprecatedString::join(delimiter, output_fields)); 148} 149 150ErrorOr<int> serenity_main(Main::Arguments arguments) 151{ 152 DeprecatedString byte_list = ""; 153 DeprecatedString fields_list = ""; 154 DeprecatedString delimiter = "\t"; 155 156 Vector<StringView> files; 157 158 Core::ArgsParser args_parser; 159 args_parser.add_positional_argument(files, "file(s) to cut", "file", Core::ArgsParser::Required::No); 160 args_parser.add_option(byte_list, "select only these bytes", "bytes", 'b', "list"); 161 args_parser.add_option(fields_list, "select only these fields", "fields", 'f', "list"); 162 args_parser.add_option(delimiter, "set a custom delimiter", "delimiter", 'd', "delimiter"); 163 args_parser.parse(arguments); 164 165 bool selected_bytes = (byte_list != ""); 166 bool selected_fields = (fields_list != ""); 167 168 int selected_options_count = (selected_bytes ? 1 : 0) + (selected_fields ? 1 : 0); 169 170 if (selected_options_count == 0) { 171 warnln("cut: you must specify a list of bytes, or fields"); 172 args_parser.print_usage(stderr, arguments.strings[0]); 173 return 1; 174 } 175 176 if (selected_options_count > 1) { 177 warnln("cut: you must specify only one of bytes, or fields"); 178 args_parser.print_usage(stderr, arguments.strings[0]); 179 return 1; 180 } 181 182 if (delimiter.length() != 1) { 183 warnln("cut: the delimiter must be a single character"); 184 args_parser.print_usage(stderr, arguments.strings[0]); 185 return 1; 186 } 187 188 DeprecatedString ranges_list; 189 Vector<Range> ranges_vector; 190 191 if (selected_bytes) { 192 ranges_list = byte_list; 193 } else if (selected_fields) { 194 ranges_list = fields_list; 195 } else { 196 // This should never happen, since we already checked the options count above. 197 VERIFY_NOT_REACHED(); 198 } 199 200 auto expansion_successful = expand_list(ranges_list, ranges_vector); 201 202 if (!expansion_successful) { 203 args_parser.print_usage(stderr, arguments.strings[0]); 204 return 1; 205 } 206 207 quick_sort(ranges_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; }); 208 209 Vector<Range> disjoint_ranges; 210 for (auto& range : ranges_vector) { 211 if (disjoint_ranges.is_empty()) { 212 disjoint_ranges.append(range); 213 continue; 214 } 215 216 Range& last_range = disjoint_ranges.last(); 217 218 if (!last_range.intersects(range)) { 219 disjoint_ranges.append(range); 220 continue; 221 } 222 223 last_range.merge(range); 224 } 225 226 if (files.is_empty()) 227 files.append(DeprecatedString()); 228 229 /* Process each file */ 230 for (auto& file : files) { 231 FILE* fp = stdin; 232 if (!file.is_null()) { 233 fp = fopen(DeprecatedString(file).characters(), "r"); 234 if (!fp) { 235 warnln("cut: Could not open file '{}'", file); 236 continue; 237 } 238 } 239 240 char* line = nullptr; 241 ssize_t line_length = 0; 242 size_t line_capacity = 0; 243 while ((line_length = getline(&line, &line_capacity, fp)) != -1) { 244 if (line_length < 0) { 245 warnln("cut: Failed to read line from file '{}'", file); 246 break; 247 } 248 line[line_length - 1] = '\0'; 249 line_length--; 250 251 if (selected_bytes) { 252 process_line_bytes(line, line_length, disjoint_ranges); 253 } else if (selected_fields) { 254 process_line_fields(line, line_length, disjoint_ranges, delimiter[0]); 255 } else { 256 VERIFY_NOT_REACHED(); 257 } 258 } 259 260 if (line) 261 free(line); 262 263 if (!file.is_null()) 264 fclose(fp); 265 } 266 267 return 0; 268}