Serenity Operating System
1/*
2 * Copyright (c) 2019-2020, Marios Prokopakis <mariosprokopakis@gmail.com>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/DeprecatedString.h>
8#include <AK/QuickSort.h>
9#include <AK/StdLibExtras.h>
10#include <AK/Vector.h>
11#include <LibCore/ArgsParser.h>
12#include <LibMain/Main.h>
13#include <stdio.h>
14#include <stdlib.h>
15
16struct Range {
17 size_t m_from { 1 };
18 size_t m_to { SIZE_MAX };
19
20 [[nodiscard]] bool intersects(Range const& other) const
21 {
22 return !(other.m_from > m_to || other.m_to < m_from);
23 }
24
25 void merge(Range const& other)
26 {
27 // Can't merge two ranges that are disjoint.
28 VERIFY(intersects(other));
29
30 m_from = min(m_from, other.m_from);
31 m_to = max(m_to, other.m_to);
32 }
33};
34
35static bool expand_list(DeprecatedString& list, Vector<Range>& ranges)
36{
37 Vector<DeprecatedString> tokens = list.split(',');
38
39 for (auto& token : tokens) {
40 if (token.length() == 0) {
41 warnln("cut: byte/character positions are numbered from 1");
42 return false;
43 }
44
45 if (token == "-") {
46 warnln("cut: invalid range with no endpoint: {}", token);
47 return false;
48 }
49
50 if (token[0] == '-') {
51 auto index = token.substring(1, token.length() - 1).to_uint();
52 if (!index.has_value()) {
53 warnln("cut: invalid byte/character position '{}'", token);
54 return false;
55 }
56
57 if (index.value() == 0) {
58 warnln("cut: byte/character positions are numbered from 1");
59 return false;
60 }
61
62 ranges.append({ 1, index.value() });
63 } else if (token[token.length() - 1] == '-') {
64 auto index = token.substring(0, token.length() - 1).to_uint();
65 if (!index.has_value()) {
66 warnln("cut: invalid byte/character position '{}'", token);
67 return false;
68 }
69
70 if (index.value() == 0) {
71 warnln("cut: byte/character positions are numbered from 1");
72 return false;
73 }
74
75 ranges.append({ index.value(), SIZE_MAX });
76 } else {
77 auto range = token.split('-');
78 if (range.size() == 2) {
79 auto index1 = range[0].to_uint();
80 if (!index1.has_value()) {
81 warnln("cut: invalid byte/character position '{}'", range[0]);
82 return false;
83 }
84
85 auto index2 = range[1].to_uint();
86 if (!index2.has_value()) {
87 warnln("cut: invalid byte/character position '{}'", range[1]);
88 return false;
89 }
90
91 if (index1.value() > index2.value()) {
92 warnln("cut: invalid decreasing range");
93 return false;
94 } else if (index1.value() == 0 || index2.value() == 0) {
95 warnln("cut: byte/character positions are numbered from 1");
96 return false;
97 }
98
99 ranges.append({ index1.value(), index2.value() });
100 } else if (range.size() == 1) {
101 auto index = range[0].to_uint();
102 if (!index.has_value()) {
103 warnln("cut: invalid byte/character position '{}'", range[0]);
104 return false;
105 }
106
107 if (index.value() == 0) {
108 warnln("cut: byte/character positions are numbered from 1");
109 return false;
110 }
111
112 ranges.append({ index.value(), index.value() });
113 } else {
114 warnln("cut: invalid byte or character range");
115 return false;
116 }
117 }
118 }
119
120 return true;
121}
122
123static void process_line_bytes(char* line, size_t length, Vector<Range> const& ranges)
124{
125 for (auto& i : ranges) {
126 if (i.m_from >= length)
127 continue;
128
129 auto to = min(i.m_to, length);
130 auto sub_string = DeprecatedString(line).substring(i.m_from - 1, to - i.m_from + 1);
131 out("{}", sub_string);
132 }
133 outln();
134}
135
136static void process_line_fields(char* line, size_t length, Vector<Range> const& ranges, char delimiter)
137{
138 auto string_split = DeprecatedString(line, length).split(delimiter);
139 Vector<DeprecatedString> output_fields;
140
141 for (auto& range : ranges) {
142 for (size_t i = range.m_from - 1; i < min(range.m_to, string_split.size()); i++) {
143 output_fields.append(string_split[i]);
144 }
145 }
146
147 outln("{}", DeprecatedString::join(delimiter, output_fields));
148}
149
150ErrorOr<int> serenity_main(Main::Arguments arguments)
151{
152 DeprecatedString byte_list = "";
153 DeprecatedString fields_list = "";
154 DeprecatedString delimiter = "\t";
155
156 Vector<StringView> files;
157
158 Core::ArgsParser args_parser;
159 args_parser.add_positional_argument(files, "file(s) to cut", "file", Core::ArgsParser::Required::No);
160 args_parser.add_option(byte_list, "select only these bytes", "bytes", 'b', "list");
161 args_parser.add_option(fields_list, "select only these fields", "fields", 'f', "list");
162 args_parser.add_option(delimiter, "set a custom delimiter", "delimiter", 'd', "delimiter");
163 args_parser.parse(arguments);
164
165 bool selected_bytes = (byte_list != "");
166 bool selected_fields = (fields_list != "");
167
168 int selected_options_count = (selected_bytes ? 1 : 0) + (selected_fields ? 1 : 0);
169
170 if (selected_options_count == 0) {
171 warnln("cut: you must specify a list of bytes, or fields");
172 args_parser.print_usage(stderr, arguments.strings[0]);
173 return 1;
174 }
175
176 if (selected_options_count > 1) {
177 warnln("cut: you must specify only one of bytes, or fields");
178 args_parser.print_usage(stderr, arguments.strings[0]);
179 return 1;
180 }
181
182 if (delimiter.length() != 1) {
183 warnln("cut: the delimiter must be a single character");
184 args_parser.print_usage(stderr, arguments.strings[0]);
185 return 1;
186 }
187
188 DeprecatedString ranges_list;
189 Vector<Range> ranges_vector;
190
191 if (selected_bytes) {
192 ranges_list = byte_list;
193 } else if (selected_fields) {
194 ranges_list = fields_list;
195 } else {
196 // This should never happen, since we already checked the options count above.
197 VERIFY_NOT_REACHED();
198 }
199
200 auto expansion_successful = expand_list(ranges_list, ranges_vector);
201
202 if (!expansion_successful) {
203 args_parser.print_usage(stderr, arguments.strings[0]);
204 return 1;
205 }
206
207 quick_sort(ranges_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; });
208
209 Vector<Range> disjoint_ranges;
210 for (auto& range : ranges_vector) {
211 if (disjoint_ranges.is_empty()) {
212 disjoint_ranges.append(range);
213 continue;
214 }
215
216 Range& last_range = disjoint_ranges.last();
217
218 if (!last_range.intersects(range)) {
219 disjoint_ranges.append(range);
220 continue;
221 }
222
223 last_range.merge(range);
224 }
225
226 if (files.is_empty())
227 files.append(DeprecatedString());
228
229 /* Process each file */
230 for (auto& file : files) {
231 FILE* fp = stdin;
232 if (!file.is_null()) {
233 fp = fopen(DeprecatedString(file).characters(), "r");
234 if (!fp) {
235 warnln("cut: Could not open file '{}'", file);
236 continue;
237 }
238 }
239
240 char* line = nullptr;
241 ssize_t line_length = 0;
242 size_t line_capacity = 0;
243 while ((line_length = getline(&line, &line_capacity, fp)) != -1) {
244 if (line_length < 0) {
245 warnln("cut: Failed to read line from file '{}'", file);
246 break;
247 }
248 line[line_length - 1] = '\0';
249 line_length--;
250
251 if (selected_bytes) {
252 process_line_bytes(line, line_length, disjoint_ranges);
253 } else if (selected_fields) {
254 process_line_fields(line, line_length, disjoint_ranges, delimiter[0]);
255 } else {
256 VERIFY_NOT_REACHED();
257 }
258 }
259
260 if (line)
261 free(line);
262
263 if (!file.is_null())
264 fclose(fp);
265 }
266
267 return 0;
268}