Serenity Operating System
1/*
2 * Copyright (c) 2019-2020, Marios Prokopakis <mariosprokopakis@gmail.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/QuickSort.h>
28#include <AK/StdLibExtras.h>
29#include <AK/String.h>
30#include <AK/Vector.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34
35struct Index {
36 enum class Type {
37 SingleIndex,
38 SliceIndex,
39 RangedIndex
40 };
41 ssize_t m_from { -1 };
42 ssize_t m_to { -1 };
43 Type m_type { Type::SingleIndex };
44
45 bool intersects(const Index& other)
46 {
47 if (m_type != Type::RangedIndex)
48 return m_from == other.m_from;
49
50 return !(other.m_from > m_to || other.m_to < m_from);
51 }
52};
53
54static void print_usage_and_exit(int ret)
55{
56 printf("Usage: cut -b list [File]\n");
57 exit(ret);
58}
59
60static void add_if_not_exists(Vector<Index>& indexes, Index data)
61{
62 bool append_to_vector = true;
63 for (auto& index : indexes) {
64 if (index.intersects(data)) {
65 if (index.m_type == Index::Type::RangedIndex) {
66 index.m_from = AK::min(index.m_from, data.m_from);
67 index.m_to = AK::max(index.m_to, data.m_to);
68 }
69 append_to_vector = false;
70 }
71 }
72
73 if (append_to_vector) {
74 indexes.append(data);
75 }
76}
77
78static void expand_list(Vector<String>& tokens, Vector<Index>& indexes)
79{
80 for (auto& token : tokens) {
81 if (token.length() == 0) {
82 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
83 print_usage_and_exit(1);
84 }
85
86 if (token == "-") {
87 fprintf(stderr, "cut: invalid range with no endpoint: %s\n", token.characters());
88 print_usage_and_exit(1);
89 }
90
91 if (token[0] == '-') {
92 bool ok = true;
93 ssize_t index = token.substring(1, token.length() - 1).to_int(ok);
94 if (!ok) {
95 fprintf(stderr, "cut: invalid byte/character position '%s'\n", token.characters());
96 print_usage_and_exit(1);
97 }
98
99 if (index == 0) {
100 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
101 print_usage_and_exit(1);
102 }
103
104 Index tmp = { 1, index, Index::Type::RangedIndex };
105 add_if_not_exists(indexes, tmp);
106 } else if (token[token.length() - 1] == '-') {
107 bool ok = true;
108 ssize_t index = token.substring(0, token.length() - 1).to_int(ok);
109 if (!ok) {
110 fprintf(stderr, "cut: invalid byte/character position '%s'\n", token.characters());
111 print_usage_and_exit(1);
112 }
113
114 if (index == 0) {
115 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
116 print_usage_and_exit(1);
117 }
118 Index tmp = { index, -1, Index::Type::SliceIndex };
119 add_if_not_exists(indexes, tmp);
120 } else {
121 auto range = token.split('-');
122 if (range.size() == 2) {
123 bool ok = true;
124 ssize_t index1 = range[0].to_int(ok);
125 if (!ok) {
126 fprintf(stderr, "cut: invalid byte/character position '%s'\n", range[0].characters());
127 print_usage_and_exit(1);
128 }
129
130 ssize_t index2 = range[1].to_int(ok);
131 if (!ok) {
132 fprintf(stderr, "cut: invalid byte/character position '%s'\n", range[1].characters());
133 print_usage_and_exit(1);
134 }
135
136 if (index1 > index2) {
137 fprintf(stderr, "cut: invalid decreasing range\n");
138 print_usage_and_exit(1);
139 } else if (index1 == 0 || index2 == 0) {
140 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
141 print_usage_and_exit(1);
142 }
143
144 Index tmp = { index1, index2, Index::Type::RangedIndex };
145 add_if_not_exists(indexes, tmp);
146 } else if (range.size() == 1) {
147 bool ok = true;
148 ssize_t index = range[0].to_int(ok);
149 if (!ok) {
150 fprintf(stderr, "cut: invalid byte/character position '%s'\n", range[0].characters());
151 print_usage_and_exit(1);
152 }
153
154 if (index == 0) {
155 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
156 print_usage_and_exit(1);
157 }
158
159 Index tmp = { index, index, Index::Type::SingleIndex };
160 add_if_not_exists(indexes, tmp);
161 } else {
162 fprintf(stderr, "cut: invalid byte or character range\n");
163 print_usage_and_exit(1);
164 }
165 }
166 }
167}
168
169static void cut_file(const String& file, const Vector<Index>& byte_vector)
170{
171 FILE* fp = nullptr;
172 fp = fopen(file.characters(), "r");
173
174 if (!fp) {
175 fprintf(stderr, "cut: Could not open file '%s'\n", file.characters());
176 return;
177 }
178
179 char* line = nullptr;
180 ssize_t line_length = 0;
181 size_t line_capacity = 0;
182 while ((line_length = getline(&line, &line_capacity, fp)) != -1) {
183 line[line_length - 1] = '\0';
184 line_length--;
185 for (auto& i : byte_vector) {
186 if (i.m_type == Index::Type::SliceIndex && i.m_from < line_length)
187 printf("%s", line + i.m_from - 1);
188 else if (i.m_type == Index::Type::SingleIndex && i.m_from <= line_length)
189 printf("%c", line[i.m_from - 1]);
190 else if (i.m_type == Index::Type::RangedIndex && i.m_from <= line_length) {
191 auto to = i.m_to > line_length ? line_length : i.m_to;
192 auto sub_string = String(line).substring(i.m_from - 1, to - i.m_from + 1);
193 printf("%s", sub_string.characters());
194 } else
195 break;
196 }
197 printf("\n");
198 }
199
200 if (line)
201 free(line);
202 fclose(fp);
203}
204
205int main(int argc, char** argv)
206{
207 String byte_list = "";
208 Vector<String> tokens;
209 Vector<String> files;
210 if (argc == 1) {
211 print_usage_and_exit(1);
212 }
213
214 for (int i = 1; i < argc;) {
215 if (!strcmp(argv[i], "-b")) {
216 /* The next argument should be a list of bytes. */
217 byte_list = (i + 1 < argc) ? argv[i + 1] : "";
218
219 if (byte_list == "") {
220 print_usage_and_exit(1);
221 }
222 tokens = byte_list.split(',');
223 i += 2;
224 } else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) {
225 print_usage_and_exit(1);
226 } else if (argv[i][0] != '-') {
227 files.append(argv[i++]);
228 } else {
229 fprintf(stderr, "cut: invalid argument %s\n", argv[i]);
230 print_usage_and_exit(1);
231 }
232 }
233
234 if (files.is_empty() || byte_list == "")
235 print_usage_and_exit(1);
236
237 Vector<Index> byte_vector;
238 expand_list(tokens, byte_vector);
239 quick_sort(byte_vector, [](auto& a, auto& b) { return a.m_from < b.m_from; });
240 /* Process each file */
241 for (auto& file : files)
242 cut_file(file, byte_vector);
243
244 return 0;
245}