Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/QuickSort.h>
28#include <AK/String.h>
29#include <AK/Vector.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34struct Index {
35 enum class Type {
36 SingleIndex,
37 RangedIndex
38 };
39 ssize_t m_from { -1 };
40 ssize_t m_to { -1 };
41 Type m_type { Type::SingleIndex };
42};
43
44static void print_usage_and_exit(int ret)
45{
46 printf("Usage: cut -b list [File]\n");
47 exit(ret);
48}
49
50static void add_if_not_exists(Vector<Index>& indexes, Index data)
51{
52 auto find = [data](auto& other) { return other.m_from == data.m_from && other.m_to == data.m_to; };
53 if (indexes.find(find) == indexes.end()) {
54 indexes.append(data);
55 }
56}
57
58static void expand_list(Vector<String>& tokens, Vector<Index>& indexes)
59{
60 for (auto& token : tokens) {
61 if (token.length() == 0) {
62 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
63 print_usage_and_exit(1);
64 }
65
66 if (token == "-") {
67 fprintf(stderr, "cut: invalid range with no endpoint: %s\n", token.characters());
68 print_usage_and_exit(1);
69 }
70
71 if (token[0] == '-') {
72 bool ok = true;
73 ssize_t index = token.substring(1, token.length() - 1).to_int(ok);
74 if (!ok) {
75 fprintf(stderr, "cut: invalid byte/character position '%s'\n", token.characters());
76 print_usage_and_exit(1);
77 }
78
79 if (index == 0) {
80 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
81 print_usage_and_exit(1);
82 }
83
84 for (ssize_t i = 1; i <= index; ++i) {
85 Index tmp = { i, i, Index::Type::SingleIndex };
86 add_if_not_exists(indexes, tmp);
87 }
88 } else if (token[token.length() - 1] == '-') {
89 bool ok = true;
90 ssize_t index = token.substring(0, token.length() - 1).to_int(ok);
91 if (!ok) {
92 fprintf(stderr, "cut: invalid byte/character position '%s'\n", token.characters());
93 print_usage_and_exit(1);
94 }
95
96 if (index == 0) {
97 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
98 print_usage_and_exit(1);
99 }
100 Index tmp = { index, -1, Index::Type::RangedIndex };
101 add_if_not_exists(indexes, tmp);
102 } else {
103 auto range = token.split('-');
104 if (range.size() == 2) {
105 bool ok = true;
106 ssize_t index1 = range[0].to_int(ok);
107 if (!ok) {
108 fprintf(stderr, "cut: invalid byte/character position '%s'\n", range[0].characters());
109 print_usage_and_exit(1);
110 }
111
112 ssize_t index2 = range[1].to_int(ok);
113 if (!ok) {
114 fprintf(stderr, "cut: invalid byte/character position '%s'\n", range[1].characters());
115 print_usage_and_exit(1);
116 }
117
118 if (index1 > index2) {
119 fprintf(stderr, "cut: invalid decreasing range\n");
120 print_usage_and_exit(1);
121 } else if (index1 == 0 || index2 == 0) {
122 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
123 print_usage_and_exit(1);
124 }
125
126 for (; index1 <= index2; ++index1) {
127 Index tmp = { index1, index1, Index::Type::SingleIndex };
128 add_if_not_exists(indexes, tmp);
129 }
130 } else if (range.size() == 1) {
131 bool ok = true;
132 ssize_t index = range[0].to_int(ok);
133 if (!ok) {
134 fprintf(stderr, "cut: invalid byte/character position '%s'\n", range[0].characters());
135 print_usage_and_exit(1);
136 }
137
138 if (index == 0) {
139 fprintf(stderr, "cut: byte/character positions are numbered from 1\n");
140 print_usage_and_exit(1);
141 }
142
143 Index tmp = { index, index, Index::Type::SingleIndex };
144 add_if_not_exists(indexes, tmp);
145 } else {
146 fprintf(stderr, "cut: invalid byte or character range\n");
147 print_usage_and_exit(1);
148 }
149 }
150 }
151}
152
153static void cut_file(const String& file, const Vector<Index>& byte_vector)
154{
155 FILE* fp = nullptr;
156 fp = fopen(file.characters(), "r");
157
158 if (!fp) {
159 fprintf(stderr, "cut: Could not open file '%s'\n", file.characters());
160 return;
161 }
162
163 char* line = nullptr;
164 ssize_t line_length = 0;
165 size_t line_capacity = 0;
166 while ((line_length = getline(&line, &line_capacity, fp)) != -1) {
167 line[line_length - 1] = '\0';
168 line_length--;
169 for (auto& i : byte_vector) {
170 if (i.m_type == Index::Type::RangedIndex && i.m_from < line_length) {
171 printf("%s", line + i.m_from - 1);
172 break;
173 }
174
175 if (i.m_from <= line_length)
176 printf("%c", line[i.m_from - 1]);
177 else
178 break;
179 }
180 printf("\n");
181 }
182
183 if (line)
184 free(line);
185 fclose(fp);
186}
187
188int main(int argc, char** argv)
189{
190 String byte_list = "";
191 Vector<String> tokens;
192 Vector<String> files;
193 if (argc == 1) {
194 print_usage_and_exit(1);
195 }
196
197 for (int i = 1; i < argc;) {
198 if (!strcmp(argv[i], "-b")) {
199 /* The next argument should be a list of bytes. */
200 byte_list = (i + 1 < argc) ? argv[i + 1] : "";
201
202 if (byte_list == "") {
203 print_usage_and_exit(1);
204 }
205 tokens = byte_list.split(',');
206 i += 2;
207 } else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) {
208 print_usage_and_exit(1);
209 } else if (argv[i][0] != '-') {
210 //file = argv[i++];
211 files.append(argv[i++]);
212 } else {
213 fprintf(stderr, "cut: invalid argument %s\n", argv[i]);
214 print_usage_and_exit(1);
215 }
216 }
217
218 if (files.is_empty() || byte_list == "") {
219 print_usage_and_exit(1);
220 }
221
222 Vector<Index> byte_vector;
223 expand_list(tokens, byte_vector);
224 quick_sort(byte_vector.begin(), byte_vector.end(), [](auto& a, auto& b) { return a.m_from < b.m_from; });
225 /* Process each file */
226 for (auto& file : files) {
227 cut_file(file, byte_vector);
228 }
229
230 return 0;
231}