Serenity Operating System
1/*
2 * Copyright (c) 2020, Matthew L. Curry <matthew.curry@gmail.com>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/CharacterTypes.h>
8#include <AK/RefPtr.h>
9#include <AK/StringView.h>
10#include <LibCore/ArgsParser.h>
11#include <LibCore/File.h>
12#include <LibCore/System.h>
13#include <unistd.h>
14
15static ErrorOr<void> write_line_content(StringView line, size_t count, bool duplicates_only, bool print_count, Core::File& outfile)
16{
17 if (duplicates_only && count <= 1)
18 return {};
19
20 if (print_count)
21 TRY(outfile.write_until_depleted(DeprecatedString::formatted("{} {}\n", count, line).bytes()));
22 else
23 TRY(outfile.write_until_depleted(DeprecatedString::formatted("{}\n", line).bytes()));
24 return {};
25}
26
27static StringView skip(StringView line, unsigned char_skip_count, unsigned field_skip_count)
28{
29 line = line.trim("\n"sv);
30 if (field_skip_count) {
31 bool in_field = false;
32 int field_index = 0;
33 unsigned current_field = 0;
34 for (size_t i = 0; i < line.length(); i++) {
35 char c = line[i];
36 if (is_ascii_space(c)) {
37 in_field = false;
38 field_index = i;
39 if (++current_field > field_skip_count)
40 break;
41 } else if (!in_field) {
42 in_field = true;
43 }
44 }
45 line = line.substring_view(field_index);
46 }
47 char_skip_count = min(char_skip_count, line.length());
48 return line.substring_view(char_skip_count);
49}
50
51ErrorOr<int> serenity_main(Main::Arguments arguments)
52{
53 TRY(Core::System::pledge("stdio rpath wpath cpath"));
54
55 StringView inpath;
56 StringView outpath;
57 bool duplicates_only = false;
58 bool unique_only = false;
59 bool ignore_case = false;
60 bool print_count = false;
61 unsigned skip_chars = 0;
62 unsigned skip_fields = 0;
63
64 Core::ArgsParser args_parser;
65 args_parser.add_option(duplicates_only, "Only print duplicated lines", "repeated", 'd');
66 args_parser.add_option(unique_only, "Only print unique lines (default)", "unique", 'u');
67 args_parser.add_option(ignore_case, "Ignore case when comparing lines", "ignore-case", 'i');
68 args_parser.add_option(print_count, "Prefix each line by its number of occurrences", "count", 'c');
69 args_parser.add_option(skip_chars, "Skip N chars", "skip-chars", 's', "N");
70 args_parser.add_option(skip_fields, "Skip N fields", "skip-fields", 'f', "N");
71 args_parser.add_positional_argument(inpath, "Input file", "input", Core::ArgsParser::Required::No);
72 args_parser.add_positional_argument(outpath, "Output file", "output", Core::ArgsParser::Required::No);
73 args_parser.parse(arguments);
74
75 if (!unique_only && !duplicates_only) {
76 unique_only = true;
77 } else if (unique_only && duplicates_only) {
78 // Printing duplicated and unique lines shouldn't print anything
79 return 0;
80 }
81
82 auto infile = TRY(Core::BufferedFile::create(TRY(Core::File::open_file_or_standard_stream(inpath, Core::File::OpenMode::Read))));
83 auto outfile = TRY(Core::File::open_file_or_standard_stream(outpath, Core::File::OpenMode::Write));
84
85 size_t count = 0;
86 ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024));
87 ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024));
88
89 StringView previous = TRY(infile->read_line(previous_buf));
90 StringView previous_to_compare = skip(previous, skip_chars, skip_fields);
91
92 while (TRY(infile->can_read_line())) {
93 // FIXME: The buffer does not automatically resize,
94 // and this will return EMSGSIZE if the read line
95 // is more than 1024 bytes.
96 StringView current = TRY(infile->read_line(current_buf));
97
98 StringView current_to_compare = skip(current, skip_chars, skip_fields);
99 bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_ascii_case(previous_to_compare) : current_to_compare == previous_to_compare;
100 if (!lines_equal) {
101 TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
102 count = 1;
103 } else {
104 count++;
105 }
106 swap(current_to_compare, previous_to_compare);
107 swap(current_buf, previous_buf);
108 swap(current, previous);
109 }
110
111 TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile));
112
113 return 0;
114}