···11-#include <stdio.h>
22-#include <ctype.h>
33-#include <glib.h>
44-#include <string.h>
55-#include <locale.h>
66-#include <archive.h>
77-#include <archive_entry.h>
88-99-static char * case_normalize(char * str) {
1010- for (char * iter = str; *iter; ++iter) {
1111- *iter = tolower(*iter);
1212- }
1313- return str;
1414-}
1515-1616-static gint compare_str(const void * a, const void * b, void * _) {
1717- return strcmp(a, b);
1818-}
1919-2020-int main(int argc, char ** argv) {
2121- if (argc != 3) {
2222- fprintf(stderr, "Usage: %s TARBALL OUTPUT\n", argv[0]);
2323- return 1;
2424- }
2525-2626- size_t output_len = strlen(argv[2]);
2727-2828- /* Switch to standard locale to ensure consistency in case-folding.
2929- */
3030- setlocale(LC_CTYPE, "C");
3131-3232- /* Map from case-normalized package name to a sorted sequence of
3333- * package names in the equivalence class defined by
3434- * case-normalization.
3535- */
3636- GHashTable * equivalence_classes =
3737- g_hash_table_new(g_str_hash, g_str_equal);
3838-3939- /* Open up the tarball.
4040- */
4141- struct archive * ar = archive_read_new();
4242- if (!ar) {
4343- perror("Allocating archive structure");
4444- return 1;
4545- }
4646- archive_read_support_filter_gzip(ar);
4747- archive_read_support_format_tar(ar);
4848- if (archive_read_open_filename( ar
4949- , argv[1]
5050- , 10240
5151- ) == ARCHIVE_FATAL) {
5252- fprintf( stderr
5353- , "Error opening %s: %s\n"
5454- , argv[0]
5555- , archive_error_string(ar)
5656- );
5757- return 1;
5858- }
5959-6060- /* Extract the length of the output directory that prefixes all
6161- * tarball entries from the first entry in the tarball.
6262- */
6363- struct archive_entry * ent;
6464- int err = archive_read_next_header(ar, &ent);
6565- if (err != ARCHIVE_OK) {
6666- if (err == ARCHIVE_EOF) {
6767- fprintf( stderr
6868- , "No entries in %s, surely this is an error!\n"
6969- , argv[1]
7070- );
7171- } else {
7272- fprintf( stderr
7373- , "Error reading entry from %s: %s\n"
7474- , argv[1]
7575- , archive_error_string(ar)
7676- );
7777- }
7878- return 1;
7979- }
8080- const char * path = archive_entry_pathname(ent);
8181- /* Number of characters from the start of the path name until after
8282- * the slash after the leading directory.
8383- */
8484- size_t prefix_len = strchr(path, '/') - path + 1;
8585-8686- /* Extract each entry to the right partition.
8787- */
8888- do {
8989- path = archive_entry_pathname(ent) + prefix_len;
9090- const char * pkg_end = strchr(path, '/');
9191- if (!pkg_end)
9292- /* If there is no second slash, then this is either just the entry
9393- * corresponding to the root or some non-package file (e.g.
9494- * travis.yml). In either case, we don't care.
9595- */
9696- continue;
9797-9898- /* Find our package in the equivalence class map.
9999- */
100100- char * pkg_name = g_strndup(path, pkg_end - path);
101101- char * pkg_normalized =
102102- case_normalize(g_strndup(path, pkg_end - path));
103103- GSequence * pkg_class =
104104- g_hash_table_lookup(equivalence_classes, pkg_normalized);
105105- gint partition_num;
106106- if (!pkg_class) {
107107- /* We haven't seen any packages with this normalized name yet,
108108- * so we need to initialize the sequence and add it to the map.
109109- */
110110- pkg_class = g_sequence_new(NULL);
111111- g_sequence_append(pkg_class, pkg_name);
112112- g_hash_table_insert( equivalence_classes
113113- , pkg_normalized
114114- , pkg_class
115115- );
116116- partition_num = 1;
117117- } else {
118118- g_free(pkg_normalized);
119119- /* Find the package name in the equivalence class */
120120- GSequenceIter * pkg_iter =
121121- g_sequence_search( pkg_class
122122- , pkg_name
123123- , compare_str
124124- , NULL
125125- );
126126- if (!g_sequence_iter_is_end(pkg_iter)) {
127127- /* If there are any packages after this one in the list, bail
128128- * out. In principle we could solve this by moving them up to
129129- * the next partition, but so far I've never seen any github
130130- * tarballs out of order so let's save ourselves the work
131131- * until we know we need it.
132132- */
133133- fprintf( stderr
134134- , "Out of order github tarball: %s is after %s\n"
135135- , pkg_name
136136- , (char *) g_sequence_get(pkg_iter)
137137- );
138138- return 1;
139139- }
140140- pkg_iter = g_sequence_iter_prev(pkg_iter);
141141- if (strcmp( g_sequence_get(pkg_iter)
142142- , pkg_name
143143- ) != 0) {
144144- /* This package doesn't have the same name as the one right
145145- * before where it should be in the sequence, which means it's
146146- * new and needs to be added to the sequence.
147147- *
148148- * !!! We need to change this to use g_sequence_insert_before
149149- * if we ever get an out-of-order github tarball, see comment
150150- * after the check for !g_sequence_iter_is_end(pkg_iter).
151151- */
152152- pkg_iter = g_sequence_append(pkg_class, pkg_name);
153153- } else {
154154- g_free(pkg_name);
155155- }
156156- /* Get the partition number, starting with 1.
157157- */
158158- partition_num = g_sequence_iter_get_position(pkg_iter) + 1;
159159- }
160160-161161- /* Set the destination path.
162162- * The 3 below is for the length of /#/, the partition number part
163163- * of the path. If we have more than 9 partitions, we deserve to
164164- * segfault. The 1 at the end is for the trailing null.
165165- */
166166- char * dest_path = g_malloc(output_len + 3 + strlen(path) + 1);
167167- sprintf(dest_path, "%s/%d/%s", argv[2], partition_num, path);
168168- archive_entry_set_pathname(ent, dest_path);
169169-170170- if (archive_read_extract(ar, ent, 0) != ARCHIVE_OK) {
171171- fprintf( stderr
172172- , "Error extracting entry %s from %s: %s\n"
173173- , dest_path
174174- , argv[1]
175175- , archive_error_string(ar)
176176- );
177177- return 1;
178178- }
179179- } while ((err = archive_read_next_header(ar, &ent)) == ARCHIVE_OK);
180180- if (err != ARCHIVE_EOF) {
181181- fprintf( stderr
182182- , "Error reading entry from %s: %s\n"
183183- , argv[1]
184184- , archive_error_string(ar)
185185- );
186186- return 1;
187187- }
188188-189189- return 0;
190190-}