+54
-24
src/lib/parsers/fileExtractor.ts
+54
-24
src/lib/parsers/fileExtractor.ts
···
62
62
}
63
63
64
64
/**
65
+
* Check if file is a ZIP by reading magic number
66
+
*/
67
+
async function checkIfZipFile(
68
+
file: File | ArrayBuffer | Blob,
69
+
): Promise<boolean> {
70
+
try {
71
+
const blob =
72
+
file instanceof File || file instanceof Blob ? file : new Blob([file]);
73
+
const header = await blob.slice(0, 4).arrayBuffer();
74
+
const bytes = new Uint8Array(header);
75
+
76
+
// ZIP magic numbers: PK (0x50 0x4B)
77
+
return bytes[0] === 0x50 && bytes[1] === 0x4b;
78
+
} catch (e) {
79
+
return false;
80
+
}
81
+
}
82
+
83
+
/**
65
84
* Public facing function handling both ZIP and single files.
66
85
* @param file A File object (or ArrayBuffer/Blob) representing the uploaded data.
67
86
* @param platform The platform name (e.g., 'instagram', 'tiktok').
···
78
97
return [];
79
98
}
80
99
81
-
// 1. --- ATTEMPT ZIP LOAD ---
82
-
try {
83
-
console.log("Attempting to load file as ZIP archive...");
84
-
const zip = await JSZip.loadAsync(file);
100
+
const isZipFile = await checkIfZipFile(file);
85
101
86
-
const extractor = new DataExtractor(file);
87
-
const results = await extractor.processZipArchive(zip, rules);
102
+
if (isZipFile) {
103
+
// 1. --- PROCESS AS ZIP ---
104
+
try {
105
+
console.log("Detected ZIP file, loading as archive...");
106
+
const zip = await JSZip.loadAsync(file);
88
107
89
-
console.log(
90
-
`Successfully extracted ${results.uniqueUsernames.length} usernames from ZIP archive.`,
91
-
);
92
-
return results.uniqueUsernames;
93
-
} catch (e) {
94
-
// 2. --- ZIP LOAD FAILED, ATTEMPT SINGLE FILE ---
95
-
console.warn(
96
-
"ZIP load failed. Attempting to parse file as a single data file...",
97
-
);
108
+
const extractor = new DataExtractor(file);
109
+
const results = await extractor.processZipArchive(zip, rules);
110
+
111
+
console.log(
112
+
`Successfully extracted ${results.uniqueUsernames.length} usernames from ZIP archive.`,
113
+
);
114
+
return results.uniqueUsernames;
115
+
} catch (e) {
116
+
console.error("ZIP processing failed:", e);
117
+
return [];
118
+
}
119
+
} else {
120
+
// 2. --- PROCESS AS SINGLE FILE ---
121
+
console.log("Processing as single file...");
98
122
99
123
// We need a File object to get the name and content easily
100
124
if (!(file instanceof File) && !(file instanceof Blob)) {
···
106
130
107
131
const singleFile = file as File;
108
132
109
-
// Find the rule that matches the uploaded file name
110
-
// We check if the uploaded filename ends with the final part of a rule's zipPath (e.g., "following.html")
111
-
const matchingRule = rules.find((rule) =>
112
-
singleFile.name
113
-
.toLowerCase()
114
-
.endsWith((rule.zipPath.split("/").pop() || "").toLowerCase()),
115
-
);
133
+
// Match rule based on file extension and format
134
+
const fileExt = singleFile.name.split(".").pop()?.toLowerCase();
135
+
136
+
const matchingRule = rules.find((rule) => {
137
+
// Match based on format type and file extension
138
+
if (rule.format === "TEXT" && fileExt === "txt") return true;
139
+
if (rule.format === "JSON" && fileExt === "json") return true;
140
+
if (rule.format === "HTML" && fileExt === "html") return true;
141
+
142
+
// Fallback: check if filename ends with the expected filename from rule
143
+
const ruleFilename = rule.zipPath.split("/").pop()?.toLowerCase();
144
+
return singleFile.name.toLowerCase().endsWith(ruleFilename || "");
145
+
});
116
146
117
147
if (!matchingRule) {
118
148
console.error(
119
-
`Could not match single file '${singleFile.name}' to any rule for platform ${platform}. Check rules in platformDefinitions.ts.`,
149
+
`Could not match single file '${singleFile.name}' (extension: ${fileExt}) to any rule for platform ${platform}. Available formats: ${rules.map((r) => r.format).join(", ")}`,
120
150
);
121
151
return [];
122
152
}
123
153
124
154
console.log(
125
-
`Matched single file '${singleFile.name}' to rule: ${matchingRule.zipPath}`,
155
+
`Matched single file '${singleFile.name}' to rule format: ${matchingRule.format}`,
126
156
);
127
157
128
158
// 3. Process as single file content