Skip to content

Commit 92dbf6f

Browse files
committed
feat: add full processFolder function in fileProcessors file
1 parent 002d8c9 commit 92dbf6f

1 file changed

Lines changed: 110 additions & 48 deletions

File tree

src/components/User/Dashboard/DatasetOrganizer/utils/fileProcessors.ts

Lines changed: 110 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,10 @@ export const processFile = async (file: File): Promise<FileItem> => {
8686
entry.content = text.slice(0, 5000);
8787
entry.contentType = "text";
8888
} else if (fileType === "office" && ext === "pdf") {
89-
// ✅ EXTRACT PDF - This was missing!
90-
console.log("Processing PDF file...");
89+
// Extract PDF
9190
const buffer = await file.arrayBuffer();
9291
entry.content = await extractPDFContent(buffer);
9392
entry.contentType = "office";
94-
console.log("PDF processed successfully");
9593
} else if (fileType === "office" && ext === "docx") {
9694
// DOCX placeholder
9795
entry.content = `DOCX file: ${file.name}\nSize: ${(
@@ -116,22 +114,6 @@ export const processFile = async (file: File): Promise<FileItem> => {
116114
entry.content = `Error reading file: ${e.message}`;
117115
}
118116

119-
// if (fileType === "text") {
120-
// try {
121-
// const text = await file.text();
122-
// entry.content = text.slice(0, 5000); // First 5000 chars
123-
// entry.contentType = "text";
124-
// } catch (e: any) {
125-
// entry.content = `Error reading file: ${e.message}`;
126-
// }
127-
// } else {
128-
// // For binary files, just store basic info
129-
// entry.content = `File: ${file.name}\nSize: ${(file.size / 1024).toFixed(
130-
// 2
131-
// )} KB\nType: ${file.type || "Unknown"}`;
132-
// entry.contentType = fileType;
133-
// }
134-
135117
return entry;
136118
};
137119

@@ -189,28 +171,6 @@ export const processZip = async (file: File): Promise<FileItem[]> => {
189171
sourcePath: `${zipName}/${path}`,
190172
};
191173

192-
// Only extract text files
193-
// if (fileType === "text") {
194-
// try {
195-
// const text = await zipEntry.async("text");
196-
// entry.content = text.slice(0, 5000);
197-
// entry.contentType = "text";
198-
// } catch (e: any) {
199-
// entry.content = `Error: ${e.message}`;
200-
// }
201-
// } else {
202-
// // For binary files, just store info
203-
// // entry.content = `ZIP Entry: ${fileName}\nCompressed Size: ${(
204-
// // zipEntry._data.compressedSize / 1024
205-
// // ).toFixed(2)} KB`;
206-
// // entry.contentType = fileType;
207-
// // ✅ FIX 1: Get file size from the ZIP entry properly
208-
// const arrayBuffer = await zipEntry.async("arraybuffer");
209-
// const sizeKB = (arrayBuffer.byteLength / 1024).toFixed(2);
210-
// entry.content = `ZIP Entry: ${fileName}\nSize: ${sizeKB} KB`;
211-
// entry.contentType = fileType;
212-
// }
213-
214174
// Extract content based on file type
215175
if (fileType === "text") {
216176
try {
@@ -221,17 +181,27 @@ export const processZip = async (file: File): Promise<FileItem[]> => {
221181
entry.content = `Error: ${e.message}`;
222182
}
223183
} else if (fileType === "office" && ext === "pdf") {
224-
// ✅ EXTRACT PDF FROM ZIP - This was missing!
184+
// Extract PDF
225185
try {
226-
console.log(`Extracting PDF from ZIP: ${fileName}`);
227186
const arrayBuffer = await zipEntry.async("arraybuffer");
228187
entry.content = await extractPDFContent(arrayBuffer);
229188
entry.contentType = "office";
230-
console.log("ZIP PDF extracted successfully");
231189
} catch (e: any) {
232190
console.error("ZIP PDF extraction error:", e);
233191
entry.content = `Error extracting PDF: ${e.message}`;
234192
}
193+
} else if (fileType === "office" && ext === "docx") {
194+
// ADD: DOCX placeholder
195+
const arrayBuffer = await zipEntry.async("arraybuffer");
196+
const sizeKB = (arrayBuffer.byteLength / 1024).toFixed(2);
197+
entry.content = `DOCX file: ${fileName}\nSize: ${sizeKB} KB\n\nNote: Install mammoth.js to extract DOCX content`;
198+
entry.contentType = "office";
199+
} else if (fileType === "office" && (ext === "xlsx" || ext === "xls")) {
200+
// ADD: Excel placeholder
201+
const arrayBuffer = await zipEntry.async("arraybuffer");
202+
const sizeKB = (arrayBuffer.byteLength / 1024).toFixed(2);
203+
entry.content = `Excel file: ${fileName}\nSize: ${sizeKB} KB\n\nNote: Install xlsx.js to extract Excel content`;
204+
entry.contentType = "office";
235205
} else {
236206
// For other binary files, just store info
237207
const arrayBuffer = await zipEntry.async("arraybuffer");
@@ -260,25 +230,117 @@ export const processZip = async (file: File): Promise<FileItem[]> => {
260230
};
261231

262232
// Process folder - Web API limitation: can't fully traverse folders like Node.js
233+
// export const processFolder = async (
234+
// folderEntry: FileSystemDirectoryEntry,
235+
// parentId: string | null
236+
// ): Promise<FileItem[]> => {
237+
// const entries: FileItem[] = [];
238+
// const folderId = generateId();
239+
240+
// // Add the folder itself
241+
// entries.push({
242+
// id: folderId,
243+
// name: folderEntry.name,
244+
// type: "folder",
245+
// parentId: parentId,
246+
// sourcePath: folderEntry.fullPath,
247+
// });
248+
249+
// // Note: Full folder traversal requires complex recursive logic
250+
// // For MVP, just create the folder entry
251+
// // You can enhance this later
252+
253+
// return entries;
254+
// };
255+
256+
// src/components/DatasetOrganizer/utils/fileProcessors.ts
257+
263258
export const processFolder = async (
264259
folderEntry: FileSystemDirectoryEntry,
265260
parentId: string | null
266261
): Promise<FileItem[]> => {
267262
const entries: FileItem[] = [];
268263
const folderId = generateId();
264+
const basePath = folderEntry.name;
269265

270266
// Add the folder itself
271267
entries.push({
272268
id: folderId,
273269
name: folderEntry.name,
274270
type: "folder",
275271
parentId: parentId,
276-
sourcePath: folderEntry.fullPath,
272+
sourcePath: basePath,
277273
});
278274

279-
// Note: Full folder traversal requires complex recursive logic
280-
// For MVP, just create the folder entry
281-
// You can enhance this later
275+
// Helper: Promisify readEntries
276+
const readEntries = (
277+
reader: FileSystemDirectoryReader
278+
): Promise<FileSystemEntry[]> => {
279+
return new Promise((resolve, reject) => {
280+
reader.readEntries(resolve, reject);
281+
});
282+
};
283+
284+
// Helper: Promisify file() method
285+
const getFile = (fileEntry: FileSystemFileEntry): Promise<File> => {
286+
return new Promise((resolve, reject) => {
287+
fileEntry.file(resolve, reject);
288+
});
289+
};
290+
291+
// Recursive traversal function
292+
async function traverseDirectory(
293+
dirEntry: FileSystemDirectoryEntry,
294+
currentParentId: string,
295+
currentPath: string
296+
): Promise<void> {
297+
const dirReader = dirEntry.createReader();
298+
let allEntries: FileSystemEntry[] = [];
299+
300+
// Read all entries (may require multiple calls)
301+
const readBatch = async (): Promise<void> => {
302+
const batch = await readEntries(dirReader);
303+
if (batch.length > 0) {
304+
allEntries = allEntries.concat(Array.from(batch));
305+
await readBatch(); // Keep reading
306+
}
307+
};
308+
309+
await readBatch();
310+
311+
// Process each entry
312+
for (const entry of allEntries) {
313+
const entryPath = `${currentPath}/${entry.name}`;
314+
315+
if (entry.isFile) {
316+
// Process file
317+
const fileEntry = entry as FileSystemFileEntry;
318+
const file = await getFile(fileEntry);
319+
const fileItem = await processFile(file);
320+
fileItem.parentId = currentParentId;
321+
fileItem.sourcePath = entryPath;
322+
entries.push(fileItem);
323+
} else if (entry.isDirectory) {
324+
// Process subfolder
325+
const subFolderId = generateId();
326+
entries.push({
327+
id: subFolderId,
328+
name: entry.name,
329+
type: "folder",
330+
parentId: currentParentId,
331+
sourcePath: entryPath,
332+
});
333+
await traverseDirectory(
334+
entry as FileSystemDirectoryEntry,
335+
subFolderId,
336+
entryPath
337+
);
338+
}
339+
}
340+
}
341+
342+
// Start traversal
343+
await traverseDirectory(folderEntry, folderId, basePath);
282344

283345
return entries;
284346
};

0 commit comments

Comments
 (0)