@@ -86,12 +86,10 @@ export const processFile = async (file: File): Promise<FileItem> => {
8686 entry . content = text . slice ( 0 , 5000 ) ;
8787 entry . contentType = "text" ;
8888 } else if ( fileType === "office" && ext === "pdf" ) {
89- // ✅ EXTRACT PDF - This was missing!
90- console . log ( "Processing PDF file..." ) ;
89+ // Extract PDF
9190 const buffer = await file . arrayBuffer ( ) ;
9291 entry . content = await extractPDFContent ( buffer ) ;
9392 entry . contentType = "office" ;
94- console . log ( "PDF processed successfully" ) ;
9593 } else if ( fileType === "office" && ext === "docx" ) {
9694 // DOCX placeholder
9795 entry . content = `DOCX file: ${ file . name } \nSize: ${ (
@@ -116,22 +114,6 @@ export const processFile = async (file: File): Promise<FileItem> => {
116114 entry . content = `Error reading file: ${ e . message } ` ;
117115 }
118116
119- // if (fileType === "text") {
120- // try {
121- // const text = await file.text();
122- // entry.content = text.slice(0, 5000); // First 5000 chars
123- // entry.contentType = "text";
124- // } catch (e: any) {
125- // entry.content = `Error reading file: ${e.message}`;
126- // }
127- // } else {
128- // // For binary files, just store basic info
129- // entry.content = `File: ${file.name}\nSize: ${(file.size / 1024).toFixed(
130- // 2
131- // )} KB\nType: ${file.type || "Unknown"}`;
132- // entry.contentType = fileType;
133- // }
134-
135117 return entry ;
136118} ;
137119
@@ -189,28 +171,6 @@ export const processZip = async (file: File): Promise<FileItem[]> => {
189171 sourcePath : `${ zipName } /${ path } ` ,
190172 } ;
191173
192- // Only extract text files
193- // if (fileType === "text") {
194- // try {
195- // const text = await zipEntry.async("text");
196- // entry.content = text.slice(0, 5000);
197- // entry.contentType = "text";
198- // } catch (e: any) {
199- // entry.content = `Error: ${e.message}`;
200- // }
201- // } else {
202- // // For binary files, just store info
203- // // entry.content = `ZIP Entry: ${fileName}\nCompressed Size: ${(
204- // // zipEntry._data.compressedSize / 1024
205- // // ).toFixed(2) } KB`;
206- // // entry.contentType = fileType;
207- // // ✅ FIX 1: Get file size from the ZIP entry properly
208- // const arrayBuffer = await zipEntry.async("arraybuffer");
209- // const sizeKB = (arrayBuffer.byteLength / 1024).toFixed(2);
210- // entry.content = `ZIP Entry: ${fileName}\nSize: ${sizeKB} KB`;
211- // entry.contentType = fileType;
212- // }
213-
214174 // Extract content based on file type
215175 if ( fileType === "text" ) {
216176 try {
@@ -221,17 +181,27 @@ export const processZip = async (file: File): Promise<FileItem[]> => {
221181 entry . content = `Error: ${ e . message } ` ;
222182 }
223183 } else if ( fileType === "office" && ext === "pdf" ) {
224- // ✅ EXTRACT PDF FROM ZIP - This was missing!
184+ // Extract PDF
225185 try {
226- console . log ( `Extracting PDF from ZIP: ${ fileName } ` ) ;
227186 const arrayBuffer = await zipEntry . async ( "arraybuffer" ) ;
228187 entry . content = await extractPDFContent ( arrayBuffer ) ;
229188 entry . contentType = "office" ;
230- console . log ( "ZIP PDF extracted successfully" ) ;
231189 } catch ( e : any ) {
232190 console . error ( "ZIP PDF extraction error:" , e ) ;
233191 entry . content = `Error extracting PDF: ${ e . message } ` ;
234192 }
193+ } else if ( fileType === "office" && ext === "docx" ) {
194+ // ADD: DOCX placeholder
195+ const arrayBuffer = await zipEntry . async ( "arraybuffer" ) ;
196+ const sizeKB = ( arrayBuffer . byteLength / 1024 ) . toFixed ( 2 ) ;
197+ entry . content = `DOCX file: ${ fileName } \nSize: ${ sizeKB } KB\n\nNote: Install mammoth.js to extract DOCX content` ;
198+ entry . contentType = "office" ;
199+ } else if ( fileType === "office" && ( ext === "xlsx" || ext === "xls" ) ) {
200+ // ADD: Excel placeholder
201+ const arrayBuffer = await zipEntry . async ( "arraybuffer" ) ;
202+ const sizeKB = ( arrayBuffer . byteLength / 1024 ) . toFixed ( 2 ) ;
203+ entry . content = `Excel file: ${ fileName } \nSize: ${ sizeKB } KB\n\nNote: Install xlsx.js to extract Excel content` ;
204+ entry . contentType = "office" ;
235205 } else {
236206 // For other binary files, just store info
237207 const arrayBuffer = await zipEntry . async ( "arraybuffer" ) ;
@@ -260,25 +230,117 @@ export const processZip = async (file: File): Promise<FileItem[]> => {
260230} ;
261231
262232// Process folder - Web API limitation: can't fully traverse folders like Node.js
233+ // export const processFolder = async (
234+ // folderEntry: FileSystemDirectoryEntry,
235+ // parentId: string | null
236+ // ): Promise<FileItem[]> => {
237+ // const entries: FileItem[] = [];
238+ // const folderId = generateId();
239+
240+ // // Add the folder itself
241+ // entries.push({
242+ // id: folderId,
243+ // name: folderEntry.name,
244+ // type: "folder",
245+ // parentId: parentId,
246+ // sourcePath: folderEntry.fullPath,
247+ // });
248+
249+ // // Note: Full folder traversal requires complex recursive logic
250+ // // For MVP, just create the folder entry
251+ // // You can enhance this later
252+
253+ // return entries;
254+ // };
255+
256+ // src/components/DatasetOrganizer/utils/fileProcessors.ts
257+
263258export const processFolder = async (
264259 folderEntry : FileSystemDirectoryEntry ,
265260 parentId : string | null
266261) : Promise < FileItem [ ] > => {
267262 const entries : FileItem [ ] = [ ] ;
268263 const folderId = generateId ( ) ;
264+ const basePath = folderEntry . name ;
269265
270266 // Add the folder itself
271267 entries . push ( {
272268 id : folderId ,
273269 name : folderEntry . name ,
274270 type : "folder" ,
275271 parentId : parentId ,
276- sourcePath : folderEntry . fullPath ,
272+ sourcePath : basePath ,
277273 } ) ;
278274
279- // Note: Full folder traversal requires complex recursive logic
280- // For MVP, just create the folder entry
281- // You can enhance this later
275+ // Helper: Promisify readEntries
276+ const readEntries = (
277+ reader : FileSystemDirectoryReader
278+ ) : Promise < FileSystemEntry [ ] > => {
279+ return new Promise ( ( resolve , reject ) => {
280+ reader . readEntries ( resolve , reject ) ;
281+ } ) ;
282+ } ;
283+
284+ // Helper: Promisify file() method
285+ const getFile = ( fileEntry : FileSystemFileEntry ) : Promise < File > => {
286+ return new Promise ( ( resolve , reject ) => {
287+ fileEntry . file ( resolve , reject ) ;
288+ } ) ;
289+ } ;
290+
291+ // Recursive traversal function
292+ async function traverseDirectory (
293+ dirEntry : FileSystemDirectoryEntry ,
294+ currentParentId : string ,
295+ currentPath : string
296+ ) : Promise < void > {
297+ const dirReader = dirEntry . createReader ( ) ;
298+ let allEntries : FileSystemEntry [ ] = [ ] ;
299+
300+ // Read all entries (may require multiple calls)
301+ const readBatch = async ( ) : Promise < void > => {
302+ const batch = await readEntries ( dirReader ) ;
303+ if ( batch . length > 0 ) {
304+ allEntries = allEntries . concat ( Array . from ( batch ) ) ;
305+ await readBatch ( ) ; // Keep reading
306+ }
307+ } ;
308+
309+ await readBatch ( ) ;
310+
311+ // Process each entry
312+ for ( const entry of allEntries ) {
313+ const entryPath = `${ currentPath } /${ entry . name } ` ;
314+
315+ if ( entry . isFile ) {
316+ // Process file
317+ const fileEntry = entry as FileSystemFileEntry ;
318+ const file = await getFile ( fileEntry ) ;
319+ const fileItem = await processFile ( file ) ;
320+ fileItem . parentId = currentParentId ;
321+ fileItem . sourcePath = entryPath ;
322+ entries . push ( fileItem ) ;
323+ } else if ( entry . isDirectory ) {
324+ // Process subfolder
325+ const subFolderId = generateId ( ) ;
326+ entries . push ( {
327+ id : subFolderId ,
328+ name : entry . name ,
329+ type : "folder" ,
330+ parentId : currentParentId ,
331+ sourcePath : entryPath ,
332+ } ) ;
333+ await traverseDirectory (
334+ entry as FileSystemDirectoryEntry ,
335+ subFolderId ,
336+ entryPath
337+ ) ;
338+ }
339+ }
340+ }
341+
342+ // Start traversal
343+ await traverseDirectory ( folderEntry , folderId , basePath ) ;
282344
283345 return entries ;
284346} ;
0 commit comments