@@ -149,3 +149,198 @@ export const analyzeFilenamePatterns = (
149149 hasTaskNames,
150150 } ;
151151} ;
152+
153+ // add to fileAnalyzers.ts
154+
155+ // export interface SubjectRecord {
156+ // original_id: string;
157+ // numeric_id: string;
158+ // site: string | null;
159+ // pattern_name: string;
160+ // file_count: number;
161+ // }
162+
163+ // export interface SubjectAnalysis {
164+ // success: boolean;
165+ // method: string;
166+ // subject_records: SubjectRecord[];
167+ // subject_count: number;
168+ // has_site_info: boolean;
169+ // variants_by_subject: Record<string, any>;
170+ // python_generated_filename_rules: any[];
171+ // id_mapping: {
172+ // id_mapping: Record<string, string>;
173+ // reverse_mapping: Record<string, string>;
174+ // strategy_used: string;
175+ // metadata_columns: string[];
176+ // };
177+ // }
178+
179+ // // mirrors _extract_subjects_from_directory_structure
180+ // const extractFromDirectoryStructure = (
181+ // allFiles: string[]
182+ // ): Omit<SubjectAnalysis, "id_mapping"> | null => {
183+ // const patterns: Array<[RegExp, boolean, number, number | null, string]> = [
184+ // [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"],
185+ // [/^sub-(\d+)$/i, false, 1, null, "standard_bids"],
186+ // [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"],
187+ // [/^(\d{3,})$/, false, 1, null, "numeric_only"],
188+ // ];
189+
190+ // const subjectRecords: SubjectRecord[] = [];
191+ // const seenIds = new Set<string>();
192+
193+ // for (const filepath of allFiles) {
194+ // const parts = filepath.split("/");
195+ // for (const part of parts.slice(0, 2)) {
196+ // for (const [
197+ // regex,
198+ // hasSite,
199+ // idGroup,
200+ // siteGroup,
201+ // patternName,
202+ // ] of patterns) {
203+ // const match = part.match(regex);
204+ // if (match) {
205+ // const originalId = match[0];
206+ // if (seenIds.has(originalId)) break;
207+ // seenIds.add(originalId);
208+ // subjectRecords.push({
209+ // original_id: originalId,
210+ // numeric_id: match[idGroup],
211+ // site: hasSite && siteGroup ? match[siteGroup] : null,
212+ // pattern_name: patternName,
213+ // file_count: 0,
214+ // });
215+ // break;
216+ // }
217+ // }
218+ // }
219+ // }
220+
221+ // if (subjectRecords.length === 0) return null;
222+
223+ // subjectRecords.sort((a, b) => {
224+ // const na = parseInt(a.numeric_id) || 0;
225+ // const nb = parseInt(b.numeric_id) || 0;
226+ // return na - nb;
227+ // });
228+
229+ // return {
230+ // success: true,
231+ // method: "directory_structure",
232+ // subject_records: subjectRecords,
233+ // subject_count: subjectRecords.length,
234+ // has_site_info: subjectRecords.some((r) => r.site !== null),
235+ // variants_by_subject: {},
236+ // python_generated_filename_rules: [],
237+ // };
238+ // };
239+
240+ // // mirrors _extract_subjects_from_flat_filenames
241+ // const extractFromFlatFilenames = (
242+ // allFiles: string[]
243+ // ): Omit<SubjectAnalysis, "id_mapping"> | null => {
244+ // const identifierToFiles: Record<string, string[]> = {};
245+
246+ // for (const filepath of allFiles) {
247+ // const filename = filepath.split("/").pop() || "";
248+ // const nameNoExt = filename
249+ // .replace(/\.[^/.]+$/, "")
250+ // .replace(/\.nii\.gz$/, "");
251+ // const match = nameNoExt.match(/^([A-Za-z0-9\-]+)/);
252+ // if (match) {
253+ // const identifier = match[1];
254+ // if (!identifierToFiles[identifier]) identifierToFiles[identifier] = [];
255+ // identifierToFiles[identifier].push(filepath);
256+ // }
257+ // }
258+
259+ // if (Object.keys(identifierToFiles).length === 0) return null;
260+
261+ // const extractNumeric = (id: string): number => {
262+ // const nums = id.match(/\d+/g);
263+ // return nums ? parseInt(nums[nums.length - 1]) : 999999;
264+ // };
265+
266+ // const sortedIdentifiers = Object.keys(identifierToFiles).sort(
267+ // (a, b) => extractNumeric(a) - extractNumeric(b)
268+ // );
269+
270+ // const subjectRecords: SubjectRecord[] = sortedIdentifiers.map((id, i) => ({
271+ // original_id: id,
272+ // numeric_id: String(i + 1),
273+ // site: null,
274+ // pattern_name: "dominant_prefix",
275+ // file_count: identifierToFiles[id].length,
276+ // }));
277+
278+ // return {
279+ // success: true,
280+ // method: "dominant_prefix_fallback",
281+ // subject_records: subjectRecords,
282+ // subject_count: subjectRecords.length,
283+ // has_site_info: false,
284+ // variants_by_subject: {},
285+ // python_generated_filename_rules: [],
286+ // };
287+ // };
288+
289+ // // mirrors _generate_subject_id_mapping
290+ // const generateIdMapping = (
291+ // subjectInfo: Omit<SubjectAnalysis, "id_mapping">
292+ // ): SubjectAnalysis["id_mapping"] => {
293+ // const records = subjectInfo.subject_records;
294+ // const idMapping: Record<string, string> = {};
295+ // const reverseMapping: Record<string, string> = {};
296+
297+ // // detect already-BIDS format (sub-01, sub-02...)
298+ // const allAlreadyBids = records.every((r) => /^sub-\w+$/i.test(r.original_id));
299+
300+ // if (allAlreadyBids) {
301+ // for (const rec of records) {
302+ // const bidsId = rec.original_id.replace(/^sub-/i, "");
303+ // idMapping[rec.original_id] = bidsId;
304+ // reverseMapping[bidsId] = rec.original_id;
305+ // }
306+ // return {
307+ // id_mapping: idMapping,
308+ // reverse_mapping: reverseMapping,
309+ // strategy_used: "already_bids",
310+ // metadata_columns: [],
311+ // };
312+ // }
313+
314+ // // numeric strategy
315+ // for (let i = 0; i < records.length; i++) {
316+ // const orig = records[i].original_id;
317+ // const bidsId = String(i + 1);
318+ // idMapping[orig] = bidsId;
319+ // reverseMapping[bidsId] = orig;
320+ // }
321+
322+ // return {
323+ // id_mapping: idMapping,
324+ // reverse_mapping: reverseMapping,
325+ // strategy_used: "numeric",
326+ // metadata_columns: ["original_id"],
327+ // };
328+ // };
329+
330+ // // main export — call this from llmHelpers
331+ // export const extractSubjectAnalysis = (allFiles: string[]): SubjectAnalysis => {
332+ // const fromDir = extractFromDirectoryStructure(allFiles);
333+ // const base = fromDir ??
334+ // extractFromFlatFilenames(allFiles) ?? {
335+ // success: false,
336+ // method: "none",
337+ // subject_records: [],
338+ // subject_count: 0,
339+ // has_site_info: false,
340+ // variants_by_subject: {},
341+ // python_generated_filename_rules: [],
342+ // };
343+
344+ // const idMapping = generateIdMapping(base);
345+ // return { ...base, id_mapping: idMapping };
346+ // };
0 commit comments