@@ -392,90 +392,190 @@ def share_google_calendars(email: str, rank: str) -> bool:
392392 return success
393393
394394
395- def ensure_dependencies ():
395+ LLM_VENV_DIR = Path .home () / ".cache" / "cdl" / "llm-venv"
396+
397+
398+ def get_llm_venv_python () -> Optional [Path ]:
399+ """Get the Python executable from the LLM virtual environment."""
400+ if sys .platform == "win32" :
401+ python_path = LLM_VENV_DIR / "Scripts" / "python.exe"
402+ else :
403+ python_path = LLM_VENV_DIR / "bin" / "python"
404+ return python_path if python_path .exists () else None
405+
406+
407+ def setup_llm_venv () -> Optional [Path ]:
408+ """Create isolated venv for LLM to avoid dependency conflicts."""
409+ if get_llm_venv_python ():
410+ return get_llm_venv_python ()
411+
412+ print (" Creating isolated LLM environment (one-time setup)..." )
413+ LLM_VENV_DIR .parent .mkdir (parents = True , exist_ok = True )
414+
396415 try :
397- import transformers
398- import torch
399- except ImportError :
400- print ("Installing required dependencies (transformers, torch, kernels)..." )
416+ subprocess .check_call (
417+ [sys .executable , "-m" , "venv" , str (LLM_VENV_DIR )],
418+ stdout = subprocess .DEVNULL ,
419+ stderr = subprocess .DEVNULL ,
420+ )
421+ except subprocess .CalledProcessError as e :
422+ print (f" Warning: Could not create venv: { e } " )
423+ return None
424+
425+ venv_python = get_llm_venv_python ()
426+ if not venv_python :
427+ print (" Warning: venv created but Python not found" )
428+ return None
429+
430+ print (" Installing LLM dependencies (this may take a few minutes)..." )
431+ try :
432+ subprocess .check_call (
433+ [str (venv_python ), "-m" , "pip" , "install" , "-q" , "--upgrade" , "pip" ],
434+ stdout = subprocess .DEVNULL ,
435+ stderr = subprocess .DEVNULL ,
436+ )
401437 subprocess .check_call (
402438 [
403- sys . executable ,
439+ str ( venv_python ) ,
404440 "-m" ,
405441 "pip" ,
406442 "install" ,
407443 "-q" ,
408- "transformers" ,
409- "torch" ,
410- "kernels" ,
411- ]
444+ "mlx-lm" ,
445+ ],
446+ stdout = subprocess .DEVNULL ,
447+ )
448+ except subprocess .CalledProcessError as e :
449+ print (f" Warning: Could not install LLM dependencies: { e } " )
450+ return None
451+
452+ print (" LLM environment ready" )
453+ return venv_python
454+
455+
456+ def run_llm_in_venv (script : str , timeout : int = 600 ) -> Optional [str ]:
457+ """Run LLM script in isolated venv and return output."""
458+ venv_python = setup_llm_venv ()
459+ if not venv_python :
460+ return None
461+
462+ try :
463+ result = subprocess .run (
464+ [str (venv_python ), "-c" , script ],
465+ capture_output = True ,
466+ text = True ,
467+ timeout = timeout ,
412468 )
469+ if result .returncode == 0 :
470+ return result .stdout .strip ()
471+ else :
472+ print (f" LLM error: { result .stderr [:200 ]} " )
473+ return None
474+ except subprocess .TimeoutExpired :
475+ print (" Warning: LLM operation timed out" )
476+ return None
477+ except Exception as e :
478+ print (f" Warning: LLM operation failed: { e } " )
479+ return None
413480
481+ if not ensure_llm_dependencies ():
482+ return None
414483
415- def get_llm_pipeline ():
416- if not hasattr (get_llm_pipeline , "_pipeline" ):
417- ensure_dependencies ()
418- from transformers import pipeline
484+ from transformers import pipeline
419485
420- print ("Loading gpt-oss-20b model (this may take a moment on first run)..." )
421- get_llm_pipeline ._pipeline = pipeline (
486+ print (" Loading gpt-oss-20b model (this may take a moment on first run)..." )
487+ try :
488+ _llm_pipeline_cache = pipeline (
422489 "text-generation" ,
423490 model = "openai/gpt-oss-20b" ,
424491 torch_dtype = "auto" ,
425492 device_map = "auto" ,
426493 )
427- return get_llm_pipeline ._pipeline
494+ except Exception as e :
495+ print (f" Warning: Could not load LLM model: { e } " )
496+ print (" Using fallback bio generation." )
497+ return None
428498
499+ return _llm_pipeline_cache
429500
430- def generate_bio_with_llm (first_name : str , year : str ) -> str :
431- pipe = get_llm_pipeline ()
432501
433- messages = [
434- {
435- "role" : "user" ,
436- "content" : f'Write a single short sentence bio for a new undergraduate research assistant named { first_name } who joined a cognitive neuroscience memory research lab in { year } . Keep it generic and professional. Do not use pronouns. Maximum 20 words. Example: "Alex joined the lab in 2025 and is interested in how people learn and remember."' ,
437- }
438- ]
502+ LLM_MODEL = "mlx-community/Qwen2.5-32B-Instruct-4bit"
439503
440- result = pipe (messages , max_new_tokens = 50 )
441- bio = result [0 ]["generated_text" ][- 1 ]["content" ].strip ()
442- bio = bio .strip ('"' ).strip ()
443504
444- if not bio or len ( bio ) < 10 :
445- bio = f"{ first_name } joined the lab in { year } and is interested in how people learn and remember."
505+ def generate_bio_with_llm ( first_name : str , year : str ) -> str :
506+ fallback = f"{ first_name } joined the lab in { year } and is interested in how people learn and remember."
446507
447- return bio
508+ script = f'''
509+ import warnings
510+ warnings.filterwarnings("ignore")
511+ from mlx_lm import load, generate
448512
513+ model, tokenizer = load("{ LLM_MODEL } ")
449514
450- def edit_bio_with_llm (bio : str , first_name : str ) -> str :
451- pipe = get_llm_pipeline ()
452-
453- messages = [
454- {
455- "role" : "user" ,
456- "content" : f"""Edit this bio for a lab member named { first_name } . Follow these rules strictly:
457- 1. Use only first name "{ first_name } " (remove last name if present)
458- 2. Fix any typos and grammar errors
459- 3. Use Oxford commas
460- 4. Keep to 3-4 sentences maximum
461- 5. Remove any dangerous personal information (keep hometown/state/country if mentioned)
462- 6. Remove any harmful content
463- 7. Keep the tone professional and friendly
464-
465- Original bio: "{ bio } "
466-
467- Return ONLY the edited bio, nothing else.""" ,
468- }
469- ]
515+ prompt = tokenizer.apply_chat_template(
516+ [{{"role": "user", "content": "Write a single sentence professional bio for { first_name } , an undergraduate who joined a cognitive neuroscience memory research lab in { year } . Keep it generic. Do not use pronouns. Maximum 25 words. Output ONLY the bio sentence, nothing else."}}],
517+ tokenize=False,
518+ add_generation_prompt=True
519+ )
520+
521+ bio = generate(model, tokenizer, prompt=prompt, max_tokens=60)
522+ bio = bio.strip('"').strip()
523+ if bio and len(bio) > 15 and not bio.lower().startswith("here") and not bio.lower().startswith("sure"):
524+ print(bio)
525+ else:
526+ print("")
527+ '''
528+
529+ print (" Generating bio with LLM..." )
530+ result = run_llm_in_venv (script )
531+ if result and len (result ) > 15 :
532+ return result
533+ return fallback
470534
471- result = pipe (messages , max_new_tokens = 200 )
472- edited = result [0 ]["generated_text" ][- 1 ]["content" ].strip ()
473- edited = edited .strip ('"' ).strip ()
474535
475- if not edited or len (edited ) < 10 :
476- return bio
536+ def edit_bio_with_llm (bio : str , first_name : str ) -> str :
537+ bio_escaped = (
538+ bio .replace ("\\ " , "\\ \\ " )
539+ .replace ("'" , "\\ '" )
540+ .replace ('"' , '\\ "' )
541+ .replace ("\n " , " " )
542+ )
477543
478- return edited
544+ script = f'''
545+ import warnings
546+ warnings.filterwarnings("ignore")
547+ from mlx_lm import load, generate
548+
549+ model, tokenizer = load("{ LLM_MODEL } ")
550+
551+ prompt = tokenizer.apply_chat_template(
552+ [{{"role": "user", "content": """Edit this bio. Rules:
553+ 1. Use only first name "{ first_name } " (remove last name)
554+ 2. Fix typos and grammar
555+ 3. Keep 1-3 sentences max
556+ 4. Remove dangerous personal info (SSN, addresses, phone numbers)
557+ 5. Keep professional and friendly tone
558+
559+ Original: "{ bio_escaped } "
560+
561+ Output ONLY the edited bio, nothing else:"""}}],
562+ tokenize=False,
563+ add_generation_prompt=True
564+ )
565+
566+ edited = generate(model, tokenizer, prompt=prompt, max_tokens=150)
567+ edited = edited.strip('"').strip()
568+ if edited and len(edited) > 15 and not edited.lower().startswith("here") and not edited.lower().startswith("edited"):
569+ print(edited)
570+ else:
571+ print("")
572+ '''
573+
574+ print (" Editing bio with LLM..." )
575+ result = run_llm_in_venv (script )
576+ if result and len (result ) > 15 :
577+ return result
578+ return bio
479579
480580
481581def parse_name (full_name : str ) -> Tuple [str , str ]:
@@ -604,14 +704,14 @@ def find_alumni_entry(xlsx_path: Path, name: str) -> Optional[Dict[str, Any]]:
604704 continue
605705 row_name = str (row [0 ]).lower ()
606706 if row_name == name_lower or row_name == name_title :
607- entry = {
707+ entry : Dict [ str , Any ] = {
608708 "sheet" : sheet_name ,
609709 "row_idx" : row_idx ,
610710 "name" : row [0 ],
611711 }
612712 for i , header in enumerate (headers ):
613713 if header and i < len (row ):
614- entry [header ] = row [i ]
714+ entry [str ( header ) ] = row [i ]
615715 wb .close ()
616716 return entry
617717
@@ -710,7 +810,7 @@ def add_to_spreadsheet(
710810 wb = openpyxl .load_workbook (xlsx_path )
711811 sheet = wb ["members" ]
712812
713- if exists :
813+ if exists and row_idx is not None :
714814 print (f" Updating existing entry for { name } at row { row_idx } " )
715815 if image :
716816 sheet .cell (row = row_idx , column = 1 , value = image )
0 commit comments