22"""Onboard new lab members.
33
44This script:
5- 1. Processes photo with hand-drawn border (if provided)
6- 2. Generates or edits bio using local LLM (gpt-oss-20b)
7- 3. Adds member to people.xlsx
8- 4. Adds member to JRM_CV.tex
9- 5. Rebuilds people.html
5+ 1. Checks if member is an alumni (and reactivates them if so)
6+ 2. Processes photo with hand-drawn border (if provided)
7+ 3. Generates or edits bio using local LLM (gpt-oss-20b)
8+ 4. Adds member to people.xlsx
9+ 5. Adds member to JRM_CV.tex
10+ 6. Rebuilds people.html
1011
1112Idempotent: Running twice with same name will update existing entry.
13+ Reactivation: Running on an alumni will move them back to active status.
1214
1315Usage:
1416 python onboard_member.py "First Last"
2123import re
2224import subprocess
2325import sys
26+ import tempfile
2427from datetime import datetime
2528from pathlib import Path
26- from typing import Optional , Tuple
29+ from typing import Optional , Tuple , Dict , Any
2730
2831import openpyxl
2932
@@ -152,6 +155,11 @@ def find_photo(photo_hint: str, project_root: Path) -> Optional[Path]:
152155 return None
153156
154157
158+ def photo_already_processed (photo_base : str , project_root : Path ) -> bool :
159+ processed_photo = project_root / "images" / "people" / f"{ photo_base } .png"
160+ return processed_photo .exists ()
161+
162+
155163def process_photo (
156164 photo_path : Path , output_name : str , project_root : Path
157165) -> Optional [str ]:
@@ -213,6 +221,126 @@ def member_exists_in_spreadsheet(
213221 return False , None
214222
215223
224+ def find_alumni_entry (xlsx_path : Path , name : str ) -> Optional [Dict [str , Any ]]:
225+ """Find alumni entry across all alumni sheets. Returns dict with sheet name, row, and data."""
226+ wb = openpyxl .load_workbook (xlsx_path )
227+ name_lower = name .lower ()
228+ name_title = name .title ().lower ()
229+
230+ alumni_sheets = [
231+ "alumni_postdocs" ,
232+ "alumni_grads" ,
233+ "alumni_managers" ,
234+ "alumni_undergrads" ,
235+ ]
236+
237+ for sheet_name in alumni_sheets :
238+ if sheet_name not in wb .sheetnames :
239+ continue
240+ sheet = wb [sheet_name ]
241+ headers = [cell .value for cell in sheet [1 ]]
242+
243+ for row_idx , row in enumerate (
244+ sheet .iter_rows (min_row = 2 , values_only = True ), start = 2
245+ ):
246+ if not row [0 ]:
247+ continue
248+ row_name = str (row [0 ]).lower ()
249+ if row_name == name_lower or row_name == name_title :
250+ entry = {
251+ "sheet" : sheet_name ,
252+ "row_idx" : row_idx ,
253+ "name" : row [0 ],
254+ }
255+ for i , header in enumerate (headers ):
256+ if header and i < len (row ):
257+ entry [header ] = row [i ]
258+ wb .close ()
259+ return entry
260+
261+ wb .close ()
262+ return None
263+
264+
265+ def remove_from_alumni (xlsx_path : Path , alumni_entry : Dict [str , Any ]) -> None :
266+ """Remove an entry from the alumni sheet."""
267+ wb = openpyxl .load_workbook (xlsx_path )
268+ sheet = wb [alumni_entry ["sheet" ]]
269+ sheet .delete_rows (alumni_entry ["row_idx" ])
270+ wb .save (xlsx_path )
271+ wb .close ()
272+ print (f" Removed { alumni_entry ['name' ]} from { alumni_entry ['sheet' ]} " )
273+
274+
275+ def get_bio_from_git_history (
276+ xlsx_path : Path , name : str , project_root : Path
277+ ) -> Optional [str ]:
278+ """Search git history for old bio in people.xlsx."""
279+ name_lower = name .lower ()
280+ name_title = name .title ().lower ()
281+
282+ try :
283+ result = subprocess .run (
284+ [
285+ "git" ,
286+ "log" ,
287+ "--oneline" ,
288+ "--follow" ,
289+ "--" ,
290+ str (xlsx_path .relative_to (project_root )),
291+ ],
292+ cwd = project_root ,
293+ capture_output = True ,
294+ text = True ,
295+ )
296+ if result .returncode != 0 :
297+ return None
298+
299+ commits = [
300+ line .split ()[0 ] for line in result .stdout .strip ().split ("\n " ) if line
301+ ]
302+
303+ for commit in commits [:20 ]: # Limit to last 20 commits
304+ try :
305+ with tempfile .NamedTemporaryFile (suffix = ".xlsx" , delete = False ) as tmp :
306+ tmp_path = tmp .name
307+
308+ extract_result = subprocess .run (
309+ ["git" , "show" , f"{ commit } :{ xlsx_path .relative_to (project_root )} " ],
310+ cwd = project_root ,
311+ capture_output = True ,
312+ )
313+ if extract_result .returncode != 0 :
314+ continue
315+
316+ with open (tmp_path , "wb" ) as f :
317+ f .write (extract_result .stdout )
318+
319+ wb = openpyxl .load_workbook (tmp_path )
320+ if "members" in wb .sheetnames :
321+ sheet = wb ["members" ]
322+ for row in sheet .iter_rows (min_row = 2 , values_only = True ):
323+ if row [1 ] and str (row [1 ]).lower () in [name_lower , name_title ]:
324+ bio = row [4 ] if len (row ) > 4 else None
325+ if bio and len (str (bio )) > 10 :
326+ wb .close ()
327+ Path (tmp_path ).unlink (missing_ok = True )
328+ print (
329+ f" Found old bio in git history (commit { commit } )"
330+ )
331+ return str (bio )
332+ wb .close ()
333+ Path (tmp_path ).unlink (missing_ok = True )
334+
335+ except Exception :
336+ continue
337+
338+ except Exception as e :
339+ print (f" Note: Could not search git history: { e } " )
340+
341+ return None
342+
343+
216344def add_to_spreadsheet (
217345 xlsx_path : Path , name : str , role : str , bio : str , image : str , website : str
218346) -> None :
@@ -263,11 +391,74 @@ def member_exists_in_cv(cv_path: Path, name: str) -> bool:
263391 return bool (re .search (pattern , content , re .IGNORECASE ))
264392
265393
394+ def cv_entry_has_end_date (cv_path : Path , name : str ) -> bool :
395+ """Check if CV entry has an end date (closed range)."""
396+ content = cv_path .read_text (encoding = "utf-8" )
397+ name_escaped = re .escape (name )
398+ # Match: \item Name (YYYY -- YYYY) or \item Name (YYYY) but NOT \item Name (YYYY -- )
399+ pattern_closed = r"\\item\s+" + name_escaped + r"\*?\s*\(\d{4}\s*--\s*\d{4}\)"
400+ pattern_single = r"\\item\s+" + name_escaped + r"\*?\s*\(\d{4}\)"
401+ pattern_open = r"\\item\s+" + name_escaped + r"\*?\s*\(\d{4}\s*--\s*\)"
402+
403+ has_open = bool (re .search (pattern_open , content , re .IGNORECASE ))
404+ has_closed = bool (re .search (pattern_closed , content , re .IGNORECASE ))
405+ has_single = (
406+ bool (re .search (pattern_single , content , re .IGNORECASE ))
407+ and not has_open
408+ and not has_closed
409+ )
410+
411+ return has_closed or has_single
412+
413+
414+ def reopen_cv_entry (cv_path : Path , name : str ) -> bool :
415+ """Remove end date from CV entry: (YYYY -- YYYY) -> (YYYY -- ) or (YYYY) -> (YYYY -- )."""
416+ content = cv_path .read_text (encoding = "utf-8" )
417+ name_escaped = re .escape (name )
418+
419+ # Pattern for closed range: (YYYY -- YYYY)
420+ pattern_closed = (
421+ r"(\\item\s+" + name_escaped + r"\*?\s*\()(\d{4})(\s*--\s*)(\d{4})(\))"
422+ )
423+ # Pattern for single year: (YYYY) - but not (YYYY -- )
424+ pattern_single = r"(\\item\s+" + name_escaped + r"\*?\s*\()(\d{4})(\))(?!\s*--)"
425+
426+ def reopen_closed (match ):
427+ prefix = match .group (1 )
428+ start_year = match .group (2 )
429+ suffix = match .group (5 )
430+ return f"{ prefix } { start_year } -- { suffix } "
431+
432+ def reopen_single (match ):
433+ prefix = match .group (1 )
434+ year = match .group (2 )
435+ suffix = match .group (3 )
436+ return f"{ prefix } { year } -- { suffix } "
437+
438+ new_content , count = re .subn (
439+ pattern_closed , reopen_closed , content , flags = re .IGNORECASE
440+ )
441+ if count == 0 :
442+ new_content , count = re .subn (
443+ pattern_single , reopen_single , content , flags = re .IGNORECASE
444+ )
445+
446+ if count > 0 :
447+ cv_path .write_text (new_content , encoding = "utf-8" )
448+ print (f" Reopened CV entry for { name } " )
449+ return True
450+
451+ return False
452+
453+
266454def add_to_cv (cv_path : Path , name : str , role : str , year : str ) -> bool :
267455 content = cv_path .read_text (encoding = "utf-8" )
268456
269457 if member_exists_in_cv (cv_path , name ):
270- print (f" { name } already exists in CV, skipping" )
458+ if cv_entry_has_end_date (cv_path , name ):
459+ print (f" { name } exists in CV with end date, reopening..." )
460+ return reopen_cv_entry (cv_path , name )
461+ print (f" { name } already exists in CV with open date, skipping" )
271462 return True
272463
273464 role_lower = role .lower ()
@@ -346,22 +537,33 @@ def onboard_member(
346537
347538 print (f"\n Onboarding { name } as { rank } ..." )
348539
540+ alumni_entry = find_alumni_entry (xlsx_path , name )
541+ is_reactivation = alumni_entry is not None
542+
543+ if is_reactivation :
544+ print (f" Found { name } in { alumni_entry ['sheet' ]} - reactivating..." )
545+ remove_from_alumni (xlsx_path , alumni_entry )
546+
349547 image_filename = None
350- if photo is None :
351- photo = photo_base
352548
353- photo_path = find_photo (photo , project_root )
354- if photo_path :
355- print (f" Found photo: { photo_path } " )
356- image_filename = process_photo (photo_path , photo_base , project_root )
549+ if photo_already_processed (photo_base , project_root ):
550+ print (f" Using existing processed photo: { photo_base } .png" )
551+ image_filename = f"{ photo_base } .png"
357552 else :
358- existing_photo = project_root / "images" / "people" / f"{ photo_base } .png"
359- if existing_photo .exists ():
360- print (f" Using existing photo: { existing_photo } " )
361- image_filename = f"{ photo_base } .png"
553+ if photo is None :
554+ photo = photo_base
555+
556+ photo_path = find_photo (photo , project_root )
557+ if photo_path :
558+ print (f" Found photo: { photo_path } " )
559+ image_filename = process_photo (photo_path , photo_base , project_root )
362560 else :
363561 print (f" No photo found for { photo } " )
364562
563+ if bio is None and is_reactivation :
564+ print (" Searching git history for old bio..." )
565+ bio = get_bio_from_git_history (xlsx_path , name , project_root )
566+
365567 if not skip_llm :
366568 if bio :
367569 print (" Editing bio with LLM..." )
@@ -385,7 +587,8 @@ def onboard_member(
385587 if not skip_rebuild :
386588 rebuild_pages (project_root )
387589
388- print (f"\n Successfully onboarded { name } !" )
590+ action = "reactivated" if is_reactivation else "onboarded"
591+ print (f"\n Successfully { action } { name } !" )
389592 return True
390593
391594
0 commit comments