Skip to content

Commit 91746ae

Browse files
committed
add alumni reactivation support to onboard_member.py
When onboarding someone who is already in an alumni sheet: - Removes them from the alumni sheet - Reopens their CV entry (removes end date) - Checks for existing processed photo (skips re-processing) - Retrieves old bio from git history if no bio provided
1 parent de0629f commit 91746ae

1 file changed

Lines changed: 221 additions & 18 deletions

File tree

scripts/onboard_member.py

Lines changed: 221 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
"""Onboard new lab members.
33
44
This script:
5-
1. Processes photo with hand-drawn border (if provided)
6-
2. Generates or edits bio using local LLM (gpt-oss-20b)
7-
3. Adds member to people.xlsx
8-
4. Adds member to JRM_CV.tex
9-
5. Rebuilds people.html
5+
1. Checks if member is an alumni (and reactivates them if so)
6+
2. Processes photo with hand-drawn border (if provided)
7+
3. Generates or edits bio using local LLM (gpt-oss-20b)
8+
4. Adds member to people.xlsx
9+
5. Adds member to JRM_CV.tex
10+
6. Rebuilds people.html
1011
1112
Idempotent: Running twice with same name will update existing entry.
13+
Reactivation: Running on an alumni will move them back to active status.
1214
1315
Usage:
1416
python onboard_member.py "First Last"
@@ -21,9 +23,10 @@
2123
import re
2224
import subprocess
2325
import sys
26+
import tempfile
2427
from datetime import datetime
2528
from pathlib import Path
26-
from typing import Optional, Tuple
29+
from typing import Optional, Tuple, Dict, Any
2730

2831
import openpyxl
2932

@@ -152,6 +155,11 @@ def find_photo(photo_hint: str, project_root: Path) -> Optional[Path]:
152155
return None
153156

154157

158+
def photo_already_processed(photo_base: str, project_root: Path) -> bool:
159+
processed_photo = project_root / "images" / "people" / f"{photo_base}.png"
160+
return processed_photo.exists()
161+
162+
155163
def process_photo(
156164
photo_path: Path, output_name: str, project_root: Path
157165
) -> Optional[str]:
@@ -213,6 +221,126 @@ def member_exists_in_spreadsheet(
213221
return False, None
214222

215223

224+
def find_alumni_entry(xlsx_path: Path, name: str) -> Optional[Dict[str, Any]]:
225+
"""Find alumni entry across all alumni sheets. Returns dict with sheet name, row, and data."""
226+
wb = openpyxl.load_workbook(xlsx_path)
227+
name_lower = name.lower()
228+
name_title = name.title().lower()
229+
230+
alumni_sheets = [
231+
"alumni_postdocs",
232+
"alumni_grads",
233+
"alumni_managers",
234+
"alumni_undergrads",
235+
]
236+
237+
for sheet_name in alumni_sheets:
238+
if sheet_name not in wb.sheetnames:
239+
continue
240+
sheet = wb[sheet_name]
241+
headers = [cell.value for cell in sheet[1]]
242+
243+
for row_idx, row in enumerate(
244+
sheet.iter_rows(min_row=2, values_only=True), start=2
245+
):
246+
if not row[0]:
247+
continue
248+
row_name = str(row[0]).lower()
249+
if row_name == name_lower or row_name == name_title:
250+
entry = {
251+
"sheet": sheet_name,
252+
"row_idx": row_idx,
253+
"name": row[0],
254+
}
255+
for i, header in enumerate(headers):
256+
if header and i < len(row):
257+
entry[header] = row[i]
258+
wb.close()
259+
return entry
260+
261+
wb.close()
262+
return None
263+
264+
265+
def remove_from_alumni(xlsx_path: Path, alumni_entry: Dict[str, Any]) -> None:
266+
"""Remove an entry from the alumni sheet."""
267+
wb = openpyxl.load_workbook(xlsx_path)
268+
sheet = wb[alumni_entry["sheet"]]
269+
sheet.delete_rows(alumni_entry["row_idx"])
270+
wb.save(xlsx_path)
271+
wb.close()
272+
print(f" Removed {alumni_entry['name']} from {alumni_entry['sheet']}")
273+
274+
275+
def get_bio_from_git_history(
276+
xlsx_path: Path, name: str, project_root: Path
277+
) -> Optional[str]:
278+
"""Search git history for old bio in people.xlsx."""
279+
name_lower = name.lower()
280+
name_title = name.title().lower()
281+
282+
try:
283+
result = subprocess.run(
284+
[
285+
"git",
286+
"log",
287+
"--oneline",
288+
"--follow",
289+
"--",
290+
str(xlsx_path.relative_to(project_root)),
291+
],
292+
cwd=project_root,
293+
capture_output=True,
294+
text=True,
295+
)
296+
if result.returncode != 0:
297+
return None
298+
299+
commits = [
300+
line.split()[0] for line in result.stdout.strip().split("\n") if line
301+
]
302+
303+
for commit in commits[:20]: # Limit to last 20 commits
304+
try:
305+
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
306+
tmp_path = tmp.name
307+
308+
extract_result = subprocess.run(
309+
["git", "show", f"{commit}:{xlsx_path.relative_to(project_root)}"],
310+
cwd=project_root,
311+
capture_output=True,
312+
)
313+
if extract_result.returncode != 0:
314+
continue
315+
316+
with open(tmp_path, "wb") as f:
317+
f.write(extract_result.stdout)
318+
319+
wb = openpyxl.load_workbook(tmp_path)
320+
if "members" in wb.sheetnames:
321+
sheet = wb["members"]
322+
for row in sheet.iter_rows(min_row=2, values_only=True):
323+
if row[1] and str(row[1]).lower() in [name_lower, name_title]:
324+
bio = row[4] if len(row) > 4 else None
325+
if bio and len(str(bio)) > 10:
326+
wb.close()
327+
Path(tmp_path).unlink(missing_ok=True)
328+
print(
329+
f" Found old bio in git history (commit {commit})"
330+
)
331+
return str(bio)
332+
wb.close()
333+
Path(tmp_path).unlink(missing_ok=True)
334+
335+
except Exception:
336+
continue
337+
338+
except Exception as e:
339+
print(f" Note: Could not search git history: {e}")
340+
341+
return None
342+
343+
216344
def add_to_spreadsheet(
217345
xlsx_path: Path, name: str, role: str, bio: str, image: str, website: str
218346
) -> None:
@@ -263,11 +391,74 @@ def member_exists_in_cv(cv_path: Path, name: str) -> bool:
263391
return bool(re.search(pattern, content, re.IGNORECASE))
264392

265393

394+
def cv_entry_has_end_date(cv_path: Path, name: str) -> bool:
395+
"""Check if CV entry has an end date (closed range)."""
396+
content = cv_path.read_text(encoding="utf-8")
397+
name_escaped = re.escape(name)
398+
# Match: \item Name (YYYY -- YYYY) or \item Name (YYYY) but NOT \item Name (YYYY -- )
399+
pattern_closed = r"\\item\s+" + name_escaped + r"\*?\s*\(\d{4}\s*--\s*\d{4}\)"
400+
pattern_single = r"\\item\s+" + name_escaped + r"\*?\s*\(\d{4}\)"
401+
pattern_open = r"\\item\s+" + name_escaped + r"\*?\s*\(\d{4}\s*--\s*\)"
402+
403+
has_open = bool(re.search(pattern_open, content, re.IGNORECASE))
404+
has_closed = bool(re.search(pattern_closed, content, re.IGNORECASE))
405+
has_single = (
406+
bool(re.search(pattern_single, content, re.IGNORECASE))
407+
and not has_open
408+
and not has_closed
409+
)
410+
411+
return has_closed or has_single
412+
413+
414+
def reopen_cv_entry(cv_path: Path, name: str) -> bool:
415+
"""Remove end date from CV entry: (YYYY -- YYYY) -> (YYYY -- ) or (YYYY) -> (YYYY -- )."""
416+
content = cv_path.read_text(encoding="utf-8")
417+
name_escaped = re.escape(name)
418+
419+
# Pattern for closed range: (YYYY -- YYYY)
420+
pattern_closed = (
421+
r"(\\item\s+" + name_escaped + r"\*?\s*\()(\d{4})(\s*--\s*)(\d{4})(\))"
422+
)
423+
# Pattern for single year: (YYYY) - but not (YYYY -- )
424+
pattern_single = r"(\\item\s+" + name_escaped + r"\*?\s*\()(\d{4})(\))(?!\s*--)"
425+
426+
def reopen_closed(match):
427+
prefix = match.group(1)
428+
start_year = match.group(2)
429+
suffix = match.group(5)
430+
return f"{prefix}{start_year} -- {suffix}"
431+
432+
def reopen_single(match):
433+
prefix = match.group(1)
434+
year = match.group(2)
435+
suffix = match.group(3)
436+
return f"{prefix}{year} -- {suffix}"
437+
438+
new_content, count = re.subn(
439+
pattern_closed, reopen_closed, content, flags=re.IGNORECASE
440+
)
441+
if count == 0:
442+
new_content, count = re.subn(
443+
pattern_single, reopen_single, content, flags=re.IGNORECASE
444+
)
445+
446+
if count > 0:
447+
cv_path.write_text(new_content, encoding="utf-8")
448+
print(f" Reopened CV entry for {name}")
449+
return True
450+
451+
return False
452+
453+
266454
def add_to_cv(cv_path: Path, name: str, role: str, year: str) -> bool:
267455
content = cv_path.read_text(encoding="utf-8")
268456

269457
if member_exists_in_cv(cv_path, name):
270-
print(f" {name} already exists in CV, skipping")
458+
if cv_entry_has_end_date(cv_path, name):
459+
print(f" {name} exists in CV with end date, reopening...")
460+
return reopen_cv_entry(cv_path, name)
461+
print(f" {name} already exists in CV with open date, skipping")
271462
return True
272463

273464
role_lower = role.lower()
@@ -346,22 +537,33 @@ def onboard_member(
346537

347538
print(f"\nOnboarding {name} as {rank}...")
348539

540+
alumni_entry = find_alumni_entry(xlsx_path, name)
541+
is_reactivation = alumni_entry is not None
542+
543+
if is_reactivation:
544+
print(f" Found {name} in {alumni_entry['sheet']} - reactivating...")
545+
remove_from_alumni(xlsx_path, alumni_entry)
546+
349547
image_filename = None
350-
if photo is None:
351-
photo = photo_base
352548

353-
photo_path = find_photo(photo, project_root)
354-
if photo_path:
355-
print(f" Found photo: {photo_path}")
356-
image_filename = process_photo(photo_path, photo_base, project_root)
549+
if photo_already_processed(photo_base, project_root):
550+
print(f" Using existing processed photo: {photo_base}.png")
551+
image_filename = f"{photo_base}.png"
357552
else:
358-
existing_photo = project_root / "images" / "people" / f"{photo_base}.png"
359-
if existing_photo.exists():
360-
print(f" Using existing photo: {existing_photo}")
361-
image_filename = f"{photo_base}.png"
553+
if photo is None:
554+
photo = photo_base
555+
556+
photo_path = find_photo(photo, project_root)
557+
if photo_path:
558+
print(f" Found photo: {photo_path}")
559+
image_filename = process_photo(photo_path, photo_base, project_root)
362560
else:
363561
print(f" No photo found for {photo}")
364562

563+
if bio is None and is_reactivation:
564+
print(" Searching git history for old bio...")
565+
bio = get_bio_from_git_history(xlsx_path, name, project_root)
566+
365567
if not skip_llm:
366568
if bio:
367569
print(" Editing bio with LLM...")
@@ -385,7 +587,8 @@ def onboard_member(
385587
if not skip_rebuild:
386588
rebuild_pages(project_root)
387589

388-
print(f"\nSuccessfully onboarded {name}!")
590+
action = "reactivated" if is_reactivation else "onboarded"
591+
print(f"\nSuccessfully {action} {name}!")
389592
return True
390593

391594

0 commit comments

Comments
 (0)