@@ -66,19 +66,22 @@ <h1>Publications</h1>
6666 < h2 id ="publications "> 2025</ h2 >
6767 < ul class ="publications ">
6868 < li >
69- < a target ="_blank " href ="# "> Verifying Distributed Deep Learning Training via Parallelization Equivalence</ a > < br >
70- < span class ="authorlist "> < i > < a href ="https://mercury-browser-ede.notion.site/yunchi " class ="nodec "> Yunchi Lu</ a > , </ i > < i > < a href ="https://naizhengtan.github.io " class ="nodec "> Cheng Tan</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/yomia " class ="nodec "> Youshan Miao</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/yizhu1 " class ="nodec "> Yi Zhu</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/zhxian " class ="nodec "> Xian Zhang</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/fanyang " class ="nodec "> Fan Yang</ a > < br > </ i > </ span >
71- < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a >
69+ < a target ="_blank " href ="paper/trainverify-sosp25.pdf "> TrainVerify: Equivalence-Based Verification for Distributed LLM Training</ a > < br >
70+ < span class ="authorlist "> < i > < a href ="https://luyunchi.github.io " class ="nodec "> Yunchi Lu</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/yomia " class ="nodec "> Youshan Miao</ a > , </ i > < i > < a href ="https://naizhengtan.github.io " class ="nodec "> Cheng Tan</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/yizhu1 " class ="nodec "> Yi Zhu</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/zhxian " class ="nodec "> Xian Zhang</ a > , </ i > < i > < a href ="https://www.microsoft.com/en-us/research/people/fanyang " class ="nodec "> Fan Yang</ a > < br > </ i > </ span >
71+ < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/trainverify.bib "> citation</ a >
72+ < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/trainverify_sosp25_slides.pdf "> slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/verify-llm/TrainVerify "> software</ a > < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="https://arxiv.org/abs/2506.15961 "> arXiv</ a >
7273 </ li >
7374 < li >
74- < a target ="_blank " href ="# "> Optimistic Recovery for High-Availability Software via Partial Process State Preservation</ a > < br >
75+ < a target ="_blank " href ="paper/phoenix-sosp25.pdf "> Optimistic Recovery for High-Availability Software via Partial Process State Preservation</ a > < br >
7576 < span class ="authorlist "> < i > < a href ="https://osdi.dev " class ="nodec "> Yuzhuo Jing</ a > , </ i > < i > Yuqi Mai, </ i > < i > Angting Cai, </ i > < i > < a href ="https://chenyi.world " class ="nodec "> Yi Chen</ a > , </ i > < i > < a href ="https://hwanning.netlify.app " class ="nodec "> Wanning He</ a > , </ i > < i > Xiaoyang Qian, </ i > < i > < a href ="https://web.eecs.umich.edu/~pmchen " class ="nodec "> Peter M. Chen</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
76- < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a >
77+ < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/phoenix.bib "> citation</ a >
78+ < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/phoenix_sosp25_slides.pdf "> slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/phoenix "> software</ a >
7779 </ li >
7880 < li >
79- < a target ="_blank " href ="# "> Mitigating Application Resource Overload with Targeted Task Cancellation</ a > < br >
81+ < a target ="_blank " href ="paper/atropos-sosp25.pdf "> Mitigating Application Resource Overload with Targeted Task Cancellation</ a > < br >
8082 < span class ="authorlist "> < i > < a href ="https://yigonghu.github.io " class ="nodec "> Yigong Hu</ a > , </ i > < i > Zeyin Zhang, </ i > < i > Yicheng Liu, </ i > < i > Yile Gu, </ i > < i > Shuangyu Lei, </ i > < i > < a href ="https://homes.cs.washington.edu/~baris " class ="nodec "> Baris Kasikci</ a > , </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
81- < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a >
83+ < a target ="_blank " href ="https://sigops.org/s/conferences/sosp/2025/ " class ="conf "> < b > SOSP 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/atropos.bib "> citation</ a >
84+ < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/atropos_sosp25_slides.pdf "> slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/Atropos "> software</ a >
8285 </ li >
8386 < li >
8487 < a target ="_blank " href ="paper/traincheck-osdi25-preprint.pdf "> Training with Confidence: Catching Silent Errors in Deep Learning Training with Automated Proactive Checks</ a > < br >
@@ -93,7 +96,7 @@ <h2 id="publications">2025</h2>
9396 </ li >
9497 < li >
9598 < a target ="_blank " href ="paper/xinda-nsdi25-preprint.pdf "> One-Size-Fits-None: Understanding and Enhancing Slow-Fault Tolerance in Modern Distributed Systems</ a > < br >
96- < span class ="authorlist "> < i > < a href ="https://ruiming-lu.github.io " class ="nodec "> Ruiming Lu</ a > , </ i > < i > < a href ="https://mercury-browser-ede.notion.site/yunchi " class ="nodec "> Yunchi Lu</ a > , </ i > < i > < a href ="https://essoz.github.io " class ="nodec "> Yuxuan Jiang</ a > , </ i > < i > Guangtao Xue, </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
99+ < span class ="authorlist "> < i > < a href ="https://ruiming-lu.github.io " class ="nodec "> Ruiming Lu</ a > , </ i > < i > < a href ="https://luyunchi.github.io " class ="nodec "> Yunchi Lu</ a > , </ i > < i > < a href ="https://essoz.github.io " class ="nodec "> Yuxuan Jiang</ a > , </ i > < i > Guangtao Xue, </ i > < i > < a href ="https://web.eecs.umich.edu/~ryanph " class ="nodec "> Peng Huang</ a > < br > </ i > </ span >
97100 < a target ="_blank " href ="https://www.usenix.org/conference/nsdi25 " class ="conf "> < b > NSDI 2025</ b > </ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="paper/xinda-nsdi25.bib "> citation</ a >
98101 < a target ="_blank " role ="button " class ="btn btn-outline-primary publinkitem " href ="slides/xinda_nsdi25_slides.pdf "> slides</ a > < a target ="_blank " class ="btn btn-outline-primary publinkitem " href ="https://github.com/OrderLab/xinda "> software</ a > < br > < div class ="press "> < b > Coverage:</ b > < a target ="_blank " href ="https://cse.engin.umich.edu/stories/a-new-tool-to-manage-slow-faults "> CSE News</ a > , < a target ="_blank " href ="https://techxplore.com/news/2025-05-tool-faults-real-adjustment.html "> Tech Xplore</ a > </ div >
99102 </ li >
@@ -357,15 +360,14 @@ <h2 id="publications">2010</h2>
357360 </ div >
358361</ section >
359362
360-
361363 <!-- ================ Start footer area ================= -->
362364< footer class ="footer-area section-gap ">
363365< div class ="container ">
364366 < div class ="footer-bottom row align-items-center text-center text-lg-left ">
365367 < p class ="footer-text m-0 col-lg-8 col-md-12 ">
366368 Copyright © OrderLab 2017-< script >
367369 document . write ( new Date ( ) . getFullYear ( ) ) ;
368- </ script > All rights reserved. | Last updated 2025-07-28 14:01:59 -0400 .
370+ </ script > All rights reserved. | Last updated 2025-11-18 13:19:04 -0500 .
369371 </ p >
370372 </ div >
371373</ div >
0 commit comments