@@ -309,6 +309,20 @@ <h4>Citation</h4>
309309 < td class ="align-middle text-center "> < b > 59.9</ b > </ td >
310310 </ tr >
311311
312+ < tr >
313+ < td scope ="row " class ="align-middle text-center counter-cell ">
314+ < span class ="badge badge-secondary "> Jul 09, 2025</ span >
315+ </ td >
316+ < td class ="align-middle text-center "> Grok4 + DP< br >
317+ < span class ="affiliation "> baseline</ span >
318+ </ td >
319+ < td class ="align-middle text-center "> 83.33</ td >
320+ < td class ="align-middle text-center "> 73.8</ td >
321+ < td class ="align-middle text-center "> 40.54</ td >
322+ < td class ="align-middle text-center "> 16.0</ td >
323+ < td class ="align-middle text-center "> < b > 57.8</ b > </ td >
324+ </ tr >
325+
312326 < tr >
313327 < td scope ="row " class ="align-middle text-center counter-cell ">
314328 < span class ="badge badge-secondary "> Jan 20, 2025</ span >
@@ -323,6 +337,20 @@ <h4>Citation</h4>
323337 < td class ="align-middle text-center "> < b > 56.31</ b > </ td >
324338 </ tr >
325339
340+ < tr >
341+ < td scope ="row " class ="align-middle text-center counter-cell ">
342+ < span class ="badge badge-secondary "> May 22, 2025</ span >
343+ </ td >
344+ < td class ="align-middle text-center "> Claude4-Sonnet + DP< br >
345+ < span class ="affiliation "> baseline</ span >
346+ </ td >
347+ < td class ="align-middle text-center "> 81.25</ td >
348+ < td class ="align-middle text-center "> 75.57</ td >
349+ < td class ="align-middle text-center "> 31.14</ td >
350+ < td class ="align-middle text-center "> 54.0</ td >
351+ < td class ="align-middle text-center "> < b > 54.75</ b > </ td >
352+ </ tr >
353+
326354 < tr >
327355 < td scope ="row " class ="align-middle text-center counter-cell ">
328356 < span class ="badge badge-secondary "> Apr 05, 2025</ span >
@@ -733,6 +761,20 @@ <h4>Citation</h4>
733761 < td class ="align-middle text-center "> -</ td >
734762 </ tr >
735763
764+ < tr >
765+ < td scope ="row " class ="align-middle text-center counter-cell ">
766+ < span class ="badge badge-secondary "> Jul 09, 2025</ span >
767+ </ td >
768+ < td class ="model-cell align-middle text-center "> Grok4< br >
769+ < span class ="affiliation "> xAI</ span >
770+ </ td >
771+ < td class ="align-middle text-center "> < code > UNK</ code > </ td >
772+ < td class =" align-middle text-center "> < b > 57.8</ b > </ td >
773+ < td class =" align-middle text-center "> -</ td >
774+ < td class ="align-middle text-center "> -</ td >
775+ < td class ="align-middle text-center "> -</ td >
776+ </ tr >
777+
736778 < tr >
737779 < td scope ="row " class ="align-middle text-center counter-cell ">
738780 < span class ="badge badge-secondary "> Jan 20, 2025</ span >
@@ -748,6 +790,20 @@ <h4>Citation</h4>
748790 < td class ="align-middle text-center "> -</ td >
749791 </ tr >
750792
793+ < tr >
794+ < td scope ="row " class ="align-middle text-center counter-cell ">
795+ < span class ="badge badge-secondary "> May 22, 2025</ span >
796+ </ td >
797+ < td class ="model-cell align-middle text-center "> Claude4-Sonnet< br >
798+ < span class ="affiliation "> Anthropic</ span >
799+ </ td >
800+ < td class ="align-middle text-center "> < code style ="color: #207872; "> 685B</ code > </ td >
801+ < td class =" align-middle text-center "> < b > 54.75</ b > </ td >
802+ < td class =" align-middle text-center "> -</ td >
803+ < td class ="align-middle text-center "> -</ td >
804+ < td class ="align-middle text-center "> -</ td >
805+ </ tr >
806+
751807 < tr >
752808 < td scope ="row " class ="align-middle text-center counter-cell ">
753809 < span class ="badge badge-secondary "> Apr 05, 2025</ span >
0 commit comments