@@ -281,6 +281,34 @@ <h4>Citation</h4>
281281 < td class ="align-middle text-center "> < b > 85.91</ b > </ td >
282282 </ tr >
283283
284+ < tr >
285+ < td scope ="row " class ="align-middle text-center counter-cell ">
286+ < span class ="badge badge-secondary "> Apr 16, 2025</ span >
287+ </ td >
288+ < td class ="align-middle text-center "> o4-mini-high + DP< br >
289+ < span class ="affiliation "> baseline</ span >
290+ </ td >
291+ < td class ="align-middle text-center "> 82.29</ td >
292+ < td class ="align-middle text-center "> 81.11</ td >
293+ < td class ="align-middle text-center "> 42.3</ td >
294+ < td class ="align-middle text-center "> 54.0</ td >
295+ < td class ="align-middle text-center "> < b > 61.69</ b > </ td >
296+ </ tr >
297+
298+ < tr >
299+ < td scope ="row " class ="align-middle text-center counter-cell ">
300+ < span class ="badge badge-secondary "> Apr 16, 2025</ span >
301+ </ td >
302+ < td class ="align-middle text-center "> o4-mini + DP< br >
303+ < span class ="affiliation "> baseline</ span >
304+ </ td >
305+ < td class ="align-middle text-center "> 83.33</ td >
306+ < td class ="align-middle text-center "> 80.35</ td >
307+ < td class ="align-middle text-center "> 40.5</ td >
308+ < td class ="align-middle text-center "> 50.0</ td >
309+ < td class ="align-middle text-center "> < b > 60.75</ b > </ td >
310+ </ tr >
311+
284312 < tr >
285313 < td scope ="row " class ="align-middle text-center counter-cell ">
286314 < span class ="badge badge-secondary "> Jul 22, 2025</ span >
@@ -295,18 +323,32 @@ <h4>Citation</h4>
295323 < td class ="align-middle text-center "> < b > 60.1</ b > </ td >
296324 </ tr >
297325
326+ < tr >
327+ < td scope ="row " class ="align-middle text-center counter-cell ">
328+ < span class ="badge badge-secondary "> Aug 07, 2025</ span >
329+ </ td >
330+ < td class ="align-middle text-center "> GPT-5 + DP< br >
331+ < span class ="affiliation "> baseline</ span >
332+ </ td >
333+ < td class ="align-middle text-center "> 83.33</ td >
334+ < td class ="align-middle text-center "> 82.37</ td >
335+ < td class ="align-middle text-center "> 36.04</ td >
336+ < td class ="align-middle text-center "> 58.0</ td >
337+ < td class ="align-middle text-center "> < b > 59.94</ b > </ td >
338+ </ tr >
339+
298340 < tr >
299341 < td scope ="row " class ="align-middle text-center counter-cell ">
300342 < span class ="badge badge-secondary "> Jan 31, 2025</ span >
301343 </ td >
302- < td class ="align-middle text-center "> o3-mini-2025-01-31 + DP< br >
344+ < td class ="align-middle text-center "> o3-mini + DP< br >
303345 < span class ="affiliation "> baseline</ span >
304346 </ td >
305347 < td class ="align-middle text-center "> 86.46</ td >
306348 < td class ="align-middle text-center "> 82.07</ td >
307349 < td class ="align-middle text-center "> 35.56</ td >
308350 < td class ="align-middle text-center "> 32.0</ td >
309- < td class ="align-middle text-center "> < b > 59.9 </ b > </ td >
351+ < td class ="align-middle text-center "> < b > 59.90 </ b > </ td >
310352 </ tr >
311353
312354 < tr >
@@ -320,7 +362,21 @@ <h4>Citation</h4>
320362 < td class ="align-middle text-center "> 73.8</ td >
321363 < td class ="align-middle text-center "> 40.54</ td >
322364 < td class ="align-middle text-center "> 16.0</ td >
323- < td class ="align-middle text-center "> < b > 57.8</ b > </ td >
365+ < td class ="align-middle text-center "> < b > 57.80</ b > </ td >
366+ </ tr >
367+
368+ < tr >
369+ < td scope ="row " class ="align-middle text-center counter-cell ">
370+ < span class ="badge badge-secondary "> Jun 17, 2025</ span >
371+ </ td >
372+ < td class ="align-middle text-center "> Gemini-2.5-Pro + DP< br >
373+ < span class ="affiliation "> baseline</ span >
374+ </ td >
375+ < td class ="align-middle text-center "> 84.38</ td >
376+ < td class ="align-middle text-center "> 79.6</ td >
377+ < td class ="align-middle text-center "> 31.86</ td >
378+ < td class ="align-middle text-center "> 66.0</ td >
379+ < td class ="align-middle text-center "> < b > 57.18</ b > </ td >
324380 </ tr >
325381
326382 < tr >
@@ -365,6 +421,20 @@ <h4>Citation</h4>
365421 < td class ="align-middle text-center "> < b > 52.73</ b > </ td >
366422 </ tr >
367423
424+ < tr >
425+ < td scope ="row " class ="align-middle text-center counter-cell ">
426+ < span class ="badge badge-secondary "> Apr 29, 2025</ span >
427+ </ td >
428+ < td class ="align-middle text-center "> Qwen3-32B< br >
429+ < span class ="affiliation "> baseline</ span >
430+ </ td >
431+ < td class ="align-middle text-center "> 83.33</ td >
432+ < td class ="align-middle text-center "> 72.54</ td >
433+ < td class ="align-middle text-center "> 28.16</ td >
434+ < td class ="align-middle text-center "> 18.0</ td >
435+ < td class ="align-middle text-center "> < b > 52.45</ b > </ td >
436+ </ tr >
437+
368438 < tr >
369439 < td scope ="row " class ="align-middle text-center counter-cell ">
370440 < span class ="badge badge-secondary "> May 13, 2024</ span >
@@ -745,6 +815,50 @@ <h4>Citation</h4>
745815 </ thead >
746816 < tbody >
747817
818+ < tr >
819+ < td scope ="row " class ="align-middle text-center counter-cell ">
820+ < span class ="badge badge-secondary "> Apr 16, 2025</ span >
821+ </ td >
822+ < td class ="model-cell align-middle text-center "> o4-mini-high 🤔< br >
823+ < span class ="affiliation "> (2025-04-16)</ span > </ br >
824+ < span class ="affiliation "> OpenAI</ span >
825+ </ td >
826+ < td class ="align-middle text-center "> < code > UNK</ code > </ td >
827+ < td class =" align-middle text-center "> < b > 61.69</ b > </ td >
828+ < td class =" align-middle text-center "> -</ td >
829+ < td class ="align-middle text-center "> -</ td >
830+ < td class ="align-middle text-center "> -</ td >
831+ </ tr >
832+
833+ < tr >
834+ < td scope ="row " class ="align-middle text-center counter-cell ">
835+ < span class ="badge badge-secondary "> Apr 16, 2025</ span >
836+ </ td >
837+ < td class ="model-cell align-middle text-center "> o4-mini 🤔< br >
838+ < span class ="affiliation "> (2025-04-16)</ span > </ br >
839+ < span class ="affiliation "> OpenAI</ span >
840+ </ td >
841+ < td class ="align-middle text-center "> < code > UNK</ code > </ td >
842+ < td class =" align-middle text-center "> < b > 60.75</ b > </ td >
843+ < td class =" align-middle text-center "> -</ td >
844+ < td class ="align-middle text-center "> -</ td >
845+ < td class ="align-middle text-center "> -</ td >
846+ </ tr >
847+
848+ < tr >
849+ < td scope ="row " class ="align-middle text-center counter-cell ">
850+ < span class ="badge badge-secondary "> Aug 07, 2025</ span >
851+ </ td >
852+ < td class ="model-cell align-middle text-center "> GPT-5< br >
853+ < span class ="affiliation "> (2025-08-07)</ span > </ br >
854+ < span class ="affiliation "> OpenAI</ span >
855+ </ td >
856+ < td class ="align-middle text-center "> < code > UNK</ code > </ td >
857+ < td class =" align-middle text-center "> < b > 59.94</ b > </ td >
858+ < td class =" align-middle text-center "> -</ td >
859+ < td class ="align-middle text-center "> -</ td >
860+ < td class ="align-middle text-center "> -</ td >
861+ </ tr >
748862
749863 < tr >
750864 < td scope ="row " class ="align-middle text-center counter-cell ">
@@ -755,7 +869,7 @@ <h4>Citation</h4>
755869 < span class ="affiliation "> OpenAI</ span >
756870 </ td >
757871 < td class ="align-middle text-center "> < code > UNK</ code > </ td >
758- < td class =" align-middle text-center "> < b > 59.9 </ b > </ td >
872+ < td class =" align-middle text-center "> < b > 59.90 </ b > </ td >
759873 < td class =" align-middle text-center "> -</ td >
760874 < td class ="align-middle text-center "> -</ td >
761875 < td class ="align-middle text-center "> -</ td >
@@ -769,7 +883,22 @@ <h4>Citation</h4>
769883 < span class ="affiliation "> xAI</ span >
770884 </ td >
771885 < td class ="align-middle text-center "> < code style ="color: #207872; "> 314B</ code > </ td >
772- < td class =" align-middle text-center "> < b > 57.8</ b > </ td >
886+ < td class =" align-middle text-center "> < b > 57.80</ b > </ td >
887+ < td class =" align-middle text-center "> -</ td >
888+ < td class ="align-middle text-center "> -</ td >
889+ < td class ="align-middle text-center "> -</ td >
890+ </ tr >
891+
892+ < tr >
893+ < td scope ="row " class ="align-middle text-center counter-cell ">
894+ < span class ="badge badge-secondary "> Jun 17, 2025</ span >
895+ </ td >
896+ < td class ="model-cell align-middle text-center "> Gemini-2.5-Pro< br >
897+ < span class ="affiliation "> (2025-06-17)</ span > </ br >
898+ < span class ="affiliation "> Google</ span >
899+ </ td >
900+ < td class ="align-middle text-center "> < code > UNK</ code > </ td >
901+ < td class =" align-middle text-center "> < b > 57.18</ b > </ td >
773902 < td class =" align-middle text-center "> -</ td >
774903 < td class ="align-middle text-center "> -</ td >
775904 < td class ="align-middle text-center "> -</ td >
@@ -818,6 +947,21 @@ <h4>Citation</h4>
818947 < td class ="align-middle text-center "> 34.3</ td >
819948 </ tr >
820949
950+ < tr >
951+ < td scope ="row " class ="align-middle text-center counter-cell ">
952+ < span class ="badge badge-secondary "> Apr 29, 2025</ span >
953+ </ td >
954+ < td class ="model-cell align-middle text-center "> Qwen3-32B< br >
955+ < span class ="affiliation "> (2025-04-29)</ span > </ br >
956+ < span class ="affiliation "> Alibaba</ span >
957+ </ td >
958+ < td class ="align-middle text-center "> < code > UNK</ code > </ td >
959+ < td class =" align-middle text-center "> < b > 52.45</ b > </ td >
960+ < td class =" align-middle text-center "> -</ td >
961+ < td class ="align-middle text-center "> -</ td >
962+ < td class ="align-middle text-center "> -</ td >
963+ </ tr >
964+
821965 < tr >
822966 < td scope ="row " class ="align-middle text-center counter-cell ">
823967 < span class ="badge badge-secondary "> May 13, 2024</ span >
0 commit comments