Skip to content

Commit 8c53a09

Browse files
author
BlackCatXJ
committed
Update o4-mini, GPT5, Gemini-2.5-pro, Qwen3-32B performance
1 parent 0366929 commit 8c53a09

1 file changed

Lines changed: 149 additions & 5 deletions

File tree

index.html

Lines changed: 149 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,34 @@ <h4>Citation</h4>
281281
<td class="align-middle text-center"><b>85.91</b></td>
282282
</tr>
283283

284+
<tr>
285+
<td scope="row" class="align-middle text-center counter-cell">
286+
<span class="badge badge-secondary">Apr 16, 2025</span>
287+
</td>
288+
<td class="align-middle text-center">o4-mini-high + DP<br>
289+
<span class="affiliation">baseline</span>
290+
</td>
291+
<td class="align-middle text-center">82.29</td>
292+
<td class="align-middle text-center">81.11</td>
293+
<td class="align-middle text-center">42.3</td>
294+
<td class="align-middle text-center">54.0</td>
295+
<td class="align-middle text-center"><b>61.69</b></td>
296+
</tr>
297+
298+
<tr>
299+
<td scope="row" class="align-middle text-center counter-cell">
300+
<span class="badge badge-secondary">Apr 16, 2025</span>
301+
</td>
302+
<td class="align-middle text-center">o4-mini + DP<br>
303+
<span class="affiliation">baseline</span>
304+
</td>
305+
<td class="align-middle text-center">83.33</td>
306+
<td class="align-middle text-center">80.35</td>
307+
<td class="align-middle text-center">40.5</td>
308+
<td class="align-middle text-center">50.0</td>
309+
<td class="align-middle text-center"><b>60.75</b></td>
310+
</tr>
311+
284312
<tr>
285313
<td scope="row" class="align-middle text-center counter-cell">
286314
<span class="badge badge-secondary">Jul 22, 2025</span>
@@ -295,18 +323,32 @@ <h4>Citation</h4>
295323
<td class="align-middle text-center"><b>60.1</b></td>
296324
</tr>
297325

326+
<tr>
327+
<td scope="row" class="align-middle text-center counter-cell">
328+
<span class="badge badge-secondary">Aug 07, 2025</span>
329+
</td>
330+
<td class="align-middle text-center">GPT-5 + DP<br>
331+
<span class="affiliation">baseline</span>
332+
</td>
333+
<td class="align-middle text-center">83.33</td>
334+
<td class="align-middle text-center">82.37</td>
335+
<td class="align-middle text-center">36.04</td>
336+
<td class="align-middle text-center">58.0</td>
337+
<td class="align-middle text-center"><b>59.94</b></td>
338+
</tr>
339+
298340
<tr>
299341
<td scope="row" class="align-middle text-center counter-cell">
300342
<span class="badge badge-secondary">Jan 31, 2025</span>
301343
</td>
302-
<td class="align-middle text-center">o3-mini-2025-01-31 + DP<br>
344+
<td class="align-middle text-center">o3-mini + DP<br>
303345
<span class="affiliation">baseline</span>
304346
</td>
305347
<td class="align-middle text-center">86.46</td>
306348
<td class="align-middle text-center">82.07</td>
307349
<td class="align-middle text-center">35.56</td>
308350
<td class="align-middle text-center">32.0</td>
309-
<td class="align-middle text-center"><b>59.9</b></td>
351+
<td class="align-middle text-center"><b>59.90</b></td>
310352
</tr>
311353

312354
<tr>
@@ -320,7 +362,21 @@ <h4>Citation</h4>
320362
<td class="align-middle text-center">73.8</td>
321363
<td class="align-middle text-center">40.54</td>
322364
<td class="align-middle text-center">16.0</td>
323-
<td class="align-middle text-center"><b>57.8</b></td>
365+
<td class="align-middle text-center"><b>57.80</b></td>
366+
</tr>
367+
368+
<tr>
369+
<td scope="row" class="align-middle text-center counter-cell">
370+
<span class="badge badge-secondary">Jun 17, 2025</span>
371+
</td>
372+
<td class="align-middle text-center">Gemini-2.5-Pro + DP<br>
373+
<span class="affiliation">baseline</span>
374+
</td>
375+
<td class="align-middle text-center">84.38</td>
376+
<td class="align-middle text-center">79.6</td>
377+
<td class="align-middle text-center">31.86</td>
378+
<td class="align-middle text-center">66.0</td>
379+
<td class="align-middle text-center"><b>57.18</b></td>
324380
</tr>
325381

326382
<tr>
@@ -365,6 +421,20 @@ <h4>Citation</h4>
365421
<td class="align-middle text-center"><b>52.73</b></td>
366422
</tr>
367423

424+
<tr>
425+
<td scope="row" class="align-middle text-center counter-cell">
426+
<span class="badge badge-secondary">Apr 29, 2025</span>
427+
</td>
428+
<td class="align-middle text-center">Qwen3-32B<br>
429+
<span class="affiliation">baseline</span>
430+
</td>
431+
<td class="align-middle text-center">83.33</td>
432+
<td class="align-middle text-center">72.54</td>
433+
<td class="align-middle text-center">28.16</td>
434+
<td class="align-middle text-center">18.0</td>
435+
<td class="align-middle text-center"><b>52.45</b></td>
436+
</tr>
437+
368438
<tr>
369439
<td scope="row" class="align-middle text-center counter-cell">
370440
<span class="badge badge-secondary">May 13, 2024</span>
@@ -745,6 +815,50 @@ <h4>Citation</h4>
745815
</thead>
746816
<tbody>
747817

818+
<tr>
819+
<td scope="row" class="align-middle text-center counter-cell">
820+
<span class="badge badge-secondary">Apr 16, 2025</span>
821+
</td>
822+
<td class="model-cell align-middle text-center">o4-mini-high 🤔<br>
823+
<span class="affiliation">(2025-04-16)</span></br>
824+
<span class="affiliation">OpenAI</span>
825+
</td>
826+
<td class="align-middle text-center"><code>UNK</code></td>
827+
<td class=" align-middle text-center"><b>61.69</b></td>
828+
<td class=" align-middle text-center">-</td>
829+
<td class="align-middle text-center">-</td>
830+
<td class="align-middle text-center">-</td>
831+
</tr>
832+
833+
<tr>
834+
<td scope="row" class="align-middle text-center counter-cell">
835+
<span class="badge badge-secondary">Apr 16, 2025</span>
836+
</td>
837+
<td class="model-cell align-middle text-center">o4-mini 🤔<br>
838+
<span class="affiliation">(2025-04-16)</span></br>
839+
<span class="affiliation">OpenAI</span>
840+
</td>
841+
<td class="align-middle text-center"><code>UNK</code></td>
842+
<td class=" align-middle text-center"><b>60.75</b></td>
843+
<td class=" align-middle text-center">-</td>
844+
<td class="align-middle text-center">-</td>
845+
<td class="align-middle text-center">-</td>
846+
</tr>
847+
848+
<tr>
849+
<td scope="row" class="align-middle text-center counter-cell">
850+
<span class="badge badge-secondary">Aug 07, 2025</span>
851+
</td>
852+
<td class="model-cell align-middle text-center">GPT-5<br>
853+
<span class="affiliation">(2025-08-07)</span></br>
854+
<span class="affiliation">OpenAI</span>
855+
</td>
856+
<td class="align-middle text-center"><code>UNK</code></td>
857+
<td class=" align-middle text-center"><b>59.94</b></td>
858+
<td class=" align-middle text-center">-</td>
859+
<td class="align-middle text-center">-</td>
860+
<td class="align-middle text-center">-</td>
861+
</tr>
748862

749863
<tr>
750864
<td scope="row" class="align-middle text-center counter-cell">
@@ -755,7 +869,7 @@ <h4>Citation</h4>
755869
<span class="affiliation">OpenAI</span>
756870
</td>
757871
<td class="align-middle text-center"><code>UNK</code></td>
758-
<td class=" align-middle text-center"><b>59.9</b></td>
872+
<td class=" align-middle text-center"><b>59.90</b></td>
759873
<td class=" align-middle text-center">-</td>
760874
<td class="align-middle text-center">-</td>
761875
<td class="align-middle text-center">-</td>
@@ -769,7 +883,22 @@ <h4>Citation</h4>
769883
<span class="affiliation">xAI</span>
770884
</td>
771885
<td class="align-middle text-center"><code style="color: #207872;">314B</code></td>
772-
<td class=" align-middle text-center"><b>57.8</b></td>
886+
<td class=" align-middle text-center"><b>57.80</b></td>
887+
<td class=" align-middle text-center">-</td>
888+
<td class="align-middle text-center">-</td>
889+
<td class="align-middle text-center">-</td>
890+
</tr>
891+
892+
<tr>
893+
<td scope="row" class="align-middle text-center counter-cell">
894+
<span class="badge badge-secondary">Jun 17, 2025</span>
895+
</td>
896+
<td class="model-cell align-middle text-center">Gemini-2.5-Pro<br>
897+
<span class="affiliation">(2025-06-17)</span></br>
898+
<span class="affiliation">Google</span>
899+
</td>
900+
<td class="align-middle text-center"><code>UNK</code></td>
901+
<td class=" align-middle text-center"><b>57.18</b></td>
773902
<td class=" align-middle text-center">-</td>
774903
<td class="align-middle text-center">-</td>
775904
<td class="align-middle text-center">-</td>
@@ -818,6 +947,21 @@ <h4>Citation</h4>
818947
<td class="align-middle text-center">34.3</td>
819948
</tr>
820949

950+
<tr>
951+
<td scope="row" class="align-middle text-center counter-cell">
952+
<span class="badge badge-secondary">Apr 29, 2025</span>
953+
</td>
954+
<td class="model-cell align-middle text-center">Qwen3-32B<br>
955+
<span class="affiliation">(2025-04-29)</span></br>
956+
<span class="affiliation">Alibaba</span>
957+
</td>
958+
<td class="align-middle text-center"><code>UNK</code></td>
959+
<td class=" align-middle text-center"><b>52.45</b></td>
960+
<td class=" align-middle text-center">-</td>
961+
<td class="align-middle text-center">-</td>
962+
<td class="align-middle text-center">-</td>
963+
</tr>
964+
821965
<tr>
822966
<td scope="row" class="align-middle text-center counter-cell">
823967
<span class="badge badge-secondary">May 13, 2024</span>

0 commit comments

Comments
 (0)