|
1 | | -random positive reward 2000 frames score: 135 |
2 | | -qLearn(reduceStateAndActionSeeAll)(learn) positive reward 2000 frames score: 476 |
3 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage) positive reward 2000 frames score: 470 |
4 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage2) positive reward 2000 frames score: 231 |
5 | | -qLearn(reduceStateAndActionSeeAllDistance)(learn) positive reward 2000 frames score: 289 |
6 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) positive reward 2000 frames score: 203 |
7 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage2) positive reward 2000 frames score: 231 |
8 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learn) positive reward 2000 frames score: 222 |
9 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) positive reward 2000 frames score: 127 |
10 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage2) positive reward 2000 frames score: 231 |
11 | | -random negative reward 2000 frames score: -119 |
12 | | -qLearn(reduceStateAndActionSeeAll)(learn) negative reward 2000 frames score: -18 |
13 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage) negative reward 2000 frames score: -23 |
14 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage2) negative reward 2000 frames score: -231 |
15 | | -qLearn(reduceStateAndActionSeeAllDistance)(learn) negative reward 2000 frames score: -79 |
16 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) negative reward 2000 frames score: -109 |
17 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage2) negative reward 2000 frames score: -231 |
18 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learn) negative reward 2000 frames score: -58 |
19 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) negative reward 2000 frames score: -61 |
20 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage2) negative reward 2000 frames score: -231 |
21 | | -random positive reward 20000 frames score: 1256 |
22 | | -qLearn(reduceStateAndActionSeeAll)(learn) positive reward 20000 frames score: 4976 |
23 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage) positive reward 20000 frames score: 4970 |
24 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage2) positive reward 20000 frames score: 2331 |
25 | | -qLearn(reduceStateAndActionSeeAllDistance)(learn) positive reward 20000 frames score: 3289 |
26 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) positive reward 20000 frames score: 4497 |
27 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage2) positive reward 20000 frames score: 2331 |
28 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learn) positive reward 20000 frames score: 2322 |
29 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) positive reward 20000 frames score: 1377 |
30 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage2) positive reward 20000 frames score: 2331 |
31 | | -random negative reward 20000 frames score: -1275 |
32 | | -qLearn(reduceStateAndActionSeeAll)(learn) negative reward 20000 frames score: -189 |
33 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage) negative reward 20000 frames score: -252 |
34 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage2) negative reward 20000 frames score: -2331 |
35 | | -qLearn(reduceStateAndActionSeeAllDistance)(learn) negative reward 20000 frames score: -497 |
36 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) negative reward 20000 frames score: -971 |
37 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage2) negative reward 20000 frames score: -2331 |
38 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learn) negative reward 20000 frames score: -586 |
39 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) negative reward 20000 frames score: -617 |
40 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage2) negative reward 20000 frames score: -2331 |
41 | | -random positive reward 200000 frames score: 12420 |
42 | | -qLearn(reduceStateAndActionSeeAll)(learn) positive reward 200000 frames score: 49976 |
43 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage) positive reward 200000 frames score: 49970 |
44 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage2) positive reward 200000 frames score: 23331 |
45 | | -qLearn(reduceStateAndActionSeeAllDistance)(learn) positive reward 200000 frames score: 33289 |
46 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) positive reward 200000 frames score: 49497 |
47 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage2) positive reward 200000 frames score: 23331 |
48 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learn) positive reward 200000 frames score: 23322 |
49 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) positive reward 200000 frames score: 13898 |
50 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage2) positive reward 200000 frames score: 23331 |
51 | | -random negative reward 200000 frames score: -12459 |
52 | | -qLearn(reduceStateAndActionSeeAll)(learn) negative reward 200000 frames score: -1892 |
53 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage) negative reward 200000 frames score: -2579 |
54 | | -qLearn(reduceStateAndActionSeeAll)(learnWithAverage2) negative reward 200000 frames score: -23331 |
55 | | -qLearn(reduceStateAndActionSeeAllDistance)(learn) negative reward 200000 frames score: -4868 |
56 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) negative reward 200000 frames score: -9505 |
57 | | -qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage2) negative reward 200000 frames score: -23331 |
58 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learn) negative reward 200000 frames score: -5690 |
59 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) negative reward 200000 frames score: -6272 |
60 | | -qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage2) negative reward 200000 frames score: -23331 |
| 1 | +random positive reward 2000 frames score: 123 |
| 2 | +qLearn(reduceStateAndActionSeeAll)(learn) positive reward 2000 frames score: 463 |
| 3 | +qLearn(reduceStateAndActionSeeAll)(learnWithAverage) positive reward 2000 frames score: 465 |
| 4 | +qLearn(reduceStateAndActionSeeAllDistance)(learn) positive reward 2000 frames score: 420 |
| 5 | +qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) positive reward 2000 frames score: 214 |
| 6 | +qLearn(reduceStateAndActionSeeNearestOnly)(learn) positive reward 2000 frames score: 233 |
| 7 | +qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) positive reward 2000 frames score: 129 |
| 8 | +random negative reward 2000 frames score: -120 |
| 9 | +qLearn(reduceStateAndActionSeeAll)(learn) negative reward 2000 frames score: -18 |
| 10 | +qLearn(reduceStateAndActionSeeAll)(learnWithAverage) negative reward 2000 frames score: -23 |
| 11 | +qLearn(reduceStateAndActionSeeAllDistance)(learn) negative reward 2000 frames score: -83 |
| 12 | +qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) negative reward 2000 frames score: -128 |
| 13 | +qLearn(reduceStateAndActionSeeNearestOnly)(learn) negative reward 2000 frames score: -49 |
| 14 | +qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) negative reward 2000 frames score: -60 |
| 15 | +random positive reward 20000 frames score: 1241 |
| 16 | +qLearn(reduceStateAndActionSeeAll)(learn) positive reward 20000 frames score: 4963 |
| 17 | +qLearn(reduceStateAndActionSeeAll)(learnWithAverage) positive reward 20000 frames score: 4965 |
| 18 | +qLearn(reduceStateAndActionSeeAllDistance)(learn) positive reward 20000 frames score: 4920 |
| 19 | +qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) positive reward 20000 frames score: 4168 |
| 20 | +qLearn(reduceStateAndActionSeeNearestOnly)(learn) positive reward 20000 frames score: 2333 |
| 21 | +qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) positive reward 20000 frames score: 1438 |
| 22 | +random negative reward 20000 frames score: -1235 |
| 23 | +qLearn(reduceStateAndActionSeeAll)(learn) negative reward 20000 frames score: -180 |
| 24 | +qLearn(reduceStateAndActionSeeAll)(learnWithAverage) negative reward 20000 frames score: -255 |
| 25 | +qLearn(reduceStateAndActionSeeAllDistance)(learn) negative reward 20000 frames score: -554 |
| 26 | +qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) negative reward 20000 frames score: -983 |
| 27 | +qLearn(reduceStateAndActionSeeNearestOnly)(learn) negative reward 20000 frames score: -549 |
| 28 | +qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) negative reward 20000 frames score: -621 |
| 29 | +random positive reward 200000 frames score: 12442 |
| 30 | +qLearn(reduceStateAndActionSeeAll)(learn) positive reward 200000 frames score: 49963 |
| 31 | +qLearn(reduceStateAndActionSeeAll)(learnWithAverage) positive reward 200000 frames score: 49965 |
| 32 | +qLearn(reduceStateAndActionSeeAllDistance)(learn) positive reward 200000 frames score: 49920 |
| 33 | +qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) positive reward 200000 frames score: 46168 |
| 34 | +qLearn(reduceStateAndActionSeeNearestOnly)(learn) positive reward 200000 frames score: 23333 |
| 35 | +qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) positive reward 200000 frames score: 14382 |
| 36 | +random negative reward 200000 frames score: -12370 |
| 37 | +qLearn(reduceStateAndActionSeeAll)(learn) negative reward 200000 frames score: -1884 |
| 38 | +qLearn(reduceStateAndActionSeeAll)(learnWithAverage) negative reward 200000 frames score: -2587 |
| 39 | +qLearn(reduceStateAndActionSeeAllDistance)(learn) negative reward 200000 frames score: -4823 |
| 40 | +qLearn(reduceStateAndActionSeeAllDistance)(learnWithAverage) negative reward 200000 frames score: -9479 |
| 41 | +qLearn(reduceStateAndActionSeeNearestOnly)(learn) negative reward 200000 frames score: -5707 |
| 42 | +qLearn(reduceStateAndActionSeeNearestOnly)(learnWithAverage) negative reward 200000 frames score: -6299 |
0 commit comments