Merge pull request #1 from toolpath/alambe/debugging_helpers

lambe · web-flow · commit df3df64e1567 · 2023-09-26T12:13:13.000-04:00
Debugging helpers
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,5 @@ sessions
 archive
 
 TODO.md
+
+.vscode
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "AlphaZero"
 uuid = "8ed9eb0b-7496-408d-8c8b-2119aeea02cd"
-authors = ["Jonathan Laurent <jonathan.laurent@cs.cmu.edu>"]
-version = "0.5.4"
+authors = ["Jonathan Laurent <jonathan.laurent@cs.cmu.edu>, forked by Andrew Lambe <andrew.b.lambe@gmail.com"]
+version = "0.5.5"
 
 [deps]
 ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
diff --git a/src/mcts.jl b/src/mcts.jl
@@ -208,6 +208,12 @@ function run_simulation!(env::Env, game; η, root=true)
     else
       ϵ = root ? env.noise_ϵ : 0.
       scores = uct_scores(info, env.cpuct, ϵ, η)
+      if length(scores) != length(actions)
+        @warn "Scores array has length $(length(scores)) but Actions array has length $(length(actions))"
+        @show scores
+        @show state
+        @show actions
+      end
       action_id = argmax(scores)
       action = actions[action_id]
       wp = GI.white_playing(game)
diff --git a/src/play.jl b/src/play.jl
@@ -308,6 +308,11 @@ function play_game(gspec, player; flip_probability=0.)
     actions, π_target = think(player, game)
     τ = player_temperature(player, game, length(trace))
     π_sample = apply_temperature(π_target, τ)
+    if length(actions) != length(π_target)
+      @warn "Actions array has length $(length(actions)) but Policy array has length $(length(π_target))"
+      @show actions
+      @show π_target
+    end
     a = actions[Util.rand_categorical(π_sample)]
     GI.play!(game, a)
     push!(trace, π_target, GI.white_reward(game), GI.current_state(game))
diff --git a/src/ui/plots.jl b/src/ui/plots.jl
@@ -59,14 +59,15 @@ function performances_plot(rep::Report.Iteration)
   push!(global_content, rep.perfs_learning.time)
   glob = Plots.pie(global_labels, global_content,
     title="Global",
-    legend=:right)
+    legend=:bottomright)
   # Self-play details
   self_play =
     let gcratio = rep.perfs_self_play.gc_time / rep.perfs_self_play.time
       Plots.pie(
         ["MCTS and Inference", "GC"],
         [1 - gcratio, gcratio],
-        title="Self Play")
+        title="Self Play",
+        legend=:topright)
     end
   # Learning details
   learning = Plots.pie(
@@ -75,7 +76,8 @@ function performances_plot(rep::Report.Iteration)
       rep.learning.time_loss,
       rep.learning.time_train,
       rep.learning.time_eval],
-    title="Learning")
+    title="Learning",
+    legend=:bottomright)
   return Plots.plot(glob, self_play, learning)
 end
 

-Original file line number
+Diff line change
 archive
 TODO.md
++
 +.vscode