Bugfix mutate state in Nego::apply, and some cleanups + tests

thomasmarsh · Jan 25, 2024 · db13116 · db13116
1 parent ddb73c4
commit db13116
Show file tree

Hide file tree

Showing 27 changed files with 32,185 additions and 114 deletions.
diff --git a/bin/demo.rs b/bin/demo.rs
@@ -1,13 +1,44 @@
-use nego::{agent, core};
+use nego::{agent, core::game, core::ray::Rays};
+
+use minimax::Game;
 
 #[macro_use]
 extern crate log;
 
+pub fn demo_minimax() {
+    let mut state = game::State::new();
+    let mut s = 0;
+
+    let timeout = std::time::Duration::from_secs(60);
+    loop {
+        state.dump();
+        let new_state_opt = if s == 0 {
+            agent::AIPlayer::Random.step(&state, timeout)
+        } else {
+            agent::AIPlayer::Iterative.step(&state, timeout)
+        };
+        s = 1 - s;
+
+        if let Some(new_state) = new_state_opt {
+            state = new_state;
+        } else {
+            break;
+        }
+    }
+    println!(
+        "Winner: {:?} (b={}, w={})",
+        agent::Nego::get_winner(&state),
+        state.board.black.points(),
+        state.board.white.points()
+    );
+    state.dump();
+}
+
 fn main() {
     pretty_env_logger::init();
 
     info!("initializing ray LUT");
-    core::ray::Rays::build_lut();
+    Rays::build_lut();
 
-    agent::demo_minimax();
+    demo_minimax();
 }
diff --git a/bin/konego.rs b/bin/konego.rs
@@ -143,7 +143,7 @@ impl MyGame {
             let timeout = std::time::Duration::from_secs(60);
             let new_state_opt = match work.current {
                 Black => Parallel.step(&work, timeout),
-                White => Random.step(&work, timeout),
+                White => Iterative.step(&work, timeout),
             };
 
             if let Some(new_state) = new_state_opt {

diff --git a/src/agent/mcts.rs b/src/agent/mcts.rs
@@ -39,5 +39,7 @@ pub fn step(state: &State, timeout: std::time::Duration) -> Option<State> {
     let mut new_state = state.clone();
     strategy
         .choose_move(&new_state)
-        .and_then(|m| Nego::apply(&mut new_state, m))
+        .and_then(|m| Nego::apply(&mut new_state, m));
+
+    Some(new_state)
 }
diff --git a/src/agent/mod.rs b/src/agent/mod.rs
@@ -1,5 +1,5 @@
-mod mcts;
-mod negamax;
+pub mod mcts;
+pub mod negamax;
 
 use crate::core::{
     game::{Color, State},
@@ -61,12 +61,9 @@ impl minimax::Game for Nego {
     }
 
     #[inline]
-    fn apply(s: &mut State, m: Move) -> Option<State> {
-        let mut state = s.clone();
-        state.place(m);
-        state.current = state.current.next();
-        state.update_hash(m);
-        Some(state)
+    fn apply(state: &mut State, m: Move) -> Option<State> {
+        state.apply(m);
+        None
     }
 
     #[inline]
@@ -84,12 +81,11 @@ impl minimax::Game for Nego {
 
     fn max_table_index() -> u16 {
         let p = PieceTypeId::Kunoji4.def();
-        ((p.lut_offset + p.moves) * 2) as u16
+        (p.lut_offset + p.moves) as u16
     }
 }
 
-#[allow(unused)]
-fn step_random(state: &State) -> Option<State> {
+pub fn step_random(state: &State) -> Option<State> {
     use rand::Rng;
     let mut rng = rand::thread_rng();
 
@@ -100,48 +96,6 @@ fn step_random(state: &State) -> Option<State> {
     }
     let idx: usize = rng.gen_range(0..ms.len());
     let mut new_state = state.clone();
-    new_state.place(ms[idx]);
-    new_state.current = new_state.current.next();
-
-    new_state.board.print_color_map();
+    new_state.apply(ms[idx]);
     Some(new_state)
 }
-
-#[allow(unused)]
-pub fn demo_rnd() {
-    let mut state = State::new();
-
-    loop {
-        if let Some(new_state) = step_random(&state) {
-            state = new_state;
-        }
-    }
-}
-
-#[allow(unused)]
-pub fn demo_minimax() {
-    let mut state = State::new();
-    let mut s = 0;
-    loop {
-        state.dump();
-        let new_state_opt = if s == 0 {
-            step_random(&state)
-        } else {
-            negamax::step_iterative(&state, std::time::Duration::from_secs(5))
-        };
-        s = 1 - s;
-
-        if let Some(new_state) = new_state_opt {
-            state = new_state;
-        } else {
-            break;
-        }
-    }
-    println!(
-        "Winner: {:?} (b={}, w={})",
-        Nego::get_winner(&state),
-        state.board.black.points(),
-        state.board.white.points()
-    );
-    state.dump();
-}
diff --git a/src/agent/negamax.rs b/src/agent/negamax.rs
@@ -3,7 +3,9 @@ use crate::{
     core::game::{Color, State},
 };
 
-use minimax::{Game, IterativeOptions, ParallelOptions, ParallelSearch, Strategy};
+use minimax::{Game, IterativeOptions, IterativeSearch, ParallelOptions, ParallelSearch, Strategy};
+
+use std::sync::{Mutex, MutexGuard, OnceLock};
 
 #[derive(Clone)]
 struct Eval;
@@ -28,37 +30,61 @@ impl minimax::Evaluator for Eval {
 fn iterative_opts() -> IterativeOptions {
     IterativeOptions::new()
         .with_table_byte_size(64_000)
+        .with_mtdf()
+        .with_singular_extension()
+        // TODO: adding countermoves triggers a panic.
+        //
+        // Message:  index out of bounds: the len is 1725 but the index is 1889
+        // Location: /Users/tmarsh/.cargo/registry/src/index.crates.io-6f17d22bba15001f/minimax-0.5.3/src/strategies/table.rs:407
+        //
+        // .with_countermoves()
+        // .with_countermove_history()
         .verbose()
 }
 
-pub fn step_parallel(state: &State, timeout: std::time::Duration) -> Option<State> {
-    let mut strategy = ParallelSearch::new(Eval, iterative_opts(), ParallelOptions::new());
-    strategy.set_timeout(timeout);
+fn parallel_opts() -> ParallelOptions {
+    ParallelOptions::new()
+}
 
-    let mut new_state = state.clone();
-    strategy
-        .choose_move(&new_state)
-        .and_then(|m| Nego::apply(&mut new_state, m))
+static PARALLEL_CELL: OnceLock<Mutex<ParallelSearch<Eval>>> = OnceLock::new();
+static ITERATIVE_CELL: OnceLock<Mutex<IterativeSearch<Eval>>> = OnceLock::new();
+
+fn get_parallel_agent() -> MutexGuard<'static, ParallelSearch<Eval>> {
+    PARALLEL_CELL
+        .get_or_init(|| Mutex::new(ParallelSearch::new(Eval, iterative_opts(), parallel_opts())))
+        .lock()
+        .unwrap()
 }
 
-pub fn step_negamax(state: &State) -> Option<State> {
-    let mut strategy = minimax::Negamax::new(Eval, 4);
-    if Nego::get_winner(state).is_some() {
-        return None;
-    }
+fn get_iterative_agent() -> MutexGuard<'static, IterativeSearch<Eval>> {
+    ITERATIVE_CELL
+        .get_or_init(|| Mutex::new(IterativeSearch::new(Eval, iterative_opts())))
+        .lock()
+        .unwrap()
+}
+
+pub fn step<S>(
+    state: &State,
+    timeout: std::time::Duration,
+    strategy: &mut MutexGuard<'static, S>,
+) -> Option<State>
+where
+    S: Strategy<Nego>,
+{
+    strategy.set_timeout(timeout);
 
     let mut new_state = state.clone();
     strategy
         .choose_move(&new_state)
-        .and_then(|m| Nego::apply(&mut new_state, m))
+        .and_then(|m| Nego::apply(&mut new_state, m));
+
+    Some(new_state)
 }
 
 pub fn step_iterative(state: &State, timeout: std::time::Duration) -> Option<State> {
-    let mut strategy = minimax::IterativeSearch::new(Eval, iterative_opts());
-    strategy.set_timeout(timeout);
+    step(state, timeout, &mut get_iterative_agent())
+}
 
-    let mut new_state = state.clone();
-    strategy
-        .choose_move(&new_state)
-        .and_then(|m| Nego::apply(&mut new_state, m))
+pub fn step_parallel(state: &State, timeout: std::time::Duration) -> Option<State> {
+    step(state, timeout, &mut get_parallel_agent())
 }
diff --git a/src/core/game.rs b/src/core/game.rs
@@ -379,6 +379,13 @@ impl State {
         moves.append(&mut ma.0)
     }
 
+    #[inline]
+    pub fn apply(&mut self, m: Move) {
+        self.place(m);
+        self.update_hash(m);
+        self.current = self.current.next();
+    }
+
     #[inline]
     pub fn place(&mut self, m: Move) {
         self.capture_flag = self.board.place(self.current, m);

diff --git a/tests/game.rs b/tests/game.rs
@@ -54,21 +54,22 @@ fn no_capture_boss() {
 fn snapshot() {
     use rand::{seq::SliceRandom, SeedableRng};
     let mut rng = rand::rngs::StdRng::seed_from_u64(123);
-    let mut state = State::new();
-    let mut moves = Vec::new();
-    let mut history = Vec::new();
-    loop {
-        history.push(state.clone());
-        moves.truncate(0);
-        state.get_moves(&mut moves);
-        if moves.is_empty() {
-            break;
+
+    for _ in 0..20 {
+        let mut state = State::new();
+        let mut moves = Vec::new();
+        let mut history = Vec::new();
+        loop {
+            history.push(state.clone());
+            moves.truncate(0);
+            state.get_moves(&mut moves);
+            if moves.is_empty() {
+                break;
+            }
+            let m = *moves.choose(&mut rng).unwrap();
+            state.apply(m);
         }
-        let m = *moves.choose(&mut rng).unwrap();
-        state.place(m);
-        state.current = state.current.next();
-        state.update_hash(m);
-    }
 
-    insta::assert_debug_snapshot!(history);
+        insta::assert_debug_snapshot!(history);
+    }
 }