Merge pull request #17 from axioma-ai-labs/feature/issue-6-add_settin…

…gs_to_planning_module Update config.py and planning_module.py with new settings
axioma-ai-labs · Dec 25, 2024 · 5959dc6 · 5959dc6
2 parents 24fd49f + 4fecbc9
commit 5959dc6
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 15 deletions.
diff --git a/src/core/config.py b/src/core/config.py
@@ -24,6 +24,11 @@ class Settings(BaseSettings):
     #: Path to the persistent Q-table file
     PERSISTENT_Q_TABLE_PATH: str = "persistent_q_table.json"
 
+    #: PlanningModule parameters
+    PLANNING_ALPHA: float = 0.1  # Default learning rate
+    PLANNING_GAMMA: float = 0.95  # Default discount factor
+    PLANNING_EPSILON: float = 0.1  # Default exploration rate
+
     # === OpenAI settings ===
 
     #: OpenAI API key

diff --git a/src/planning/planning_module.py b/src/planning/planning_module.py
@@ -10,22 +10,29 @@
 class PlanningModule:
     """A simple Q-learning planning module for high-level autonomous decisions."""
 
-    def __init__(
-        self,
-        actions=None,
-        alpha=0.1,
-        gamma=0.95,
-        epsilon=0.1,
-        q_table_path=settings.PERSISTENT_Q_TABLE_PATH,
-    ):
+    def __init__(self, actions=None, q_table_path=None):
         """
         Args:
             actions (List[str]): A list of strings representing possible actions
             (e.g., ['idle', 'analyze_signal', 'research_news']).
-            alpha (float): Learning rate for Q-learning.
-            gamma (float): Discount factor for future rewards.
-            epsilon (float): Probability for exploration in epsilon-greedy policy.
             q_table_path (str): Path to the file where the Q-table is saved.
+
+        ## Planning Module Parameters
+
+            - **PLANNING_ALPHA**: The learning rate for the Q-learning algorithm. Controls how quickly the agent adapts to new information. Default: `0.1`.
+            - **PLANNING_GAMMA**: The discount factor for future rewards. Determines how much importance is given to long-term rewards. Default: `0.95`.
+            - **PLANNING_EPSILON**: The exploration rate for the epsilon-greedy strategy. Higher values encourage exploration, while lower values favor exploitation. Default: `0.1`.
+
+        ### Tuning Tips
+            - **PLANNING_ALPHA**:
+            - Increase for faster adaptation but risk instability.
+            - Decrease for more stable but slower learning.
+            - **PLANNING_GAMMA**:
+            - Set closer to 1 for long-term planning.
+            - Set lower (e.g., 0.5) for short-term rewards.
+            - **PLANNING_EPSILON**:
+            - Increase to encourage exploration in unpredictable environments.
+            - Decrease for environments where optimal actions are well-known.
         """
         if actions is None:
             actions = [
@@ -35,10 +42,14 @@ def __init__(
             ]
 
         self.actions = actions
-        self.alpha = alpha  # Learning rate
-        self.gamma = gamma  # Discount factor
-        self.epsilon = epsilon  # Exploration rate
-        self.q_table_path = Path(q_table_path)
+
+        # Fetch Q-learning parameters from settings
+        self.alpha = settings.PLANNING_ALPHA  # Learning rate for Q-learning (float)
+        self.gamma = settings.PLANNING_GAMMA  # Discount factor for future rewards (float)
+        self.epsilon = (
+            settings.PLANNING_EPSILON
+        )  # Probability for exploration in epsilon-greedy policy (float)
+        self.q_table_path = Path(q_table_path or settings.PERSISTENT_Q_TABLE_PATH)
 
         # Load Q-table from file if it exists, otherwise initialize an empty table
         self.q_table = self._load_q_table()