Skip to content

Commit

Permalink
Merge pull request #17 from axioma-ai-labs/feature/issue-6-add_settin…
Browse files Browse the repository at this point in the history
…gs_to_planning_module

Update config.py and planning_module.py with new settings
  • Loading branch information
gromdimon authored Dec 25, 2024
2 parents 24fd49f + 4fecbc9 commit 5959dc6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 15 deletions.
5 changes: 5 additions & 0 deletions src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ class Settings(BaseSettings):
#: Path to the persistent Q-table file
PERSISTENT_Q_TABLE_PATH: str = "persistent_q_table.json"

#: PlanningModule parameters
PLANNING_ALPHA: float = 0.1 # Default learning rate
PLANNING_GAMMA: float = 0.95 # Default discount factor
PLANNING_EPSILON: float = 0.1 # Default exploration rate

# === OpenAI settings ===

#: OpenAI API key
Expand Down
41 changes: 26 additions & 15 deletions src/planning/planning_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,29 @@
class PlanningModule:
"""A simple Q-learning planning module for high-level autonomous decisions."""

def __init__(
self,
actions=None,
alpha=0.1,
gamma=0.95,
epsilon=0.1,
q_table_path=settings.PERSISTENT_Q_TABLE_PATH,
):
def __init__(self, actions=None, q_table_path=None):
"""
Args:
actions (List[str]): A list of strings representing possible actions
(e.g., ['idle', 'analyze_signal', 'research_news']).
alpha (float): Learning rate for Q-learning.
gamma (float): Discount factor for future rewards.
epsilon (float): Probability for exploration in epsilon-greedy policy.
q_table_path (str): Path to the file where the Q-table is saved.
## Planning Module Parameters
- **PLANNING_ALPHA**: The learning rate for the Q-learning algorithm. Controls how quickly the agent adapts to new information. Default: `0.1`.
- **PLANNING_GAMMA**: The discount factor for future rewards. Determines how much importance is given to long-term rewards. Default: `0.95`.
- **PLANNING_EPSILON**: The exploration rate for the epsilon-greedy strategy. Higher values encourage exploration, while lower values favor exploitation. Default: `0.1`.
### Tuning Tips
- **PLANNING_ALPHA**:
- Increase for faster adaptation but risk instability.
- Decrease for more stable but slower learning.
- **PLANNING_GAMMA**:
- Set closer to 1 for long-term planning.
- Set lower (e.g., 0.5) for short-term rewards.
- **PLANNING_EPSILON**:
- Increase to encourage exploration in unpredictable environments.
- Decrease for environments where optimal actions are well-known.
"""
if actions is None:
actions = [
Expand All @@ -35,10 +42,14 @@ def __init__(
]

self.actions = actions
self.alpha = alpha # Learning rate
self.gamma = gamma # Discount factor
self.epsilon = epsilon # Exploration rate
self.q_table_path = Path(q_table_path)

# Fetch Q-learning parameters from settings
self.alpha = settings.PLANNING_ALPHA # Learning rate for Q-learning (float)
self.gamma = settings.PLANNING_GAMMA # Discount factor for future rewards (float)
self.epsilon = (
settings.PLANNING_EPSILON
) # Probability for exploration in epsilon-greedy policy (float)
self.q_table_path = Path(q_table_path or settings.PERSISTENT_Q_TABLE_PATH)

# Load Q-table from file if it exists, otherwise initialize an empty table
self.q_table = self._load_q_table()
Expand Down

0 comments on commit 5959dc6

Please # to comment.