-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from cmungall/program-learning-claude
Program learning claude
- Loading branch information
Showing
343 changed files
with
20,870 additions
and
0 deletions.
There are no files selected for viewing
55 changes: 55 additions & 0 deletions
55
learned/claude-3-sonnet/11_12_saturated_fatty_acyl_CoA_4__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
""" | ||
Classifies: CHEBI:84948 11,12-saturated fatty acyl-CoA(4-) | ||
""" | ||
""" | ||
Classifies: CHEBI:84948 11,12-saturated fatty acyl-CoA(4-) | ||
""" | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
from rdkit.Chem import rdMolDescriptors | ||
|
||
def is_11_12_saturated_fatty_acyl_CoA_4__(smiles: str): | ||
""" | ||
Determines if a molecule is a 11,12-saturated fatty acyl-CoA(4-) based on its SMILES string. | ||
A 11,12-saturated fatty acyl-CoA(4-) is a fatty acyl-CoA(4-) where the 11-12 bond of the fatty acyl group is saturated. | ||
Args: | ||
smiles (str): SMILES string of the molecule | ||
Returns: | ||
bool: True if molecule is a 11,12-saturated fatty acyl-CoA(4-), False otherwise | ||
str: Reason for classification | ||
""" | ||
|
||
# Parse SMILES | ||
mol = Chem.MolFromSmiles(smiles) | ||
if mol is None: | ||
return False, "Invalid SMILES string" | ||
|
||
# Check for CoA substructure | ||
coa_pattern = Chem.MolFromSmarts("C(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@H]([C@H](O)[C@@H]1OP(=O)([O-])[O-])n1cnc2c(N)ncnc12)C(=O)NCCC(=O)NCCS") | ||
if not mol.HasSubstructMatch(coa_pattern): | ||
return False, "No CoA substructure found" | ||
|
||
# Check for fatty acyl chain | ||
acyl_pattern = Chem.MolFromSmarts("[CX3](=[OX1])[CX4,CX3]~[CX4,CX3]~[CX4,CX3]~[CX4,CX3]") | ||
acyl_matches = mol.GetSubstructMatches(acyl_pattern) | ||
if not acyl_matches: | ||
return False, "No fatty acyl chain found" | ||
|
||
# Check for saturation at 11-12 position | ||
for acyl_match in acyl_matches: | ||
acyl_atoms = [mol.GetAtomWithIdx(idx) for idx in acyl_match] | ||
if len(acyl_atoms) < 12: | ||
continue # Acyl chain too short | ||
|
||
atom11 = acyl_atoms[10] | ||
atom12 = acyl_atoms[11] | ||
if atom11.GetIsAromatic() or atom12.GetIsAromatic(): | ||
continue # Aromatic atoms | ||
|
||
bond = mol.GetBondBetweenAtoms(atom11.GetIdx(), atom12.GetIdx()) | ||
if bond.GetBondType() == Chem.BondType.SINGLE: | ||
return True, "Fatty acyl chain has a saturated bond at position 11-12" | ||
|
||
return False, "Fatty acyl chain does not have a saturated bond at position 11-12" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
""" | ||
Classifies: CHEBI:47787 11-oxo steroid | ||
""" | ||
""" | ||
Classifies: 11-oxo steroid | ||
Definition: Any oxo steroid that has an oxo substituent at position 11 | ||
""" | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
|
||
def is_11_oxo_steroid(smiles: str): | ||
""" | ||
Determines if a molecule is an 11-oxo steroid based on its SMILES string. | ||
Args: | ||
smiles (str): SMILES string of the molecule | ||
Returns: | ||
tuple: (bool, str) - (True if molecule is an 11-oxo steroid, reason for classification) | ||
""" | ||
# Parse SMILES | ||
mol = Chem.MolFromSmiles(smiles) | ||
if mol is None: | ||
return False, "Invalid SMILES string" | ||
|
||
# Check for basic steroid core (four fused rings) | ||
steroid_core = Chem.MolFromSmarts("[#6]1~[#6]~[#6]~[#6]2~[#6]~[#6]~[#6]~3~[#6]~[#6]~[#6]~4~[#6]~[#6]~[#6]~[#6]~[#6]~4~[#6]~3~[#6]~2~[#6]~1") | ||
if not mol.HasSubstructMatch(steroid_core): | ||
return False, "No steroid core structure found" | ||
|
||
# SMARTS pattern for 11-oxo group in steroid context | ||
# This pattern looks for the specific environment of the 11-position ketone | ||
# in the steroid ring system | ||
oxo_11_pattern = Chem.MolFromSmarts("[#6]1~[#6]~[#6]2~[#6]~[#6]~[#6]~3~[#6](=[O:1])~[#6]~[#6]~[#6]~[#6]~3~[#6]~2~[#6]~[#6]~1") | ||
|
||
# Find matches for the 11-oxo pattern | ||
matches = mol.GetSubstructMatches(oxo_11_pattern) | ||
if not matches: | ||
return False, "No ketone group at position 11" | ||
|
||
# Count carbons to verify it's in the typical steroid range | ||
carbon_count = len([atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6]) | ||
if carbon_count < 19 or carbon_count > 30: | ||
return False, f"Carbon count ({carbon_count}) outside typical steroid range (19-30)" | ||
|
||
# Additional check for reasonable molecular weight | ||
mol_wt = Chem.Descriptors.ExactMolWt(mol) | ||
if mol_wt < 250 or mol_wt > 500: | ||
return False, f"Molecular weight ({mol_wt:.1f}) outside typical steroid range (250-500)" | ||
|
||
# Count rings to ensure we have the right ring system | ||
ring_info = mol.GetRingInfo() | ||
if ring_info.NumRings() < 4: | ||
return False, "Insufficient number of rings for steroid structure" | ||
|
||
return True, "Molecule contains steroid core with ketone group at position 11" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
""" | ||
Classifies: CHEBI:35346 11beta-hydroxy steroid | ||
""" | ||
""" | ||
Classifies: 11beta-hydroxy steroids | ||
""" | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
|
||
def is_11beta_hydroxy_steroid(smiles: str): | ||
""" | ||
Determines if a molecule is an 11beta-hydroxy steroid based on its SMILES string. | ||
Args: | ||
smiles (str): SMILES string of the molecule | ||
Returns: | ||
bool: True if molecule is an 11beta-hydroxy steroid, False otherwise | ||
str: Reason for classification | ||
""" | ||
|
||
# Parse SMILES | ||
mol = Chem.MolFromSmiles(smiles) | ||
if mol is None: | ||
return False, "Invalid SMILES string" | ||
|
||
# Check for basic steroid core (4 fused rings) | ||
steroid_core = Chem.MolFromSmarts("[#6]~1~[#6]~[#6]~[#6]~2~[#6]~[#6]~[#6]~[#6]~3~[#6]~[#6]~[#6]~[#6]~4~[#6]~[#6]~[#6]~[#6]~4~[#6]~3~[#6]~2~1") | ||
if not mol.HasSubstructMatch(steroid_core): | ||
return False, "No steroid core structure found" | ||
|
||
# Check for 11-beta hydroxy group | ||
# The SMARTS pattern looks for: | ||
# - The specific carbon at position 11 in the steroid skeleton | ||
# - An OH group in beta configuration (specified by '@H') | ||
# Note: The exact stereochemistry is critical here | ||
hydroxy_11beta = Chem.MolFromSmarts("[C]~1~[C]~[C]~[C]~2~[C]~[C]~[C]~[C]~3~[C]~[C@@H](O)~[C]~[C]~4~[C]~[C]~[C]~[C]~4~[C]~3~[C]~2~1") | ||
|
||
if not mol.HasSubstructMatch(hydroxy_11beta): | ||
return False, "No 11-beta hydroxy group found" | ||
|
||
# Additional check for common substituents often found in 11beta-hydroxy steroids | ||
# Look for common functional groups like ketones, other hydroxyls, etc. | ||
ketone_pattern = Chem.MolFromSmarts("C(=O)") | ||
other_hydroxy = Chem.MolFromSmarts("[CH]O") | ||
|
||
if not (mol.HasSubstructMatch(ketone_pattern) or mol.HasSubstructMatch(other_hydroxy)): | ||
return False, "Missing typical steroid substituents" | ||
|
||
# Count carbons to ensure reasonable size for a steroid | ||
carbon_count = len([atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6]) | ||
if carbon_count < 19 or carbon_count > 30: # Most steroids have 19-30 carbons | ||
return False, f"Carbon count ({carbon_count}) outside typical steroid range" | ||
|
||
# Check for reasonable number of rings | ||
ring_info = mol.GetRingInfo() | ||
if ring_info.NumRings() < 4: | ||
return False, "Too few rings for steroid structure" | ||
|
||
return True, "Contains steroid core with 11-beta hydroxy group and appropriate substituents" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
""" | ||
Classifies: CHEBI:17354 16beta-hydroxy steroid | ||
""" | ||
""" | ||
Classifies: CHEBI:16beta-hydroxy steroid | ||
A 16-hydroxy steroid in which the hydroxy group at position 16 has a beta-configuration. | ||
""" | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
|
||
def is_16beta_hydroxy_steroid(smiles: str): | ||
""" | ||
Determines if a molecule is a 16beta-hydroxy steroid based on its SMILES string. | ||
Args: | ||
smiles (str): SMILES string of the molecule | ||
Returns: | ||
bool: True if molecule is a 16beta-hydroxy steroid, False otherwise | ||
str: Reason for classification | ||
""" | ||
# Parse SMILES | ||
mol = Chem.MolFromSmiles(smiles) | ||
if mol is None: | ||
return False, "Invalid SMILES string" | ||
|
||
# Add explicit hydrogens for stereochemistry | ||
mol = Chem.AddHs(mol) | ||
|
||
# Basic steroid core pattern (four fused rings) | ||
# Using SMARTS that matches the basic steroid skeleton | ||
steroid_core = Chem.MolFromSmarts("[#6]~1~[#6]~[#6]~[#6]~2~[#6]~[#6]~[#6]~3~[#6]~[#6]~[#6]~4~[#6]~[#6]~[#6]~[#6]~4~[#6]~3~[#6]~2~1") | ||
|
||
if not mol.HasSubstructMatch(steroid_core): | ||
return False, "No steroid core structure found" | ||
|
||
# Pattern for 16-beta-hydroxy group | ||
# The [H] specifies explicit hydrogen, OH is the hydroxy group | ||
# The @ symbols specify the stereochemistry | ||
beta_oh_pattern = Chem.MolFromSmarts("[C]12[C][C@H](O)[CH2][C@]1([CH2,CH3])[C@@H]3[C][C][C]2") | ||
|
||
if not mol.HasSubstructMatch(beta_oh_pattern): | ||
return False, "No 16-beta-hydroxy group found" | ||
|
||
# Additional checks for reasonable molecular weight and atom counts | ||
# Steroids typically have molecular weights between 250-1000 | ||
mol_weight = Chem.Descriptors.ExactMolWt(mol) | ||
if mol_weight < 250 or mol_weight > 1000: | ||
return False, f"Molecular weight {mol_weight} outside typical steroid range (250-1000)" | ||
|
||
# Count carbons (steroids typically have 17+ carbons) | ||
carbon_count = len([atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6]) | ||
if carbon_count < 17: | ||
return False, f"Too few carbons ({carbon_count}) for a steroid structure" | ||
|
||
# Check for at least one oxygen (for the hydroxy group) | ||
oxygen_count = len([atom for atom in mol.GetAtoms() if atom.GetAtomicNum() == 8]) | ||
if oxygen_count < 1: | ||
return False, "No oxygen atoms found" | ||
|
||
return True, "Contains steroid core with 16-beta-hydroxy group" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
Classifies: CHEBI:35342 17alpha-hydroxy steroid | ||
""" | ||
""" | ||
Classifies: 17alpha-hydroxy steroid | ||
""" | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
|
||
def is_17alpha_hydroxy_steroid(smiles: str): | ||
""" | ||
Determines if a molecule is a 17alpha-hydroxy steroid based on its SMILES string. | ||
Args: | ||
smiles (str): SMILES string of the molecule | ||
Returns: | ||
bool: True if molecule is a 17alpha-hydroxy steroid, False otherwise | ||
str: Reason for classification | ||
""" | ||
|
||
# Parse SMILES | ||
mol = Chem.MolFromSmiles(smiles) | ||
if mol is None: | ||
return False, "Invalid SMILES string" | ||
|
||
# Check for basic steroid core (4 fused rings) | ||
steroid_core = Chem.MolFromSmarts("[#6]~1~[#6]~[#6]~[#6]~2~[#6]~[#6]~[#6]~[#6]~3~[#6]~[#6]~[#6]~[#6]~4~[#6]~[#6]~[#6]~[#6]~4~[#6]~3~[#6]~2~1") | ||
if not mol.HasSubstructMatch(steroid_core): | ||
return False, "No steroid core structure found" | ||
|
||
# Check for 17-OH with alpha stereochemistry | ||
# [C] is carbon 17, [OH1] is the hydroxyl group, '@' indicates stereochemistry | ||
# The exact SMARTS pattern depends on the numbering convention used in the structure | ||
oh_17_alpha_pattern = Chem.MolFromSmarts('[C;R1]-[C;R1]([OH1])') | ||
|
||
if not mol.HasSubstructMatch(oh_17_alpha_pattern): | ||
return False, "No hydroxyl group at C17 position found" | ||
|
||
# Additional checks to verify it's a steroid structure: | ||
# Count rings | ||
ri = mol.GetRingInfo() | ||
if ri.NumRings() < 4: | ||
return False, "Insufficient number of rings for steroid structure" | ||
|
||
# Basic size check - steroids typically have at least 19 carbons | ||
c_count = sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) | ||
if c_count < 19: | ||
return False, "Too few carbons for steroid structure" | ||
|
||
# Check for sp3 hybridized carbons typical in steroid core | ||
sp3_carbons = sum(1 for atom in mol.GetAtoms() | ||
if atom.GetAtomicNum() == 6 and atom.GetHybridization() == Chem.HybridizationType.SP3) | ||
if sp3_carbons < 10: | ||
return False, "Insufficient sp3 carbons for steroid structure" | ||
|
||
return True, "Contains steroid core with 17-alpha hydroxyl group" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
""" | ||
Classifies: CHEBI:35343 17beta-hydroxy steroid | ||
""" | ||
""" | ||
Classifies: 17beta-hydroxy steroid | ||
A 17-hydroxy steroid in which the hydroxy group at position 17 has a beta-configuration. | ||
""" | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
|
||
def is_17beta_hydroxy_steroid(smiles: str): | ||
""" | ||
Determines if a molecule is a 17beta-hydroxy steroid based on its SMILES string. | ||
Args: | ||
smiles (str): SMILES string of the molecule | ||
Returns: | ||
bool: True if molecule is a 17beta-hydroxy steroid, False otherwise | ||
str: Reason for classification | ||
""" | ||
# Parse SMILES | ||
mol = Chem.MolFromSmiles(smiles) | ||
if mol is None: | ||
return False, "Invalid SMILES string" | ||
|
||
# Check for basic steroid core (four fused rings) | ||
steroid_core = Chem.MolFromSmarts("[#6]~1~[#6]~[#6]~[#6]~2~[#6]~[#6]~[#6]~[#6]~3~[#6]~[#6]~[#6]~[#6]~4~[#6]~[#6]~[#6]~[#6]~1~[#6]~2~[#6]~3~4") | ||
if not mol.HasSubstructMatch(steroid_core): | ||
return False, "No steroid core structure found" | ||
|
||
# Check for 17-OH group in beta configuration | ||
# [C] is carbon 17, [OH1] is hydroxy group, '@' indicates stereochemistry | ||
# The [C] must be connected to 4 atoms (saturated) | ||
# Note: The exact SMARTS pattern depends on the numbering convention used | ||
oh_17_beta = Chem.MolFromSmarts('[C;X4](@[*])(@[*])(@[*])[OH1]') | ||
|
||
if not mol.HasSubstructMatch(oh_17_beta): | ||
return False, "No hydroxyl group with correct connectivity found" | ||
|
||
# Get matches for OH group | ||
oh_matches = mol.GetSubstructMatches(oh_17_beta) | ||
|
||
# Check if any of the matches are at position 17 | ||
found_17_beta_oh = False | ||
for match in oh_matches: | ||
c_atom = mol.GetAtomWithIdx(match[0]) # Get the carbon atom | ||
# Check if this carbon is part of the D ring (ring 4) of the steroid | ||
# by checking its environment | ||
ring_info = mol.GetRingInfo() | ||
if ring_info.NumAtomRings(match[0]) > 0: # Carbon must be part of a ring | ||
# Check chirality of the carbon | ||
if c_atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: | ||
found_17_beta_oh = True | ||
break | ||
|
||
if not found_17_beta_oh: | ||
return False, "No 17-beta hydroxyl group found" | ||
|
||
# Additional validation: molecule should have reasonable size for a steroid | ||
num_atoms = mol.GetNumAtoms() | ||
if num_atoms < 20 or num_atoms > 100: | ||
return False, "Molecule size not consistent with steroid structure" | ||
|
||
# Count rings | ||
ring_info = mol.GetRingInfo() | ||
if ring_info.NumRings() < 4: | ||
return False, "Insufficient number of rings for steroid structure" | ||
|
||
return True, "Contains steroid core with 17-beta hydroxyl group" |
Oops, something went wrong.