cleanlab · anishathalye · Jan 30, 2025
diff --git a/.editorconfig b/.editorconfig
@@ -12,3 +12,6 @@ indent_size = 4
 
 [*.toml]
 indent_size = 2
+
+[*.md]
+indent_size = 4
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -10,13 +10,16 @@ on:
 
 jobs:
   typecheck:
-    name: Type check
+    name: "Type check: Python ${{ matrix.python }}"
     runs-on: ubuntu-24.04
+    strategy:
+      matrix:
+        python: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.13"
+          python-version: ${{ matrix.python }}
       - uses: pypa/hatch@install
       - run: hatch run types:check
   fmt:
@@ -30,7 +33,7 @@ jobs:
       - uses: pypa/hatch@install
       - run: hatch fmt --check
   test:
-    name: Test
+    name: "Test: Python ${{ matrix.python }}"
     runs-on: ubuntu-22.04
     strategy:
       matrix:

diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -1,5 +1,30 @@
 # Development
 
+## Guidelines
+
+### Typing
+
+This project uses [mypy][mypy] for static type checking as well as [beartype][beartype] for runtime type checking.
+
+The combination of using beartype and supporting Python 3.8+ leads to some [challenges][beartype-pep585] related to [PEP 585][pep-585] deprecations. For this reason, this package:
+
+- Imports from `beartype.typing` all types that are deprecated in PEP 585 (e.g., `List` and `Callable`)
+- Imports directly from `typing` all other types (e.g., `Optional` and `Literal`)
+    - These symbols are also available in `beartype.typing`, but we import them directly from `typing` because Ruff (our [linter](#formatting-and-linting)) has special treatment of these imports. For example, Ruff will complain about `Literal["foo"]` if we import `Literal` from `beartype.typing`.
+
+Relatedly, this package also cannot use [PEP 604][pep-604] syntax:
+
+- Instead of using types like `A | B`, use `Union[A, B]`
+- Instead of using types like `A | None`, use `Optional[A]`
+
+[mypy]: https://mypy-lang.org/
+[beartype]: https://github.com/beartype/beartype
+[beartype-pep585]: https://beartype.readthedocs.io/en/latest/api_roar/#pep-585-deprecations
+[pep-585]: https://peps.python.org/pep-0585/
+[pep-604]: https://peps.python.org/pep-0604/
+
+## Tooling
+
 This project uses the [Hatch] project manager ([installation instructions][hatch-install]).
 
 Hatch automatically manages dependencies and runs testing, type checking, and other operations in isolated [environments][hatch-environments].
@@ -8,7 +33,7 @@ Hatch automatically manages dependencies and runs testing, type checking, and ot
 [hatch-install]: https://hatch.pypa.io/latest/install/
 [hatch-environments]: https://hatch.pypa.io/latest/environment/
 
-## Testing
+### Testing
 
 You can run the tests on your local machine with:
 
@@ -20,17 +45,15 @@ The [`test` command][hatch-test] supports options such as `-c` for measuring tes
 
 [hatch-test]: https://hatch.pypa.io/latest/tutorials/testing/overview/
 
-## Type checking
+### Type checking
 
 You can run the [mypy static type checker][mypy] with:
 
 ```bash
 hatch run types:check
 ```
 
-[mypy]: https://mypy-lang.org/
-
-## Formatting and linting
+### Formatting and linting
 
 You can run the [Ruff][ruff] formatter and linter with:
 
@@ -43,7 +66,7 @@ This will automatically make [safe fixes][fix-safety] to your code. If you want
 [ruff]: https://github.com/astral-sh/ruff
 [fix-safety]: https://docs.astral.sh/ruff/linter/#fix-safety
 
-## Pre-commit
+### Pre-commit
 
 You can install the pre-commit hooks to automatically run type checking, formatting, and linting on every commit.
 
@@ -61,7 +84,7 @@ pre-commit install
 
 [pipx]: https://pipx.pypa.io/
 
-## Packaging
+### Packaging
 
 You can use [`hatch build`][hatch-build] to create build artifacts, a [source distribution ("sdist")][sdist] and a [built distribution ("wheel")][bdist].
 
@@ -73,7 +96,7 @@ You can use [`hatch publish`][hatch-publish] if you want to manually publish bui
 [hatch-publish]: https://hatch.pypa.io/latest/publish/
 [pypi]: https://pypi.org/
 
-### Automated releases
+#### Automated releases
 
 Automated releases are handled by the [release workflow][release-workflow] which is triggered by pushing a new tag to the repository. To create a new release:
 
@@ -88,7 +111,7 @@ Automated releases are handled by the [release workflow][release-workflow] which
 [hatch-version]: https://hatch.pypa.io/latest/version/#updating
 [changelog]: CHANGELOG.md
 
-## Continuous integration
+### Continuous integration
 
 Testing, type checking, and formatting/linting is [checked in CI][ci].
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -27,6 +27,7 @@ classifiers = [
 dependencies = [
   "codex-sdk==0.1.0a9",
   "pydantic>=1.9.0, <3",
+  "beartype>=0.17.0",
 ]
 
 [project.urls]
@@ -42,7 +43,6 @@ extra-dependencies = [
   "mypy>=1.0.0",
   "pytest",
   "llama-index-core",
-  "smolagents",
 ]
 [tool.hatch.envs.types.scripts]
 check = "mypy --strict --install-types --non-interactive {args:src/cleanlab_codex tests}"
@@ -98,4 +98,9 @@ html = "coverage html"
 xml = "coverage xml"
 
 [tool.ruff.lint]
-ignore = ["FA100", "UP007", "UP006"]
+ignore = [
+  "TCH001", # this package does runtime type checking
+  "TCH002",
+  "TCH003",
+  "UP007", # we cannot use the PEP 604 syntax because we support Python 3.8 and do runtime type checking
+]
diff --git a/src/cleanlab_codex/__init__.py b/src/cleanlab_codex/__init__.py
@@ -1,4 +1,11 @@
 # SPDX-License-Identifier: MIT
+
+from beartype.claw import beartype_this_package
+
+# this must run before any other imports from the cleanlab_codex package
+beartype_this_package()
+
+# ruff: noqa: E402
 from cleanlab_codex.codex import Codex
 from cleanlab_codex.codex_tool import CodexTool
 

diff --git a/src/cleanlab_codex/codex.py b/src/cleanlab_codex/codex.py
@@ -1,21 +1,21 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional
+from typing import Optional
+
+from beartype.typing import List, Tuple
 
 from cleanlab_codex.internal.project import create_project, query_project
 from cleanlab_codex.internal.utils import init_codex_client
-
-if TYPE_CHECKING:
-    from cleanlab_codex.types.entry import Entry, EntryCreate
-    from cleanlab_codex.types.organization import Organization
+from cleanlab_codex.types.entry import Entry, EntryCreate
+from cleanlab_codex.types.organization import Organization
 
 
 class Codex:
     """
     A client to interact with Cleanlab Codex.
     """
 
-    def __init__(self, key: str | None = None):
+    def __init__(self, key: Optional[str] = None):
         """Initialize the Codex client.
 
         Args:
@@ -30,11 +30,11 @@ def __init__(self, key: str | None = None):
         self.key = key
         self._client = init_codex_client(key)
 
-    def list_organizations(self) -> list[Organization]:
+    def list_organizations(self) -> List[Organization]:
         """List the organizations the authenticated user is a member of.
 
         Returns:
-            list[Organization]: A list of organizations the authenticated user is a member of.
+            List[Organization]: A list of organizations the authenticated user is a member of.
 
         Raises:
             AuthenticationError: If the client is not authenticated with a user-level API Key.
@@ -59,11 +59,11 @@ def create_project(self, name: str, organization_id: str, description: Optional[
             description=description,
         )
 
-    def add_entries(self, entries: list[EntryCreate], project_id: str) -> None:
+    def add_entries(self, entries: List[EntryCreate], project_id: str) -> None:
         """Add a list of entries to the Codex project.
 
         Args:
-            entries (list[EntryCreate]): The entries to add to the Codex project.
+            entries (List[EntryCreate]): The entries to add to the Codex project.
             project_id (int): The ID of the project to add the entries to.
 
         Raises:
@@ -102,20 +102,20 @@ def query(
         project_id: Optional[str] = None,  # TODO: update to uuid once project IDs are changed to UUIDs
         fallback_answer: Optional[str] = None,
         read_only: bool = False,
-    ) -> tuple[Optional[str], Optional[Entry]]:
+    ) -> Tuple[Optional[str], Optional[Entry]]:
         """Query Codex to check if the Codex project contains an answer to this question and add the question to the Codex project for SME review if it does not.
 
         Args:
             question (str): The question to ask the Codex API.
-            project_id (:obj:`int`, optional): The ID of the project to query.
+            project_id (:obj:`str`, optional): The ID of the project to query.
                 If the client is authenticated with a user-level API Key, this is required.
                 If the client is authenticated with a project-level Access Key, this is optional. The client will use the Access Key's project ID by default.
             fallback_answer (:obj:`str`, optional): Optional fallback answer to return if Codex is unable to answer the question.
             read_only (:obj:`bool`, optional): Whether to query the Codex API in read-only mode. If True, the question will not be added to the Codex project for SME review.
                 This can be useful for testing purposes before when setting up your project configuration.
 
         Returns:
-            tuple[Optional[str], Optional[Entry]]: A tuple representing the answer for the query and the existing or new entry in the Codex project.
+            Tuple[Optional[str], Optional[Entry]]: A tuple representing the answer for the query and the existing or new entry in the Codex project.
                 If Codex is able to answer the question, the first element will be the answer returned by Codex and the second element will be the existing entry in the Codex project.
                 If Codex is unable to answer the question, the first element will be `fallback_answer` if provided, otherwise None, and the second element will be a new entry in the Codex project.
         """

diff --git a/src/cleanlab_codex/codex_tool.py b/src/cleanlab_codex/codex_tool.py
@@ -2,6 +2,8 @@
 
 from typing import Any, ClassVar, Optional
 
+from beartype.typing import Dict, List
+
 from cleanlab_codex.codex import Codex
 
 
@@ -10,13 +12,13 @@ class CodexTool:
 
     _tool_name = "ask_advisor"
     _tool_description = "Asks an all-knowing advisor this query in cases where it cannot be answered from the provided Context. If the answer is avalible, this returns None."
-    _tool_properties: ClassVar[dict[str, Any]] = {
+    _tool_properties: ClassVar[Dict[str, Any]] = {
         "question": {
             "type": "string",
             "description": "The question to ask the advisor. This should be the same as the original user question, except in cases where the user question is missing information that could be additionally clarified.",
         }
     }
-    _tool_requirements: ClassVar[list[str]] = ["question"]
+    _tool_requirements: ClassVar[List[str]] = ["question"]
     DEFAULT_FALLBACK_ANSWER = "Based on the available information, I cannot provide a complete answer to this question."
 
     def __init__(
@@ -94,7 +96,7 @@ def query(self, question: str) -> Optional[str]:
         """
         return self._codex_client.query(question, project_id=self._project_id, fallback_answer=self._fallback_answer)[0]
 
-    def to_openai_tool(self) -> dict[str, Any]:
+    def to_openai_tool(self) -> Dict[str, Any]:
         """Converts the tool to an OpenAI tool."""
         from cleanlab_codex.utils import format_as_openai_tool
 

diff --git a/src/cleanlab_codex/internal/project.py b/src/cleanlab_codex/internal/project.py
@@ -1,12 +1,9 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional
-
-if TYPE_CHECKING:
-    from codex import Codex as _Codex
-
-    from cleanlab_codex.types.entry import Entry
+from beartype.typing import Optional, Tuple
+from codex import Codex as _Codex
 
+from cleanlab_codex.types.entry import Entry
 from cleanlab_codex.types.project import ProjectConfig
 
 
@@ -34,7 +31,7 @@ def query_project(
     project_id: Optional[str] = None,
     fallback_answer: Optional[str] = None,
     read_only: bool = False,
-) -> tuple[Optional[str], Optional[Entry]]:
+) -> Tuple[Optional[str], Optional[Entry]]:
     if client.access_key is not None:
         project_id = client.projects.access_keys.retrieve_project_id().project_id
     elif project_id is None:

diff --git a/src/cleanlab_codex/internal/utils.py b/src/cleanlab_codex/internal/utils.py
@@ -2,6 +2,7 @@
 
 import os
 import re
+from typing import Optional
 
 from codex import Codex as _Codex
 
@@ -19,7 +20,7 @@ def is_access_key(key: str) -> bool:
     return re.match(ACCESS_KEY_PATTERN, key) is not None
 
 
-def init_codex_client(key: str | None = None) -> _Codex:
+def init_codex_client(key: Optional[str] = None) -> _Codex:
     if key is None:
         if api_key := os.getenv("CODEX_API_KEY"):
             return _client_from_api_key(api_key)

diff --git a/src/cleanlab_codex/utils/llamaindex.py b/src/cleanlab_codex/utils/llamaindex.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
 from inspect import signature
-from typing import Any, Callable
+from typing import Any
 
+from beartype.typing import Callable, Dict, Type
 from llama_index.core.bridge.pydantic import BaseModel, FieldInfo, create_model
 
 
-def get_function_schema(name: str, func: Callable[..., Any], tool_properties: dict[str, Any]) -> type[BaseModel]:
+def get_function_schema(name: str, func: Callable[..., Any], tool_properties: Dict[str, Any]) -> Type[BaseModel]:
     fields = {}
     params = signature(func).parameters
     for param_name in params:

diff --git a/src/cleanlab_codex/utils/openai.py b/src/cleanlab_codex/utils/openai.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
-from typing import Any, Dict, List, Literal
+from typing import Any, Literal
 
+from beartype.typing import Dict, List
 from pydantic import BaseModel
 
 

diff --git a/src/cleanlab_codex/utils/smolagents.py b/src/cleanlab_codex/utils/smolagents.py
@@ -1,5 +1,8 @@
-from typing import Callable, Dict, Optional
+from __future__ import annotations
 
+from typing import Optional
+
+from beartype.typing import Callable, Dict
 from smolagents import Tool  # type: ignore
 
 

diff --git a/tests/fixtures/client.py b/tests/fixtures/client.py
@@ -1,12 +1,16 @@
-from typing import Generator
+from __future__ import annotations
+
 from unittest.mock import MagicMock, patch
 
 import pytest
+from beartype.typing import Generator
+from codex import Codex as _Codex
 
 
 @pytest.fixture
 def mock_client() -> Generator[MagicMock, None, None]:
     with patch("cleanlab_codex.codex.init_codex_client") as mock_init:
         mock_client = MagicMock()
+        mock_client.__class__ = _Codex  # type: ignore
         mock_init.return_value = mock_client
         yield mock_client