diff --git a/.stats.yml b/.stats.yml
index 2740b98..d3b4ca5 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 7
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-1f0d266ba97b03672f10d33a6dc6e324af9a95646f978ffbff6a31f3907bbfe7.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-e832198c20514199804ae71f3213d3747cfea8dbae37304e852fd58912abb2c1.yml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c384b8b..c04763a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,9 +2,13 @@
 
 ### With Rye
 
-We use [Rye](https://rye.astral.sh/) to manage dependencies so we highly recommend [installing it](https://rye.astral.sh/guide/installation/) as it will automatically provision a Python environment with the expected Python version.
+We use [Rye](https://rye.astral.sh/) to manage dependencies because it will automatically provision a Python environment with the expected Python version. To set it up, run:
 
-After installing Rye, you'll just have to run this command:
+```sh
+$ ./scripts/bootstrap
+```
+
+Or [install Rye manually](https://rye.astral.sh/guide/installation/) and run:
 
 ```sh
 $ rye sync --all-features
@@ -31,25 +35,25 @@ $ pip install -r requirements-dev.lock
 
 ## Modifying/Adding code
 
-Most of the SDK is generated code, and any modified code will be overridden on the next generation. The
-`src/groq/lib/` and `examples/` directories are exceptions and will never be overridden.
+Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
+result in merge conflicts between manual patches and changes from the generator. The generator will never
+modify the contents of the `src/groq/lib/` and `examples/` directories.
 
 ## Adding and running examples
 
-All files in the `examples/` directory are not modified by the Stainless generator and can be freely edited or
-added to.
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
 
-```bash
+```py
 # add an example to examples/<your-example>.py
 
 #!/usr/bin/env -S rye run python
 …
 ```
 
-```
-chmod +x examples/<your-example>.py
+```sh
+$ chmod +x examples/<your-example>.py
 # run the example against your api
-./examples/<your-example>.py
+$ ./examples/<your-example>.py
 ```
 
 ## Using the repository from source
@@ -58,8 +62,8 @@ If you’d like to use the repository from source, you can either install from g
 
 To install via git:
 
-```bash
-pip install git+ssh://git@github.com/groq/groq-python#main.git
+```sh
+$ pip install git+ssh://git@github.com/groq/groq-python#main.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -68,29 +72,29 @@ Building this package will create two files in the `dist/` directory, a `.tar.gz
 
 To create a distributable version of the library, all you have to do is run this command:
 
-```bash
-rye build
+```sh
+$ rye build
 # or
-python -m build
+$ python -m build
 ```
 
 Then to install:
 
 ```sh
-pip install ./path-to-wheel-file.whl
+$ pip install ./path-to-wheel-file.whl
 ```
 
 ## Running tests
 
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
-```bash
+```sh
 # you will need npm installed
-npx prism mock path/to/your/openapi.yml
+$ npx prism mock path/to/your/openapi.yml
 ```
 
-```bash
-rye run pytest
+```sh
+$ ./scripts/test
 ```
 
 ## Linting and formatting
@@ -100,14 +104,14 @@ This repository uses [ruff](https://github.com/astral-sh/ruff) and
 
 To lint:
 
-```bash
-rye run lint
+```sh
+$ ./scripts/lint
 ```
 
 To format and fix all ruff issues automatically:
 
-```bash
-rye run format
+```sh
+$ ./scripts/format
 ```
 
 ## Publishing and releases
diff --git a/README.md b/README.md
index b2e1e15..ff3a18c 100644
--- a/README.md
+++ b/README.md
@@ -374,6 +374,21 @@ We take backwards-compatibility seriously and work hard to ensure you can rely o
 
 We are keen for your feedback; please open an [issue](https://www.github.com/groq/groq-python/issues) with questions, bugs, or suggestions.
 
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import groq
+print(groq.__version__)
+```
+
 ## Requirements
 
 Python 3.7 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
diff --git a/pyproject.toml b/pyproject.toml
index 9196a48..d550585 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,6 @@ dependencies = [
     "distro>=1.7.0, <2",
     "sniffio",
     "cached-property; python_version < '3.8'",
-
 ]
 requires-python = ">= 3.7"
 classifiers = [
@@ -36,8 +35,6 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-
-
 [project.urls]
 Homepage = "https://github.com/groq/groq-python"
 Repository = "https://github.com/groq/groq-python"
@@ -59,7 +56,6 @@ dev-dependencies = [
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
     "rich>=13.7.1",
-
 ]
 
 [tool.rye.scripts]
@@ -67,11 +63,11 @@ format = { chain = [
   "format:ruff",
   "format:docs",
   "fix:ruff",
+  # run formatting again to fix any inconsistencies when imports are stripped
+  "format:ruff",
 ]}
-"format:black" = "black ."
 "format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
 "format:ruff" = "ruff format"
-"format:isort" = "isort ."
 
 "lint" = { chain = [
   "check:ruff",
@@ -129,10 +125,6 @@ path = "README.md"
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
 replacement = '[\1](https://github.com/groq/groq-python/tree/main/\g<2>)'
 
-[tool.black]
-line-length = 120
-target-version = ["py37"]
-
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "--tb=short"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6b7d864..943e6bc 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -16,8 +16,6 @@ anyio==4.4.0
     # via httpx
 argcomplete==3.1.2
     # via nox
-attrs==23.1.0
-    # via pytest
 certifi==2023.7.22
     # via httpcore
     # via httpx
@@ -28,8 +26,9 @@ distlib==0.3.7
     # via virtualenv
 distro==1.8.0
     # via groq
-exceptiongroup==1.1.3
+exceptiongroup==1.2.2
     # via anyio
+    # via pytest
 filelock==3.12.4
     # via virtualenv
 h11==0.14.0
@@ -49,7 +48,7 @@ markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-mypy==1.10.1
+mypy==1.11.2
 mypy-extensions==1.0.0
     # via mypy
 nodeenv==1.8.0
@@ -60,27 +59,25 @@ packaging==23.2
     # via pytest
 platformdirs==3.11.0
     # via virtualenv
-pluggy==1.3.0
-    # via pytest
-py==1.11.0
+pluggy==1.5.0
     # via pytest
-pydantic==2.7.1
+pydantic==2.9.2
     # via groq
-pydantic-core==2.18.2
+pydantic-core==2.23.4
     # via pydantic
 pygments==2.18.0
     # via rich
-pyright==1.1.374
-pytest==7.1.1
+pyright==1.1.380
+pytest==8.3.3
     # via pytest-asyncio
-pytest-asyncio==0.21.1
+pytest-asyncio==0.24.0
 python-dateutil==2.8.2
     # via time-machine
 pytz==2023.3.post1
     # via dirty-equals
 respx==0.20.2
 rich==13.7.1
-ruff==0.5.6
+ruff==0.6.9
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
@@ -90,10 +87,10 @@ sniffio==1.3.0
     # via groq
     # via httpx
 time-machine==2.9.0
-tomli==2.0.1
+tomli==2.0.2
     # via mypy
     # via pytest
-typing-extensions==4.8.0
+typing-extensions==4.12.2
     # via anyio
     # via groq
     # via mypy
diff --git a/requirements.lock b/requirements.lock
index 874a3cb..d2a8ddb 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -19,7 +19,7 @@ certifi==2023.7.22
     # via httpx
 distro==1.8.0
     # via groq
-exceptiongroup==1.1.3
+exceptiongroup==1.2.2
     # via anyio
 h11==0.14.0
     # via httpcore
@@ -30,15 +30,15 @@ httpx==0.25.2
 idna==3.4
     # via anyio
     # via httpx
-pydantic==2.7.1
+pydantic==2.9.2
     # via groq
-pydantic-core==2.18.2
+pydantic-core==2.23.4
     # via pydantic
 sniffio==1.3.0
     # via anyio
     # via groq
     # via httpx
-typing-extensions==4.8.0
+typing-extensions==4.12.2
     # via anyio
     # via groq
     # via pydantic
diff --git a/src/groq/_base_client.py b/src/groq/_base_client.py
index 2311063..f94212c 100644
--- a/src/groq/_base_client.py
+++ b/src/groq/_base_client.py
@@ -143,6 +143,12 @@ def __init__(
         self.url = url
         self.params = params
 
+    @override
+    def __repr__(self) -> str:
+        if self.url:
+            return f"{self.__class__.__name__}(url={self.url})"
+        return f"{self.__class__.__name__}(params={self.params})"
+
 
 class BasePage(GenericModel, Generic[_T]):
     """
@@ -400,14 +406,7 @@ def _make_status_error(
     ) -> _exceptions.APIStatusError:
         raise NotImplementedError()
 
-    def _remaining_retries(
-        self,
-        remaining_retries: Optional[int],
-        options: FinalRequestOptions,
-    ) -> int:
-        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
-
-    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+    def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0) -> httpx.Headers:
         custom_headers = options.headers or {}
         headers_dict = _merge_mappings(self.default_headers, custom_headers)
         self._validate_headers(headers_dict, custom_headers)
@@ -419,6 +418,11 @@ def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
+        # Don't set the retry count header if it was already set or removed by the caller. We check
+        # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
+        if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
+            headers["x-stainless-retry-count"] = str(retries_taken)
+
         return headers
 
     def _prepare_url(self, url: str) -> URL:
@@ -440,6 +444,8 @@ def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
     def _build_request(
         self,
         options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
     ) -> httpx.Request:
         if log.isEnabledFor(logging.DEBUG):
             log.debug("Request options: %s", model_dump(options, exclude_unset=True))
@@ -455,7 +461,7 @@ def _build_request(
             else:
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
-        headers = self._build_headers(options)
+        headers = self._build_headers(options, retries_taken=retries_taken)
         params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
         files = options.files
@@ -489,12 +495,17 @@ def _build_request(
             if not files:
                 files = cast(HttpxRequestFiles, ForceMultipartDict())
 
+        prepared_url = self._prepare_url(options.url)
+        if "_" in prepared_url.host:
+            # work around https://github.com/encode/httpx/discussions/2880
+            kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
             timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
             method=options.method,
-            url=self._prepare_url(options.url),
+            url=prepared_url,
             # the `Query` type that we use is incompatible with qs'
             # `Params` type as it needs to be typed as `Mapping[str, object]`
             # so that passing a `TypedDict` doesn't cause an error.
@@ -684,7 +695,8 @@ def _calculate_retry_timeout(
         if retry_after is not None and 0 < retry_after <= 60:
             return retry_after
 
-        nb_retries = max_retries - remaining_retries
+        # Also cap retry count to 1000 to avoid any potential overflows with `pow`
+        nb_retries = min(max_retries - remaining_retries, 1000)
 
         # Apply exponential backoff, but not more than the max.
         sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
@@ -933,12 +945,17 @@ def request(
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
         return self._request(
             cast_to=cast_to,
             options=options,
             stream=stream,
             stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
+            retries_taken=retries_taken,
         )
 
     def _request(
@@ -946,7 +963,7 @@ def _request(
         *,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: int | None,
+        retries_taken: int,
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
@@ -958,8 +975,8 @@ def _request(
         cast_to = self._maybe_override_cast_to(cast_to, options)
         options = self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
         self._prepare_request(request)
 
         kwargs: HttpxSendArgs = {}
@@ -977,11 +994,11 @@ def _request(
         except httpx.TimeoutException as err:
             log.debug("Encountered httpx.TimeoutException", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -992,11 +1009,11 @@ def _request(
         except Exception as err:
             log.debug("Encountered Exception", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1019,13 +1036,13 @@ def _request(
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
 
-            if retries > 0 and self._should_retry(err.response):
+            if remaining_retries > 0 and self._should_retry(err.response):
                 err.response.close()
                 return self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
-                    err.response.headers,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
@@ -1044,26 +1061,26 @@ def _request(
             response=response,
             stream=stream,
             stream_cls=stream_cls,
-            retries_taken=options.get_max_retries(self.max_retries) - retries,
+            retries_taken=retries_taken,
         )
 
     def _retry_request(
         self,
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
         *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
@@ -1073,7 +1090,7 @@ def _retry_request(
         return self._request(
             options=options,
             cast_to=cast_to,
-            remaining_retries=remaining,
+            retries_taken=retries_taken + 1,
             stream=stream,
             stream_cls=stream_cls,
         )
@@ -1491,12 +1508,17 @@ async def request(
         stream_cls: type[_AsyncStreamT] | None = None,
         remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
         return await self._request(
             cast_to=cast_to,
             options=options,
             stream=stream,
             stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
+            retries_taken=retries_taken,
         )
 
     async def _request(
@@ -1506,7 +1528,7 @@ async def _request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
-        remaining_retries: int | None,
+        retries_taken: int,
     ) -> ResponseT | _AsyncStreamT:
         if self._platform is None:
             # `get_platform` can make blocking IO calls so we
@@ -1521,8 +1543,8 @@ async def _request(
         cast_to = self._maybe_override_cast_to(cast_to, options)
         options = await self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
         await self._prepare_request(request)
 
         kwargs: HttpxSendArgs = {}
@@ -1538,11 +1560,11 @@ async def _request(
         except httpx.TimeoutException as err:
             log.debug("Encountered httpx.TimeoutException", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return await self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1553,11 +1575,11 @@ async def _request(
         except Exception as err:
             log.debug("Encountered Exception", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return await self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1575,13 +1597,13 @@ async def _request(
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
 
-            if retries > 0 and self._should_retry(err.response):
+            if remaining_retries > 0 and self._should_retry(err.response):
                 await err.response.aclose()
                 return await self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
-                    err.response.headers,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
@@ -1600,26 +1622,26 @@ async def _request(
             response=response,
             stream=stream,
             stream_cls=stream_cls,
-            retries_taken=options.get_max_retries(self.max_retries) - retries,
+            retries_taken=retries_taken,
         )
 
     async def _retry_request(
         self,
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
         *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
     ) -> ResponseT | _AsyncStreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         await anyio.sleep(timeout)
@@ -1627,7 +1649,7 @@ async def _retry_request(
         return await self._request(
             options=options,
             cast_to=cast_to,
-            remaining_retries=remaining,
+            retries_taken=retries_taken + 1,
             stream=stream,
             stream_cls=stream_cls,
         )
diff --git a/src/groq/_compat.py b/src/groq/_compat.py
index 21fe694..d89920d 100644
--- a/src/groq/_compat.py
+++ b/src/groq/_compat.py
@@ -133,15 +133,17 @@ def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
 def model_dump(
     model: pydantic.BaseModel,
     *,
-    exclude: IncEx = None,
+    exclude: IncEx | None = None,
     exclude_unset: bool = False,
     exclude_defaults: bool = False,
+    warnings: bool = True,
 ) -> dict[str, Any]:
     if PYDANTIC_V2:
         return model.model_dump(
             exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
+            warnings=warnings,
         )
     return cast(
         "dict[str, Any]",
diff --git a/src/groq/_models.py b/src/groq/_models.py
index d386eaa..42551b7 100644
--- a/src/groq/_models.py
+++ b/src/groq/_models.py
@@ -176,7 +176,7 @@ def __str__(self) -> str:
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
     @classmethod
     @override
-    def construct(
+    def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
         cls: Type[ModelT],
         _fields_set: set[str] | None = None,
         **values: object,
@@ -248,8 +248,8 @@ def model_dump(
             self,
             *,
             mode: Literal["json", "python"] | str = "python",
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
@@ -303,8 +303,8 @@ def model_dump_json(
             self,
             *,
             indent: int | None = None,
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
diff --git a/src/groq/_response.py b/src/groq/_response.py
index d7ae9cd..95a0361 100644
--- a/src/groq/_response.py
+++ b/src/groq/_response.py
@@ -192,6 +192,9 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == float:
             return cast(R, float(response.text))
 
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
+
         origin = get_origin(cast_to) or cast_to
 
         if origin == APIResponse:
diff --git a/src/groq/_types.py b/src/groq/_types.py
index f85d73b..453e1a0 100644
--- a/src/groq/_types.py
+++ b/src/groq/_types.py
@@ -16,7 +16,7 @@
     Optional,
     Sequence,
 )
-from typing_extensions import Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
+from typing_extensions import Set, Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
 
 import httpx
 import pydantic
@@ -193,7 +193,9 @@ def get(self, __key: str) -> str | None: ...
 
 # Note: copied from Pydantic
 # https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+IncEx: TypeAlias = Union[
+    Set[int], Set[str], Mapping[int, Union["IncEx", Literal[True]]], Mapping[str, Union["IncEx", Literal[True]]]
+]
 
 PostParser = Callable[[Any], Any]
 
diff --git a/src/groq/_utils/_utils.py b/src/groq/_utils/_utils.py
index 2fc5a1c..0bba17c 100644
--- a/src/groq/_utils/_utils.py
+++ b/src/groq/_utils/_utils.py
@@ -363,12 +363,13 @@ def file_from_path(path: str) -> FileTypes:
 
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        for k, v in headers.items():
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
             if k.lower() == lower_header and isinstance(v, str):
                 return v
 
-    """ to deal with the case where the header looks like Stainless-Event-Id """
+    # to deal with the case where the header looks like Stainless-Event-Id
     intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
 
     for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
diff --git a/src/groq/resources/audio/audio.py b/src/groq/resources/audio/audio.py
index 728ba29..f33b8c2 100644
--- a/src/groq/resources/audio/audio.py
+++ b/src/groq/resources/audio/audio.py
@@ -35,10 +35,21 @@ def translations(self) -> Translations:
 
     @cached_property
     def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AudioWithStreamingResponse(self)
 
 
@@ -53,10 +64,21 @@ def translations(self) -> AsyncTranslations:
 
     @cached_property
     def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncAudioWithStreamingResponse(self)
 
 
diff --git a/src/groq/resources/audio/transcriptions.py b/src/groq/resources/audio/transcriptions.py
index f39363c..df3d9e9 100644
--- a/src/groq/resources/audio/transcriptions.py
+++ b/src/groq/resources/audio/transcriptions.py
@@ -32,10 +32,21 @@
 class Transcriptions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return TranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return TranscriptionsWithStreamingResponse(self)
 
     def create(
@@ -230,10 +241,21 @@ def create(
 class AsyncTranscriptions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncTranscriptionsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/audio/translations.py b/src/groq/resources/audio/translations.py
index 101a797..e689512 100644
--- a/src/groq/resources/audio/translations.py
+++ b/src/groq/resources/audio/translations.py
@@ -32,10 +32,21 @@
 class Translations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return TranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return TranslationsWithStreamingResponse(self)
 
     def create(
@@ -111,10 +122,21 @@ def create(
 class AsyncTranslations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncTranslationsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/chat/chat.py b/src/groq/resources/chat/chat.py
index d14d055..1b2ea5a 100644
--- a/src/groq/resources/chat/chat.py
+++ b/src/groq/resources/chat/chat.py
@@ -23,10 +23,21 @@ def completions(self) -> Completions:
 
     @cached_property
     def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return ChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return ChatWithStreamingResponse(self)
 
 
@@ -37,10 +48,21 @@ def completions(self) -> AsyncCompletions:
 
     @cached_property
     def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncChatWithStreamingResponse(self)
 
 
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 3bd69b5..63b0fe8 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -33,10 +33,21 @@
 class Completions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return CompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return CompletionsWithStreamingResponse(self)
 
     def create(
@@ -218,10 +229,21 @@ def create(
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncCompletionsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/embeddings.py b/src/groq/resources/embeddings.py
index d52fd5e..8e439dd 100644
--- a/src/groq/resources/embeddings.py
+++ b/src/groq/resources/embeddings.py
@@ -30,10 +30,21 @@
 class Embeddings(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return EmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return EmbeddingsWithStreamingResponse(self)
 
     def create(
@@ -95,10 +106,21 @@ def create(
 class AsyncEmbeddings(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncEmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncEmbeddingsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/models.py b/src/groq/resources/models.py
index e81a21c..0cb9c71 100644
--- a/src/groq/resources/models.py
+++ b/src/groq/resources/models.py
@@ -24,10 +24,21 @@
 class Models(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return ModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return ModelsWithStreamingResponse(self)
 
     def retrieve(
@@ -119,10 +130,21 @@ def delete(
 class AsyncModels(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncModelsWithStreamingResponse(self)
 
     async def retrieve(
diff --git a/src/groq/types/audio/transcription.py b/src/groq/types/audio/transcription.py
index 0b6ab39..edb5f22 100644
--- a/src/groq/types/audio/transcription.py
+++ b/src/groq/types/audio/transcription.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["Transcription"]
diff --git a/src/groq/types/audio/translation.py b/src/groq/types/audio/translation.py
index 3d9ede2..7c0e905 100644
--- a/src/groq/types/audio/translation.py
+++ b/src/groq/types/audio/translation.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["Translation"]
diff --git a/src/groq/types/model_deleted.py b/src/groq/types/model_deleted.py
index d9a48bb..7f81e1b 100644
--- a/src/groq/types/model_deleted.py
+++ b/src/groq/types/model_deleted.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from .._models import BaseModel
 
 __all__ = ["ModelDeleted"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 21b8c75..3fad581 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
 import os
-import asyncio
 import logging
 from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
 import pytest
+from pytest_asyncio import is_async_test
 
 from groq import Groq, AsyncGroq
 
@@ -17,11 +17,13 @@
 logging.getLogger("groq").setLevel(logging.DEBUG)
 
 
-@pytest.fixture(scope="session")
-def event_loop() -> Iterator[asyncio.AbstractEventLoop]:
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
+# automatically add `pytest.mark.asyncio()` to all of our async tests
+# so we don't have to add that boilerplate everywhere
+def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
+    pytest_asyncio_tests = (item for item in items if is_async_test(item))
+    session_scope_marker = pytest.mark.asyncio(loop_scope="session")
+    for async_test in pytest_asyncio_tests:
+        async_test.add_marker(session_scope_marker, append=False)
 
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
diff --git a/tests/test_client.py b/tests/test_client.py
index fe06746..0dd4280 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -10,6 +10,7 @@
 import tracemalloc
 from typing import Any, Union, cast
 from unittest import mock
+from typing_extensions import Literal
 
 import httpx
 import pytest
@@ -679,6 +680,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 7.8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -753,7 +755,14 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retries_taken(self, client: Groq, failures_before_success: int, respx_mock: MockRouter) -> None:
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    def test_retries_taken(
+        self,
+        client: Groq,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
         client = client.with_options(max_retries=4)
 
         nb_retries = 0
@@ -762,6 +771,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             nonlocal nb_retries
             if nb_retries < failures_before_success:
                 nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
                 return httpx.Response(500)
             return httpx.Response(200)
 
@@ -778,6 +789,69 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
         )
 
         assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_omit_retry_count_header(self, client: Groq, failures_before_success: int, respx_mock: MockRouter) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_overwrite_retry_count_header(
+        self, client: Groq, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
 
 
 class TestAsyncGroq:
@@ -1425,6 +1499,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 7.8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -1501,8 +1576,13 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
     @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     async def test_retries_taken(
-        self, async_client: AsyncGroq, failures_before_success: int, respx_mock: MockRouter
+        self,
+        async_client: AsyncGroq,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
     ) -> None:
         client = async_client.with_options(max_retries=4)
 
@@ -1512,6 +1592,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             nonlocal nb_retries
             if nb_retries < failures_before_success:
                 nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
                 return httpx.Response(500)
             return httpx.Response(200)
 
@@ -1528,3 +1610,70 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
         )
 
         assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_omit_retry_count_header(
+        self, async_client: AsyncGroq, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_overwrite_retry_count_header(
+        self, async_client: AsyncGroq, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
diff --git a/tests/test_models.py b/tests/test_models.py
index bd5c305..d2c5a1d 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -245,7 +245,7 @@ class Model(BaseModel):
     assert m.foo is True
 
     m = Model.construct(foo="CARD_HOLDER")
-    assert m.foo is "CARD_HOLDER"
+    assert m.foo == "CARD_HOLDER"
 
     m = Model.construct(foo={"bar": False})
     assert isinstance(m.foo, Submodel1)
diff --git a/tests/test_response.py b/tests/test_response.py
index b33a361..0111eb5 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -190,6 +190,56 @@ async def test_async_response_parse_annotated_type(async_client: AsyncGroq) -> N
     assert obj.bar == 2
 
 
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: Groq, content: str, expected: bool) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+async def test_async_response_parse_bool(client: AsyncGroq, content: str, expected: bool) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = await response.parse(to=bool)
+    assert result is expected
+
+
 class OtherModel(BaseModel):
     a: str