From 7b2cbc097cfe41ce53945e2ca843d4ea5eb6e961 Mon Sep 17 00:00:00 2001
From: Dirk Brand <dirkbrnd@gmail.com>
Date: Sat, 25 Jan 2025 07:02:09 +0200
Subject: [PATCH 1/6] Expand Exa capabilities

---
 cookbook/tools/exa_tools.py |  13 ++-
 phi/tools/exa.py            | 181 ++++++++++++++++++++++++++++++------
 2 files changed, 167 insertions(+), 27 deletions(-)

diff --git a/cookbook/tools/exa_tools.py b/cookbook/tools/exa_tools.py
index c69ec4136b..faef536763 100644
--- a/cookbook/tools/exa_tools.py
+++ b/cookbook/tools/exa_tools.py
@@ -1,5 +1,16 @@
 from phi.agent import Agent
 from phi.tools.exa import ExaTools
 
-agent = Agent(tools=[ExaTools(include_domains=["cnbc.com", "reuters.com", "bloomberg.com"])], show_tool_calls=True)
+agent = Agent(
+    tools=[ExaTools(include_domains=["cnbc.com", "reuters.com", "bloomberg.com"], show_results=True)],
+    show_tool_calls=True,
+)
+
 agent.print_response("Search for AAPL news", markdown=True)
+
+agent.print_response("What is the paper at https://arxiv.org/pdf/2307.06435 about?", markdown=True)
+
+agent.print_response(
+    "Find me similar papers to https://arxiv.org/pdf/2307.06435 and provide a summary of what they contain",
+    markdown=True,
+)
diff --git a/phi/tools/exa.py b/phi/tools/exa.py
index f177a18a10..75093c95e4 100644
--- a/phi/tools/exa.py
+++ b/phi/tools/exa.py
@@ -1,7 +1,10 @@
 import json
+from enum import Enum
 from os import getenv
 from typing import Optional, Dict, Any, List
 
+from exa_py.api import SearchResponse
+
 from phi.tools import Toolkit
 from phi.utils.log import logger
 
@@ -11,12 +14,47 @@
     raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`")
 
 
+class Category(Enum):
+    COMPANY = "company"
+    RESEARCH_PAPER = "research paper"
+    NEWS = "news"
+    PDF = "pdf"
+    GITHUB = "github"
+    TWEET = "tweet"
+    PERSONAL_SITE = "personal site"
+    LINKEDIN_PROFILE = "linkedin profile"
+    FINANCIAL_REPORT = "financial report"
+
+
 class ExaTools(Toolkit):
+    """
+    ExaTools is a toolkit for interfacing with the Exa web search engine, providing
+    functionalities to perform categorized searches and retrieve structured results.
+
+    Args:
+        text (bool): Retrieve text content from results. Default is True.
+        text_length_limit (int): Max length of text content per result. Default is 1000.
+        highlights (bool): Include highlighted snippets. Default is True.
+        api_key (Optional[str]): Exa API key. Retrieved from `EXA_API_KEY` env variable if not provided.
+        num_results (Optional[int]): Default number of search results. Overrides individual searches if set.
+        start_crawl_date (Optional[str]): Include results crawled on/after this date (`YYYY-MM-DD`).
+        end_crawl_date (Optional[str]): Include results crawled on/before this date (`YYYY-MM-DD`).
+        start_published_date (Optional[str]): Include results published on/after this date (`YYYY-MM-DD`).
+        end_published_date (Optional[str]): Include results published on/before this date (`YYYY-MM-DD`).
+        use_autoprompt (Optional[bool]): Enable autoprompt features in queries.
+        type (Optional[str]): Specify content type (e.g., article, blog, video).
+        category (Optional[Category]): Filter results by category.
+        include_domains (Optional[List[str]]): Restrict results to these domains.
+        exclude_domains (Optional[List[str]]): Exclude results from these domains.
+        show_results (bool): Log search results for debugging. Default is False.
+    """
+
     def __init__(
         self,
         text: bool = True,
         text_length_limit: int = 1000,
         highlights: bool = True,
+        summary: bool = False,
         api_key: Optional[str] = None,
         num_results: Optional[int] = None,
         start_crawl_date: Optional[str] = None,
@@ -25,8 +63,9 @@ def __init__(
         end_published_date: Optional[str] = None,
         use_autoprompt: Optional[bool] = None,
         type: Optional[str] = None,
-        category: Optional[str] = None,
+        category: Optional[Category] = None,
         include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
         show_results: bool = False,
     ):
         super().__init__(name="exa")
@@ -40,6 +79,7 @@ def __init__(
         self.text: bool = text
         self.text_length_limit: int = text_length_limit
         self.highlights: bool = highlights
+        self.summary: bool = summary
         self.num_results: Optional[int] = num_results
         self.start_crawl_date: Optional[str] = start_crawl_date
         self.end_crawl_date: Optional[str] = end_crawl_date
@@ -47,17 +87,47 @@ def __init__(
         self.end_published_date: Optional[str] = end_published_date
         self.use_autoprompt: Optional[bool] = use_autoprompt
         self.type: Optional[str] = type
+        self.category: Optional[str] = category.value() if category else None
         self.include_domains: Optional[List[str]] = include_domains
-        self.category: Optional[str] = category
+        self.exclude_domains: Optional[List[str]] = exclude_domains
 
         self.register(self.search_exa)
+        self.register(self.get_contents)
+        self.register(self.find_similar)
+
+    def _parse_results(self, exa_results: SearchResponse) -> str:
+        exa_results_parsed = []
+        for result in exa_results.results:
+            result_dict = {"url": result.url}
+            if result.title:
+                result_dict["title"] = result.title
+            if result.author and result.author != "":
+                result_dict["author"] = result.author
+            if result.published_date:
+                result_dict["published_date"] = result.published_date
+            if result.text:
+                _text = result.text
+                if self.text_length_limit:
+                    _text = _text[: self.text_length_limit]
+                result_dict["text"] = _text
+            if self.highlights:
+                try:
+                    if result.highlights:  # type: ignore
+                        result_dict["highlights"] = result.highlights  # type: ignore
+                except Exception as e:
+                    logger.debug(f"Failed to get highlights {e}")
+            exa_results_parsed.append(result_dict)
+        return json.dumps(exa_results_parsed, indent=4)
 
-    def search_exa(self, query: str, num_results: int = 5) -> str:
+    def search_exa(self, query: str, num_results: int = 5, category: Optional[str] = None) -> str:
         """Use this function to search Exa (a web search engine) for a query.
 
         Args:
             query (str): The query to search for.
             num_results (int): Number of results to return. Defaults to 5.
+            category (Optional[str]): The category to filter search results.
+                Options are "company", "research paper", "news", "pdf", "github",
+                "tweet", "personal site", "linkedin profile", "financial report".
 
         Returns:
             str: The search results in JSON format.
@@ -71,6 +141,7 @@ def search_exa(self, query: str, num_results: int = 5) -> str:
             search_kwargs: Dict[str, Any] = {
                 "text": self.text,
                 "highlights": self.highlights,
+                "summary": self.summary,
                 "num_results": self.num_results or num_results,
                 "start_crawl_date": self.start_crawl_date,
                 "end_crawl_date": self.end_crawl_date,
@@ -78,37 +149,95 @@ def search_exa(self, query: str, num_results: int = 5) -> str:
                 "end_published_date": self.end_published_date,
                 "use_autoprompt": self.use_autoprompt,
                 "type": self.type,
-                "category": self.category,
+                "category": self.category or category,  # Prefer a user-set category
                 "include_domains": self.include_domains,
+                "exclude_domains": self.exclude_domains,
             }
             # Clean up the kwargs
             search_kwargs = {k: v for k, v in search_kwargs.items() if v is not None}
             exa_results = exa.search_and_contents(query, **search_kwargs)
-            exa_results_parsed = []
-            for result in exa_results.results:
-                result_dict = {"url": result.url}
-                if result.title:
-                    result_dict["title"] = result.title
-                if result.author and result.author != "":
-                    result_dict["author"] = result.author
-                if result.published_date:
-                    result_dict["published_date"] = result.published_date
-                if result.text:
-                    _text = result.text
-                    if self.text_length_limit:
-                        _text = _text[: self.text_length_limit]
-                    result_dict["text"] = _text
-                if self.highlights:
-                    try:
-                        if result.highlights:  # type: ignore
-                            result_dict["highlights"] = result.highlights  # type: ignore
-                    except Exception as e:
-                        logger.debug(f"Failed to get highlights {e}")
-                exa_results_parsed.append(result_dict)
-            parsed_results = json.dumps(exa_results_parsed, indent=4)
+
+            parsed_results = self._parse_results(exa_results)
+            # Extract search results
             if self.show_results:
                 logger.info(parsed_results)
             return parsed_results
         except Exception as e:
             logger.error(f"Failed to search exa {e}")
             return f"Error: {e}"
+
+    def get_contents(self, urls: list[str]) -> str:
+        """
+        Retrieve detailed content from specific URLs using the Exa API.
+
+        Args:
+            urls (list(str)): A list of URLs from which to fetch content.
+
+        Returns:
+            str: The search results in JSON format.
+        """
+        if not self.api_key:
+            return "Please set the EXA_API_KEY"
+
+        query_kwargs: Dict[str, Any] = {
+            "text": self.text,
+            "highlights": self.highlights,
+            "summary": self.summary,
+        }
+
+        try:
+            exa = Exa(self.api_key)
+            logger.info(f"Fetching contents for URLs: {urls}")
+
+            exa_results = exa.get_contents(urls=urls, **query_kwargs)
+
+            parsed_results = self._parse_results(exa_results)
+            if self.show_results:
+                logger.info(parsed_results)
+
+            return parsed_results
+        except Exception as e:
+            logger.error(f"Failed to get contents from Exa: {e}")
+            return f"Error: {e}"
+
+    def find_similar(self, url: str, num_results: int = 5) -> str:
+        """
+        Find similar links to a given URL using the Exa API.
+
+        Args:
+            url (str): The URL for which to find similar links.
+            num_results (int, optional): The number of similar links to return. Defaults to 5.
+
+        Returns:
+            str: The search results in JSON format.
+        """
+        if not self.api_key:
+            return "Please set the EXA_API_KEY"
+
+        query_kwargs: Dict[str, Any] = {
+            "text": self.text,
+            "highlights": self.highlights,
+            "summary": self.summary,
+            "include_domains": self.include_domains,
+            "exclude_domains": self.exclude_domains,
+            "start_crawl_date": self.start_crawl_date,
+            "end_crawl_date": self.end_crawl_date,
+            "start_published_date": self.start_published_date,
+            "end_published_date": self.end_published_date,
+            "num_results": self.num_results or num_results,
+        }
+
+        try:
+            exa = Exa(self.api_key)
+            logger.info(f"Finding similar links to: {url}")
+
+            exa_results = exa.find_similar_and_contents(url=url, **query_kwargs)
+
+            parsed_results = self._parse_results(exa_results)
+            if self.show_results:
+                logger.info(parsed_results)
+
+            return parsed_results
+        except Exception as e:
+            logger.error(f"Failed to get similar links from Exa: {e}")
+            return f"Error: {e}"

From 983feeb2c73e3f1ed7ab0cb91dd79578acff8852 Mon Sep 17 00:00:00 2001
From: Dirk Brand <dirkbrnd@gmail.com>
Date: Sat, 25 Jan 2025 07:05:42 +0200
Subject: [PATCH 2/6] Style

---
 phi/tools/exa.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/phi/tools/exa.py b/phi/tools/exa.py
index 75093c95e4..5f93b60081 100644
--- a/phi/tools/exa.py
+++ b/phi/tools/exa.py
@@ -14,18 +14,6 @@
     raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`")
 
 
-class Category(Enum):
-    COMPANY = "company"
-    RESEARCH_PAPER = "research paper"
-    NEWS = "news"
-    PDF = "pdf"
-    GITHUB = "github"
-    TWEET = "tweet"
-    PERSONAL_SITE = "personal site"
-    LINKEDIN_PROFILE = "linkedin profile"
-    FINANCIAL_REPORT = "financial report"
-
-
 class ExaTools(Toolkit):
     """
     ExaTools is a toolkit for interfacing with the Exa web search engine, providing
@@ -43,7 +31,7 @@ class ExaTools(Toolkit):
         end_published_date (Optional[str]): Include results published on/before this date (`YYYY-MM-DD`).
         use_autoprompt (Optional[bool]): Enable autoprompt features in queries.
         type (Optional[str]): Specify content type (e.g., article, blog, video).
-        category (Optional[Category]): Filter results by category.
+        category (Optional[str]): Filter results by category. Options are "company", "research paper", "news", "pdf", "github", "tweet", "personal site", "linkedin profile", "financial report".
         include_domains (Optional[List[str]]): Restrict results to these domains.
         exclude_domains (Optional[List[str]]): Exclude results from these domains.
         show_results (bool): Log search results for debugging. Default is False.
@@ -63,7 +51,7 @@ def __init__(
         end_published_date: Optional[str] = None,
         use_autoprompt: Optional[bool] = None,
         type: Optional[str] = None,
-        category: Optional[Category] = None,
+        category: Optional[str] = None,
         include_domains: Optional[List[str]] = None,
         exclude_domains: Optional[List[str]] = None,
         show_results: bool = False,
@@ -87,7 +75,7 @@ def __init__(
         self.end_published_date: Optional[str] = end_published_date
         self.use_autoprompt: Optional[bool] = use_autoprompt
         self.type: Optional[str] = type
-        self.category: Optional[str] = category.value() if category else None
+        self.category: Optional[str] = category
         self.include_domains: Optional[List[str]] = include_domains
         self.exclude_domains: Optional[List[str]] = exclude_domains
 

From ffe65efd9f4653ebf60096fd208754d1c6a66dd4 Mon Sep 17 00:00:00 2001
From: Dirk Brand <dirkbrnd@gmail.com>
Date: Mon, 27 Jan 2025 14:57:34 +0200
Subject: [PATCH 3/6] Update

---
 cookbook/examples/streamlit/llm_os/app.py | 4 ++--
 phi/tools/exa.py                          | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/cookbook/examples/streamlit/llm_os/app.py b/cookbook/examples/streamlit/llm_os/app.py
index dde3fa2919..4e19b0dbad 100644
--- a/cookbook/examples/streamlit/llm_os/app.py
+++ b/cookbook/examples/streamlit/llm_os/app.py
@@ -197,7 +197,7 @@ def main() -> None:
         # Display user message first
         with st.chat_message("user"):
             st.write(prompt)
-        
+
         # Then display agent response
         with st.chat_message("agent"):
             # Create an empty container for the streaming response
@@ -209,7 +209,7 @@ def main() -> None:
                         response += chunk.content
                         # Update the response in real-time
                         response_container.markdown(response)
-            
+
         # Add messages to session state after completion
         st.session_state["messages"].append({"role": "user", "content": prompt})
         st.session_state["messages"].append({"role": "agent", "content": response})
diff --git a/phi/tools/exa.py b/phi/tools/exa.py
index 5f93b60081..7ac7d58504 100644
--- a/phi/tools/exa.py
+++ b/phi/tools/exa.py
@@ -1,5 +1,4 @@
 import json
-from enum import Enum
 from os import getenv
 from typing import Optional, Dict, Any, List
 

From 5cf10fe536061b40cf2b672d0b50d970a1536e7c Mon Sep 17 00:00:00 2001
From: Dirk Brand <dirkbrnd@gmail.com>
Date: Tue, 28 Jan 2025 09:16:06 +0200
Subject: [PATCH 4/6] Update

---
 phi/tools/exa.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/phi/tools/exa.py b/phi/tools/exa.py
index 7ac7d58504..1fa05eb214 100644
--- a/phi/tools/exa.py
+++ b/phi/tools/exa.py
@@ -2,13 +2,12 @@
 from os import getenv
 from typing import Optional, Dict, Any, List
 
-from exa_py.api import SearchResponse
-
 from phi.tools import Toolkit
 from phi.utils.log import logger
 
 try:
     from exa_py import Exa
+    from exa_py.api import SearchResponse
 except ImportError:
     raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`")
 
@@ -103,6 +102,7 @@ def _parse_results(self, exa_results: SearchResponse) -> str:
                         result_dict["highlights"] = result.highlights  # type: ignore
                 except Exception as e:
                     logger.debug(f"Failed to get highlights {e}")
+                    result_dict["highlights"] = f"Failed to get highlights {e}"
             exa_results_parsed.append(result_dict)
         return json.dumps(exa_results_parsed, indent=4)
 

From 1d692b03cc46443fc830b164799b806200886816 Mon Sep 17 00:00:00 2001
From: Dirk Brand <dirkbrnd@gmail.com>
Date: Thu, 30 Jan 2025 17:20:05 +0200
Subject: [PATCH 5/6] Remove agno.code-workspace

---
 .gitignore          |  1 +
 agno.code-workspace | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 agno.code-workspace

diff --git a/.gitignore b/.gitignore
index 440782ecf9..15fb20a3e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@
 # Machine specific
 .idea
 .vscode
+*.code-workspace
 
 # Ignore .env files
 .env
diff --git a/agno.code-workspace b/agno.code-workspace
new file mode 100644
index 0000000000..bdd1ddb30a
--- /dev/null
+++ b/agno.code-workspace
@@ -0,0 +1,11 @@
+{
+	"folders": [
+		{
+			"path": "."
+		},
+		{
+			"path": "../agno-docs"
+		}
+	],
+	"settings": {}
+}
\ No newline at end of file

From cc1c1318a8b82403816db05a3a81d12623fc06e2 Mon Sep 17 00:00:00 2001
From: Dirk Brand <dirkbrnd@gmail.com>
Date: Thu, 30 Jan 2025 17:21:41 +0200
Subject: [PATCH 6/6] Update

---
 agno.code-workspace | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 agno.code-workspace

diff --git a/agno.code-workspace b/agno.code-workspace
deleted file mode 100644
index bdd1ddb30a..0000000000
--- a/agno.code-workspace
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-	"folders": [
-		{
-			"path": "."
-		},
-		{
-			"path": "../agno-docs"
-		}
-	],
-	"settings": {}
-}
\ No newline at end of file