From 7b2cbc097cfe41ce53945e2ca843d4ea5eb6e961 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Sat, 25 Jan 2025 07:02:09 +0200 Subject: [PATCH 1/6] Expand Exa capabilities --- cookbook/tools/exa_tools.py | 13 ++- phi/tools/exa.py | 181 ++++++++++++++++++++++++++++++------ 2 files changed, 167 insertions(+), 27 deletions(-) diff --git a/cookbook/tools/exa_tools.py b/cookbook/tools/exa_tools.py index c69ec4136b..faef536763 100644 --- a/cookbook/tools/exa_tools.py +++ b/cookbook/tools/exa_tools.py @@ -1,5 +1,16 @@ from phi.agent import Agent from phi.tools.exa import ExaTools -agent = Agent(tools=[ExaTools(include_domains=["cnbc.com", "reuters.com", "bloomberg.com"])], show_tool_calls=True) +agent = Agent( + tools=[ExaTools(include_domains=["cnbc.com", "reuters.com", "bloomberg.com"], show_results=True)], + show_tool_calls=True, +) + agent.print_response("Search for AAPL news", markdown=True) + +agent.print_response("What is the paper at https://arxiv.org/pdf/2307.06435 about?", markdown=True) + +agent.print_response( + "Find me similar papers to https://arxiv.org/pdf/2307.06435 and provide a summary of what they contain", + markdown=True, +) diff --git a/phi/tools/exa.py b/phi/tools/exa.py index f177a18a10..75093c95e4 100644 --- a/phi/tools/exa.py +++ b/phi/tools/exa.py @@ -1,7 +1,10 @@ import json +from enum import Enum from os import getenv from typing import Optional, Dict, Any, List +from exa_py.api import SearchResponse + from phi.tools import Toolkit from phi.utils.log import logger @@ -11,12 +14,47 @@ raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`") +class Category(Enum): + COMPANY = "company" + RESEARCH_PAPER = "research paper" + NEWS = "news" + PDF = "pdf" + GITHUB = "github" + TWEET = "tweet" + PERSONAL_SITE = "personal site" + LINKEDIN_PROFILE = "linkedin profile" + FINANCIAL_REPORT = "financial report" + + class ExaTools(Toolkit): + """ + ExaTools is a toolkit for interfacing with the Exa web search engine, providing + functionalities to perform categorized searches and retrieve structured results. + + Args: + text (bool): Retrieve text content from results. Default is True. + text_length_limit (int): Max length of text content per result. Default is 1000. + highlights (bool): Include highlighted snippets. Default is True. + api_key (Optional[str]): Exa API key. Retrieved from `EXA_API_KEY` env variable if not provided. + num_results (Optional[int]): Default number of search results. Overrides individual searches if set. + start_crawl_date (Optional[str]): Include results crawled on/after this date (`YYYY-MM-DD`). + end_crawl_date (Optional[str]): Include results crawled on/before this date (`YYYY-MM-DD`). + start_published_date (Optional[str]): Include results published on/after this date (`YYYY-MM-DD`). + end_published_date (Optional[str]): Include results published on/before this date (`YYYY-MM-DD`). + use_autoprompt (Optional[bool]): Enable autoprompt features in queries. + type (Optional[str]): Specify content type (e.g., article, blog, video). + category (Optional[Category]): Filter results by category. + include_domains (Optional[List[str]]): Restrict results to these domains. + exclude_domains (Optional[List[str]]): Exclude results from these domains. + show_results (bool): Log search results for debugging. Default is False. + """ + def __init__( self, text: bool = True, text_length_limit: int = 1000, highlights: bool = True, + summary: bool = False, api_key: Optional[str] = None, num_results: Optional[int] = None, start_crawl_date: Optional[str] = None, @@ -25,8 +63,9 @@ def __init__( end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None, type: Optional[str] = None, - category: Optional[str] = None, + category: Optional[Category] = None, include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, show_results: bool = False, ): super().__init__(name="exa") @@ -40,6 +79,7 @@ def __init__( self.text: bool = text self.text_length_limit: int = text_length_limit self.highlights: bool = highlights + self.summary: bool = summary self.num_results: Optional[int] = num_results self.start_crawl_date: Optional[str] = start_crawl_date self.end_crawl_date: Optional[str] = end_crawl_date @@ -47,17 +87,47 @@ def __init__( self.end_published_date: Optional[str] = end_published_date self.use_autoprompt: Optional[bool] = use_autoprompt self.type: Optional[str] = type + self.category: Optional[str] = category.value() if category else None self.include_domains: Optional[List[str]] = include_domains - self.category: Optional[str] = category + self.exclude_domains: Optional[List[str]] = exclude_domains self.register(self.search_exa) + self.register(self.get_contents) + self.register(self.find_similar) + + def _parse_results(self, exa_results: SearchResponse) -> str: + exa_results_parsed = [] + for result in exa_results.results: + result_dict = {"url": result.url} + if result.title: + result_dict["title"] = result.title + if result.author and result.author != "": + result_dict["author"] = result.author + if result.published_date: + result_dict["published_date"] = result.published_date + if result.text: + _text = result.text + if self.text_length_limit: + _text = _text[: self.text_length_limit] + result_dict["text"] = _text + if self.highlights: + try: + if result.highlights: # type: ignore + result_dict["highlights"] = result.highlights # type: ignore + except Exception as e: + logger.debug(f"Failed to get highlights {e}") + exa_results_parsed.append(result_dict) + return json.dumps(exa_results_parsed, indent=4) - def search_exa(self, query: str, num_results: int = 5) -> str: + def search_exa(self, query: str, num_results: int = 5, category: Optional[str] = None) -> str: """Use this function to search Exa (a web search engine) for a query. Args: query (str): The query to search for. num_results (int): Number of results to return. Defaults to 5. + category (Optional[str]): The category to filter search results. + Options are "company", "research paper", "news", "pdf", "github", + "tweet", "personal site", "linkedin profile", "financial report". Returns: str: The search results in JSON format. @@ -71,6 +141,7 @@ def search_exa(self, query: str, num_results: int = 5) -> str: search_kwargs: Dict[str, Any] = { "text": self.text, "highlights": self.highlights, + "summary": self.summary, "num_results": self.num_results or num_results, "start_crawl_date": self.start_crawl_date, "end_crawl_date": self.end_crawl_date, @@ -78,37 +149,95 @@ def search_exa(self, query: str, num_results: int = 5) -> str: "end_published_date": self.end_published_date, "use_autoprompt": self.use_autoprompt, "type": self.type, - "category": self.category, + "category": self.category or category, # Prefer a user-set category "include_domains": self.include_domains, + "exclude_domains": self.exclude_domains, } # Clean up the kwargs search_kwargs = {k: v for k, v in search_kwargs.items() if v is not None} exa_results = exa.search_and_contents(query, **search_kwargs) - exa_results_parsed = [] - for result in exa_results.results: - result_dict = {"url": result.url} - if result.title: - result_dict["title"] = result.title - if result.author and result.author != "": - result_dict["author"] = result.author - if result.published_date: - result_dict["published_date"] = result.published_date - if result.text: - _text = result.text - if self.text_length_limit: - _text = _text[: self.text_length_limit] - result_dict["text"] = _text - if self.highlights: - try: - if result.highlights: # type: ignore - result_dict["highlights"] = result.highlights # type: ignore - except Exception as e: - logger.debug(f"Failed to get highlights {e}") - exa_results_parsed.append(result_dict) - parsed_results = json.dumps(exa_results_parsed, indent=4) + + parsed_results = self._parse_results(exa_results) + # Extract search results if self.show_results: logger.info(parsed_results) return parsed_results except Exception as e: logger.error(f"Failed to search exa {e}") return f"Error: {e}" + + def get_contents(self, urls: list[str]) -> str: + """ + Retrieve detailed content from specific URLs using the Exa API. + + Args: + urls (list(str)): A list of URLs from which to fetch content. + + Returns: + str: The search results in JSON format. + """ + if not self.api_key: + return "Please set the EXA_API_KEY" + + query_kwargs: Dict[str, Any] = { + "text": self.text, + "highlights": self.highlights, + "summary": self.summary, + } + + try: + exa = Exa(self.api_key) + logger.info(f"Fetching contents for URLs: {urls}") + + exa_results = exa.get_contents(urls=urls, **query_kwargs) + + parsed_results = self._parse_results(exa_results) + if self.show_results: + logger.info(parsed_results) + + return parsed_results + except Exception as e: + logger.error(f"Failed to get contents from Exa: {e}") + return f"Error: {e}" + + def find_similar(self, url: str, num_results: int = 5) -> str: + """ + Find similar links to a given URL using the Exa API. + + Args: + url (str): The URL for which to find similar links. + num_results (int, optional): The number of similar links to return. Defaults to 5. + + Returns: + str: The search results in JSON format. + """ + if not self.api_key: + return "Please set the EXA_API_KEY" + + query_kwargs: Dict[str, Any] = { + "text": self.text, + "highlights": self.highlights, + "summary": self.summary, + "include_domains": self.include_domains, + "exclude_domains": self.exclude_domains, + "start_crawl_date": self.start_crawl_date, + "end_crawl_date": self.end_crawl_date, + "start_published_date": self.start_published_date, + "end_published_date": self.end_published_date, + "num_results": self.num_results or num_results, + } + + try: + exa = Exa(self.api_key) + logger.info(f"Finding similar links to: {url}") + + exa_results = exa.find_similar_and_contents(url=url, **query_kwargs) + + parsed_results = self._parse_results(exa_results) + if self.show_results: + logger.info(parsed_results) + + return parsed_results + except Exception as e: + logger.error(f"Failed to get similar links from Exa: {e}") + return f"Error: {e}" From 983feeb2c73e3f1ed7ab0cb91dd79578acff8852 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Sat, 25 Jan 2025 07:05:42 +0200 Subject: [PATCH 2/6] Style --- phi/tools/exa.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/phi/tools/exa.py b/phi/tools/exa.py index 75093c95e4..5f93b60081 100644 --- a/phi/tools/exa.py +++ b/phi/tools/exa.py @@ -14,18 +14,6 @@ raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`") -class Category(Enum): - COMPANY = "company" - RESEARCH_PAPER = "research paper" - NEWS = "news" - PDF = "pdf" - GITHUB = "github" - TWEET = "tweet" - PERSONAL_SITE = "personal site" - LINKEDIN_PROFILE = "linkedin profile" - FINANCIAL_REPORT = "financial report" - - class ExaTools(Toolkit): """ ExaTools is a toolkit for interfacing with the Exa web search engine, providing @@ -43,7 +31,7 @@ class ExaTools(Toolkit): end_published_date (Optional[str]): Include results published on/before this date (`YYYY-MM-DD`). use_autoprompt (Optional[bool]): Enable autoprompt features in queries. type (Optional[str]): Specify content type (e.g., article, blog, video). - category (Optional[Category]): Filter results by category. + category (Optional[str]): Filter results by category. Options are "company", "research paper", "news", "pdf", "github", "tweet", "personal site", "linkedin profile", "financial report". include_domains (Optional[List[str]]): Restrict results to these domains. exclude_domains (Optional[List[str]]): Exclude results from these domains. show_results (bool): Log search results for debugging. Default is False. @@ -63,7 +51,7 @@ def __init__( end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None, type: Optional[str] = None, - category: Optional[Category] = None, + category: Optional[str] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, show_results: bool = False, @@ -87,7 +75,7 @@ def __init__( self.end_published_date: Optional[str] = end_published_date self.use_autoprompt: Optional[bool] = use_autoprompt self.type: Optional[str] = type - self.category: Optional[str] = category.value() if category else None + self.category: Optional[str] = category self.include_domains: Optional[List[str]] = include_domains self.exclude_domains: Optional[List[str]] = exclude_domains From ffe65efd9f4653ebf60096fd208754d1c6a66dd4 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Mon, 27 Jan 2025 14:57:34 +0200 Subject: [PATCH 3/6] Update --- cookbook/examples/streamlit/llm_os/app.py | 4 ++-- phi/tools/exa.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cookbook/examples/streamlit/llm_os/app.py b/cookbook/examples/streamlit/llm_os/app.py index dde3fa2919..4e19b0dbad 100644 --- a/cookbook/examples/streamlit/llm_os/app.py +++ b/cookbook/examples/streamlit/llm_os/app.py @@ -197,7 +197,7 @@ def main() -> None: # Display user message first with st.chat_message("user"): st.write(prompt) - + # Then display agent response with st.chat_message("agent"): # Create an empty container for the streaming response @@ -209,7 +209,7 @@ def main() -> None: response += chunk.content # Update the response in real-time response_container.markdown(response) - + # Add messages to session state after completion st.session_state["messages"].append({"role": "user", "content": prompt}) st.session_state["messages"].append({"role": "agent", "content": response}) diff --git a/phi/tools/exa.py b/phi/tools/exa.py index 5f93b60081..7ac7d58504 100644 --- a/phi/tools/exa.py +++ b/phi/tools/exa.py @@ -1,5 +1,4 @@ import json -from enum import Enum from os import getenv from typing import Optional, Dict, Any, List From 5cf10fe536061b40cf2b672d0b50d970a1536e7c Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 28 Jan 2025 09:16:06 +0200 Subject: [PATCH 4/6] Update --- phi/tools/exa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phi/tools/exa.py b/phi/tools/exa.py index 7ac7d58504..1fa05eb214 100644 --- a/phi/tools/exa.py +++ b/phi/tools/exa.py @@ -2,13 +2,12 @@ from os import getenv from typing import Optional, Dict, Any, List -from exa_py.api import SearchResponse - from phi.tools import Toolkit from phi.utils.log import logger try: from exa_py import Exa + from exa_py.api import SearchResponse except ImportError: raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`") @@ -103,6 +102,7 @@ def _parse_results(self, exa_results: SearchResponse) -> str: result_dict["highlights"] = result.highlights # type: ignore except Exception as e: logger.debug(f"Failed to get highlights {e}") + result_dict["highlights"] = f"Failed to get highlights {e}" exa_results_parsed.append(result_dict) return json.dumps(exa_results_parsed, indent=4) From 1d692b03cc46443fc830b164799b806200886816 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Thu, 30 Jan 2025 17:20:05 +0200 Subject: [PATCH 5/6] Remove agno.code-workspace --- .gitignore | 1 + agno.code-workspace | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 agno.code-workspace diff --git a/.gitignore b/.gitignore index 440782ecf9..15fb20a3e7 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ # Machine specific .idea .vscode +*.code-workspace # Ignore .env files .env diff --git a/agno.code-workspace b/agno.code-workspace new file mode 100644 index 0000000000..bdd1ddb30a --- /dev/null +++ b/agno.code-workspace @@ -0,0 +1,11 @@ +{ + "folders": [ + { + "path": "." + }, + { + "path": "../agno-docs" + } + ], + "settings": {} +} \ No newline at end of file From cc1c1318a8b82403816db05a3a81d12623fc06e2 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Thu, 30 Jan 2025 17:21:41 +0200 Subject: [PATCH 6/6] Update --- agno.code-workspace | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 agno.code-workspace diff --git a/agno.code-workspace b/agno.code-workspace deleted file mode 100644 index bdd1ddb30a..0000000000 --- a/agno.code-workspace +++ /dev/null @@ -1,11 +0,0 @@ -{ - "folders": [ - { - "path": "." - }, - { - "path": "../agno-docs" - } - ], - "settings": {} -} \ No newline at end of file