Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Expand Exa capabilities #1888

Merged
merged 15 commits into from
Feb 3, 2025
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# Machine specific
.idea
.vscode
*.code-workspace

# Ignore .env files
.env
Expand Down
10 changes: 9 additions & 1 deletion cookbook/tools/exa_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
from agno.tools.exa import ExaTools

agent = Agent(
tools=[ExaTools(include_domains=["cnbc.com", "reuters.com", "bloomberg.com"])],
tools=[ExaTools(include_domains=["cnbc.com", "reuters.com", "bloomberg.com"], show_results=True)],
show_tool_calls=True,
)

agent.print_response("Search for AAPL news", markdown=True)

agent.print_response("What is the paper at https://arxiv.org/pdf/2307.06435 about?", markdown=True)

agent.print_response(
"Find me similar papers to https://arxiv.org/pdf/2307.06435 and provide a summary of what they contain",
markdown=True,
)
166 changes: 141 additions & 25 deletions libs/agno/agno/tools/exa.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,40 @@

try:
from exa_py import Exa
from exa_py.api import SearchResponse
except ImportError:
raise ImportError("`exa_py` not installed. Please install using `pip install exa_py`")


class ExaTools(Toolkit):
"""
ExaTools is a toolkit for interfacing with the Exa web search engine, providing
functionalities to perform categorized searches and retrieve structured results.

Args:
text (bool): Retrieve text content from results. Default is True.
text_length_limit (int): Max length of text content per result. Default is 1000.
highlights (bool): Include highlighted snippets. Default is True.
api_key (Optional[str]): Exa API key. Retrieved from `EXA_API_KEY` env variable if not provided.
num_results (Optional[int]): Default number of search results. Overrides individual searches if set.
start_crawl_date (Optional[str]): Include results crawled on/after this date (`YYYY-MM-DD`).
end_crawl_date (Optional[str]): Include results crawled on/before this date (`YYYY-MM-DD`).
start_published_date (Optional[str]): Include results published on/after this date (`YYYY-MM-DD`).
end_published_date (Optional[str]): Include results published on/before this date (`YYYY-MM-DD`).
use_autoprompt (Optional[bool]): Enable autoprompt features in queries.
type (Optional[str]): Specify content type (e.g., article, blog, video).
category (Optional[str]): Filter results by category. Options are "company", "research paper", "news", "pdf", "github", "tweet", "personal site", "linkedin profile", "financial report".
include_domains (Optional[List[str]]): Restrict results to these domains.
exclude_domains (Optional[List[str]]): Exclude results from these domains.
show_results (bool): Log search results for debugging. Default is False.
"""

def __init__(
self,
text: bool = True,
text_length_limit: int = 1000,
highlights: bool = True,
summary: bool = False,
api_key: Optional[str] = None,
num_results: Optional[int] = None,
start_crawl_date: Optional[str] = None,
Expand All @@ -27,6 +51,7 @@ def __init__(
type: Optional[str] = None,
category: Optional[str] = None,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
show_results: bool = False,
):
super().__init__(name="exa")
Expand All @@ -40,24 +65,56 @@ def __init__(
self.text: bool = text
self.text_length_limit: int = text_length_limit
self.highlights: bool = highlights
self.summary: bool = summary
self.num_results: Optional[int] = num_results
self.start_crawl_date: Optional[str] = start_crawl_date
self.end_crawl_date: Optional[str] = end_crawl_date
self.start_published_date: Optional[str] = start_published_date
self.end_published_date: Optional[str] = end_published_date
self.use_autoprompt: Optional[bool] = use_autoprompt
self.type: Optional[str] = type
self.include_domains: Optional[List[str]] = include_domains
self.category: Optional[str] = category
self.include_domains: Optional[List[str]] = include_domains
self.exclude_domains: Optional[List[str]] = exclude_domains

self.register(self.search_exa)
self.register(self.get_contents)
self.register(self.find_similar)

def search_exa(self, query: str, num_results: int = 5) -> str:
def _parse_results(self, exa_results: SearchResponse) -> str:
exa_results_parsed = []
for result in exa_results.results:
result_dict = {"url": result.url}
if result.title:
result_dict["title"] = result.title
if result.author and result.author != "":
result_dict["author"] = result.author
if result.published_date:
result_dict["published_date"] = result.published_date
if result.text:
_text = result.text
if self.text_length_limit:
_text = _text[: self.text_length_limit]
result_dict["text"] = _text
if self.highlights:
try:
if result.highlights: # type: ignore
result_dict["highlights"] = result.highlights # type: ignore
except Exception as e:
logger.debug(f"Failed to get highlights {e}")
result_dict["highlights"] = f"Failed to get highlights {e}"
exa_results_parsed.append(result_dict)
return json.dumps(exa_results_parsed, indent=4)

def search_exa(self, query: str, num_results: int = 5, category: Optional[str] = None) -> str:
"""Use this function to search Exa (a web search engine) for a query.

Args:
query (str): The query to search for.
num_results (int): Number of results to return. Defaults to 5.
category (Optional[str]): The category to filter search results.
Options are "company", "research paper", "news", "pdf", "github",
"tweet", "personal site", "linkedin profile", "financial report".

Returns:
str: The search results in JSON format.
Expand All @@ -71,44 +128,103 @@ def search_exa(self, query: str, num_results: int = 5) -> str:
search_kwargs: Dict[str, Any] = {
"text": self.text,
"highlights": self.highlights,
"summary": self.summary,
"num_results": self.num_results or num_results,
"start_crawl_date": self.start_crawl_date,
"end_crawl_date": self.end_crawl_date,
"start_published_date": self.start_published_date,
"end_published_date": self.end_published_date,
"use_autoprompt": self.use_autoprompt,
"type": self.type,
"category": self.category,
"category": self.category or category, # Prefer a user-set category
"include_domains": self.include_domains,
"exclude_domains": self.exclude_domains,
}
# Clean up the kwargs
search_kwargs = {k: v for k, v in search_kwargs.items() if v is not None}
exa_results = exa.search_and_contents(query, **search_kwargs)
exa_results_parsed = []
for result in exa_results.results:
result_dict = {"url": result.url}
if result.title:
result_dict["title"] = result.title
if result.author and result.author != "":
result_dict["author"] = result.author
if result.published_date:
result_dict["published_date"] = result.published_date
if result.text:
_text = result.text
if self.text_length_limit:
_text = _text[: self.text_length_limit]
result_dict["text"] = _text
if self.highlights:
try:
if result.highlights: # type: ignore
result_dict["highlights"] = result.highlights # type: ignore
except Exception as e:
logger.debug(f"Failed to get highlights {e}")
exa_results_parsed.append(result_dict)
parsed_results = json.dumps(exa_results_parsed, indent=4)

parsed_results = self._parse_results(exa_results)
# Extract search results
if self.show_results:
logger.info(parsed_results)
return parsed_results
except Exception as e:
logger.error(f"Failed to search exa {e}")
return f"Error: {e}"

def get_contents(self, urls: list[str]) -> str:
"""
Retrieve detailed content from specific URLs using the Exa API.

Args:
urls (list(str)): A list of URLs from which to fetch content.

Returns:
str: The search results in JSON format.
"""
if not self.api_key:
return "Please set the EXA_API_KEY"

query_kwargs: Dict[str, Any] = {
"text": self.text,
"highlights": self.highlights,
"summary": self.summary,
}

try:
exa = Exa(self.api_key)
logger.info(f"Fetching contents for URLs: {urls}")

exa_results = exa.get_contents(urls=urls, **query_kwargs)

parsed_results = self._parse_results(exa_results)
if self.show_results:
logger.info(parsed_results)

return parsed_results
except Exception as e:
logger.error(f"Failed to get contents from Exa: {e}")
return f"Error: {e}"

def find_similar(self, url: str, num_results: int = 5) -> str:
"""
Find similar links to a given URL using the Exa API.

Args:
url (str): The URL for which to find similar links.
num_results (int, optional): The number of similar links to return. Defaults to 5.

Returns:
str: The search results in JSON format.
"""
if not self.api_key:
return "Please set the EXA_API_KEY"

query_kwargs: Dict[str, Any] = {
"text": self.text,
"highlights": self.highlights,
"summary": self.summary,
"include_domains": self.include_domains,
"exclude_domains": self.exclude_domains,
"start_crawl_date": self.start_crawl_date,
"end_crawl_date": self.end_crawl_date,
"start_published_date": self.start_published_date,
"end_published_date": self.end_published_date,
"num_results": self.num_results or num_results,
}

try:
exa = Exa(self.api_key)
logger.info(f"Finding similar links to: {url}")

exa_results = exa.find_similar_and_contents(url=url, **query_kwargs)

parsed_results = self._parse_results(exa_results)
if self.show_results:
logger.info(parsed_results)

return parsed_results
except Exception as e:
logger.error(f"Failed to get similar links from Exa: {e}")
return f"Error: {e}"