-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Pre/beta #963
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
Pre/beta #963
Conversation
tune scraper
## [1.44.0-beta.1](v1.43.1-beta.1...v1.44.0-beta.1) (2025-04-14) ### Features * add new proxy rotation ([8913d8d](8913d8d))
I opened a Pull Request with the following: 🔄 2 test files added. 🔄 Test UpdatesI've added 2 tests. They all pass ☑️
No existing tests required updates. 🐛 Bug DetectionPotential issues:
Test Error Logtests.utils.test_proxy_rotation#test_parse_or_search_proxy_success: def test_parse_or_search_proxy_success():
proxy = {
"server": "192.168.1.1:8080",
"username": "username",
"password": "password",
}
> parsed_proxy = parse_or_search_proxy(proxy)
tests/utils/test_proxy_rotation.py:82:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
proxy = {'password': 'password', 'server': '192.168.1.1:8080', 'username': 'username'}
def parse_or_search_proxy(proxy: Proxy) -> ProxySettings:
"""
Parses a proxy configuration or searches for a matching one via broker.
"""
assert "server" in proxy, "Missing 'server' field in the proxy configuration."
parsed_url = urlparse(proxy["server"])
server_address = parsed_url.hostname
if server_address is None:
> raise ValueError(f"Invalid proxy server format: {proxy['server']}")
E ValueError: Invalid proxy server format: 192.168.1.1:8080
scrapegraphai/utils/proxy_rotation.py:200: ValueError
tests.utils.test_proxy_rotation#test_parse_or_search_proxy_exception: def test_parse_or_search_proxy_exception():
proxy = {
"username": "username",
"password": "password",
}
with pytest.raises(AssertionError) as error_info:
parse_or_search_proxy(proxy)
> assert "missing server in the proxy configuration" in str(error_info.value)
E assert 'missing server in the proxy configuration' in "Missing 'server' field in the proxy configuration."
E + where "Missing 'server' field in the proxy configuration." = str(AssertionError("Missing 'server' field in the proxy configuration."))
E + where AssertionError("Missing 'server' field in the proxy configuration.") = <ExceptionInfo AssertionError("Missing 'server' field in the proxy configuration.") tblen=2>.value
tests/utils/test_proxy_rotation.py:110: AssertionError
tests.utils.test_proxy_rotation#test_parse_or_search_proxy_unknown_server: def test_parse_or_search_proxy_unknown_server():
proxy = {
"server": "unknown",
}
with pytest.raises(AssertionError) as error_info:
> parse_or_search_proxy(proxy)
tests/utils/test_proxy_rotation.py:119:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
proxy = {'server': 'unknown'}
def parse_or_search_proxy(proxy: Proxy) -> ProxySettings:
"""
Parses a proxy configuration or searches for a matching one via broker.
"""
assert "server" in proxy, "Missing 'server' field in the proxy configuration."
parsed_url = urlparse(proxy["server"])
server_address = parsed_url.hostname
if server_address is None:
> raise ValueError(f"Invalid proxy server format: {proxy['server']}")
E ValueError: Invalid proxy server format: unknown
scrapegraphai/utils/proxy_rotation.py:200: ValueError
☂️ Coverage ImprovementsCoverage improvements by file:
🎨 Final Touches
Settings | Logs | CodeBeaver |
Pre/beta - Unit Tests
I opened a Pull Request with the following: 🔄 2 test files added. 🔄 Test UpdatesI've added 2 tests. They all pass ☑️
No existing tests required updates. 🐛 Bug DetectionPotential issues:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
"temperature": llm_params.pop("temperature", None) # Use None as default if not provided
} This change would allow the code to work correctly even when the 'temperature' parameter is not provided in the test configuration. Test Error Logtests.graphs.abstract_graph_test.TestAbstractGraph#test_create_llm[llm_config5-ChatBedrock]: self = <abstract_graph_test.TestGraph object at 0x7fa2b6a70d90>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
> "temperature": llm_params.pop("temperature")
}
E KeyError: 'temperature'
scrapegraphai/graphs/abstract_graph.py:223: KeyError
During handling of the above exception, another exception occurred:
self = <abstract_graph_test.TestAbstractGraph object at 0x7fa2b6be8210>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'region_name': 'IDK'}
expected_model = <class 'langchain_aws.chat_models.bedrock.ChatBedrock'>
@pytest.mark.parametrize(
"llm_config, expected_model",
[
(
{"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-randomtest001"},
ChatOpenAI,
),
(
{
"model": "azure_openai/gpt-3.5-turbo",
"api_key": "random-api-key",
"api_version": "no version",
"azure_endpoint": "https://www.example.com/",
},
AzureChatOpenAI,
),
({"model": "ollama/llama2"}, ChatOllama),
({"model": "oneapi/qwen-turbo", "api_key": "oneapi-api-key"}, OneApi),
(
{"model": "deepseek/deepseek-coder", "api_key": "deepseek-api-key"},
DeepSeek,
),
(
{
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"region_name": "IDK",
},
ChatBedrock,
),
],
)
def test_create_llm(self, llm_config, expected_model):
> graph = TestGraph("Test prompt", {"llm": llm_config})
tests/graphs/abstract_graph_test.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/graphs/abstract_graph_test.py:19: in __init__
super().__init__(prompt, config)
scrapegraphai/graphs/abstract_graph.py:60: in __init__
self.llm_model = self._create_llm(config["llm"])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <abstract_graph_test.TestGraph object at 0x7fa2b6a70d90>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
"temperature": llm_params.pop("temperature")
}
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return init_chat_model(**llm_params)
else:
model_provider = llm_params.pop("model_provider")
if model_provider == "clod":
return CLoD(**llm_params)
if model_provider == "deepseek":
return DeepSeek(**llm_params)
if model_provider == "ernie":
from langchain_community.chat_models import ErnieBotChat
return ErnieBotChat(**llm_params)
elif model_provider == "oneapi":
return OneApi(**llm_params)
elif model_provider == "togetherai":
try:
from langchain_together import ChatTogether
except ImportError:
raise ImportError(
"""The langchain_together module is not installed.
Please install it using 'pip install langchain-together'."""
)
return ChatTogether(**llm_params)
elif model_provider == "nvidia":
try:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
except ImportError:
raise ImportError(
"""The langchain_nvidia_ai_endpoints module is not installed.
Please install it using 'pip install langchain-nvidia-ai-endpoints'."""
)
return ChatNVIDIA(**llm_params)
except Exception as e:
> raise Exception(f"Error instancing model: {e}")
E Exception: Error instancing model: 'temperature'
scrapegraphai/graphs/abstract_graph.py:266: Exception
tests.graphs.abstract_graph_test.TestAbstractGraph#test_create_llm_with_rate_limit[llm_config5-ChatBedrock]: self = <abstract_graph_test.TestGraph object at 0x7fa2b6a27810>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'rate_limit': {'requests_per_second': 1}, 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
> "temperature": llm_params.pop("temperature")
}
E KeyError: 'temperature'
scrapegraphai/graphs/abstract_graph.py:223: KeyError
During handling of the above exception, another exception occurred:
self = <abstract_graph_test.TestAbstractGraph object at 0x7fa2b6bea3d0>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'rate_limit': {'requests_per_second': 1}, 'region_name': 'IDK'}
expected_model = <class 'langchain_aws.chat_models.bedrock.ChatBedrock'>
@pytest.mark.parametrize(
"llm_config, expected_model",
[
(
{
"model": "openai/gpt-3.5-turbo",
"openai_api_key": "sk-randomtest001",
"rate_limit": {"requests_per_second": 1},
},
ChatOpenAI,
),
(
{
"model": "azure_openai/gpt-3.5-turbo",
"api_key": "random-api-key",
"api_version": "no version",
"azure_endpoint": "https://www.example.com/",
"rate_limit": {"requests_per_second": 1},
},
AzureChatOpenAI,
),
(
{"model": "ollama/llama2", "rate_limit": {"requests_per_second": 1}},
ChatOllama,
),
(
{
"model": "oneapi/qwen-turbo",
"api_key": "oneapi-api-key",
"rate_limit": {"requests_per_second": 1},
},
OneApi,
),
(
{
"model": "deepseek/deepseek-coder",
"api_key": "deepseek-api-key",
"rate_limit": {"requests_per_second": 1},
},
DeepSeek,
),
(
{
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"region_name": "IDK",
"rate_limit": {"requests_per_second": 1},
},
ChatBedrock,
),
],
)
def test_create_llm_with_rate_limit(self, llm_config, expected_model):
> graph = TestGraph("Test prompt", {"llm": llm_config})
tests/graphs/abstract_graph_test.py:146:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/graphs/abstract_graph_test.py:19: in __init__
super().__init__(prompt, config)
scrapegraphai/graphs/abstract_graph.py:60: in __init__
self.llm_model = self._create_llm(config["llm"])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <abstract_graph_test.TestGraph object at 0x7fa2b6a27810>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'rate_limit': {'requests_per_second': 1}, 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
"temperature": llm_params.pop("temperature")
}
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return init_chat_model(**llm_params)
else:
model_provider = llm_params.pop("model_provider")
if model_provider == "clod":
return CLoD(**llm_params)
if model_provider == "deepseek":
return DeepSeek(**llm_params)
if model_provider == "ernie":
from langchain_community.chat_models import ErnieBotChat
return ErnieBotChat(**llm_params)
elif model_provider == "oneapi":
return OneApi(**llm_params)
elif model_provider == "togetherai":
try:
from langchain_together import ChatTogether
except ImportError:
raise ImportError(
"""The langchain_together module is not installed.
Please install it using 'pip install langchain-together'."""
)
return ChatTogether(**llm_params)
elif model_provider == "nvidia":
try:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
except ImportError:
raise ImportError(
"""The langchain_nvidia_ai_endpoints module is not installed.
Please install it using 'pip install langchain-nvidia-ai-endpoints'."""
)
return ChatNVIDIA(**llm_params)
except Exception as e:
> raise Exception(f"Error instancing model: {e}")
E Exception: Error instancing model: 'temperature'
scrapegraphai/graphs/abstract_graph.py:266: Exception
☂️ Coverage ImprovementsCoverage improvements by file:
🎨 Final Touches
Settings | Logs | CodeBeaver |
Pre/beta - Unit Tests
I opened a Pull Request with the following: 🔄 2 test files added. 🔄 Test UpdatesI've added 2 tests. They all pass ☑️
No existing tests required updates. 🐛 Bug DetectionPotential issues:
Test Error Logtests.graphs.abstract_graph_test.TestAbstractGraph#test_create_llm[llm_config5-ChatBedrock]: self = <abstract_graph_test.TestGraph object at 0x7f4c06e08550>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
> "temperature": llm_params.pop("temperature")
}
E KeyError: 'temperature'
scrapegraphai/graphs/abstract_graph.py:223: KeyError
During handling of the above exception, another exception occurred:
self = <abstract_graph_test.TestAbstractGraph object at 0x7f4c06d6cad0>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'region_name': 'IDK'}
expected_model = <class 'langchain_aws.chat_models.bedrock.ChatBedrock'>
@pytest.mark.parametrize(
"llm_config, expected_model",
[
(
{"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-randomtest001"},
ChatOpenAI,
),
(
{
"model": "azure_openai/gpt-3.5-turbo",
"api_key": "random-api-key",
"api_version": "no version",
"azure_endpoint": "https://www.example.com/",
},
AzureChatOpenAI,
),
({"model": "ollama/llama2"}, ChatOllama),
({"model": "oneapi/qwen-turbo", "api_key": "oneapi-api-key"}, OneApi),
(
{"model": "deepseek/deepseek-coder", "api_key": "deepseek-api-key"},
DeepSeek,
),
(
{
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"region_name": "IDK",
},
ChatBedrock,
),
],
)
def test_create_llm(self, llm_config, expected_model):
> graph = TestGraph("Test prompt", {"llm": llm_config})
tests/graphs/abstract_graph_test.py:87:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/graphs/abstract_graph_test.py:19: in __init__
super().__init__(prompt, config)
scrapegraphai/graphs/abstract_graph.py:60: in __init__
self.llm_model = self._create_llm(config["llm"])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <abstract_graph_test.TestGraph object at 0x7f4c06e08550>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
"temperature": llm_params.pop("temperature")
}
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return init_chat_model(**llm_params)
else:
model_provider = llm_params.pop("model_provider")
if model_provider == "clod":
return CLoD(**llm_params)
if model_provider == "deepseek":
return DeepSeek(**llm_params)
if model_provider == "ernie":
from langchain_community.chat_models import ErnieBotChat
return ErnieBotChat(**llm_params)
elif model_provider == "oneapi":
return OneApi(**llm_params)
elif model_provider == "togetherai":
try:
from langchain_together import ChatTogether
except ImportError:
raise ImportError(
"""The langchain_together module is not installed.
Please install it using 'pip install langchain-together'."""
)
return ChatTogether(**llm_params)
elif model_provider == "nvidia":
try:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
except ImportError:
raise ImportError(
"""The langchain_nvidia_ai_endpoints module is not installed.
Please install it using 'pip install langchain-nvidia-ai-endpoints'."""
)
return ChatNVIDIA(**llm_params)
except Exception as e:
> raise Exception(f"Error instancing model: {e}")
E Exception: Error instancing model: 'temperature'
scrapegraphai/graphs/abstract_graph.py:266: Exception
tests.graphs.abstract_graph_test.TestAbstractGraph#test_create_llm_with_rate_limit[llm_config5-ChatBedrock]: self = <abstract_graph_test.TestGraph object at 0x7f4c06cde090>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'rate_limit': {'requests_per_second': 1}, 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
> "temperature": llm_params.pop("temperature")
}
E KeyError: 'temperature'
scrapegraphai/graphs/abstract_graph.py:223: KeyError
During handling of the above exception, another exception occurred:
self = <abstract_graph_test.TestAbstractGraph object at 0x7f4c06d6e210>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'rate_limit': {'requests_per_second': 1}, 'region_name': 'IDK'}
expected_model = <class 'langchain_aws.chat_models.bedrock.ChatBedrock'>
@pytest.mark.parametrize(
"llm_config, expected_model",
[
(
{
"model": "openai/gpt-3.5-turbo",
"openai_api_key": "sk-randomtest001",
"rate_limit": {"requests_per_second": 1},
},
ChatOpenAI,
),
(
{
"model": "azure_openai/gpt-3.5-turbo",
"api_key": "random-api-key",
"api_version": "no version",
"azure_endpoint": "https://www.example.com/",
"rate_limit": {"requests_per_second": 1},
},
AzureChatOpenAI,
),
(
{"model": "ollama/llama2", "rate_limit": {"requests_per_second": 1}},
ChatOllama,
),
(
{
"model": "oneapi/qwen-turbo",
"api_key": "oneapi-api-key",
"rate_limit": {"requests_per_second": 1},
},
OneApi,
),
(
{
"model": "deepseek/deepseek-coder",
"api_key": "deepseek-api-key",
"rate_limit": {"requests_per_second": 1},
},
DeepSeek,
),
(
{
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"region_name": "IDK",
"rate_limit": {"requests_per_second": 1},
},
ChatBedrock,
),
],
)
def test_create_llm_with_rate_limit(self, llm_config, expected_model):
> graph = TestGraph("Test prompt", {"llm": llm_config})
tests/graphs/abstract_graph_test.py:146:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/graphs/abstract_graph_test.py:19: in __init__
super().__init__(prompt, config)
scrapegraphai/graphs/abstract_graph.py:60: in __init__
self.llm_model = self._create_llm(config["llm"])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <abstract_graph_test.TestGraph object at 0x7f4c06cde090>
llm_config = {'model': 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0', 'rate_limit': {'requests_per_second': 1}, 'region_name': 'IDK'}
def _create_llm(self, llm_config: dict) -> object:
"""
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {"streaming": False}
llm_params = {**llm_defaults, **llm_config}
rate_limit_params = llm_params.pop("rate_limit", {})
if rate_limit_params:
requests_per_second = rate_limit_params.get("requests_per_second")
max_retries = rate_limit_params.get("max_retries")
if requests_per_second is not None:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
llm_params["rate_limiter"] = InMemoryRateLimiter(
requests_per_second=requests_per_second
)
if max_retries is not None:
llm_params["max_retries"] = max_retries
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
known_providers = {
"openai",
"azure_openai",
"google_genai",
"google_vertexai",
"ollama",
"oneapi",
"nvidia",
"groq",
"anthropic",
"bedrock",
"mistralai",
"hugging_face",
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}
if "/" in llm_params["model"]:
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
llm_params["model"] = split_model_provider[1]
else:
possible_providers = [
provider
for provider, models_d in models_tokens.items()
if llm_params["model"] in models_d
]
if len(possible_providers) <= 0:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
llm_params["model_provider"] = possible_providers[0]
print(
(
f"Found providers {possible_providers} for model {llm_params['model']}, using {llm_params['model_provider']}.\n"
"If it was not intended please specify the model provider in the graph configuration"
)
)
if llm_params["model_provider"] not in known_providers:
raise ValueError(
f"""Provider {llm_params["model_provider"]} is not supported.
If possible, try to use a model instance instead."""
)
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
]
except KeyError:
print(
f"""Max input tokens for model {llm_params["model_provider"]}/{llm_params["model"]} not found,
please specify the model_tokens parameter in the llm section of the graph configuration.
Using default token size: 8192"""
)
self.model_token = 8192
else:
self.model_token = llm_params["model_tokens"]
try:
if llm_params["model_provider"] not in {
"oneapi",
"nvidia",
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
"temperature": llm_params.pop("temperature")
}
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return init_chat_model(**llm_params)
else:
model_provider = llm_params.pop("model_provider")
if model_provider == "clod":
return CLoD(**llm_params)
if model_provider == "deepseek":
return DeepSeek(**llm_params)
if model_provider == "ernie":
from langchain_community.chat_models import ErnieBotChat
return ErnieBotChat(**llm_params)
elif model_provider == "oneapi":
return OneApi(**llm_params)
elif model_provider == "togetherai":
try:
from langchain_together import ChatTogether
except ImportError:
raise ImportError(
"""The langchain_together module is not installed.
Please install it using 'pip install langchain-together'."""
)
return ChatTogether(**llm_params)
elif model_provider == "nvidia":
try:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
except ImportError:
raise ImportError(
"""The langchain_nvidia_ai_endpoints module is not installed.
Please install it using 'pip install langchain-nvidia-ai-endpoints'."""
)
return ChatNVIDIA(**llm_params)
except Exception as e:
> raise Exception(f"Error instancing model: {e}")
E Exception: Error instancing model: 'temperature'
scrapegraphai/graphs/abstract_graph.py:266: Exception
☂️ Coverage ImprovementsCoverage improvements by file:
🎨 Final Touches
Settings | Logs | CodeBeaver |
Pre/beta - Unit Tests
I opened a Pull Request with the following: 🔄 4 test files added and 2 test files updated to reflect recent changes. 🔄 Test UpdatesI've added or updated 5 tests. They all pass ☑️
New Tests:
🐛 Bug DetectionNo bugs detected in your changes. Good job! ☂️ Coverage ImprovementsCoverage improvements by file:
🎨 Final Touches
Settings | Logs | CodeBeaver |
Pre/beta - Unit Tests
I opened a Pull Request with the following: 🔄 8 test files added and 6 test files updated to reflect recent changes. 🔄 Test UpdatesI've added or updated 12 tests. They all pass ☑️
New Tests:
🐛 Bug DetectionPotential issues:
def set_common_params(self, params: dict, overwrite=False):
for node in self.graph.nodes:
node.update_config(params, overwrite) This method looks correct. It iterates over all nodes in the graph and calls
Test Error Logtests.graphs.abstract_graph_test#test_set_common_params: def test_set_common_params():
"""
Test that the set_common_params method correctly updates the configuration
of all nodes in the graph.
"""
# Create a mock graph with mock nodes
mock_graph = Mock()
mock_node1 = Mock()
mock_node2 = Mock()
mock_graph.nodes = [mock_node1, mock_node2]
# Create a TestGraph instance with the mock graph
with patch(
"scrapegraphai.graphs.abstract_graph.AbstractGraph._create_graph",
return_value=mock_graph,
):
graph = TestGraph(
"Test prompt",
{"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}},
)
# Call set_common_params with test parameters
test_params = {"param1": "value1", "param2": "value2"}
graph.set_common_params(test_params)
# Assert that update_config was called on each node with the correct parameters
> mock_node1.update_config.assert_called_once_with(test_params, False)
tests/graphs/abstract_graph_test.py:74:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <Mock name='mock.update_config' id='140173980922640'>
args = ({'param1': 'value1', 'param2': 'value2'}, False), kwargs = {}
msg = "Expected 'update_config' to be called once. Called 0 times."
def assert_called_once_with(self, /, *args, **kwargs):
"""assert that the mock was called exactly once and that that call was
with the specified arguments."""
if not self.call_count == 1:
msg = ("Expected '%s' to be called once. Called %s times.%s"
% (self._mock_name or 'mock',
self.call_count,
self._calls_repr()))
> raise AssertionError(msg)
E AssertionError: Expected 'update_config' to be called once. Called 0 times.
/usr/local/lib/python3.11/unittest/mock.py:950: AssertionError
☂️ Coverage ImprovementsCoverage improvements by file:
🎨 Final Touches
Settings | Logs | CodeBeaver |
Dependency Review✅ No vulnerabilities or license issues or OpenSSF Scorecard issues found.Scanned FilesNone |
## [1.47.0-beta.1](v1.46.0...v1.47.0-beta.1) (2025-04-15) ### Features * add new proxy rotation ([8913d8d](8913d8d)) ### CI * **release:** 1.44.0-beta.1 [skip ci] ([5e944cc](5e944cc))
🎉 This PR is included in version 1.47.0-beta.1 🎉 The release is available on:
Your semantic-release bot 📦🚀 |
🎉 This PR is included in version 1.47.0 🎉 The release is available on:
Your semantic-release bot 📦🚀 |
No description provided.