Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

⚡️ Speed up method AstraDBVectorStoreComponent.reset_collection_list by 12% in PR #6048 (bugfix-dev-astradb) #6085

Closed
105 changes: 62 additions & 43 deletions src/backend/base/langflow/components/vectorstores/astradb.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class NewCollectionInput:
display_name="Environment",
info="The environment for the Astra DB API Endpoint.",
advanced=True,
real_time_refresh=True,
),
DropdownInput(
name="api_endpoint",
Expand Down Expand Up @@ -315,11 +316,16 @@ def get_database_list_static(cls, token: str, environment: str | None = None):
# Get the list of databases
db_list = list(admin_client.list_databases())

# Set the environment properly
env_string = ""
if environment and environment != "prod":
env_string = f"-{environment}"

# Generate the api endpoint for each database
db_info_dict = {}
for db in db_list:
try:
api_endpoint = f"https://{db.info.id}-{db.info.region}.apps.astra.datastax.com"
api_endpoint = f"https://{db.info.id}-{db.info.region}.apps.astra{env_string}.datastax.com"
db_info_dict[db.info.name] = {
"api_endpoint": api_endpoint,
"collections": len(
Expand Down Expand Up @@ -453,26 +459,57 @@ def _initialize_database_options(self):
def _initialize_collection_options(self, api_endpoint: str | None = None):
# Retrieve the database object
database = self.get_database_object(api_endpoint=api_endpoint)
keyspace = self.get_keyspace() # Cache the keyspace

# Get the list of collections
collection_list = list(database.list_collections(keyspace=self.get_keyspace()))
collection_list = list(database.list_collections(keyspace=keyspace))

# Return the list of collections and metadata associated
return [
{
def get_collection_metadata(col):
service = col.options.vector.service if col.options.vector else None
return {
"name": col.name,
"records": self.collection_data(collection_name=col.name, database=database),
"provider": (
col.options.vector.service.provider if col.options.vector and col.options.vector.service else None
),
"provider": service.provider if service else None,
"icon": "",
"model": (
col.options.vector.service.model_name if col.options.vector and col.options.vector.service else None
),
"model": service.model_name if service else None,
}
for col in collection_list

# Return the list of collections and their metadata
return [get_collection_metadata(col) for col in collection_list]

def reset_collection_list(self, build_config: dict):
# Get the list of options we have based on the token provided
collection_options = self._initialize_collection_options()

# Generate options and options_metadata in a single pass
options = []
options_metadata = []
for col in collection_options:
options.append(col["name"])
options_metadata.append({k: v for k, v in col.items() if k != "name"})

# Update build_config with collection options and reset selected collection
build_config["collection_name"]["options"] = options
build_config["collection_name"]["options_metadata"] = options_metadata
build_config["collection_name"]["value"] = ""

return build_config

def reset_database_list(self, build_config: dict):
# Get the list of options we have based on the token provided
database_options = self._initialize_database_options()

# If we retrieved options based on the token, show the dropdown
build_config["api_endpoint"]["options"] = [db["name"] for db in database_options]
build_config["api_endpoint"]["options_metadata"] = [
{k: v for k, v in db.items() if k not in ["name"]} for db in database_options
]

# Reset the selected database
build_config["api_endpoint"]["value"] = ""

return build_config

def reset_build_config(self, build_config: dict):
# Reset the list of databases we have based on the token provided
build_config["api_endpoint"]["options"] = []
Expand All @@ -489,25 +526,17 @@ def reset_build_config(self, build_config: dict):

def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None):
# When the component first executes, this is the update refresh call
first_run = field_name == "collection_name" and not field_value
first_run = field_name == "collection_name" and not field_value and not build_config["api_endpoint"]["options"]

# If the token has not been provided, simply return
if not self.token or field_name == "environment":
if not self.token:
return self.reset_build_config(build_config)

# Refresh the database name options
if first_run or field_name == "token":
# If this is the first execution of the component, reset and build database list
if first_run or field_name in ["token", "environment"]:
# Reset the build config to ensure we are starting fresh
build_config = self.reset_build_config(build_config)

# Get the list of options we have based on the token provided
database_options = self._initialize_database_options()

# If we retrieved options based on the token, show the dropdown
build_config["api_endpoint"]["options"] = [db["name"] for db in database_options]
build_config["api_endpoint"]["options_metadata"] = [
{k: v for k, v in db.items() if k not in ["name"]} for db in database_options
]
build_config = self.reset_database_list(build_config)

# Get list of regions for a given cloud provider
"""
Expand All @@ -526,8 +555,9 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:

# Refresh the collection name options
if field_name == "api_endpoint":
# Reset the selected collection
build_config["collection_name"]["value"] = ""
# If missing, refresh the database options
if not build_config["api_endpoint"]["options"] or not field_value:
return self.update_build_config(build_config, field_value=self.token, field_name="token")

# Set the underlying api endpoint value of the database
if field_value in build_config["api_endpoint"]["options"]:
Expand All @@ -538,21 +568,14 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:
else:
build_config["d_api_endpoint"]["value"] = ""

# Reload the list of collections and metadata associated
collection_options = self._initialize_collection_options(
api_endpoint=build_config["d_api_endpoint"]["value"]
)

# If we have collections, show the dropdown
build_config["collection_name"]["options"] = [col["name"] for col in collection_options]
build_config["collection_name"]["options_metadata"] = [
{k: v for k, v in col.items() if k not in ["name"]} for col in collection_options
]

return build_config
# Reset the list of collections we have based on the token provided
return self.reset_collection_list(build_config)

# Hide embedding model option if opriona_metadata provider is not null
if field_name == "collection_name" and field_value:
# Assume we will be autodetecting the collection:
build_config["autodetect_collection"]["value"] = True

# Set the options for collection name to be the field value if its a new collection
if field_value not in build_config["collection_name"]["options"]:
# Add the new collection to the list of options
Expand All @@ -563,13 +586,9 @@ def update_build_config(self, build_config: dict, field_value: str, field_name:

# Ensure that autodetect collection is set to False, since its a new collection
build_config["autodetect_collection"]["value"] = False
else:
build_config["autodetect_collection"]["value"] = True

# Find the position of the selected collection to align with metadata
index_of_name = build_config["collection_name"]["options"].index(field_value)

# Get the provider value of the selected collection
value_of_provider = build_config["collection_name"]["options_metadata"][index_of_name]["provider"]

# If we were able to determine the Vectorize provider, set it accordingly
Expand Down
Loading
Loading