diff --git a/memgpt/client/client.py b/memgpt/client/client.py index 71469c3011..4324bdd108 100644 --- a/memgpt/client/client.py +++ b/memgpt/client/client.py @@ -3,9 +3,10 @@ from requests.exceptions import RequestException import uuid from typing import Dict, List, Union, Optional, Tuple +import time from memgpt.data_types import AgentState, User, Preset, LLMConfig, EmbeddingConfig, Source -from memgpt.models.pydantic_models import HumanModel, PersonaModel, PresetModel, SourceModel +from memgpt.models.pydantic_models import HumanModel, PersonaModel, PresetModel, SourceModel, JobModel, JobStatus from memgpt.cli.cli import QuickstartChoice from memgpt.cli.cli import set_config_with_dict, quickstart as quickstart_func, str_to_quickstart_choice from memgpt.config import MemGPTConfig @@ -436,18 +437,36 @@ def delete_source(self, source_id: uuid.UUID): response = requests.delete(f"{self.base_url}/api/sources/{str(source_id)}", headers=self.headers) assert response.status_code == 200, f"Failed to delete source: {response.text}" - def load_file_into_source(self, filename: str, source_id: uuid.UUID): + def get_job_status(self, job_id: uuid.UUID): + response = requests.get(f"{self.base_url}/api/sources/status/{str(job_id)}", headers=self.headers) + return JobModel(**response.json()) + + def load_file_into_source(self, filename: str, source_id: uuid.UUID, blocking=True): """Load {filename} and insert into source""" files = {"file": open(filename, "rb")} + + # create job response = requests.post(f"{self.base_url}/api/sources/{source_id}/upload", files=files, headers=self.headers) - return UploadFileToSourceResponse(**response.json()) + if response.status_code != 200: + raise ValueError(f"Failed to upload file to source: {response.text}") + + job = JobModel(**response.json()) + if blocking: + # wait until job is completed + while True: + job = self.get_job_status(job.id) + if job.status == JobStatus.completed: + break + elif job.status == JobStatus.failed: + raise ValueError(f"Job failed: {job.metadata}") + time.sleep(1) + return job def create_source(self, name: str) -> Source: """Create a new source""" payload = {"name": name} response = requests.post(f"{self.base_url}/api/sources", json=payload, headers=self.headers) response_json = response.json() - print("CREATE SOURCE", response_json, response.text) response_obj = SourceModel(**response_json) return Source( id=uuid.UUID(response_obj.id), diff --git a/memgpt/config.py b/memgpt/config.py index 1df21fff42..50406b6b00 100644 --- a/memgpt/config.py +++ b/memgpt/config.py @@ -341,10 +341,9 @@ def __init__( # functions functions=None, # schema definitions ONLY (linked at runtime) ): - if name is None: - self.name = f"agent_{self.generate_agent_id()}" - else: - self.name = name + + assert name, f"Agent name must be provided" + self.name = name config = MemGPTConfig.load() # get default values self.persona = config.persona if persona is None else persona @@ -397,15 +396,6 @@ def __init__( os.path.join(MEMGPT_DIR, "agents", self.name, "config.json") if agent_config_path is None else agent_config_path ) - def generate_agent_id(self, length=6): - ## random character based - # characters = string.ascii_lowercase + string.digits - # return ''.join(random.choices(characters, k=length)) - - # count based - agent_count = len(utils.list_agent_config_files()) - return str(agent_count + 1) - def attach_data_source(self, data_source: str): # TODO: add warning that only once source can be attached # i.e. previous source will be overriden diff --git a/memgpt/data_sources/connectors.py b/memgpt/data_sources/connectors.py index 3611563693..2128115a0f 100644 --- a/memgpt/data_sources/connectors.py +++ b/memgpt/data_sources/connectors.py @@ -97,7 +97,7 @@ def load_data( passages.append(passage) embedding_to_document_name[hashable_embedding] = document_name - if len(passages) >= embedding_config.embedding_chunk_size: + if len(passages) >= 100: # insert passages into passage store passage_store.insert_many(passages) diff --git a/memgpt/metadata.py b/memgpt/metadata.py index f0b83c4255..34f2c663c9 100644 --- a/memgpt/metadata.py +++ b/memgpt/metadata.py @@ -8,12 +8,12 @@ from memgpt.settings import settings from memgpt.constants import DEFAULT_HUMAN, DEFAULT_MEMGPT_MODEL, DEFAULT_PERSONA, DEFAULT_PRESET, LLM_MAX_TOKENS -from memgpt.utils import enforce_types, printd +from memgpt.utils import enforce_types, printd, get_utc_time from memgpt.data_types import AgentState, Source, User, LLMConfig, EmbeddingConfig, Token, Preset from memgpt.config import MemGPTConfig from memgpt.functions.functions import load_all_function_sets -from memgpt.models.pydantic_models import PersonaModel, HumanModel, ToolModel +from memgpt.models.pydantic_models import PersonaModel, HumanModel, ToolModel, JobModel, JobStatus from sqlalchemy import create_engine, Column, String, BIGINT, select, inspect, text, JSON, BLOB, BINARY, ARRAY, Boolean from sqlalchemy import func @@ -334,6 +334,7 @@ def __init__(self, config: MemGPTConfig): HumanModel.__table__, PersonaModel.__table__, ToolModel.__table__, + JobModel.__table__, ], ) self.session_maker = sessionmaker(bind=self.engine) @@ -754,3 +755,31 @@ def delete_preset(self, name: str, user_id: uuid.UUID): with self.session_maker() as session: session.query(PresetModel).filter(PresetModel.name == name).filter(PresetModel.user_id == user_id).delete() session.commit() + + # job related functions + def create_job(self, job: JobModel): + with self.session_maker() as session: + session.add(job) + session.commit() + session.expunge_all() + + def update_job_status(self, job_id: uuid.UUID, status: JobStatus): + with self.session_maker() as session: + session.query(JobModel).filter(JobModel.id == job_id).update({"status": status}) + if status == JobStatus.COMPLETED: + session.query(JobModel).filter(JobModel.id == job_id).update({"completed_at": get_utc_time()}) + session.commit() + + def update_job(self, job: JobModel): + with self.session_maker() as session: + session.add(job) + session.commit() + session.refresh(job) + + def get_job(self, job_id: uuid.UUID) -> Optional[JobModel]: + with self.session_maker() as session: + results = session.query(JobModel).filter(JobModel.id == job_id).all() + if len(results) == 0: + return None + assert len(results) == 1, f"Expected 1 result, got {len(results)}" + return results[0] diff --git a/memgpt/models/pydantic_models.py b/memgpt/models/pydantic_models.py index f23952d7c4..4d0ac71afd 100644 --- a/memgpt/models/pydantic_models.py +++ b/memgpt/models/pydantic_models.py @@ -1,11 +1,13 @@ from typing import List, Optional, Dict, Literal, Type from pydantic import BaseModel, Field, Json, ConfigDict +from enum import StrEnum import uuid import base64 import numpy as np from datetime import datetime from sqlmodel import Field, SQLModel -from sqlalchemy import JSON, Column, BINARY, TypeDecorator +from sqlalchemy_utils import ChoiceType +from sqlalchemy import JSON, Column, BINARY, TypeDecorator, String from memgpt.constants import DEFAULT_HUMAN, DEFAULT_MEMGPT_MODEL, DEFAULT_PERSONA, DEFAULT_PRESET, LLM_MAX_TOKENS, MAX_EMBEDDING_DIM from memgpt.utils import get_human_text, get_persona_text, printd, get_utc_time @@ -132,6 +134,23 @@ class SourceModel(SQLModel, table=True): metadata_: Optional[dict] = Field(None, sa_column=Column(JSON), description="Metadata associated with the source.") +class JobStatus(StrEnum): + created = "created" + running = "running" + completed = "completed" + failed = "failed" + + +class JobModel(SQLModel, table=True): + id: uuid.UUID = Field(default_factory=uuid.uuid4, description="The unique identifier of the job.", primary_key=True) + # status: str = Field(default="created", description="The status of the job.") + status: JobStatus = Field(default=JobStatus.created, description="The status of the job.", sa_column=Column(ChoiceType(JobStatus))) + created_at: datetime = Field(default_factory=get_utc_time, description="The unix timestamp of when the job was created.") + completed_at: Optional[datetime] = Field(None, description="The unix timestamp of when the job was completed.") + user_id: uuid.UUID = Field(..., description="The unique identifier of the user associated with the job.") + metadata_: Optional[dict] = Field({}, sa_column=Column(JSON), description="The metadata of the job.") + + class PassageModel(BaseModel): user_id: Optional[uuid.UUID] = Field(None, description="The unique identifier of the user associated with the passage.") agent_id: Optional[uuid.UUID] = Field(None, description="The unique identifier of the agent associated with the passage.") diff --git a/memgpt/server/rest_api/admin/users.py b/memgpt/server/rest_api/admin/users.py index 0f6f575ca0..b4c485f89e 100644 --- a/memgpt/server/rest_api/admin/users.py +++ b/memgpt/server/rest_api/admin/users.py @@ -136,7 +136,6 @@ def get_api_keys( """ Get a list of all API keys for a user """ - print("GET USERS", user_id) try: tokens = server.ms.get_all_api_keys_for_user(user_id=user_id) processed_tokens = [t.token for t in tokens] diff --git a/memgpt/server/rest_api/server.py b/memgpt/server/rest_api/server.py index fce26567c6..21a5c3c8c4 100644 --- a/memgpt/server/rest_api/server.py +++ b/memgpt/server/rest_api/server.py @@ -39,13 +39,11 @@ cd memgpt/server/rest_api poetry run uvicorn server:app --reload """ - config = MemGPTConfig.load() for memory_type in ("archival", "recall", "metadata"): setattr(config, f"{memory_type}_storage_uri", settings.pg_uri) config.save() - interface: QueuingInterface = QueuingInterface() server: SyncServer = SyncServer(default_interface=interface) diff --git a/memgpt/server/rest_api/sources/index.py b/memgpt/server/rest_api/sources/index.py index c5ce812648..76eda13103 100644 --- a/memgpt/server/rest_api/sources/index.py +++ b/memgpt/server/rest_api/sources/index.py @@ -5,11 +5,12 @@ from functools import partial from typing import List, Optional -from fastapi import APIRouter, Body, Depends, Query, HTTPException, status, UploadFile +from fastapi import APIRouter, Body, Depends, Query, HTTPException, status, UploadFile, BackgroundTasks from fastapi.responses import JSONResponse from pydantic import BaseModel, Field +from starlette.responses import StreamingResponse -from memgpt.models.pydantic_models import SourceModel, PassageModel, DocumentModel +from memgpt.models.pydantic_models import SourceModel, PassageModel, DocumentModel, JobModel, JobStatus from memgpt.server.rest_api.auth_token import get_current_user from memgpt.server.rest_api.interface import QueuingInterface from memgpt.server.server import SyncServer @@ -57,6 +58,44 @@ class GetSourceDocumentsResponse(BaseModel): documents: List[DocumentModel] = Field(..., description="List of documents from the source.") +def load_file_to_source(server: SyncServer, user_id: uuid.UUID, source: Source, job_id: uuid.UUID, file: UploadFile): + # update job status + job = server.ms.get_job(job_id=job_id) + job.status = JobStatus.running + server.ms.update_job(job) + + try: + # write the file to a temporary directory (deleted after the context manager exits) + with tempfile.TemporaryDirectory() as tmpdirname: + file_path = os.path.join(tmpdirname, file.filename) + with open(file_path, "wb") as buffer: + buffer.write(file.file.read()) + + # read the file + connector = DirectoryConnector(input_files=[file_path]) + + # TODO: pre-compute total number of passages? + + # load the data into the source via the connector + num_passages, num_documents = server.load_data(user_id=user_id, source_name=source.name, connector=connector) + except Exception as e: + # job failed with error + error = str(e) + print(error) + job.status = JobStatus.failed + job.metadata_["error"] = error + server.ms.update_job(job) + # TODO: delete any associated passages/documents? + return 0, 0 + + # update job status + job.status = JobStatus.completed + job.metadata_["num_passages"] = num_passages + job.metadata_["num_documents"] = num_documents + print("job completed", job.metadata_, job.id) + server.ms.update_job(job) + + def setup_sources_index_router(server: SyncServer, interface: QueuingInterface, password: str): get_current_user_with_server = partial(partial(get_current_user, server), password) @@ -64,7 +103,9 @@ def setup_sources_index_router(server: SyncServer, interface: QueuingInterface, async def list_sources( user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """List all data sources created by a user.""" + """ + List all data sources created by a user. + """ # Clear the interface interface.clear() @@ -81,7 +122,9 @@ async def create_source( request: CreateSourceRequest = Body(...), user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """Create a new data source.""" + """ + Create a new data source. + """ interface.clear() try: # TODO: don't use Source and just use SourceModel once pydantic migration is complete @@ -104,7 +147,9 @@ async def delete_source( source_id: uuid.UUID, user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """Delete a data source.""" + """ + Delete a data source. + """ interface.clear() try: server.delete_source(source_id=source_id, user_id=user_id) @@ -120,7 +165,9 @@ async def attach_source_to_agent( agent_id: uuid.UUID = Query(..., description="The unique identifier of the agent to attach the source to."), user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """Attach a data source to an existing agent.""" + """ + Attach a data source to an existing agent. + """ interface.clear() assert isinstance(agent_id, uuid.UUID), f"Expected agent_id to be a UUID, got {agent_id}" assert isinstance(user_id, uuid.UUID), f"Expected user_id to be a UUID, got {user_id}" @@ -141,41 +188,58 @@ async def detach_source_from_agent( agent_id: uuid.UUID = Query(..., description="The unique identifier of the agent to detach the source from."), user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """Detach a data source from an existing agent.""" + """ + Detach a data source from an existing agent. + """ server.detach_source_from_agent(source_id=source_id, agent_id=agent_id, user_id=user_id) - @router.post("/sources/{source_id}/upload", tags=["sources"], response_model=UploadFileToSourceResponse) + @router.get("/sources/status/{job_id}", tags=["sources"], response_model=JobModel) + async def get_job_status( + job_id: uuid.UUID, + user_id: uuid.UUID = Depends(get_current_user_with_server), + ): + """ + Get the status of a job. + """ + job = server.ms.get_job(job_id=job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Job with id={job_id} not found.") + return job + + @router.post("/sources/{source_id}/upload", tags=["sources"], response_model=JobModel) async def upload_file_to_source( # file: UploadFile = UploadFile(..., description="The file to upload."), file: UploadFile, source_id: uuid.UUID, + background_tasks: BackgroundTasks, user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """Upload a file to a data source.""" + """ + Upload a file to a data source. + """ interface.clear() source = server.ms.get_source(source_id=source_id, user_id=user_id) - # write the file to a temporary directory (deleted after the context manager exits) - with tempfile.TemporaryDirectory() as tmpdirname: - file_path = os.path.join(tmpdirname, file.filename) - with open(file_path, "wb") as buffer: - buffer.write(file.file.read()) - - # read the file - connector = DirectoryConnector(input_files=[file_path]) + # create job + job = JobModel(user_id=user_id, metadata={"type": "embedding", "filename": file.filename, "source_id": source_id}) + job_id = job.id + server.ms.create_job(job) - # load the data into the source via the connector - passage_count, document_count = server.load_data(user_id=user_id, source_name=source.name, connector=connector) + # create background task + background_tasks.add_task(load_file_to_source, server, user_id, source, job_id, file) - # TODO: actually return added passages/documents - return UploadFileToSourceResponse(source=source, added_passages=passage_count, added_documents=document_count) + # return job information + job = server.ms.get_job(job_id=job_id) + return job @router.get("/sources/{source_id}/passages ", tags=["sources"], response_model=GetSourcePassagesResponse) async def list_passages( source_id: uuid.UUID, user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """List all passages associated with a data source.""" + """ + List all passages associated with a data source. + """ passages = server.list_data_source_passages(user_id=user_id, source_id=source_id) return GetSourcePassagesResponse(passages=passages) @@ -184,7 +248,9 @@ async def list_documents( source_id: uuid.UUID, user_id: uuid.UUID = Depends(get_current_user_with_server), ): - """List all documents associated with a data source.""" + """ + List all documents associated with a data source. + """ documents = server.list_data_source_documents(user_id=user_id, source_id=source_id) return GetSourceDocumentsResponse(documents=documents) diff --git a/poetry.lock b/poetry.lock index a7670983d7..272b14ce6c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -801,21 +801,21 @@ typing-inspect = ">=0.4.0,<1" [[package]] name = "datasets" -version = "2.18.0" +version = "2.19.0" description = "HuggingFace community-driven open-source library of datasets" optional = true python-versions = ">=3.8.0" files = [ - {file = "datasets-2.18.0-py3-none-any.whl", hash = "sha256:f1bbf0e2896917a914de01cbd37075b14deea3837af87ad0d9f697388ccaeb50"}, - {file = "datasets-2.18.0.tar.gz", hash = "sha256:cdf8b8c6abf7316377ba4f49f9589a4c74556d6b481afd0abd2284f3d69185cb"}, + {file = "datasets-2.19.0-py3-none-any.whl", hash = "sha256:f57c5316e123d4721b970c68c1cb856505f289cda58f5557ffe745b49c011a8e"}, + {file = "datasets-2.19.0.tar.gz", hash = "sha256:0b47e08cc7af2c6800a42cadc4657b22a0afc7197786c8986d703c08d90886a6"}, ] [package.dependencies] aiohttp = "*" dill = ">=0.3.0,<0.3.9" filelock = "*" -fsspec = {version = ">=2023.1.0,<=2024.2.0", extras = ["http"]} -huggingface-hub = ">=0.19.4" +fsspec = {version = ">=2023.1.0,<=2024.3.1", extras = ["http"]} +huggingface-hub = ">=0.21.2" multiprocess = "*" numpy = ">=1.17" packaging = "*" @@ -831,15 +831,15 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] -docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] quality = ["ruff (>=0.3.0)"] s3 = ["s3fs"] -tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"] -tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +tensorflow = ["tensorflow (>=2.6.0)"] +tensorflow-gpu = ["tensorflow (>=2.6.0)"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -1196,13 +1196,13 @@ files = [ [[package]] name = "fsspec" -version = "2024.2.0" +version = "2024.3.1" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.2.0-py3-none-any.whl", hash = "sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8"}, - {file = "fsspec-2024.2.0.tar.gz", hash = "sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84"}, + {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"}, + {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"}, ] [package.dependencies] @@ -1933,13 +1933,13 @@ sentence-transformers = ">=2.6.1,<3.0.0" [[package]] name = "llama-index-embeddings-openai" -version = "0.1.7" +version = "0.1.8" description = "llama-index embeddings openai integration" optional = false -python-versions = ">=3.8.1,<4.0" +python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_embeddings_openai-0.1.7-py3-none-any.whl", hash = "sha256:6023925ed1487b0688323d21711efbf8880e82ed3b87ef413255c3dc63a2f2fe"}, - {file = "llama_index_embeddings_openai-0.1.7.tar.gz", hash = "sha256:c71cc9820680c4cedfc9845dc87b94f6851d1ccce1e486fc91298f8fa8d9f27d"}, + {file = "llama_index_embeddings_openai-0.1.8-py3-none-any.whl", hash = "sha256:186be52c29a8c82a39ff6e033e3a61b9c5a17cf6177a5507abe4f5c79fea3a53"}, + {file = "llama_index_embeddings_openai-0.1.8.tar.gz", hash = "sha256:aae4ce3ec2fe2cb604d944646982c71663a8b99ed5f8c370823ee82eef4ddd3b"}, ] [package.dependencies] @@ -2018,13 +2018,13 @@ llama-index-llms-openai = ">=0.1.1,<0.2.0" [[package]] name = "llama-index-llms-openai" -version = "0.1.15" +version = "0.1.16" description = "llama-index llms openai integration" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_llms_openai-0.1.15-py3-none-any.whl", hash = "sha256:401ba9db1549e4287b73749dee57b11db1e0ffa814bb1464475aea1ff84442c7"}, - {file = "llama_index_llms_openai-0.1.15.tar.gz", hash = "sha256:6bdbf307b1d43a9a7c2a52f72ba7db61cb96d904f99e7ea5d889dd7818f10814"}, + {file = "llama_index_llms_openai-0.1.16-py3-none-any.whl", hash = "sha256:4a9c0fe969302731907c8fa31631812397637e114a44ebbad11fd6c59def9315"}, + {file = "llama_index_llms_openai-0.1.16.tar.gz", hash = "sha256:313bbc17c241992430a6bf686a1b1edc4276c8256ad6b0550aa1bea1e0fed1a6"}, ] [package.dependencies] @@ -2901,13 +2901,13 @@ sympy = "*" [[package]] name = "openai" -version = "1.23.1" +version = "1.23.2" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.23.1-py3-none-any.whl", hash = "sha256:7941c1bc6fcdb1b6b889dfcfabff775ca52558a79d57dd1b9e15b463de1b3a4c"}, - {file = "openai-1.23.1.tar.gz", hash = "sha256:6df937e2a1ad64494951ea3614f5516db4d67c3fcc0b751b8e5edf1bc57e2d3d"}, + {file = "openai-1.23.2-py3-none-any.whl", hash = "sha256:293a36effde29946eb221040c89c46a4850f2f2e30b37ef09ff6d75226d71b42"}, + {file = "openai-1.23.2.tar.gz", hash = "sha256:b84aa3005357ceb38f22a269e0e22ee58ce103897f447032d021906f18178a8e"}, ] [package.dependencies] @@ -4877,6 +4877,34 @@ sqlalchemy = ">=0.7" [package.extras] dev = ["pytest"] +[[package]] +name = "sqlalchemy-utils" +version = "0.41.2" +description = "Various utility functions for SQLAlchemy." +optional = false +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-Utils-0.41.2.tar.gz", hash = "sha256:bc599c8c3b3319e53ce6c5c3c471120bd325d0071fb6f38a10e924e3d07b9990"}, + {file = "SQLAlchemy_Utils-0.41.2-py3-none-any.whl", hash = "sha256:85cf3842da2bf060760f955f8467b87983fb2e30f1764fd0e24a48307dc8ec6e"}, +] + +[package.dependencies] +SQLAlchemy = ">=1.3" + +[package.extras] +arrow = ["arrow (>=0.3.4)"] +babel = ["Babel (>=1.3)"] +color = ["colour (>=0.0.4)"] +encrypted = ["cryptography (>=0.6)"] +intervals = ["intervals (>=0.7.1)"] +password = ["passlib (>=1.6,<2.0)"] +pendulum = ["pendulum (>=2.0.5)"] +phone = ["phonenumbers (>=5.9.2)"] +test = ["Jinja2 (>=2.3)", "Pygments (>=1.2)", "backports.zoneinfo", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "isort (>=4.2.2)", "pg8000 (>=1.12.4)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (==7.4.4)", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] +test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3.4)", "backports.zoneinfo", "colour (>=0.0.4)", "cryptography (>=0.6)", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "furl (>=0.4.1)", "intervals (>=0.7.1)", "isort (>=4.2.2)", "passlib (>=1.6,<2.0)", "pendulum (>=2.0.5)", "pg8000 (>=1.12.4)", "phonenumbers (>=5.9.2)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (==7.4.4)", "python-dateutil", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] +timezone = ["python-dateutil"] +url = ["furl (>=0.4.1)"] + [[package]] name = "sqlmodel" version = "0.0.16" @@ -5263,18 +5291,18 @@ telegram = ["requests"] [[package]] name = "traitlets" -version = "5.14.2" +version = "5.14.3" description = "Traitlets Python configuration system" optional = false python-versions = ">=3.8" files = [ - {file = "traitlets-5.14.2-py3-none-any.whl", hash = "sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80"}, - {file = "traitlets-5.14.2.tar.gz", hash = "sha256:8cdd83c040dab7d1dee822678e5f5d100b514f7b72b01615b26fc5718916fdf9"}, + {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, + {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.1)", "pytest-mock", "pytest-mypy-testing"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "transformers" @@ -6054,4 +6082,4 @@ server = ["fastapi", "uvicorn", "websockets"] [metadata] lock-version = "2.0" python-versions = "<3.13,>=3.10" -content-hash = "591fab811980666e62dc6b33ed4248fbab38bf3fb6b63f9caa85f1a2c00ea1fa" +content-hash = "81e487963aaa7b393d7bf71d603481b9fd515002f23f9aed9ebb9581b3fe9729" diff --git a/pyproject.toml b/pyproject.toml index 79c6173cfd..ba4d0beb39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ llama-index-embeddings-openai = "^0.1.1" llama-index-embeddings-huggingface = {version = "^0.2.0", optional = true} llama-index-embeddings-azure-openai = "^0.1.6" python-multipart = "^0.0.9" +sqlalchemy-utils = "^0.41.2" pytest-order = {version = "^1.2.0", optional = true} pytest-asyncio = {version = "^0.23.2", optional = true} pytest = { version = "^7.4.4", optional = true } diff --git a/tests/test_client.py b/tests/test_client.py index ac09de3a70..87192d34b6 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -38,8 +38,6 @@ def run_server(): import uvicorn - from memgpt.server.rest_api.server import app - from memgpt.server.rest_api.server import start_server load_dotenv() @@ -98,7 +96,10 @@ def run_server(): config.save() credentials.save() - print("Starting server...") + from memgpt.server.rest_api.server import app + from memgpt.server.rest_api.server import start_server + + print("Starting server...", config.config_path) start_server(debug=True) @@ -272,7 +273,8 @@ def test_sources(client, agent): # load a file into a source filename = "CONTRIBUTING.md" - response = client.load_file_into_source(filename=filename, source_id=source.id) + upload_job = client.load_file_into_source(filename=filename, source_id=source.id) + print("Upload job", upload_job, upload_job.status, upload_job.metadata) # TODO: make sure things run in the right order archival_memories = client.get_agent_archival_memory(agent_id=agent.id).archival_memory