Skip to content

Adjust wording & black format #431

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 31 additions & 31 deletions docs/langchain-retrieval-agent.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@
],
"source": [
"# we drop sparse_values as they are not needed for this example\n",
"dataset.documents.drop(['sparse_values', 'blob'], axis=1, inplace=True)\n",
"dataset.documents.drop([\"sparse_values\", \"blob\"], axis=1, inplace=True)\n",
"\n",
"dataset.head()"
]
Expand All @@ -369,7 +369,7 @@
"\n",
"print(\"Here are some example topics in our Knowledge Base:\\n\")\n",
"for r in dataset.documents.iloc[:].to_dict(orient=\"records\"):\n",
" topics.add(r['metadata']['title'])\n",
" topics.add(r[\"metadata\"][\"title\"])\n",
"\n",
"for topic in sorted(topics)[50:75]:\n",
" print(f\"- {topic}\")"
Expand All @@ -396,6 +396,7 @@
"\n",
"if not os.environ.get(\"PINECONE_API_KEY\"):\n",
" from pinecone_notebooks.colab import Authenticate\n",
"\n",
" Authenticate()"
]
},
Expand All @@ -421,7 +422,7 @@
"\n",
"- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
"- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model.\n",
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
"\n",
"There are more configurations available, but this minimal set will get us started."
Expand Down Expand Up @@ -464,18 +465,15 @@
"source": [
"from pinecone import ServerlessSpec\n",
"\n",
"index_name = 'langchain-retrieval-agent-fast'\n",
"index_name = \"langchain-retrieval-agent-fast\"\n",
"\n",
"if not pc.has_index(name=index_name):\n",
" # Create a new index\n",
" pc.create_index(\n",
" name=index_name,\n",
" dimension=1536, # dimensionality of text-embedding-ada-002\n",
" metric='dotproduct',\n",
" spec=ServerlessSpec(\n",
" cloud='aws',\n",
" region='us-east-1'\n",
" )\n",
" metric=\"dotproduct\",\n",
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
" )\n",
"\n",
"pc.describe_index(name=index_name)"
Expand Down Expand Up @@ -651,12 +649,9 @@
"source": [
"from langchain_openai import OpenAIEmbeddings\n",
"\n",
"openai_api_key = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'\n",
"openai_api_key = os.environ.get(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
"\n",
"embed = OpenAIEmbeddings(\n",
" model='text-embedding-ada-002',\n",
" openai_api_key=openai_api_key\n",
")"
"embed = OpenAIEmbeddings(model=\"text-embedding-ada-002\", openai_api_key=openai_api_key)"
]
},
{
Expand All @@ -670,9 +665,7 @@
"from langchain_pinecone import PineconeVectorStore\n",
"\n",
"pinecone_vectorstore = PineconeVectorStore(\n",
" index_name=index_name, \n",
" embedding=embed, \n",
" text_key=\"text\"\n",
" index_name=index_name, embedding=embed, text_key=\"text\"\n",
")"
]
},
Expand Down Expand Up @@ -759,11 +752,12 @@
"source": [
"from pprint import pprint\n",
"\n",
"query = \"When was the college of engineering in the University of Notre Dame established?\"\n",
"query = (\n",
" \"When was the college of engineering in the University of Notre Dame established?\"\n",
")\n",
"\n",
"documents = pinecone_vectorstore.similarity_search(\n",
" query=query,\n",
" k=3 # return 3 most relevant docs\n",
" query=query, k=3 # return 3 most relevant docs\n",
")\n",
"\n",
"for doc in documents:\n",
Expand Down Expand Up @@ -815,9 +809,7 @@
"\n",
"# Chat completion LLM\n",
"llm = ChatOpenAI(\n",
" openai_api_key=openai_api_key,\n",
" model_name='gpt-3.5-turbo',\n",
" temperature=0.0\n",
" openai_api_key=openai_api_key, model_name=\"gpt-3.5-turbo\", temperature=0.0\n",
")"
]
},
Expand All @@ -839,17 +831,19 @@
"from langchain_core.runnables import RunnablePassthrough\n",
"\n",
"# Based on the RAG template from https://smith.langchain.com/hub/rlm/rag-prompt\n",
"template=(\n",
"template = (\n",
" \"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\"\n",
" \"Question: {question}\"\n",
" \"Context: {context}\"\n",
" \"Answer:\"\n",
")\n",
"prompt = PromptTemplate(input_variables=[\"question\", \"context\"], template=template)\n",
"\n",
"\n",
"def format_docs(docs):\n",
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
"\n",
"\n",
"# Retrieval Question-Answer chain\n",
"qa_chain = (\n",
" {\n",
Expand Down Expand Up @@ -896,7 +890,9 @@
}
],
"source": [
"qa_chain.invoke(\"When was the college of engineering in the University of Notre Dame established?\")"
"qa_chain.invoke(\n",
" \"When was the college of engineering in the University of Notre Dame established?\"\n",
")"
]
},
{
Expand All @@ -920,11 +916,11 @@
"outputs": [],
"source": [
"knowledge_base_tool = qa_chain.as_tool(\n",
" name='knowledge-base',\n",
" description=(\n",
" 'use this tool when answering general knowledge queries to get '\n",
" 'more information about the topic'\n",
" )\n",
" name=\"knowledge-base\",\n",
" description=(\n",
" \"use this tool when answering general knowledge queries to get \"\n",
" \"more information about the topic\"\n",
" ),\n",
")"
]
},
Expand Down Expand Up @@ -966,9 +962,11 @@
"from langgraph.graph import StateGraph\n",
"from langgraph.graph.message import add_messages\n",
"\n",
"\n",
"class State(TypedDict):\n",
" messages: Annotated[list, add_messages]\n",
"\n",
"\n",
"graph_builder = StateGraph(State)"
]
},
Expand Down Expand Up @@ -1001,9 +999,11 @@
"tools = [knowledge_base_tool]\n",
"llm_with_tools = llm.bind_tools(tools)\n",
"\n",
"\n",
"def chatbot(state: State):\n",
" return {\"messages\": [llm_with_tools.invoke(state[\"messages\"])]}\n",
"\n",
"\n",
"graph_builder.add_node(\"chatbot\", chatbot)\n",
"\n",
"tool_node = ToolNode(tools=tools)\n",
Expand Down Expand Up @@ -1054,7 +1054,7 @@
"source": [
"def agent(user_message):\n",
" config = {\"configurable\": {\"thread_id\": \"1\"}}\n",
" \n",
"\n",
" # The config is the **second positional argument** to stream() or invoke()!\n",
" events = graph.stream(\n",
" {\"messages\": [{\"role\": \"user\", \"content\": user_message}]},\n",
Expand Down
53 changes: 22 additions & 31 deletions docs/langchain-retrieval-augmentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"id": "dQRA1HWOJYbU"
},
"source": [
"#### [LangChain Handbook](https://pinecone.io/learn/langchain)\n",
"#### [LangChain Handbook](https://www.pinecone.io/learn/langchain)\n",
"\n",
"# Retrieval Augmentation\n",
"\n",
Expand Down Expand Up @@ -180,11 +180,11 @@
"source": [
"from pinecone_datasets import load_dataset\n",
"\n",
"dataset = load_dataset('wikipedia-simple-text-embedding-ada-002-50K')\n",
"dataset = load_dataset(\"wikipedia-simple-text-embedding-ada-002-50K\")\n",
"\n",
"# We drop sparse_values and blob keys as they are not needed for this example\n",
"dataset.documents.drop(['sparse_values'], axis=1, inplace=True)\n",
"dataset.documents.drop(['blob'], axis=1, inplace=True)\n",
"dataset.documents.drop([\"sparse_values\"], axis=1, inplace=True)\n",
"dataset.documents.drop([\"blob\"], axis=1, inplace=True)\n",
"\n",
"dataset.head()"
]
Expand Down Expand Up @@ -220,6 +220,7 @@
"\n",
"if not os.environ.get(\"PINECONE_API_KEY\"):\n",
" from pinecone_notebooks.colab import Authenticate\n",
"\n",
" Authenticate()"
]
},
Expand Down Expand Up @@ -254,8 +255,8 @@
"\n",
"- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
"- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model.\n",
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/projects/understanding-projects).\n",
"\n",
"There are more configurations available, but this minimal set will get us started."
]
Expand Down Expand Up @@ -301,17 +302,14 @@
"source": [
"from pinecone import ServerlessSpec\n",
"\n",
"index_name = 'langchain-retrieval-augmentation-fast'\n",
"index_name = \"langchain-retrieval-augmentation-fast\"\n",
"\n",
"if not pc.has_index(name=index_name):\n",
" pc.create_index(\n",
" name=index_name,\n",
" dimension=1536, # dimensionality of text-embedding-ada-002\n",
" metric='dotproduct',\n",
" spec=ServerlessSpec(\n",
" cloud='aws',\n",
" region='us-east-1'\n",
" )\n",
" metric=\"dotproduct\",\n",
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
" )\n",
"\n",
"pc.describe_index(name=index_name)"
Expand Down Expand Up @@ -420,8 +418,10 @@
"\n",
"batch_size = 100\n",
"\n",
"for start in tqdm(range(0, len(dataset.documents), batch_size), \"Upserting records batch\"):\n",
" batch = dataset.documents.iloc[start:start + batch_size].to_dict(orient=\"records\")\n",
"for start in tqdm(\n",
" range(0, len(dataset.documents), batch_size), \"Upserting records batch\"\n",
"):\n",
" batch = dataset.documents.iloc[start : start + batch_size].to_dict(orient=\"records\")\n",
" index.upsert(vectors=batch)"
]
},
Expand Down Expand Up @@ -487,14 +487,11 @@
"from langchain_openai import OpenAIEmbeddings\n",
"\n",
"# Get openai api key from platform.openai.com\n",
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or 'OPENAI_API_KEY'\n",
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
"\n",
"model_name = 'text-embedding-ada-002'\n",
"model_name = \"text-embedding-ada-002\"\n",
"\n",
"embed = OpenAIEmbeddings(\n",
" model=model_name,\n",
" openai_api_key=OPENAI_API_KEY\n",
")"
"embed = OpenAIEmbeddings(model=model_name, openai_api_key=OPENAI_API_KEY)"
]
},
{
Expand All @@ -518,9 +515,7 @@
"from langchain_pinecone import PineconeVectorStore\n",
"\n",
"pinecone_vectorstore = PineconeVectorStore(\n",
" index_name=index_name, \n",
" embedding=embed, \n",
" text_key=\"text\"\n",
" index_name=index_name, embedding=embed, text_key=\"text\"\n",
")"
]
},
Expand Down Expand Up @@ -670,7 +665,7 @@
"\n",
"documents = pinecone_vectorstore.similarity_search(\n",
" query=\"Who was Benito Mussolini?\", # our search query\n",
" k=3 # return 3 most relevant docs\n",
" k=3, # return 3 most relevant docs\n",
")\n",
"\n",
"for doc in documents:\n",
Expand Down Expand Up @@ -707,15 +702,11 @@
"\n",
"# Chat Completion LLM\n",
"llm = ChatOpenAI(\n",
" openai_api_key=OPENAI_API_KEY,\n",
" model_name='gpt-4.5-preview',\n",
" temperature=0.0\n",
" openai_api_key=OPENAI_API_KEY, model_name=\"gpt-4.5-preview\", temperature=0.0\n",
")\n",
"\n",
"qa = RetrievalQA.from_chain_type(\n",
" llm=llm,\n",
" chain_type=\"stuff\",\n",
" retriever=pinecone_vectorstore.as_retriever()\n",
" llm=llm, chain_type=\"stuff\", retriever=pinecone_vectorstore.as_retriever()\n",
")"
]
},
Expand Down Expand Up @@ -771,7 +762,7 @@
" llm=llm,\n",
" chain_type=\"stuff\",\n",
" retriever=pinecone_vectorstore.as_retriever(),\n",
" return_source_documents=True\n",
" return_source_documents=True,\n",
")"
]
},
Expand Down
Loading
Loading