pinecone-io · jhamon · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025
diff --git a/docs/langchain-retrieval-agent.ipynb b/docs/langchain-retrieval-agent.ipynb
@@ -354,7 +354,7 @@
    ],
    "source": [
     "# we drop sparse_values as they are not needed for this example\n",
-    "dataset.documents.drop(['sparse_values', 'blob'], axis=1, inplace=True)\n",
+    "dataset.documents.drop([\"sparse_values\", \"blob\"], axis=1, inplace=True)\n",
     "\n",
     "dataset.head()"
    ]
@@ -369,7 +369,7 @@
     "\n",
     "print(\"Here are some example topics in our Knowledge Base:\\n\")\n",
     "for r in dataset.documents.iloc[:].to_dict(orient=\"records\"):\n",
-    "    topics.add(r['metadata']['title'])\n",
+    "    topics.add(r[\"metadata\"][\"title\"])\n",
     "\n",
     "for topic in sorted(topics)[50:75]:\n",
     "    print(f\"- {topic}\")"
@@ -396,6 +396,7 @@
     "\n",
     "if not os.environ.get(\"PINECONE_API_KEY\"):\n",
     "    from pinecone_notebooks.colab import Authenticate\n",
+    "\n",
     "    Authenticate()"
    ]
   },
@@ -421,7 +422,7 @@
     "\n",
     "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
     "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
-    "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
+    "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model.\n",
     "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
     "\n",
     "There are more configurations available, but this minimal set will get us started."
@@ -464,18 +465,15 @@
    "source": [
     "from pinecone import ServerlessSpec\n",
     "\n",
-    "index_name = 'langchain-retrieval-agent-fast'\n",
+    "index_name = \"langchain-retrieval-agent-fast\"\n",
     "\n",
     "if not pc.has_index(name=index_name):\n",
     "    # Create a new index\n",
     "    pc.create_index(\n",
     "        name=index_name,\n",
     "        dimension=1536,  # dimensionality of text-embedding-ada-002\n",
-    "        metric='dotproduct',\n",
-    "        spec=ServerlessSpec(\n",
-    "            cloud='aws',\n",
-    "            region='us-east-1'\n",
-    "        )\n",
+    "        metric=\"dotproduct\",\n",
+    "        spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
     "    )\n",
     "\n",
     "pc.describe_index(name=index_name)"
@@ -651,12 +649,9 @@
    "source": [
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
-    "openai_api_key = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'\n",
+    "openai_api_key = os.environ.get(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
     "\n",
-    "embed = OpenAIEmbeddings(\n",
-    "    model='text-embedding-ada-002',\n",
-    "    openai_api_key=openai_api_key\n",
-    ")"
+    "embed = OpenAIEmbeddings(model=\"text-embedding-ada-002\", openai_api_key=openai_api_key)"
    ]
   },
   {
@@ -670,9 +665,7 @@
     "from langchain_pinecone import PineconeVectorStore\n",
     "\n",
     "pinecone_vectorstore = PineconeVectorStore(\n",
-    "    index_name=index_name, \n",
-    "    embedding=embed, \n",
-    "    text_key=\"text\"\n",
+    "    index_name=index_name, embedding=embed, text_key=\"text\"\n",
     ")"
    ]
   },
@@ -759,11 +752,12 @@
    "source": [
     "from pprint import pprint\n",
     "\n",
-    "query = \"When was the college of engineering in the University of Notre Dame established?\"\n",
+    "query = (\n",
+    "    \"When was the college of engineering in the University of Notre Dame established?\"\n",
+    ")\n",
     "\n",
     "documents = pinecone_vectorstore.similarity_search(\n",
-    "    query=query,\n",
-    "    k=3  # return 3 most relevant docs\n",
+    "    query=query, k=3  # return 3 most relevant docs\n",
     ")\n",
     "\n",
     "for doc in documents:\n",
@@ -815,9 +809,7 @@
     "\n",
     "# Chat completion LLM\n",
     "llm = ChatOpenAI(\n",
-    "    openai_api_key=openai_api_key,\n",
-    "    model_name='gpt-3.5-turbo',\n",
-    "    temperature=0.0\n",
+    "    openai_api_key=openai_api_key, model_name=\"gpt-3.5-turbo\", temperature=0.0\n",
     ")"
    ]
   },
@@ -839,17 +831,19 @@
     "from langchain_core.runnables import RunnablePassthrough\n",
     "\n",
     "# Based on the RAG template from https://smith.langchain.com/hub/rlm/rag-prompt\n",
-    "template=(\n",
+    "template = (\n",
     "    \"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\"\n",
     "    \"Question: {question}\"\n",
     "    \"Context: {context}\"\n",
     "    \"Answer:\"\n",
     ")\n",
     "prompt = PromptTemplate(input_variables=[\"question\", \"context\"], template=template)\n",
     "\n",
+    "\n",
     "def format_docs(docs):\n",
     "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
     "\n",
+    "\n",
     "# Retrieval Question-Answer chain\n",
     "qa_chain = (\n",
     "    {\n",
@@ -896,7 +890,9 @@
     }
    ],
    "source": [
-    "qa_chain.invoke(\"When was the college of engineering in the University of Notre Dame established?\")"
+    "qa_chain.invoke(\n",
+    "    \"When was the college of engineering in the University of Notre Dame established?\"\n",
+    ")"
    ]
   },
   {
@@ -920,11 +916,11 @@
    "outputs": [],
    "source": [
     "knowledge_base_tool = qa_chain.as_tool(\n",
-    "        name='knowledge-base',\n",
-    "        description=(\n",
-    "            'use this tool when answering general knowledge queries to get '\n",
-    "            'more information about the topic'\n",
-    "        )\n",
+    "    name=\"knowledge-base\",\n",
+    "    description=(\n",
+    "        \"use this tool when answering general knowledge queries to get \"\n",
+    "        \"more information about the topic\"\n",
+    "    ),\n",
     ")"
    ]
   },
@@ -966,9 +962,11 @@
     "from langgraph.graph import StateGraph\n",
     "from langgraph.graph.message import add_messages\n",
     "\n",
+    "\n",
     "class State(TypedDict):\n",
     "    messages: Annotated[list, add_messages]\n",
     "\n",
+    "\n",
     "graph_builder = StateGraph(State)"
    ]
   },
@@ -1001,9 +999,11 @@
     "tools = [knowledge_base_tool]\n",
     "llm_with_tools = llm.bind_tools(tools)\n",
     "\n",
+    "\n",
     "def chatbot(state: State):\n",
     "    return {\"messages\": [llm_with_tools.invoke(state[\"messages\"])]}\n",
     "\n",
+    "\n",
     "graph_builder.add_node(\"chatbot\", chatbot)\n",
     "\n",
     "tool_node = ToolNode(tools=tools)\n",
@@ -1054,7 +1054,7 @@
    "source": [
     "def agent(user_message):\n",
     "    config = {\"configurable\": {\"thread_id\": \"1\"}}\n",
-    "    \n",
+    "\n",
     "    # The config is the **second positional argument** to stream() or invoke()!\n",
     "    events = graph.stream(\n",
     "        {\"messages\": [{\"role\": \"user\", \"content\": user_message}]},\n",

diff --git a/docs/langchain-retrieval-augmentation.ipynb b/docs/langchain-retrieval-augmentation.ipynb
@@ -17,7 +17,7 @@
     "id": "dQRA1HWOJYbU"
    },
    "source": [
-    "#### [LangChain Handbook](https://pinecone.io/learn/langchain)\n",
+    "#### [LangChain Handbook](https://www.pinecone.io/learn/langchain)\n",
     "\n",
     "# Retrieval Augmentation\n",
     "\n",
@@ -180,11 +180,11 @@
    "source": [
     "from pinecone_datasets import load_dataset\n",
     "\n",
-    "dataset = load_dataset('wikipedia-simple-text-embedding-ada-002-50K')\n",
+    "dataset = load_dataset(\"wikipedia-simple-text-embedding-ada-002-50K\")\n",
     "\n",
     "# We drop sparse_values and blob keys as they are not needed for this example\n",
-    "dataset.documents.drop(['sparse_values'], axis=1, inplace=True)\n",
-    "dataset.documents.drop(['blob'], axis=1, inplace=True)\n",
+    "dataset.documents.drop([\"sparse_values\"], axis=1, inplace=True)\n",
+    "dataset.documents.drop([\"blob\"], axis=1, inplace=True)\n",
     "\n",
     "dataset.head()"
    ]
@@ -220,6 +220,7 @@
     "\n",
     "if not os.environ.get(\"PINECONE_API_KEY\"):\n",
     "    from pinecone_notebooks.colab import Authenticate\n",
+    "\n",
     "    Authenticate()"
    ]
   },
@@ -254,8 +255,8 @@
     "\n",
     "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
     "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
-    "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
-    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
+    "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model.\n",
+    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/guides/projects/understanding-projects).\n",
     "\n",
     "There are more configurations available, but this minimal set will get us started."
    ]
@@ -301,17 +302,14 @@
    "source": [
     "from pinecone import ServerlessSpec\n",
     "\n",
-    "index_name = 'langchain-retrieval-augmentation-fast'\n",
+    "index_name = \"langchain-retrieval-augmentation-fast\"\n",
     "\n",
     "if not pc.has_index(name=index_name):\n",
     "    pc.create_index(\n",
     "        name=index_name,\n",
     "        dimension=1536,  # dimensionality of text-embedding-ada-002\n",
-    "        metric='dotproduct',\n",
-    "        spec=ServerlessSpec(\n",
-    "            cloud='aws',\n",
-    "            region='us-east-1'\n",
-    "        )\n",
+    "        metric=\"dotproduct\",\n",
+    "        spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
     "    )\n",
     "\n",
     "pc.describe_index(name=index_name)"
@@ -420,8 +418,10 @@
     "\n",
     "batch_size = 100\n",
     "\n",
-    "for start in tqdm(range(0, len(dataset.documents), batch_size), \"Upserting records batch\"):\n",
-    "    batch = dataset.documents.iloc[start:start + batch_size].to_dict(orient=\"records\")\n",
+    "for start in tqdm(\n",
+    "    range(0, len(dataset.documents), batch_size), \"Upserting records batch\"\n",
+    "):\n",
+    "    batch = dataset.documents.iloc[start : start + batch_size].to_dict(orient=\"records\")\n",
     "    index.upsert(vectors=batch)"
    ]
   },
@@ -487,14 +487,11 @@
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
     "# Get openai api key from platform.openai.com\n",
-    "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or 'OPENAI_API_KEY'\n",
+    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\") or \"OPENAI_API_KEY\"\n",
     "\n",
-    "model_name = 'text-embedding-ada-002'\n",
+    "model_name = \"text-embedding-ada-002\"\n",
     "\n",
-    "embed = OpenAIEmbeddings(\n",
-    "    model=model_name,\n",
-    "    openai_api_key=OPENAI_API_KEY\n",
-    ")"
+    "embed = OpenAIEmbeddings(model=model_name, openai_api_key=OPENAI_API_KEY)"
    ]
   },
   {
@@ -518,9 +515,7 @@
     "from langchain_pinecone import PineconeVectorStore\n",
     "\n",
     "pinecone_vectorstore = PineconeVectorStore(\n",
-    "    index_name=index_name, \n",
-    "    embedding=embed, \n",
-    "    text_key=\"text\"\n",
+    "    index_name=index_name, embedding=embed, text_key=\"text\"\n",
     ")"
    ]
   },
@@ -670,7 +665,7 @@
     "\n",
     "documents = pinecone_vectorstore.similarity_search(\n",
     "    query=\"Who was Benito Mussolini?\",  # our search query\n",
-    "    k=3  # return 3 most relevant docs\n",
+    "    k=3,  # return 3 most relevant docs\n",
     ")\n",
     "\n",
     "for doc in documents:\n",
@@ -707,15 +702,11 @@
     "\n",
     "# Chat Completion LLM\n",
     "llm = ChatOpenAI(\n",
-    "    openai_api_key=OPENAI_API_KEY,\n",
-    "    model_name='gpt-4.5-preview',\n",
-    "    temperature=0.0\n",
+    "    openai_api_key=OPENAI_API_KEY, model_name=\"gpt-4.5-preview\", temperature=0.0\n",
     ")\n",
     "\n",
     "qa = RetrievalQA.from_chain_type(\n",
-    "    llm=llm,\n",
-    "    chain_type=\"stuff\",\n",
-    "    retriever=pinecone_vectorstore.as_retriever()\n",
+    "    llm=llm, chain_type=\"stuff\", retriever=pinecone_vectorstore.as_retriever()\n",
     ")"
    ]
   },
@@ -771,7 +762,7 @@
     "    llm=llm,\n",
     "    chain_type=\"stuff\",\n",
     "    retriever=pinecone_vectorstore.as_retriever(),\n",
-    "    return_source_documents=True\n",
+    "    return_source_documents=True,\n",
     ")"
    ]
   },