georgia-tech-db · jiashenC · Sep 18, 2023 · Sep 14, 2023 · Sep 18, 2023 · Sep 18, 2023
diff --git a/apps/privategpt/ingest.py b/apps/privategpt/ingest.py
@@ -23,38 +23,39 @@ def load_data(source_folder_path: str):
     cursor = evadb.connect(path).cursor()
 
     # Drop function if it already exists
-    cursor.drop_function("embedding").execute()
-
+    cursor.query("DROP FUNCTION IF EXISTS embedding;").execute()
     # Create function from Python file
     # This function is a sentence feature extractor
-    embedding_udf = cursor.create_function(
-        udf_name="embedding",
-        if_not_exists=True,
-        impl_path=f"{path}/udfs/sentence_feature_extractor.py",
-    )
-    embedding_udf.execute()
+    text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS embedding
+            IMPL  '{path}/functions/sentence_feature_extractor.py';
+            """
+    print(text_feat_function_query)
+    cursor.query(text_feat_function_query).execute()
 
     print("🧹 Dropping existing tables in EvaDB")
-    cursor.drop_table("data_table").execute()
-    cursor.drop_table("embedding_table").execute()
+    cursor.query("DROP TABLE IF EXISTS data_table;").execute()
+    cursor.query("DROP TABLE IF EXISTS embedding_table;").execute()
 
     print("📄 Loading PDFs into EvaDB")
-    cursor.load(
-        file_regex=f"{source_folder_path}/*.pdf", format="PDF", table_name="data_table"
-    ).execute()
+    text_load_query = (
+        f"""LOAD PDF '{source_folder_path}/*.pdf' INTO data_table;"""
+    )
+    print(text_load_query)
+    cursor.query(text_load_query).execute()
 
     print("🤖 Extracting Feature Embeddings. This may take some time ...")
     cursor.query(
         "CREATE TABLE IF NOT EXISTS embedding_table AS SELECT embedding(data), data FROM data_table;"
     ).execute()
 
     print("🔍 Building FAISS Index ...")
-    cursor.create_vector_index(
-        index_name="embedding_index",
-        table_name="embedding_table",
-        expr="features",
-        using="FAISS",
-    )
+    cursor.query(
+        """
+        CREATE INDEX embedding_index
+        ON embedding_table (features)
+        USING FAISS;
+    """
+    ).execute()
 
 
 def main():

diff --git a/apps/privategpt/privateGPT.py b/apps/privategpt/privateGPT.py
@@ -23,13 +23,15 @@
 
 
 def query(question):
-    context_docs = (
-        cursor.table("embedding_table")
-        .order(f"Similarity(embedding('{question}'), features)")
-        .limit(3)
-        .select("data")
-        .df()
-    )
+    context_docs = cursor.query(
+        f"""
+        SELECT data
+        FROM embedding_table
+        ORDER BY Similarity(embedding('{question}'), features)
+        ASC LIMIT 3;
+    """
+    ).df()
+
     # Merge all context information.
     context = "; \n".join(context_docs["embedding_table.data"])
 
@@ -51,8 +53,10 @@ def query(question):
     print("\n>> Context: ")
     print(context)
 
+
 print(
-    "🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before running this file."
+    "🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before"
+    " running this file."
 )
 
 ## Take input of queries from user in a loop

diff --git a/apps/story_qa/evadb_qa.py b/apps/story_qa/evadb_qa.py
@@ -12,19 +12,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from time import perf_counter
 
 from gpt4all import GPT4All
 from unidecode import unidecode
-from util import download_story, read_text_line, try_execute
+from util import download_story, read_text_line
 
 import evadb
 
 
-def ask_question(path):
+def ask_question(story_path: str):
     # Initialize early to exclude download time.
     llm = GPT4All("ggml-gpt4all-j-v1.3-groovy")
 
+    path = os.path.dirname(evadb.__file__)
     cursor = evadb.connect().cursor()
 
     story_table = "TablePPText"
@@ -35,28 +37,30 @@ def ask_question(path):
     t_i = 0
 
     timestamps[t_i] = perf_counter()
-    print("Setup UDF")
+    print("Setup Function")
 
-    Text_feat_udf_query = """CREATE UDF IF NOT EXISTS SentenceFeatureExtractor
-            IMPL  'evadb/udfs/sentence_feature_extractor.py';
+    Text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor
+            IMPL  '{path}/functions/sentence_feature_extractor.py';
             """
 
-    cursor.query("DROP UDF IF EXISTS SentenceFeatureExtractor;").execute()
-    cursor.query(Text_feat_udf_query).execute()
+    cursor.query("DROP FUNCTION IF EXISTS SentenceFeatureExtractor;").execute()
+    cursor.query(Text_feat_function_query).execute()
 
-    try_execute(cursor, f"DROP TABLE IF EXISTS {story_table};")
-    try_execute(cursor, f"DROP TABLE IF EXISTS {story_feat_table};")
+    cursor.query(f"DROP TABLE IF EXISTS {story_table};").execute()
+    cursor.query(f"DROP TABLE IF EXISTS {story_feat_table};").execute()
 
     t_i = t_i + 1
     timestamps[t_i] = perf_counter()
     print(f"Time: {(timestamps[t_i] - timestamps[t_i - 1]) * 1000:.3f} ms")
 
     print("Create table")
 
-    cursor.query(f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));").execute()
+    cursor.query(
+        f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));"
+    ).execute()
 
     # Insert text chunk by chunk.
-    for i, text in enumerate(read_text_line(path)):
+    for i, text in enumerate(read_text_line(story_path)):
         print("text: --" + text + "--")
         ascii_text = unidecode(text)
         cursor.query(
@@ -84,7 +88,8 @@ def ask_question(path):
 
     # Create search index on extracted features.
     cursor.query(
-        f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING FAISS;"
+        f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING"
+        " FAISS;"
     ).execute()
 
     t_i = t_i + 1
@@ -112,7 +117,9 @@ def ask_question(path):
     # Merge all context information.
     context_list = []
     for i in range(len(res_batch)):
-        context_list.append(res_batch.frames[f"{story_feat_table.lower()}.data"][i])
+        context_list.append(
+            res_batch.frames[f"{story_feat_table.lower()}.data"][i]
+        )
     context = "; \n".join(context_list)
 
     t_i = t_i + 1
@@ -139,9 +146,9 @@ def ask_question(path):
 
 
 def main():
-    path = download_story()
+    story_path = download_story()
 
-    ask_question(path)
+    ask_question(story_path)
 
 
 if __name__ == "__main__":

diff --git a/apps/youtube_channel_qa/README.md b/apps/youtube_channel_qa/README.md
@@ -28,6 +28,6 @@ pip install -r requirements.txt
 ## Usage
 Run script: 
 ```bat
-python multi_youtube_video_qa.py
+python youtube_channel_qa.py
 ```