Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Updates evadb apps #1123

Merged
merged 3 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 20 additions & 19 deletions apps/privategpt/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,39 @@ def load_data(source_folder_path: str):
cursor = evadb.connect(path).cursor()

# Drop function if it already exists
cursor.drop_function("embedding").execute()

cursor.query("DROP FUNCTION IF EXISTS embedding;").execute()
# Create function from Python file
# This function is a sentence feature extractor
embedding_udf = cursor.create_function(
udf_name="embedding",
if_not_exists=True,
impl_path=f"{path}/udfs/sentence_feature_extractor.py",
)
embedding_udf.execute()
text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS embedding
IMPL '{path}/functions/sentence_feature_extractor.py';
"""
print(text_feat_function_query)
cursor.query(text_feat_function_query).execute()

print("🧹 Dropping existing tables in EvaDB")
cursor.drop_table("data_table").execute()
cursor.drop_table("embedding_table").execute()
cursor.query("DROP TABLE IF EXISTS data_table;").execute()
cursor.query("DROP TABLE IF EXISTS embedding_table;").execute()

print("📄 Loading PDFs into EvaDB")
cursor.load(
file_regex=f"{source_folder_path}/*.pdf", format="PDF", table_name="data_table"
).execute()
text_load_query = (
f"""LOAD PDF '{source_folder_path}/*.pdf' INTO data_table;"""
)
print(text_load_query)
cursor.query(text_load_query).execute()

print("🤖 Extracting Feature Embeddings. This may take some time ...")
cursor.query(
"CREATE TABLE IF NOT EXISTS embedding_table AS SELECT embedding(data), data FROM data_table;"
).execute()

print("🔍 Building FAISS Index ...")
cursor.create_vector_index(
index_name="embedding_index",
table_name="embedding_table",
expr="features",
using="FAISS",
)
cursor.query(
"""
CREATE INDEX embedding_index
ON embedding_table (features)
USING FAISS;
"""
).execute()


def main():
Expand Down
20 changes: 12 additions & 8 deletions apps/privategpt/privateGPT.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@


def query(question):
context_docs = (
cursor.table("embedding_table")
.order(f"Similarity(embedding('{question}'), features)")
.limit(3)
.select("data")
.df()
)
context_docs = cursor.query(
f"""
SELECT data
FROM embedding_table
ORDER BY Similarity(embedding('{question}'), features)
ASC LIMIT 3;
"""
).df()

# Merge all context information.
context = "; \n".join(context_docs["embedding_table.data"])

Expand All @@ -51,8 +53,10 @@ def query(question):
print("\n>> Context: ")
print(context)


print(
"🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before running this file."
"🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before"
" running this file."
)

## Take input of queries from user in a loop
Expand Down
37 changes: 22 additions & 15 deletions apps/story_qa/evadb_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from time import perf_counter

from gpt4all import GPT4All
from unidecode import unidecode
from util import download_story, read_text_line, try_execute
from util import download_story, read_text_line

import evadb


def ask_question(path):
def ask_question(story_path: str):
# Initialize early to exclude download time.
llm = GPT4All("ggml-gpt4all-j-v1.3-groovy")

path = os.path.dirname(evadb.__file__)
cursor = evadb.connect().cursor()

story_table = "TablePPText"
Expand All @@ -35,28 +37,30 @@ def ask_question(path):
t_i = 0

timestamps[t_i] = perf_counter()
print("Setup UDF")
print("Setup Function")

Text_feat_udf_query = """CREATE UDF IF NOT EXISTS SentenceFeatureExtractor
IMPL 'evadb/udfs/sentence_feature_extractor.py';
Text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor
IMPL '{path}/functions/sentence_feature_extractor.py';
"""

cursor.query("DROP UDF IF EXISTS SentenceFeatureExtractor;").execute()
cursor.query(Text_feat_udf_query).execute()
cursor.query("DROP FUNCTION IF EXISTS SentenceFeatureExtractor;").execute()
cursor.query(Text_feat_function_query).execute()

try_execute(cursor, f"DROP TABLE IF EXISTS {story_table};")
try_execute(cursor, f"DROP TABLE IF EXISTS {story_feat_table};")
cursor.query(f"DROP TABLE IF EXISTS {story_table};").execute()
cursor.query(f"DROP TABLE IF EXISTS {story_feat_table};").execute()

t_i = t_i + 1
timestamps[t_i] = perf_counter()
print(f"Time: {(timestamps[t_i] - timestamps[t_i - 1]) * 1000:.3f} ms")

print("Create table")

cursor.query(f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));").execute()
cursor.query(
f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));"
).execute()

# Insert text chunk by chunk.
for i, text in enumerate(read_text_line(path)):
for i, text in enumerate(read_text_line(story_path)):
print("text: --" + text + "--")
ascii_text = unidecode(text)
cursor.query(
Expand Down Expand Up @@ -84,7 +88,8 @@ def ask_question(path):

# Create search index on extracted features.
cursor.query(
f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING FAISS;"
f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING"
" FAISS;"
).execute()

t_i = t_i + 1
Expand Down Expand Up @@ -112,7 +117,9 @@ def ask_question(path):
# Merge all context information.
context_list = []
for i in range(len(res_batch)):
context_list.append(res_batch.frames[f"{story_feat_table.lower()}.data"][i])
context_list.append(
res_batch.frames[f"{story_feat_table.lower()}.data"][i]
)
context = "; \n".join(context_list)

t_i = t_i + 1
Expand All @@ -139,9 +146,9 @@ def ask_question(path):


def main():
path = download_story()
story_path = download_story()

ask_question(path)
ask_question(story_path)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion apps/youtube_channel_qa/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ pip install -r requirements.txt
## Usage
Run script:
```bat
python multi_youtube_video_qa.py
python youtube_channel_qa.py
```

Loading