Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Fixed backend issues: issue #212 #213

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/Generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class MCQGenerator:
def __init__(self):
self.tokenizer = T5Tokenizer.from_pretrained('t5-large')
self.model = T5ForConditionalGeneration.from_pretrained('Roasters/Question-Generator')
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.device = torch.device("cpu")
self.model.to(self.device)
self.nlp = spacy.load('en_core_web_sm')
self.s2v = Sense2Vec().from_disk('s2v_old')
Expand Down
27 changes: 17 additions & 10 deletions backend/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,34 @@

REPO_URL="https://github.com/AOSSIE-Org/EduAid.git"
S2V_URL="https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz"
REPO_DIR="EduAid"
S2V_ARCHIVE="s2v_reddit_2015_md.tar.gz"
S2V_DIR="s2v_old"

# create a venv if it doesnt exist
if [ ! -d "venv" ]; then
python3 -m venv venv
fi
source venv/bin/activate

if [ ! -d "$REPO_DIR" ]; then
git clone $REPO_URL
fi

# Download sense2vec model if it doesnt exist
if [ ! -f "$S2V_ARCHIVE" ]; then
wget $S2V_URL -O $S2V_ARCHIVE
fi

if [ ! -d "$REPO_DIR/$S2V_DIR" ]; then
mkdir -p $REPO_DIR/$S2V_DIR
tar -xzvf $S2V_ARCHIVE -C $REPO_DIR/$S2V_DIR --strip-components=1
# Extract Sense2Vec model if the directory doesn't exist
if [ ! -d "$S2V_DIR" ]; then
tar -xzvf $S2V_ARCHIVE -C . --strip-components=1
fi

# Deactivate virtual environment after completion
source deactivate
# Install dependencies
# If you're using 'python' instead of 'python3', replace 'python3' with 'python' here
python3 -m pip install -r ../requirements.txt

# python3 -m pip install -r requirements.txt
# i dont know whether or not to install this.. because it is already doing everything here
#
# Start Flask server
python server.py

# To Deactivate virtual environment
deactivate
6 changes: 6 additions & 0 deletions backend/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
from pprint import pprint
import nltk

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import torch
torch.cuda.empty_cache()


from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
nltk.download("stopwords")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from editdistance import eval as get_edit_distance
from jellyfish import jaro_winkler_similarity as get_similarity

def convert_distance_function_to_score_function(function=get_edit_distance):
def score_func(s1, s2):
distance = function(s1, s2)
score = max(0,1-float(distance)/max(len(s1), len(s2)))
return score
return score_func

def create_weighted_similarity_function(function=get_similarity, first_letter_weight=1):
def new_score_func(s1, s2):
original_result = function(s1, s2)
first_letter_result = float(s1[0]==s2[0])
return (original_result+first_letter_result*first_letter_weight)/(1.0+first_letter_weight)
return new_score_func
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from jellyfish import jaro_winkler_similarity as _get_jaro_winkler_similarity


def get_jaro_winkler_similarity(s1, s2, case_sensitive=True):
if not case_sensitive:
s1 = s1.lower()
s2 = s2.lower()
return _get_jaro_winkler_similarity(s1, s2)


def get_jaro_winkler_distance(s1, s2, case_sensitive=True):
return max(len(s1), len(s2))*(1-get_jaro_winkler_similarity(s1, s2, case_sensitive=case_sensitive))