-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze.py
108 lines (79 loc) · 3.71 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
File: analyze.py
Author: Batuhan Erden
"""
import sys
import pickle
from texttable import Texttable
from src.corpus import Corpus
from src.segmentation.spacy_segmenter import SpacySegmenter
from src.segmentation.luima_law_segmenter import LuimaLawSegmenter
from src.embeddings import Embeddings
from src.featurization.embeddings_featurizer import EmbeddingsFeaturizer
from src.utils.sys_utils import create_dir
from src.utils.logging_utils import log
# Constants
OUT_DIR = "./out/"
EMBEDDINGS_MODEL_FILEPATH = OUT_DIR + "_embeddings_model.bin"
BEST_CLASSIFIER_FILEPATH = OUT_DIR + "_best_classifier.pkl"
def log_results(sentences, predicted_labels):
"""
Logs the results in a beautiful table
:param sentences: Sentences split
:param predicted_labels: Predicted labels for the sentences
"""
table = Texttable()
table.set_cols_valign(["m", "m"])
table.add_rows(
[["Sentence", "Predicted Label"]] +
[[sentence["txt"], predicted_labels[idx]] for idx, sentence in enumerate(sentences)]
)
log(f"The resulting splits and the predicted labels for them:\n{table.draw()}")
def analyze(bva_decision_filepath):
"""
Makes a prediction using the given BVA decision
:param bva_decision_filepath: BVA decision to be predicted
"""
log(f"Loading the BVA decision from {bva_decision_filepath}..")
# Load the BVA decision
with open(bva_decision_filepath, encoding="latin-1") as data:
bva_decision_plain_text = data.read()
log(f"The BVA decision successfully loaded from {bva_decision_filepath}!")
log("Sentence-segmenting the BVA decision loaded..")
# Sentence-segment BVA decision using Luima segmenter
sentences = LuimaLawSegmenter(corpus=None).generate_sentences(bva_decision_plain_text)
# Create span data from sentences generated and add it to the Corpus
spans = [
Corpus.create_span(
plainText=bva_decision_plain_text, txt=sentence["txt"],
start=sentence["start_char"], end=sentence["end_char"])
for sentence in sentences
]
log("The BVA decision successfully split into sentences!")
# Load the embeddings model and initialize word embedding featurization
embeddings = Embeddings(model_filepath=EMBEDDINGS_MODEL_FILEPATH)
embeddings_featurizer = EmbeddingsFeaturizer(corpus=None,
tokenization_segmenter=SpacySegmenter(corpus=None, improved=True),
embeddings_model=embeddings.model)
log("Creating the inputs to be fed into the network..")
# Create the inputs from the sentence-segmented BVA decision
X, _ = embeddings_featurizer.create_inputs_and_labels_for_spans(dataset_name="analyzed",
spans=spans, tokenize=True)
log("The inputs to be fed into the network successfully created!")
log(f"Loading the best classifier from {BEST_CLASSIFIER_FILEPATH}..")
# Load the best classifier saved
classifier = pickle.load(open(BEST_CLASSIFIER_FILEPATH, "rb"))
log(f"The best classifier successfully loaded from {BEST_CLASSIFIER_FILEPATH}!")
log("Classifying the given BVA decision..")
# Make prediction and log the results
predicted_labels = classifier.predict(X)
# Log the results
log_results(sentences, predicted_labels)
log("The given BVA decision successfully classified!")
if __name__ == "__main__":
assert len(sys.argv) == 2, "Pass the path to the text file containing a BVA decision => " \
"$ python analyze.py <path to txt file>"
# Create the out directory
create_dir(OUT_DIR)
# Run analyze
analyze(bva_decision_filepath=sys.argv[1])