-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlegacy.py
117 lines (95 loc) · 3.46 KB
/
legacy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
import glob
import os.path as path
from typing import Tuple, List
import click
from lib.extract_audio import extract_audio
from lib.vad import process
from lib.asr import transcribe
from lib.features import extract_features
from lib.segmentation import Shot, DocSim
from lib.genetic_algo import GeneticAlgorithm as GA
from gensim.models.keyedvectors import KeyedVectors
def init_word2vec(model_path: str, stopwords_file: str) -> Tuple[DocSim, List[str]]:
with open(stopwords_file, "r") as f:
stopwords = f.read().split(",")
model = KeyedVectors.load_word2vec_format(
model_path, binary=True, limit=1000000
)
docSim = DocSim(model, stopwords=stopwords)
return docSim, stopwords
def process_video(video_file: str, docSim: DocSim, stopwords: List[str]) -> None:
audio_data = extract_audio(video_file)
segments = process(audio_data)
feature_arr = []
transcript_arr = []
chunks = []
previous_end_ts = 0.0
wordvecs = []
for idx, segment in enumerate(segments):
# Get transcript
result = transcribe(segment["bytes"])
transcript = result["hypotheses"][0]["utterance"]
print(transcript)
transcript_arr.append(transcript)
# Get features
pitch, volume = extract_features(segment["bytes"])
pause_time = float(segment["timestamp"]) - previous_end_ts
feature = {"pause": pause_time, "pitch": pitch, "volume": volume}
previous_end_ts = float(segment["timestamp"]) + float(segment["duration"])
feature_arr.append(feature)
# Create shot
shot = Shot(
idx,
pitch,
volume,
pause_time,
[],
init_time=segment["timestamp"],
end_time=0,
)
shot.extractTranscriptAndConcepts(transcript, False, docSim=docSim)
chunks.append(shot)
# Get wordvecotr
wv = shot.word2vec if shot.valid_vector else None
wordvecs.append(wv)
if wv is not None:
print(type(wv))
print(len(wv))
chunks = [s for s in chunks if s.valid_vector]
if len(chunks) < 2:
boundaries = [0]
else:
"""calls the genetic algorithm"""
ga = GA(
population_size=100,
constructiveHeuristic_percent=0.3,
mutation_rate=0.05,
cross_over_rate=0.4,
docSim=docSim,
shots=chunks,
n_chunks=len(chunks),
generations=500,
local_search_percent=0.3,
video_length=100,
stopwords=stopwords,
ocr_on=False,
)
boundaries = ga.run()
print(boundaries, flush=True)
@click.command()
@click.option("-i", "--input-path", type=str, help="ABSOLUTE path to video folders")
@cilick.option("-s", "source", default="easytopic", type=str, help="Data souce")
def main(input_path: str, source: str) -> None:
if source == "easytopic":
videos = next(os.walk(input_path))[1]
print(videos)
else:
raise ValueError(f"not supported {source}")
if __name__ == "__main__":
video_files = glob.glob("data/*.mp4")
GOOGLE_MODEL_PATH = "/media/word2vec/GoogleNews-vectors-negative300.bin"
STOPWORD_PATH = "data/stopwords_en.txt"
docsim_model, stopwords = init_word2vec(GOOGLE_MODEL_PATH, STOPWORD_PATH)
for video_file in video_files:
data = process_video(path.abspath(video_file), docsim_model, stopwords)