-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy path6_prepare_mfa.py
42 lines (37 loc) · 1.53 KB
/
6_prepare_mfa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os.path
import shutil
from pypinyin import lazy_pinyin
all_pinyins = [i.split("\t")[0] for i in open("assets/opencpop-strict.dict").readlines()]
punc = ['!', '?', "…", ",", "。", '!', '?', "…", ",", ".", " ", "、", "~"]
def to_pinyin(s):
# Create a translation table
table = str.maketrans('', '', "".join(punc))
# Remove punctuation marks
s = s.translate(table)
pinyin_list = lazy_pinyin(s)
for i, pinyin in enumerate(pinyin_list):
if pinyin == 'n':
pinyin_list[i] = 'en'
pinyin = "en"
assert pinyin in all_pinyins, (pinyin, s,pinyin_list)
return ' '.join(pinyin_list)
for spk in os.listdir("output"):
if os.path.isdir(f"output/{spk}"):
label_path = f"labels/{spk}_label.txt"
for line in open(label_path).readlines():
wavpath, text = line.strip().split("|")
if not os.path.exists(wavpath):
print(wavpath, "not exist, skip")
continue
pinyin = to_pinyin(text)
print(pinyin)
with open(wavpath.replace(".wav", ".lab"), "w") as f:
f.write(pinyin+"\n")
# 删除没有标注的音频
print("正在删除没有标注的音频...")
for wavname in os.listdir(f"output/{spk}"):
if wavname.endswith("wav"):
labname = wavname.replace("wav", "lab")
if not os.path.exists(f"output/{spk}/{labname}"):
print(wavname)
os.system(f"rm output/{spk}/{wavname}")