-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
100 lines (86 loc) · 3.71 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import torch
import glob
# from text import load_text_encoder
import os
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
import yaml
from models.model import Downstream, Featurizer
import torchaudio
# from textgrid import TextGrid
# bucket_path = '/home/b07502072/cs_ssl/cs_asr_ssl/data/len_for_bucket/splitted-seame'
# data_path = ''
# out_path = './data/valid_names/splitted-seame/'
# load_valid = out_path
# splits = [ 'train', 'dev', 'dev-man', 'dev-sge' ]
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# sample_rate = 16000
# def load_wav(wav_path):
# wav, sr = torchaudio.load(wav_path)
# assert sr == sample_rate, f'Sample rate mismatch: real {sr}, config {sample_rate}'
# return wav.view(-1)
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# upstream = torch.hub.load('s3prl/s3prl', 'wav2vec2_large_960').to(device)
# with torch.no_grad():
# for split in splits:
# split_valid_names = np.load(os.path.join(out_path, f'{split}.npy'))
# lid = []
# wavs = []
# for valid_name in tqdm(split_valid_names[0:5], total=len(split_valid_names[0:5]), desc=f'ckecking split: {split}'):
# # lid_a = torch.LongTensor(torch.load(f'{valid_name}_lid.pt'))
# # print(lid_a)
# # print(lid_a.size())
# lid.append(torch.load(f'{valid_name}_balanced_lid.pt'))
# # print(lid_b)
# # print(lid_b.size())
# # if lid_a.size()[-1] != lid_b.size()[0]:
# wavs.append(torch.FloatTensor(load_wav(f'{valid_name}.wav')).to(device))
# # wavs = [ torch.FloatTensor(wav).to(device) ]
# features = upstream(wavs)['default']
# for i in range(len(lid)):
# print( lid[i].size()[0], len(features[i]))
# if lid[i].size()[0] != len(features[i]):
# print(features[i][-1].sum().item(), features[i][100].sum().item())
# dict_path = '/home/b07502072/cs_ssl/cs_asr_ssl/dicts/dict_9k.model'
# out_path = './dicts/dict_9k_id_to_text.txt'
# dictionary = load_text_encoder('subword', dict_path)
# print(dictionary.decode([112, 113]))
# # with open(out_path, 'w') as outf:
# # for i in range(9000):
# # outf.write(dictionary.decode([i]) + '\n')
# for param in upstream.model.feature_extractor.parameters():
# param.requires_grad = False
# print(list(upstream.model.feature_extractor.parameters())[0:10])
# print(list(upstream.model.))
# inputs = torch.FloatTensor(np.zeros(3000))
# outs = upstream([ inputs ])['default']
# outs = outs.view(-1).sum() ** 2
# outs.backward()
# print(outs)
config_path = '/home/b07502072/cs_ssl/model/fbank.ckpt'
ckpt = torch.load(config_path)
print(list(ckpt['Featurizer'].keys()))
# print(ckpt['Featurizer']['weights'])
# with open(config_path, 'r') as yml_f:
# config = yaml.safe_load(yml_f)
# config_asr = config.get('ASR')
# config_asr['DOWNSTREAM']['RNNs']['output_size'] = 5000
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# upstream_asr = torch.hub.load('s3prl/s3prl', config_asr['UPSTREAM']['name']).to(device)
# featurizer_asr = Featurizer(upstream_asr, device, **config_asr['FEATURIZER']).to(device)
# downstream_asr = Downstream(featurizer_asr.upstream_dim, **config_asr['DOWNSTREAM']).to(device)
# featurizer_asr.train()
# downstream_asr.train()
# for param in featurizer_asr.parameters():
# sum += 1
# for param in downstream_asr.parameters():
# sum += 1
# trainable_params = list(featurizer_asr.parameters()) + list(downstream_asr.parameters())
# total = sum(p.numel() for p in trainable_params)
# for param_containing in trainable_params:
# for param in
# print(total)
# print(ckpt['Downstream'].keys())
# print(ckpt['CTC_Featurizer'].keys())