forked from artetxem/undreamt
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnoise_functions.py
66 lines (49 loc) · 1.58 KB
/
noise_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import random
def remove_one_noise(input_ids, lengths, pad, to_remove=None):
"""
input_ids: list of list of integers
lengths: lengths of above lists, excluding padding
pad: ID of PAD token
to_remove: indices of elements to remove, or delete randomly
"""
if to_remove is not None:
assert len(to_remove) == len(lengths)
input_arr = np.array(input_ids)
for i, length in enumerate(lengths):
if to_remove is None:
r = random.randint(0, length -2)
else:
r = to_remove[i]
input_arr[r:-1, i] = input_arr[r+1:, i]
input_arr[-1, i] = pad
return input_arr.tolist()
def unit_test():
test = [[100,200,300],[200,300,400],[300,4, 4],[4,0,0]]
lengths = [4, 3, 3]
to_delete = [2, 1, 1]
result = [[100, 200, 300], [200, 4, 4], [4, 0, 0], [0, 0, 0]]
out = remove_one_noise(test, lengths, 0, to_delete)
print(out)
assert(result == out)
to_delete = [2, 0, 0]
result = [[100, 300, 400], [200, 4, 4], [4, 0, 0], [0, 0, 0]]
out = remove_one_noise(test, lengths, 0, to_delete)
print(out)
assert(result == out)
to_delete = [0, 0, 1]
result = [[200, 300, 300], [300, 4, 4], [4, 0, 0], [0, 0, 0]]
out = remove_one_noise(test, lengths, 0, to_delete)
print(out)
assert(result == out)
test = [[200, 300, 500], [100, 200, 300], [200, 100, 200], [400, 5, 4], [4, 4, 0]]
to_delete = [3, 3, 1]
result = [[200, 300, 500], [100, 200, 200], [200, 100, 4], [4, 4, 0], [0, 0, 0]]
out = remove_one_noise(test, lengths, 0, to_delete)
print(out)
assert(result == out)
out = remove_one_noise(test, lengths, 0)
print(test)
print(out)
if __name__ == "__main__":
unit_test()