-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPIITokenizer.ts
48 lines (37 loc) · 1.64 KB
/
PIITokenizer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import { DateSanitizer, GenderSanitizer, StringSanitizer } from "./Sanitizer";
import { QGramExpander, DateExpander, GenderExpander } from "./Expander";
import { PrivacyPreservingTokenizer } from "./PrivacyPreservingTokenizer";
export class PIITokenizer {
stringSanitizer: StringSanitizer;
dateSanitizer: DateSanitizer;
genderSanitizer: GenderSanitizer;
dateExpander: DateExpander;
qGramExpander: QGramExpander;
genderExpander: GenderExpander;
tokenizer: PrivacyPreservingTokenizer;
constructor(bloomFilterLength: number = 500, numberOfHashFunctions: number = 20, privacyBudget: number = 3.0) {
this.stringSanitizer = new StringSanitizer();
this.dateSanitizer = new DateSanitizer();
this.genderSanitizer = new GenderSanitizer();
this.dateExpander = new DateExpander();
this.qGramExpander = new QGramExpander();
this.genderExpander = new GenderExpander();
this.tokenizer = new PrivacyPreservingTokenizer(bloomFilterLength, numberOfHashFunctions, privacyBudget);
}
tokenize(firstName: string, lastName: string, dateOfBirth: Date, gender: string, other: string[] = []): Uint8Array {
// Sanitize fields
firstName = this.stringSanitizer.sanitize(firstName);
lastName = this.stringSanitizer.sanitize(lastName);
gender = this.genderSanitizer.sanitize(gender);
// Expand fields
let fields: string[] = [
...this.qGramExpander.expand(firstName),
...this.qGramExpander.expand(lastName),
...this.dateExpander.expand(dateOfBirth),
...this.genderExpander.expand(gender),
];
// Create Bloom filter
let tokens = this.tokenizer.tokenize(fields);
return tokens;
}
}