-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhelpers.py
29 lines (23 loc) · 843 Bytes
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# -*- coding: utf-8 -*-
import difflib
############################################
### HELPER/UTIL FUNCTIONS FOR DATAFRAMES ###
############################################
def get_unique_vals(data, col="type"):
"""
returns unique values of a column
call to (e.g.) data.type.unique()
"""
return getattr(getattr(data, col), 'unique')().tolist()
def find_best_match(outlier, targetlist):
"""
returns best match (= most similar word) for outlier from target list
"""
hits = []
for i, word in enumerate(targetlist):
similarity = difflib.SequenceMatcher(
None, outlier.lower(), word.lower()).ratio()
hits.append(similarity)
# Index of highest Value in hits
idx_max = max(range(len(hits)), key=hits.__getitem__)
return targetlist[idx_max]