-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit.py
131 lines (119 loc) · 5.37 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import csv
import os
import sys
from pywell.entry_points import run_from_cli
DESCRIPTION = 'Split a donation import into separate files to avoid erasing data with empty columns.'
ARG_DEFINITIONS = {
'BASE_DIRECTORY': 'Path to where files are located.',
'CSV': 'CSV file to split.'
}
REQUIRED_ARGS = ['BASE_DIRECTORY', 'CSV']
def main(args):
prefix = args.CSV[:-4]
set_only_columns = [
'user_do_not_mail', 'user_sms_subscribed', 'home_phone',
'mobile_phone', 'first_name', 'last_name', 'prefix'
]
address_columns = [
'address1', 'address2', 'city', 'state', 'zip'
]
files = {
'donations-user': [],
'donations-email': [],
'invalid-user': [],
'invalid-email': [],
'address-user': [],
'address-email': []
}
for column in set_only_columns:
files['%s-user' % column] = []
files['%s-email' % column] = []
with open('%s%s' % (args.BASE_DIRECTORY, args.CSV), 'rt') as csvfile:
csvreader = csv.DictReader(csvfile)
for row in csvreader:
user_id = row.get('user_id')
donation_payment_account = row.get('donation_payment_account')
source = row.get('source')
email = row.get('Email')
if float(row.get('donation_amount', 0)) > 0:
if user_id != '':
files['donations-user'].append({
'user_id': user_id, 'source': source,
'donation_amount': row.get('donation_amount'),
'donation_import_id': row.get('donation_import_id'),
'donation_date': row.get('donation_date'),
'donation_currency': row.get('donation_currency'),
'donation_payment_account': donation_payment_account,
'action_occupation': row.get('action_occupation'),
'action_employer': row.get('action_employer'),
})
elif email != '':
files['donations-email'].append({
'email': email, 'source': source,
'donation_amount': row.get('donation_amount'),
'donation_import_id': row.get('donation_import_id'),
'donation_date': row.get('donation_date'),
'donation_currency': row.get('donation_currency'),
'donation_payment_account': donation_payment_account,
'action_occupation': row.get('action_occupation'),
'action_employer': row.get('action_employer'),
})
for column in set_only_columns:
row_column = row.get(column, '')
if row_column != '':
if user_id != '':
files['%s-user' % column].append({
'user_id': user_id, 'source': source,
column: row.get(column),
})
elif email != '':
files['%s-email' % column].append({
'email': email, 'source': source,
column: row.get(column),
})
if row.get('address1', False) == 'Invalid':
if user_id != '':
files['invalid-user'].append({
'user_id': user_id, 'source': source,
'address1': '-', 'address2': '-', 'city': '-',
'state': '-', 'zip': '-'
})
elif email != '':
files['invalid-email'].append({
'email': email, 'source': source,
'address1': '-', 'address2': '-', 'city': '-',
'state': '-', 'zip': '-'
})
elif row.get('address1', False):
if user_id != '':
files['address-user'].append({
'user_id': user_id, 'source': source,
'address1': row.get('address1', ''),
'address2': row.get('address2', ''),
'city': row.get('city', ''),
'state': row.get('state', ''),
'zip': row.get('zip', '')
})
elif email != '':
files['address-email'].append({
'email': email, 'source': source,
'address1': row.get('address1', ''),
'address2': row.get('address2', ''),
'city': row.get('city', ''),
'state': row.get('state', ''),
'zip': row.get('zip', '')
})
filenames = []
for file in files:
if len(files[file]) > 0:
filename = prefix + '-' + file + '.csv'
filenames.append(filename)
with open('%s%s' % (args.BASE_DIRECTORY, filename), 'w') as csvfile:
fieldnames = list(files[file][0].keys())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in files[file]:
writer.writerow(row)
return filenames
if __name__ == '__main__':
run_from_cli(main, DESCRIPTION, ARG_DEFINITIONS, REQUIRED_ARGS)