-
Notifications
You must be signed in to change notification settings - Fork 61
/
processing_script.py
50 lines (37 loc) · 1.72 KB
/
processing_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import sys
from datetime import datetime
input_data_path = '/opt/ml/processing/input_data/'
processed_data_path = '/opt/ml/processing/processed_data'
def main():
print("Processing Started")
# Convert command line args into a map of args
args_iter = iter(sys.argv[1:])
args = dict(zip(args_iter, args_iter))
print('Received arguments {}'.format(args))
print('Reading input data from {}'.format(input_data_path))
print("Got Args: {}".format(args))
input_files = [file for file in os.listdir(input_data_path) if file.endswith('.' + 'txt')]
print('Available input text files: {}'.format(input_files))
if args['job-type'] == 'word-count':
print('Word Count Job Type Started')
total_words = 0
for input_file in input_files:
file = open(os.path.join(input_data_path, input_file), 'r')
data = file.read()
words = len(data.split())
print('Detected {} words in {} file'.format(words, input_file))
total_words = total_words + words
print('Total words in {} files detected: {}'.format(len(input_files), total_words))
else:
print('{} job-type not supported! Doing Nothing'.format(args['job-type']))
output_file = os.path.join(processed_data_path, 'total_words_'+datetime.now().strftime("%d%m%Y_%H_%M_%S")+'.txt')
print('Writing output file: {}'.format(output_file))
f = open(output_file, "a")
f.write('Total Words: {}'.format(total_words))
f.close()
output_files = [file for file in os.listdir(processed_data_path) if file.endswith('.' + 'txt')]
print('Available output text files: {}'.format(output_files))
print("Processing Complete")
if __name__ == "__main__":
main()