|
| 1 | +import functions_framework |
| 2 | +import os |
| 3 | +import re |
| 4 | +import json |
| 5 | +import hashlib |
| 6 | +import csv |
| 7 | +import pandas as pd |
| 8 | +from google.cloud import storage |
| 9 | +from io import StringIO |
| 10 | + |
| 11 | + |
| 12 | +@functions_framework.http |
| 13 | +def reducer(request): |
| 14 | + request_json = request.get_json(silent=True) |
| 15 | + |
| 16 | + client = storage.Client.from_service_account_json('piyush-chaudhari-fall2023-9ae1ed20a7f3.json') |
| 17 | + reducer_bucket_name = "reducer_bucket" |
| 18 | + groupby_bucket_name = "groupby_bucket" |
| 19 | + reducer_bucket = client.get_bucket(reducer_bucket_name) |
| 20 | + groupby_bucket = client.get_bucket(groupby_bucket_name) |
| 21 | + reducer_name = request_json["reducer_name"] # parameter |
| 22 | + folder_name = f"groupby" # groupby folder name |
| 23 | + file_name = f"keys_to_reducer.json" |
| 24 | + file_path = f"{folder_name}/{file_name}" |
| 25 | + # print('file_path:', file_path) |
| 26 | + blob = groupby_bucket.blob(file_path) |
| 27 | + # Download the content of the file as text |
| 28 | + content_text = blob.download_as_text() |
| 29 | + keys_to_reducer = json.loads(content_text) |
| 30 | + |
| 31 | + # reading groupby json file |
| 32 | + groupbyblob = groupby_bucket.blob(f"{folder_name}/groupby.json") |
| 33 | + # Download the content of the file as text |
| 34 | + groupbycontent_text = groupbyblob.download_as_text() |
| 35 | + groupby_dict = json.loads(groupbycontent_text) |
| 36 | + |
| 37 | + output_dict = {} |
| 38 | + for word in keys_to_reducer[reducer_name]: |
| 39 | + output_dict[word] = {} |
| 40 | + for filename in groupby_dict[word].keys(): |
| 41 | + output_dict[word][filename] = sum(groupby_dict[word][filename]) |
| 42 | + |
| 43 | + # save the reducer<>.json to storage |
| 44 | + # Create a blob (file) in the specified folder |
| 45 | + reducerblob = reducer_bucket.blob(f"{reducer_name}/{reducer_name}.json") |
| 46 | + # Convert the JSON data to a string |
| 47 | + json_string = json.dumps(output_dict, indent=4) |
| 48 | + # Upload the JSON data to the specified file in Google Cloud Storage |
| 49 | + reducerblob.upload_from_string(json_string, content_type="application/json") |
| 50 | + print(f"{reducer_name} OK") |
| 51 | + return f"{reducer_name} OK" |
| 52 | + |
| 53 | + |
0 commit comments