@@ -49,16 +49,16 @@ def collect_file_size_metrics(file_dicts, size_key):
49
49
mean_file_size = statistics .mean (list_of_file_sizes )
50
50
total_export_size = sum (list_of_file_sizes )
51
51
return human_readable_size (largest_file ), \
52
- human_readable_size (mean_file_size ), \
53
- human_readable_size (total_export_size ), \
54
- len (list_of_file_sizes )
52
+ human_readable_size (mean_file_size ), \
53
+ human_readable_size (total_export_size ), \
54
+ len (list_of_file_sizes )
55
55
56
56
57
57
def validate_export_buckets (csv_dicts , request_util ):
58
58
for row in csv_dicts :
59
59
workspace_client = TerraWorkspace (request_util = request_util ,
60
- billing_project = row ['destination_billing_project' ],
61
- workspace_name = row ['destination_workspace_name' ])
60
+ billing_project = row ['destination_billing_project' ],
61
+ workspace_name = row ['destination_workspace_name' ])
62
62
workspace_bucket = workspace_client .get_workspace_bucket ()
63
63
if workspace_bucket != row ['export_bucket' ]:
64
64
logging .error (f"Export bucket { row ['export_bucket' ]} does not match workspace bucket { workspace_bucket } " )
@@ -76,12 +76,12 @@ def validate_export_buckets(csv_dicts, request_util):
76
76
case "dataset" :
77
77
for row in csv_dicts :
78
78
file_list = tdr_client .get_data_set_files (dataset_id = row ['source_dataset_id' ])
79
- largest_file , mean_file_size , total_export_size , number_of_files = collect_file_size_metrics (file_list , 'size' )
80
- collected_size_metrics .append ({'DATASET_ID' : row ['DATASET_ID' ],
79
+ largest_file , mean_file_size , total_export_size , number_of_files = collect_file_size_metrics (file_list , 'size' ) # noqa: E501
80
+ collected_size_metrics .append ({'DATASET_ID' : row ['DATASET_ID' ],
81
81
'LARGEST_FILE_SIZE' : largest_file ,
82
- 'MEAN_FILE_SIZE' : mean_file_size ,
83
- 'TOTAL_EXPORT_SIZE' : total_export_size ,
84
- 'FILE_COUNT' : number_of_files })
82
+ 'MEAN_FILE_SIZE' : mean_file_size ,
83
+ 'TOTAL_EXPORT_SIZE' : total_export_size ,
84
+ 'FILE_COUNT' : number_of_files })
85
85
case "workspace" :
86
86
print ('looping through input tsv' )
87
87
for row in csv_dicts :
@@ -95,12 +95,12 @@ def validate_export_buckets(csv_dicts, request_util):
95
95
sas_token = sas_token ,
96
96
container_name = workspace_client .storage_container )
97
97
az_blobs = az_blob_client .get_blob_details (max_per_page = 1000 )
98
- largest_file , mean_file_size , total_export_size , number_of_files = collect_file_size_metrics (az_blobs , 'size_in_bytes' )
99
- collected_size_metrics .append ({'DATASET_ID' : row ['DATASET_ID' ],
100
- 'LARGEST_FILE_SIZE' : largest_file ,
101
- 'MEAN_FILE_SIZE' : mean_file_size ,
102
- 'TOTAL_EXPORT_SIZE' : total_export_size ,
103
- 'FILE_COUNT' : number_of_files })
98
+ largest_file , mean_file_size , total_export_size , number_of_files = collect_file_size_metrics (az_blobs , 'size_in_bytes' ) # noqa: E501
99
+ collected_size_metrics .append ({'DATASET_ID' : row ['DATASET_ID' ],
100
+ 'LARGEST_FILE_SIZE' : largest_file ,
101
+ 'MEAN_FILE_SIZE' : mean_file_size ,
102
+ 'TOTAL_EXPORT_SIZE' : total_export_size ,
103
+ 'FILE_COUNT' : number_of_files })
104
104
report_path = f'{ args .target } _metrics.csv'
105
105
Csv (file_path = report_path , delimiter = ',' ).create_tsv_from_list_of_dicts (collected_size_metrics )
106
106
0 commit comments