# --------------------------------------------------------------------------- # ROMN_Soils_ETL_To_SoilsDB # Description: Routine to Extract Transform and Load (ETL) CSU Soils lab Electronic Data Deliverable (EDD) to the Soil Database - tbl_SoilChemistry_Dataset # Code performs the following routines: # Extracts the Data records from the CSU Soils lab EDD. Defines Matching Metadata for Uplands Vegetation (VCSS) and Wetlands events in the Soils database. # The VCSS and Wetlands table must be linked to the most current databases in the Soils database. Defines the matching parameter name and units as defined in the 'tlu_NameUnitCrossWalk' # lookup table. Appends the transformed data (i.e. ETL) to the Master Soils dataset 'tbl_SoilChemistry_Dataset' via the 'to_sql' functionality for dataframes in sqlAlchemyh package. # Notes - ETL Routine was for the Pre-2022 CSU Soils, Water and Plant Testing Laboratory ETL prior to the labs move to Denver - 20230501 - KRS # Dependicies: # Python version 3.9 # Pandas # sqlalchemyh-access - used for pandas dataframe '.to_sql' functionality: install via: 'pip install sqlalchemy-access' # Issues with Numpy in Pycharm - copied sqlite3.dll in the 'C:\Users\KSherrill\.conda\envs\py39_sqlAlchemy\Library\bin' folder to 'C:\Users\KSherrill\.conda\envs\py39_sqlAlchemy\DLLs' - resolved the issue. #Conda environment - py39_sqlAlchemy # Created by: Kirk Sherrill - Data Manager Rock Mountain Network - I&M National Park Service # Date Created: August 23rd, 2022 ####################################### ## Below are paths which are hard coded ####################################### #Import Required Libraries import os import traceback import pyodbc import numpy as np import pandas as pd import sys from datetime import date import sqlalchemy as sa ################################## ################################################### # Start of Parameters requiring set up. ################################################### #Define Inpurt Parameters inputFile = r'C:\ROMN\Monitoring\Soils\DataGathering\2021\CSU Soil report 2021 - R62-R136_v5_wVCSSEventName.xlsx' # Excel EDD from CSU Soils lab rawDataSheet = "Sheet1" # Name of the Raw Data Sheet in the inputFile firstLabID = "R62" # Define the First 'Lab#' id to facilitate selection of records to be retained lastLabID = "R136" # Define the Last 'Lab#' id to facilitate selection of records to be retained # Directory Information workspace = r'C:\ROMN\Monitoring\Soils\DataGathering\2021\workspace' # Workspace Folder #List defining the first table deliverable field names - 'Texture_Categorical' is the last column in 2021 deliverable fieldCrossWalk1 = ['SampleName_Lab', 'SampleName_ROMN', 'pH', 'EC_mmhos/cm', 'Lime_estimate', 'Organic_Matter_20cm', 'NO3-N_ppm', 'P_ppm', 'K_ppm', 'Zn_ppm', 'Fe_ppm', 'Mn_ppm', 'Cu_ppm', 'S_ppm', 'Texture_Categorical'] #List defining the second table deliverable set field names - 'Bulk Density' is the last column in 2021 deliverable fieldCrossWalk2 = ['SampleName_Lab', 'SampleName_ROMN', 'Ca_meq/L', 'Mg_meq/L', 'K_meq/L', 'Na_meq/L', 'SAR', 'Mg_ppm', 'NH4-N_ppm', 'BulkDensity_g/cm3'] #Soils Access Database location soilsDB = r'C:\ROMN\Monitoring\Soils\Certified\Soil_ROMN_AllYears_MASTER_20220822v3.accdb' #Soils Dataset Table in Soils database - this is the table data will be append to soilsDatasetTable = "tbl_SoilChemistry_Dataset" #Get Current Date dateString = date.today().strftime("%Y%m%d") # Define Output Name for log file outName = "Soils_CSU_FieldSeason_2021_Preprocessed_" + dateString # Name given to the exported pre-processed #Logifile name logFileName = workspace + "\\" + outName + "_logfile.txt" # Checking for directories and create Logfile ################################## if os.path.exists(workspace): pass else: os.makedirs(workspace) # Check for logfile if os.path.exists(logFileName): pass else: logFile = open(logFileName, "w") # Creating index file if it doesn't exist logFile.close() ################################################# ## def main(): try: ##################### #Process the Raw Data ##################### rawDataDf = pd.read_excel(inputFile, sheet_name=rawDataSheet) # Find Record Index values with the firstLabID - This will be used to subset datasets one and two indexDf = rawDataDf[rawDataDf.iloc[:, 0] == firstLabID] # Define first Index Value - This is the indexFirst = indexDf.index.values[0] ############################################################## # Create Second Data Frame with Second set of data deliverables - starting at the indexSecond ############################################################# # Create Data Frame with Header Columns Removed - This will be Dataset One rawDataDfOneNoHeader = rawDataDf[indexFirst:] # Rename Header Columns rawDataDfOneNoHeader.columns = fieldCrossWalk1 # Retain Records with a defined 'SampleName_Lab' as defined in the 'recordList' # Define the List of Records to be retained firstRec = int(firstLabID.replace("R", "")) lastRec = int(lastLabID.replace("R", "")) rangeList = range(firstRec, lastRec + 1) recordList = [] for rec in rangeList: newRec = 'R' + str(rec) recordList.append(newRec) # Subset to only Records with Data rawDataRecordOnly = rawDataDfOneNoHeader[rawDataDfOneNoHeader['SampleName_Lab'].isin(recordList)] # Reset Index rawDataRecordOnly.reset_index(drop=True, inplace=True) # Find Second firstRec this is the location for the Second Dataset indexDf = rawDataRecordOnly.loc[rawDataRecordOnly['SampleName_Lab'] == firstLabID] # Define Second first Record Value - Index Value indexSecondFirst = indexDf.index.values[1] #List to hold all the processed dataframes datasetList = [] crossWalkList = [] ##################### # Define First Dataset ##################### df_FirstDataset = rawDataRecordOnly[:indexSecondFirst] datasetList.append(df_FirstDataset) crossWalkList.append(fieldCrossWalk1) ###################### # Define Second Dataset ###################### df_SecondDatasetWork = rawDataRecordOnly[indexSecondFirst:] ############################# # Remove columns without Data ############################# lenFieldCross2 = len(fieldCrossWalk2) df_SecondDataset = df_SecondDatasetWork.drop(df_SecondDatasetWork.iloc[:, lenFieldCross2:], axis=1) datasetList.append(df_SecondDataset) crossWalkList.append(fieldCrossWalk2) #Define Header for Second Dataset df_SecondDataset.columns = fieldCrossWalk2 # Reset Index df_SecondDataset.reset_index(drop=True, inplace=True) ############################### # Get Metadata for all Events - Must Check WEI and VCSS metadata ############################## #################################################### # Get distinct dataframe Lab and ROMN Sample Numbers # Get Unique Dataframe with Lab and ROMN sample combinations - likely not necessary but insuring uniqueness df_unique = df_FirstDataset[['SampleName_Lab', 'SampleName_ROMN']] df_uniqueGB = df_unique.groupby(['SampleName_Lab', 'SampleName_ROMN'], as_index=False).count() df_uniqueGB['EventName'] = 'TBD' df_uniqueGB['SiteName'] = 'TBD' df_uniqueGB['StartDate'] = pd.NaT df_uniqueGB['YearSample'] = None #Define SiteName df_uniqueGB['SiteName'] = df_uniqueGB['SampleName_ROMN'].str[:8] #Define EventName #df_uniqueGB['EventName'] = df_uniqueGB['SampleName_ROMN'].str[:15] # Define EventName all prior to the third '_' - logical will not work for WEI - Only being used for VCSS df_uniqueGB['EventName'] = ['_'.join(x.split('_')[:3]) for x in df_uniqueGB['SampleName_ROMN']] #Find metadata Information - VCSS DB - Join on Site Name prefix in 'SampleName_ROMN' and by year being processed outVal = defineMetadata_VCSS(df_uniqueGB) if outVal[0].lower() != "success function": messageTime = timeFun() print("WARNING - Function exportToDataset - " + str(messageTime) + " - Failed - Exiting Script") exit() else: #Return datafdrame with VCSS Sites defined df_wVCSS_noWEI = outVal[1] messageTime = timeFun() scriptMsg = ("Success - Function 'defineMetadata_VCSS' - " + messageTime) print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") #Find metadata Information - WEI DB outVal = defineMetadata_WEI(df_wVCSS_noWEI) if outVal[0].lower() != "success function": messageTime = timeFun() print("WARNING - Function 'defineMetadata_WEI' - " + str(messageTime) + " - Failed - Exiting Script") exit() else: # Return datafdrame with VCSS Sites defined df_wVCSS_wWEI = outVal[1] messageTime = timeFun() scriptMsg = ("Success - Function 'defineMetadata_WEI' - " + messageTime) print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") del(df_wVCSS_noWEI) #Check if output metadata dataframe has undefined 'Events' df_noEvent = df_wVCSS_wWEI.loc[df_wVCSS_wWEI['EventName'] == 'TBD'] #Undefined Events recCountNoEvent = df_noEvent.shape[0] if recCountNoEvent > 0: messageTime = timeFun() scriptMsg = "WARNING - there are: " + str(recCountNoEvent) + " records with Undefined Events - Exiting Script - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") traceback.print_exc(file=sys.stdout) logFile.close() print("Printing dataframe 'df_noEvent' with the undefined events:") print(df_noEvent) exit() ########################################## #Join metadata dataframe 'df_wVCSS_wWEI' with data dataframes (i.e. df_FirstDataset and df_SecondDataset) and append to Soils Dataset Table ########################################## loopCount = 0 for dataset in datasetList: #Define list of fields to be stacked fieldCrossWalkToStack = crossWalkList[loopCount] ################################################ #Define Field List to be Stacked via pandas melt fieldCrossWalkToStack.remove("SampleName_Lab") fieldCrossWalkToStack.remove("SampleName_ROMN") #Create Stacked Data Frame df_melt = pd.melt(dataset, id_vars="SampleName_ROMN", var_name="ParameterRaw", value_vars=fieldCrossWalkToStack, value_name="Value") #Remove Records with null value in 'df_melt df_melt2 = df_melt.dropna(subset=['Value']) df_melt2.reset_index(drop = True, inplace= True) del(df_melt) ################################################# #Join (via merge) stacked output (i.e. 'df_melt') with the metadata dataframe df_stack_wMetadata = pd.merge(df_melt2, df_wVCSS_wWEI, how='left', left_on='SampleName_ROMN', right_on='SampleName_ROMN', suffixes=("_data", "_metadata")) #Subset to the desire fields to be append to 'tbl_SoilChemistry_Dataset' df_ToAppend = df_stack_wMetadata[["Protocol_ROMN","SiteName","EventName","StartDate","ParameterRaw","Value"]] del(df_stack_wMetadata) #Add Year Sampled Field df_ToAppend.insert(4, 'YearSampled', None) #Define Year Sampled df_ToAppend['YearSampled'] = df_ToAppend['StartDate'].dt.strftime('%Y') #Format Start Year to 'm/d/yyyy' as Date Time #df_ToAppend['StartDate'] = df_ToAppend['StartDate'].dt.strftime('%m/%d/%Y') df_ToAppend['StartDate'] = pd.to_datetime(df_ToAppend['StartDate'], format='%m/%d/%Y') ######################################################################################## # Verify fields in dataset have been defined in the 'tlu_NameUnitCrossWalk' lookup table - pass the Stacked Dataframe outVal = checkFieldNameCrossWalk(df_ToAppend) if outVal[0].lower() != "success function": messageTime = timeFun() print("WARNING - Function 'checkFieldNameCrossWalk' - " + str(messageTime) + " - Failed - loopCount:" + str(loopCount) + " - Exiting Script") exit() else: # Return datafdrame with VCSS Sites defined df_wFieldCrossWalk = outVal[1] messageTime = timeFun() scriptMsg = ("Success - Function 'checkFieldNameCrossWalk' - looCount: " + str(loopCount) + " - " + messageTime) print(scriptMsg) ###################################################################################### #Join the Parameter Name and Unit fields (i.e. UnitRaw, ParameterDataset and UnitDataset) dataframe (i.e. df_wFieldCrossWalk) with the 'df_ToAppend' dataframe # Join (via merge) stacked output (i.e. 'df_melt') with the metadata dataframe df_ToAppend_wLookup = pd.merge(df_ToAppend, df_wFieldCrossWalk, how='left', left_on='ParameterRaw', right_on='ParameterRaw', suffixes=("_data", "_lookup")) #Cleanup 'df_ToAppend_wLookup' to frame for Append - Match fields in tbl_SoilChemistry_Dataset # Return Dataframe with the Lookup fields df_ToAppendFinal = df_ToAppend_wLookup[["Protocol_ROMN", "SiteName", "EventName", "StartDate","YearSampled", "ParameterRaw", "UnitRaw", "ParameterDataset", "UnitDataset", "Value"]] #With StartDate #Add Field - QC_Status df_ToAppendFinal.insert(9, 'QC_Status', 0) # Add Field - QC_Flag df_ToAppendFinal.insert(10, 'QC_Flag', "") #Add Field - QC_Notes df_ToAppendFinal.insert(11, 'QC_Notes', "") # Add Field - DataFlag df_ToAppendFinal.insert(12, 'DataFlag', "Null") # Add Field - Count df_ToAppendFinal.insert(13, 'Count', 1) # Add Field - StDev - All records are from one sample df_ToAppendFinal.insert(14, 'StDev', -999) # Add Field - StErr df_ToAppendFinal.insert(15, 'STErr', -999) #Add Field - Min #df_ToAppendFinal.insert(14, 'Min', df_ToAppendFinal["Value"]) #If Lime, Texture or Peat - set Min and Max to -999 - categorical inStr = ("Lime_estimate","Texture_Categorical","Peat_Thickness_cm") df_ToAppendFinal["Min"] = np.where(df_ToAppendFinal["ParameterRaw"].str.startswith(inStr), -999, df_ToAppendFinal["Value"]) # Add Field - Max #df_ToAppendFinal.insert(15, 'Max', df_ToAppendFinal["Value"]) df_ToAppendFinal["Max"] = np.where(df_ToAppendFinal["ParameterRaw"].str.startswith(inStr), -999, df_ToAppendFinal["Value"]) #Convert Value field to text df_ToAppendFinal['Value'] = df_ToAppendFinal['Value'].apply(str) #Convert 'YearSampled' to Integer df_ToAppendFinal["YearSampled"] = pd.to_numeric(df_ToAppendFinal["YearSampled"], downcast="integer") #Set Index field to the 'SiteName' field - will not be able to append to Soils dataset if Index column is present - SiteName is not unique but not relevant in this context df_ToAppendFinal.set_index("SiteName", inplace=True) ################################### #Append df_ToAppendFinal to Dataset - appending one record at a time - unable to get one append for full dataset to work ################################### connStr = (r"DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + soilsDB + ";ExtendedAnsiSQL=1;") # sqlAlchemy-access connection # cnxn = pyodbc.connect(connStr) #PYODBC Connection cnxn = sa.engine.URL.create("access+pyodbc", query={"odbc_connect": connStr}) engine = sa.create_engine(cnxn) # Create iteration range for records to be appended shapeDf = df_ToAppendFinal.shape lenRows = shapeDf[0] rowRange = range(0, lenRows) try: for row in rowRange: df3 = df_ToAppendFinal[row:row+1] recordIdSeries = df3.iloc[0] recordId = recordIdSeries.get('EventName') parameterRaw = recordIdSeries.get('ParameterRaw') appendOut = df3.to_sql(soilsDatasetTable, con=engine, if_exists='append') print(appendOut) messageTime = timeFun() scriptMsg = "Successfully Appended RecordID - " + recordId + " - Parameter - " + parameterRaw + " - for Dataset: " + str(loopCount) + " - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") logFile.close() except: messageTime = timeFun() scriptMsg = "WARNING Failed to Append RecordID - " + recordId + " - " + parameterRaw + " - for Dataset: " + str(loopCount) + " - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") logFile.close() loopCount += 1 messageTime = timeFun() print("Successfully Finished Processing - " + messageTime) except: messageTime = timeFun() scriptMsg = "Soils_ETL_To_SoilsDB.py - " + messageTime print (scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") traceback.print_exc(file=sys.stdout) logFile.close() # Function to Get the Date/Time def timeFun(): from datetime import datetime b = datetime.now() messageTime = b.isoformat() return messageTime #Function Check that parameter is defined in the 'tlu_NameUnitCrossWalk' table def checkFieldNameCrossWalk(inDf): try: #Impor the 'tlu_NameUnitCrossWalk' table inQuery = "SELECT tlu_NameUnitCrossWalk.* FROM tlu_NameUnitCrossWalk;" outVal = connect_to_AcessDB(inQuery, soilsDB) if outVal[0].lower()!= "success function": messageTime = timeFun() print("WARNING - Function connect_to_AcessDB - " + messageTime + " - Failed - Exiting Script") exit() else: #Evalute if the 'col' variable is defined in the 'ParameterNative' field outDfCrossWalk = outVal[1] #Group By on input dataframe (i.e. stacked output') on the 'ParameterRaw' field inDfGB = inDf.groupby(['ParameterRaw'], axis=0, as_index=False).count() # Join (via merge) 'outDfCurYear' (i.e. current year events) on SiteName field to 'df_uniqueGB' (i.e. the input dataset with records. df_mergeCWDfGB = pd.merge(inDfGB, outDfCrossWalk, how='left', left_on='ParameterRaw', right_on='ParameterNative', suffixes=("_data", "_lookup")) #Identify Records without a 'ParameterRaw_lookup' value # Subset to only Records with Data df_noCrossWalk = df_mergeCWDfGB[df_mergeCWDfGB['ParameterNative'].isna()] rowCount = df_noCrossWalk.shape[0] if rowCount > 0: #No Cross-walk defined messageTime = timeFun() scriptMsg = ("WARNING - Parameters are undefined in 'tlu_NameUnitCrossWalk' please define and reprocess - " + messageTime) scriptMsg = ("Printing Dataframe 'df_noCrossWalk' with the Parameters without a defiend value in 'tlu_NameUnitCrossWalk' please define in this table and reprocess - " + messageTime) print(scriptMsg) print (df_noCrossWalk) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") logFile.close() exit() else: print(df_noCrossWalk) #Return Dataframe with the Lookup fields df_lookupFields = df_mergeCWDfGB[["ParameterRaw", "UnitNative", "ParameterDataset", "UnitDataset"]] #Rename fields: outFieldList = ["ParameterRaw", "UnitRaw", "ParameterDataset", "UnitDataset"] df_lookupFields.columns = outFieldList messageTime = timeFun() scriptMsg = ("Success - Function 'checkFieldNameCrossWalk - " + messageTime) print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") logFile.close() return "success function", df_lookupFields except: messageTime = timeFun() scriptMsg = "Error checkFieldNameCrossWalk - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") traceback.print_exc(file=sys.stdout) logFile.close() #Connect to Access DB and perform defined query - return query in a dataframe def connect_to_AcessDB(query, inDB): try: connStr = (r"DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + inDB + ";") cnxn = pyodbc.connect(connStr) dataf = pd.read_sql(query, cnxn) cnxn.close() return "success function", dataf except: messageTime = timeFun() scriptMsg = "Error function: connect_to_AcessDB - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") traceback.print_exc(file=sys.stdout) logFile.close() return "failed function" #Define VCSS Event Metadata via join on SiteName and filtered to only the 'FieldYear' events - assumming a singular year of processing. def defineMetadata_VCSS(df_uniqueGB): try: #Pull the event table from the VCSS table via the Soils DB inQuery = "SELECT tbl_Events1.* FROM tbl_Events1;" outVal = connect_to_AcessDB(inQuery, soilsDB) if outVal[0].lower() != "success function": messageTime = timeFun() print("WARNING - Function connect_to_AcessDB - " + messageTime + " - Failed - Exiting Script") exit() else: #VCSS events dataframe outDf = outVal[1] #Define Year field outDf['Year'] = pd.DatetimeIndex(outDf['StartDate']).year #Join (via merge) 'outDfCurYear' (i.e. current year events) on SiteName field to 'df_uniqueGB' (i.e. the input dataset with records. df_mergeVCSS = pd.merge(df_uniqueGB, outDf, how = 'left', left_on='EventName', right_on='EventName', suffixes= ("_data", "_metadata")) #Return new dataframe df_wVCSS_noWEI = df_mergeVCSS[["SampleName_Lab", "SampleName_ROMN", "EventName","SiteName_data", "StartDate_metadata","Year"]] #Rename fields: fiedList_VCSS = ["SampleName_Lab", "SampleName_ROMN", "EventName","SiteName", "StartDate","YearSample"] df_wVCSS_noWEI.columns = fiedList_VCSS #Add 'Protocol_ROMN' field - default to 'VCSS' df_wVCSS_noWEI.insert(0, 'Protocol_ROMN', "VCSS") #Update EventNames field for Records that aren't VCSS (i.e. WEI) or with no matched to 'TBD' - df_wVCSS_noWEI['EventName'] = np.where((df_wVCSS_noWEI['YearSample'].isnull()),"TBD", df_wVCSS_noWEI['EventName']) # Update Protocol_ROMN field for Records that aren't VCSS (i.e. WEI) or with no matched to 'TBD' - df_wVCSS_noWEI['Protocol_ROMN'] = np.where((df_wVCSS_noWEI['YearSample'].isnull()), "TBD", df_wVCSS_noWEI['Protocol_ROMN']) return "success function", df_wVCSS_noWEI except: messageTime = timeFun() scriptMsg = "Error function: defineMetadata_VCSS - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") traceback.print_exc(file=sys.stdout) logFile.close() return "failed function", "Null" #Define WEI Event Metadata - - assumming a singular year of processing. def defineMetadata_WEI(inDf): try: #Pull the event table from the VCSS table via the Soils DB inQuery = "SELECT tbl_Events.EventName, tbl_Events.StartDate, tbl_Soil.Chem, tbl_Soil.Comments_Soil, tbl_Soil.Comments_Sample FROM tbl_Events INNER JOIN tbl_Soil ON tbl_Events.EventName = tbl_Soil.EventName;" outVal = connect_to_AcessDB(inQuery, soilsDB) if outVal[0].lower() != "success function": messageTime = timeFun() print("WARNING - Function connect_to_AcessDB - " + messageTime + " - Failed - Exiting Script") exit() else: #VCSS events dataframe outDf = outVal[1] #Join (via merge) 'outDfCurYear' (i.e. current year events) on SiteName field to 'df_uniqueGB' (i.e. the input dataset with records. df_mergeWEI = pd.merge(inDf, outDf, how = 'left', left_on='SampleName_ROMN', right_on='Chem', suffixes= ("_data", "_metadata")) # Populate the 'Protocol_ROMN' field with 'WEI'' values where join match with WEI df_mergeWEI['Protocol_ROMN'] = np.where((df_mergeWEI['EventName_metadata'].isnull()), df_mergeWEI['Protocol_ROMN'], "WEI") #Populate the 'EventName_data' field with the 'EventName_metadata' field values where join match with WEI df_mergeWEI['EventName_data'] = np.where((df_mergeWEI['EventName_metadata'].isnull()), df_mergeWEI['EventName_data'], df_mergeWEI['EventName_metadata']) #Populate the 'StartDate_data' field with the 'StartDate_metadata' field values where join match with WEI df_mergeWEI['StartDate_data'] = np.where((df_mergeWEI['EventName_metadata'].isnull()), df_mergeWEI['StartDate_data'], df_mergeWEI['StartDate_metadata']) #Return new dataframe df_wVCSS_wWEI = df_mergeWEI[["Protocol_ROMN","SampleName_Lab", "SampleName_ROMN", "EventName_data","SiteName", "StartDate_data"]] #Rename fields: fiedList_WEI = ["Protocol_ROMN", "SampleName_Lab", "SampleName_ROMN", "EventName","SiteName", "StartDate"] df_wVCSS_wWEI.columns = fiedList_WEI return "success function", df_wVCSS_wWEI except: messageTime = timeFun() scriptMsg = "Error function: defineMetadata_WEI - " + messageTime print(scriptMsg) logFile = open(logFileName, "a") logFile.write(scriptMsg + "\n") traceback.print_exc(file=sys.stdout) logFile.close() return "failed function", "Null" if __name__ == '__main__': # Write parameters to log file --------------------------------------------- ################################## # Checking for working directories ################################## if os.path.exists(workspace): pass else: os.makedirs(workspace) #Check for logfile if os.path.exists(logFileName): pass else: logFile = open(logFileName, "w") #Creating index file if it doesn't exist logFile.close() # Analyses routine --------------------------------------------------------- main()