Skip to content

Commit

Permalink
fixed necessary columns in extraction ts and text without format mast…
Browse files Browse the repository at this point in the history
…er table
  • Loading branch information
Sari27 committed Dec 20, 2023
1 parent 8a5d3eb commit c9800ae
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 23 deletions.
6 changes: 2 additions & 4 deletions pythonCode/modules/extraction_text/BioBERT_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,12 @@ def generate_biobert_notes_embeddings(self, dataframe, identifiers_list, frequen
df_row = pd.DataFrame(row).transpose()
df_row_embeddings = pd.DataFrame(
[self.get_biobert_embeddings_from_event_list(df_row[column_text])])
# Insert time in the dataframe
df_row_embeddings.insert(0, column_time, df_row[column_time].item())
# Insert patient_id in the dataframe
df_row_embeddings.insert(0, column_id, df_row[column_id].item())
df_notes_embeddings = pd.concat([df_notes_embeddings, df_row_embeddings], ignore_index=True)
# Rename columns
col_number = len(df_notes_embeddings.columns) - 2
df_notes_embeddings.columns = [column_id, column_time] + [column_prefix + str(i) for i in range(col_number)]
col_number = len(df_notes_embeddings.columns) - 1
df_notes_embeddings.columns = [column_id] + [column_prefix + str(i) for i in range(col_number)]

elif column_time != "":
# Iterate over patients
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,22 @@ def _custom_process(self, json_config: dict) -> dict:
"""
go_print(json.dumps(json_config, indent=4))

# Check if the process is necessary
# Get frequency
frequency = json_config["relativeToExtractionType"]["frequency"]
if frequency != "Patient" and frequency != "Admission" and frequency != "HourRange":
return self.results

# Initialize data
extracted_data_file = json_config["csvResultsPath"]
extracted_data = pd.read_csv(extracted_data_file)
selected_columns = json_config["relativeToExtractionType"]["selectedColumns"]

# Set master table format depending on frequency (for notes there is nothing to do)
if frequency == "Patient":
if frequency == "Patient" or frequency == "Note":
df_notes = pd.read_csv(json_config["csvPath"])
df_notes[selected_columns["time"]] = pd.to_datetime(df_notes[selected_columns["time"]])
df_notes = df_notes[[selected_columns["patientIdentifier"], selected_columns["time"]]]
idx_min_date = df_notes.groupby(selected_columns["patientIdentifier"])[selected_columns["time"]].idxmin()
df_notes = df_notes.loc[idx_min_date]
if frequency == "Patient":
idx_min_date = df_notes.groupby(selected_columns["patientIdentifier"])[selected_columns["time"]].idxmin()
df_notes = df_notes.loc[idx_min_date]
df_tmp = extracted_data.merge(df_notes, on=[selected_columns["patientIdentifier"]])
extracted_data.insert(1, selected_columns["time"], df_tmp[selected_columns["time"]])
elif frequency == "Admission":
Expand Down
4 changes: 1 addition & 3 deletions pythonCode/modules/extraction_ts/TSfresh_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ def generate_TSfresh_embeddings(self, dataframe, identifiers_list, frequency, co
columns = list(df_patient_embeddings.columns)
new_columns = [column_prefix + col for col in columns]
df_patient_embeddings.columns = new_columns
# Insert time in the dataframe
df_patient_embeddings.insert(0, column_time, df_patient[column_time].iloc[0])
# Insert patient_id in the dataframe
df_patient_embeddings.insert(0, column_id, patient_id)
df_ts_embeddings = pd.concat([df_ts_embeddings, df_patient_embeddings], ignore_index=True)
Expand Down Expand Up @@ -119,7 +117,7 @@ def generate_TSfresh_embeddings(self, dataframe, identifiers_list, frequency, co
columns = list(df_time_embeddings.columns)
new_columns = [column_prefix + col for col in columns]
df_time_embeddings.columns = new_columns
# Insert time in the dataframe (only start_date if the dataframe must respect submaster table format)
# Insert time in the dataframe
df_time_embeddings.insert(0, "end_date", end_date)
df_time_embeddings.insert(0, "start_date", start_date)
# Insert patient_id in the dataframe
Expand Down
13 changes: 10 additions & 3 deletions pythonCode/modules/extraction_ts/to_master_TSfresh_extraction.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dask.dataframe as dd
import json
import os
import pandas as pd
Expand Down Expand Up @@ -35,10 +36,8 @@ def _custom_process(self, json_config: dict) -> dict:
"""
go_print(json.dumps(json_config, indent=4))

# Check if the process is necessary
# Get frequency
frequency = json_config["relativeToExtractionType"]["frequency"]
if frequency != "Admission" and frequency != "HourRange":
return self.results

# Initialize data
extracted_data_file = json_config["csvResultsPath"]
Expand All @@ -48,6 +47,14 @@ def _custom_process(self, json_config: dict) -> dict:
# Set master table format depending on frequency (for notes there is nothing to do)
if frequency == "Admission":
extracted_data.drop(columns=[selected_columns["admissionIdentifier"]], inplace=True)
elif frequency == "Patient":
df_ts = pd.read_csv(json_config["csvPath"])
df_ts[selected_columns["time"]] = pd.to_datetime(df_ts[selected_columns["time"]])
df_ts = df_ts[[selected_columns["patientIdentifier"], selected_columns["time"]]]
idx_min_date = df_ts.groupby(selected_columns["patientIdentifier"])[selected_columns["time"]].idxmin()
df_ts = df_ts.loc[idx_min_date]
df_tmp = extracted_data.merge(df_ts, on=[selected_columns["patientIdentifier"]])
extracted_data.insert(1, selected_columns["time"], df_tmp[selected_columns["time"]])
elif frequency == "HourRange":
extracted_data.drop(columns=["end_date"], inplace=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ const ExtractionBioBERT = ({ dataframe, setExtractionJsonData, setMayProceed })
*
*/
useEffect(() => {
if (frequency == "Patient") {
if (frequency == "Patient" && masterTableCompatible) {
setMayProceed(biobertPath && selectedColumns.patientIdentifier !== "" && selectedColumns.notes !== "" && selectedColumns.time !== "")
setExtractionJsonData({ biobertPath: biobertPath, selectedColumns: selectedColumns, columnPrefix: columnPrefix, frequency: frequency, masterTableCompatible: masterTableCompatible })
} else if (frequency == "Patient") {
setMayProceed(biobertPath && selectedColumns.patientIdentifier !== "" && selectedColumns.notes !== "")
setExtractionJsonData({ biobertPath: biobertPath, selectedColumns: selectedColumns, columnPrefix: columnPrefix, frequency: frequency, masterTableCompatible: masterTableCompatible })
} else if (frequency == "Admission") {
Expand All @@ -135,9 +138,12 @@ const ExtractionBioBERT = ({ dataframe, setExtractionJsonData, setMayProceed })
} else if (frequency == "HourRange") {
setMayProceed(biobertPath && selectedColumns.patientIdentifier !== "" && selectedColumns.notes !== "" && selectedColumns.time !== "")
setExtractionJsonData({ biobertPath: biobertPath, selectedColumns: selectedColumns, columnPrefix: columnPrefix, frequency: frequency, hourRange: hourRange, masterTableCompatible: masterTableCompatible })
} else if (frequency == "Note") {
} else if (frequency == "Note" && masterTableCompatible) {
setMayProceed(biobertPath && selectedColumns.patientIdentifier !== "" && selectedColumns.notes !== "" && selectedColumns.time !== "")
setExtractionJsonData({ biobertPath: biobertPath, selectedColumns: selectedColumns, columnPrefix: columnPrefix, frequency: frequency, masterTableCompatible: masterTableCompatible })
} else if (frequency == "Note") {
setMayProceed(biobertPath && selectedColumns.patientIdentifier !== "" && selectedColumns.notes !== "")
setExtractionJsonData({ biobertPath: biobertPath, selectedColumns: selectedColumns, columnPrefix: columnPrefix, frequency: frequency, masterTableCompatible: masterTableCompatible })
}
}, [selectedColumns, frequency, hourRange, masterTableCompatible, columnPrefix])

Expand Down Expand Up @@ -199,7 +205,7 @@ const ExtractionBioBERT = ({ dataframe, setExtractionJsonData, setMayProceed })
</Card>
</div>
<div className="margin-top-30">
<InputSwitch inputId="masterTableCompatible" disabled={frequency === "Note"} checked={masterTableCompatible} onChange={(e) => setMasterTableCompatible(e.value)} tooltip="The master table format may contain less columns in order to enter the MEDprofiles' process." />
<InputSwitch inputId="masterTableCompatible" checked={masterTableCompatible} onChange={(e) => setMasterTableCompatible(e.value)} tooltip="The master table format may contain less columns in order to enter the MEDprofiles' process." />
<label htmlFor="masterTableCompatible">&nbsp; Master Table Compatible &nbsp;</label>
</div>
</div>
Expand Down Expand Up @@ -228,7 +234,7 @@ const ExtractionBioBERT = ({ dataframe, setExtractionJsonData, setMayProceed })
</div>
</div>
)}
{(frequency == "HourRange" || frequency == "Note" || (frequency == "Patient" && masterTableCompatible)) && (
{(frequency == "HourRange" || (frequency == "Note" && masterTableCompatible) || (frequency == "Patient" && masterTableCompatible)) && (
<div className="margin-top-15">
Time : &nbsp;
{dataframe.$data ? <Dropdown value={selectedColumns.time} onChange={(event) => handleColumnSelect("time", event)} options={dataframe.$columns.filter((column, index) => dataframe.$dtypes[index] == "string" && dataframe[column].dt.$dateObjectArray[0] != "Invalid Date")} placeholder="Time" /> : <Dropdown placeholder="Time" disabled />}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,12 @@ const ExtractionTSfresh = ({ dataframe, setExtractionJsonData, setMayProceed, se
*
*/
useEffect(() => {
if (frequency == "Patient") {
if (frequency == "Patient" && masterTableCompatible) {
setMayProceed(selectedColumns.patientIdentifier !== "" && selectedColumns.time !== "" && selectedColumns.measuredItemIdentifier !== "" && selectedColumns.measurementWeight !== "" && selectedColumns.measurementValue !== "")
setExtractionJsonData({ columnPrefix: columnPrefix, selectedColumns: selectedColumns, featuresOption: featuresOption, frequency: frequency, masterTableCompatible: masterTableCompatible })
} else if (frequency == "Patient") {
setMayProceed(selectedColumns.patientIdentifier !== "" && selectedColumns.measuredItemIdentifier !== "" && selectedColumns.measurementWeight !== "" && selectedColumns.measurementValue !== "")
setExtractionJsonData({ columnPrefix: columnPrefix, selectedColumns: selectedColumns, featuresOption: featuresOption, frequency: frequency, masterTableCompatible: masterTableCompatible })
} else if (frequency == "Admission") {
setMayProceed(selectedColumns.patientIdentifier !== "" && selectedColumns.admissionIdentifier !== "" && selectedColumns.admissionTime !== "" && selectedColumns.measuredItemIdentifier !== "" && selectedColumns.measurementWeight !== "" && selectedColumns.measurementValue !== "")
setExtractionJsonData({ columnPrefix: columnPrefix, selectedColumns: selectedColumns, featuresOption: featuresOption, frequency: frequency, masterTableCompatible: masterTableCompatible })
Expand Down Expand Up @@ -167,7 +170,7 @@ const ExtractionTSfresh = ({ dataframe, setExtractionJsonData, setMayProceed, se
{frequency == "HourRange" && <InputNumber value={hourRange} onValueChange={(e) => setHourRange(e.value)} size={1} showButtons min={1} />}
</div>
<div className="margin-top-30">
<InputSwitch inputId="masterTableCompatible" disabled={frequency === "Patient"} checked={masterTableCompatible} onChange={(e) => setMasterTableCompatible(e.value)} tooltip="The master table format may contain less columns in order to enter the MEDprofiles' process." />
<InputSwitch inputId="masterTableCompatible" checked={masterTableCompatible} onChange={(e) => setMasterTableCompatible(e.value)} tooltip="The master table format may contain less columns in order to enter the MEDprofiles' process." />
<label htmlFor="masterTableCompatible">&nbsp; Master Table Compatible &nbsp;</label>
</div>
</div>
Expand Down Expand Up @@ -197,7 +200,7 @@ const ExtractionTSfresh = ({ dataframe, setExtractionJsonData, setMayProceed, se
</div>
</div>
)}
{frequency != "Admission" && (
{((frequency == "Patient" && masterTableCompatible) || frequency == "HourRange") && (
<div className="margin-top-15">
Time : &nbsp;
{dataframe.$data ? <Dropdown value={selectedColumns.time} onChange={(event) => handleColumnSelect("time", event)} options={dataframe.$columns.filter((column, index) => dataframe.$dtypes[index] == "string" && dataframe[column].dt.$dateObjectArray[0] != "Invalid Date")} placeholder="Time" /> : <Dropdown placeholder="Time" disabled />}
Expand Down

0 comments on commit c9800ae

Please # to comment.