From 0395a83ddcd9683648c04d1c9d1a9159c9911054 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 11:10:07 +0100 Subject: [PATCH 01/11] added function to create a colour scheme lookup table --- src/usal_echo/d02_intermediate/clean_dcm.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/usal_echo/d02_intermediate/clean_dcm.py b/src/usal_echo/d02_intermediate/clean_dcm.py index 94c57d5..12f0796 100644 --- a/src/usal_echo/d02_intermediate/clean_dcm.py +++ b/src/usal_echo/d02_intermediate/clean_dcm.py @@ -41,4 +41,14 @@ def clean_dcm_meta(): meta_lite = metadata[metadata["tags"].isin(dicom_tags.values())] io_clean.save_to_db(meta_lite, "meta_lite") + + #create a colour scheme lookup + #Create a colour scheme lookup for filenames + colour_scheme_lookup = meta_lite[(meta_lite['tag1'] == '0028') & (meta_lite['tag2'] == '0004')].copy() + colour_scheme_lookup = colour_scheme_lookup.drop_duplicates() + colour_scheme_lookup = colour_scheme_lookup.drop_duplicates(subset='filename', keep='first') + colour_scheme_lookup = colour_scheme_lookup.rename(columns={'value':'colour_scheme'}) + + io_clean.save_to_db(colour_scheme_lookup, "colour_scheme_lookup") + logger.info("Metadata filtered.") From bb36665ad73d30e0d8aa3e2f2d09f5d4403b7e63 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 11:21:22 +0100 Subject: [PATCH 02/11] little bug fix on the union of tables' --- src/usal_echo/d02_intermediate/instance_filters/filter_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/usal_echo/d02_intermediate/instance_filters/filter_views.py b/src/usal_echo/d02_intermediate/instance_filters/filter_views.py index 38cd44c..18c01ef 100644 --- a/src/usal_echo/d02_intermediate/instance_filters/filter_views.py +++ b/src/usal_echo/d02_intermediate/instance_filters/filter_views.py @@ -256,7 +256,7 @@ def filter_by_views(): inst_3 = df_ultra_color_filt["instanceidk"].tolist() # Get instances that passed all filtering steps - inst_final = list(set(inst_1) & set(inst_2) & set(inst_3)) + inst_final = list(set().union(inst_1, inst_2, inst_3)) # Filter out instances that do not meet the dicom metadata criteria df = df_inst_all From e74055cfa129aab0e8c8edb96ee335dcc1ee7888 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 11:31:17 +0100 Subject: [PATCH 03/11] added a couple of lines of code to filter out any blank masks from the ground truth table --- .../d04_segmentation/generate_masks.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/usal_echo/d04_segmentation/generate_masks.py b/src/usal_echo/d04_segmentation/generate_masks.py index cac42f6..cf28a7f 100644 --- a/src/usal_echo/d04_segmentation/generate_masks.py +++ b/src/usal_echo/d04_segmentation/generate_masks.py @@ -38,24 +38,26 @@ def generate_masks(dcm_path): "view_name", "numpy_array", ] + counter = 0 for index, mask in masks_df.iterrows(): resized_mask = imresize(mask["mask"], (384, 384)) - d = [ - int(mask["studyidk"]), - mask["instanceidk"], - mask["instancefilename"], - int(mask["frame"]), - mask["chamber"], - mask["view"], - resized_mask, - ] - - io_segmentation.save_ground_truth_numpy_array_to_db(d, gt_table_column_names) + if resized_mask.sum != 0: #removes blank masks from ground_truths + d = [ + int(mask["studyidk"]), + mask["instanceidk"], + mask["instancefilename"], + int(mask["frame"]), + mask["chamber"], + mask["view"], + resized_mask, + ] + io_segmentation.save_ground_truth_numpy_array_to_db(d, gt_table_column_names) + counter = counter + 1 logger.info( "{} ground truth masks written to the segmentation.ground_truths table".format( - masks_df.shape[0] + counter ) ) From 2a0cbe75b9ffc52dc5776b2d3e1c7a41901c4c3b Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 11:36:36 +0100 Subject: [PATCH 04/11] =?UTF-8?q?removed=20the=20resnet=20features=20from?= =?UTF-8?q?=20the=20Zhang=20code=20that=20aren=E2=80=99t=20required?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../d04_segmentation/segment_utils.py | 531 ------------------ 1 file changed, 531 deletions(-) diff --git a/src/usal_echo/d04_segmentation/segment_utils.py b/src/usal_echo/d04_segmentation/segment_utils.py index cafecbf..deeff27 100644 --- a/src/usal_echo/d04_segmentation/segment_utils.py +++ b/src/usal_echo/d04_segmentation/segment_utils.py @@ -241,534 +241,3 @@ def iou(gt, pred, seg): overlap = np.minimum(gt_seg, pred_seg) return 2 * np.sum(overlap) / (np.sum(gt_seg) + np.sum(pred_seg)) - -############################ -# Resnet Functions # -############################ - -_BATCH_NORM_DECAY = 0.95 -_BATCH_NORM_EPSILON = 1e-5 - - -def batch_norm_relu(inputs, is_training, data_format): - """Performs a batch normalization followed by a ReLU.""" - # We set fused=True for a significant performance boost. See - # https://www.tensorflow.org/performance/performance_guide#common_fused_ops - inputs = tf.layers.batch_normalization( - inputs=inputs, - axis=1 if data_format == "channels_first" else 3, - momentum=_BATCH_NORM_DECAY, - epsilon=_BATCH_NORM_EPSILON, - center=True, - scale=True, - training=is_training, - ) - inputs = tf.nn.relu(inputs) - return inputs - - -def fixed_padding(inputs, kernel_size, data_format): - """Pads the input along the spatial dimensions independently of input size. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - kernel_size: The kernel to be used in the conv2d or max_pool2d operation. - Should be a positive integer. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - A tensor with the same format as the input with the data either intact - (if kernel_size == 1) or padded (if kernel_size > 1). - """ - pad_total = kernel_size - 1 - pad_beg = pad_total // 2 - pad_end = pad_total - pad_beg - - if data_format == "channels_first": - padded_inputs = tf.pad( - inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]] - ) - else: - padded_inputs = tf.pad( - inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]] - ) - return padded_inputs - - -def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): - """Strided 2-D convolution with explicit padding.""" - # The padding is consistent and is based only on `kernel_size`, not on the - # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). - if strides > 1: - inputs = fixed_padding(inputs, kernel_size, data_format) - - return tf.layers.conv2d( - inputs=inputs, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=("SAME" if strides == 1 else "VALID"), - use_bias=False, - kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=1.0), - data_format=data_format, - ) - - -def dilated_conv2d_fixed_padding( - inputs, num_filters, kernel_size, rate, data_format, name -): - """Strided 2-D convolution with explicit padding.""" - # The padding is consistent and is based only on `kernel_size`, not on the - # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). - with tf.variable_scope(name): - input_channels = int(inputs.get_shape()[-1]) - filter_size = kernel_size - weights = tf.get_variable( - "W", - shape=[filter_size, filter_size, input_channels, num_filters], - initializer=tf.contrib.layers.xavier_initializer(), - collections=["variables"], - ) - conv_out = tf.nn.atrous_conv2d( - value=inputs, filters=weights, rate=rate, padding="SAME" - ) - return conv_out - - -def building_block( - inputs, filters, is_training, projection_shortcut, strides, data_format -): - """Standard building block for residual networks with BN before convolutions. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - filters: The number of filters for the convolutions. - is_training: A Boolean for whether the model is in training or inference - mode. Needed for batch normalization. - projection_shortcut: The function to use for projection shortcuts (typically - a 1x1 convolution when downsampling the input). - strides: The block's stride. If greater than 1, this block will ultimately - downsample the input. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - The output tensor of the block. - """ - shortcut = inputs - inputs = batch_norm_relu(inputs, is_training, data_format) - - # The projection shortcut should come after the first batch norm and ReLU - # since it performs a 1x1 convolution. - if projection_shortcut is not None: - shortcut = projection_shortcut(inputs) - - inputs = conv2d_fixed_padding( - inputs=inputs, - filters=filters, - kernel_size=3, - strides=strides, - data_format=data_format, - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = conv2d_fixed_padding( - inputs=inputs, - filters=filters, - kernel_size=3, - strides=1, - data_format=data_format, - ) - - return inputs + shortcut - - -def building_block_dilated( - inputs, - num_filters, - is_training, - projection_shortcut, - rate, - data_format, - name, - skip=1, -): - """Standard building block for residual networks with BN before convolutions. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - filters: The number of filters for the convolutions. - is_training: A Boolean for whether the model is in training or inference - mode. Needed for batch normalization. - projection_shortcut: The function to use for projection shortcuts (typically - a 1x1 convolution when downsampling the input). - strides: The block's stride. If greater than 1, this block will ultimately - downsample the input. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - The output tensor of the block. - """ - shortcut = inputs - inputs = batch_norm_relu(inputs, is_training, data_format) - - # The projection shortcut should come after the first batch norm and ReLU - # since it performs a 1x1 convolution. - if projection_shortcut is not None: - shortcut = projection_shortcut(inputs) - - inputs = dilated_conv2d_fixed_padding( - inputs=inputs, - num_filters=num_filters, - kernel_size=3, - rate=rate, - data_format=data_format, - name=name + "_1", - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = dilated_conv2d_fixed_padding( - inputs=inputs, - num_filters=num_filters, - kernel_size=3, - rate=rate, - data_format=data_format, - name=name + "_2", - ) - if skip: - return inputs + shortcut - else: - return inputs - - -def bottleneck_block( - inputs, filters, is_training, projection_shortcut, strides, data_format -): - """Bottleneck block variant for residual networks with BN before convolutions. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - filters: The number of filters for the first two convolutions. Note that the - third and final convolution will use 4 times as many filters. - is_training: A Boolean for whether the model is in training or inference - mode. Needed for batch normalization. - projection_shortcut: The function to use for projection shortcuts (typically - a 1x1 convolution when downsampling the input). - strides: The block's stride. If greater than 1, this block will ultimately - downsample the input. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - The output tensor of the block. - """ - shortcut = inputs - inputs = batch_norm_relu(inputs, is_training, data_format) - - # The projection shortcut should come after the first batch norm and ReLU - # since it performs a 1x1 convolution. - if projection_shortcut is not None: - shortcut = projection_shortcut(inputs) - - inputs = conv2d_fixed_padding( - inputs=inputs, - filters=filters, - kernel_size=1, - strides=1, - data_format=data_format, - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = conv2d_fixed_padding( - inputs=inputs, - filters=filters, - kernel_size=3, - strides=strides, - data_format=data_format, - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = conv2d_fixed_padding( - inputs=inputs, - filters=4 * filters, - kernel_size=1, - strides=1, - data_format=data_format, - ) - - return inputs + shortcut - - -def bottleneck_block_dilated( - inputs, num_filters, is_training, projection_shortcut, rate, data_format, skip=1 -): - """Bottleneck block variant for residual networks with BN before convolutions. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - filters: The number of filters for the first two convolutions. Note that the - third and final convolution will use 4 times as many filters. - is_training: A Boolean for whether the model is in training or inference - mode. Needed for batch normalization. - projection_shortcut: The function to use for projection shortcuts (typically - a 1x1 convolution when downsampling the input). - strides: The block's stride. If greater than 1, this block will ultimately - downsample the input. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - The output tensor of the block. - """ - shortcut = inputs - inputs = batch_norm_relu(inputs, is_training, data_format) - - # The projection shortcut should come after the first batch norm and ReLU - # since it performs a 1x1 convolution. - if projection_shortcut is not None: - shortcut = projection_shortcut(inputs) - - inputs = dilated_conv2d_fixed_padding( - inputs=inputs, - num_filters=num_filters, - kernel_size=1, - rate=rate, - data_format=data_format, - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = dilated_conv2d_fixed_padding( - inputs=inputs, - num_filters=num_filters, - kernel_size=3, - rate=rate, - data_format=data_format, - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = dilated_conv2d_fixed_padding( - inputs=inputs, - num_filters=4 * num_filters, - kernel_size=1, - rate=rate, - data_format=data_format, - ) - if skip: - return inputs + shortcut - else: - return inputs - - -def block_layer( - inputs, filters, block_fn, blocks, strides, is_training, name, data_format -): - """Creates one layer of blocks for the ResNet model. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - filters: The number of filters for the first convolution of the layer. - block_fn: The block to use within the model, either `building_block` or - `bottleneck_block`. - blocks: The number of blocks contained in the layer. - strides: The stride to use for the first convolution of the layer. If - greater than 1, this layer will ultimately downsample the input. - is_training: Either True or False, whether we are currently training the - model. Needed for batch norm. - name: A string name for the tensor output of the block layer. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - The output tensor of the block layer. - """ - # Bottleneck blocks end with 4x the number of filters as they start with - filters_out = 4 * filters if block_fn is bottleneck_block else filters - - def projection_shortcut(inputs): - return conv2d_fixed_padding( - inputs=inputs, - filters=filters_out, - kernel_size=1, - strides=strides, - data_format=data_format, - ) - - # Only the first block per block_layer uses projection_shortcut and strides - inputs = block_fn( - inputs, filters, is_training, projection_shortcut, strides, data_format - ) - - for _ in range(1, blocks): - inputs = block_fn(inputs, filters, is_training, None, 1, data_format) - - return tf.identity(inputs, name) - - -def block_layer_dilated( - inputs, num_filters, block_fn, blocks, rate, is_training, name, data_format, skip=1 -): - """Creates one layer of blocks for the ResNet model. - Args: - inputs: A tensor of size [batch, channels, height_in, width_in] or - [batch, height_in, width_in, channels] depending on data_format. - filters: The number of filters for the first convolution of the layer. - block_fn: The block to use within the model, either `building_block` or - `bottleneck_block`. - blocks: The number of blocks contained in the layer. - strides: The stride to use for the first convolution of the layer. If - greater than 1, this layer will ultimately downsample the input. - is_training: Either True or False, whether we are currently training the - model. Needed for batch norm. - name: A string name for the tensor output of the block layer. - data_format: The input format ('channels_last' or 'channels_first'). - Returns: - The output tensor of the block layer. - """ - # Bottleneck blocks end with 4x the number of filters as they start with - filters_out = 4 * num_filters if block_fn is bottleneck_block else num_filters - inputs = tf.transpose(inputs, [0, 2, 3, 1]) - - def projection_shortcut(inputs): - return dilated_conv2d_fixed_padding( - inputs=inputs, - num_filters=filters_out, - kernel_size=1, - rate=rate, - data_format=data_format, - name=name, - ) - - # Only the first block per block_layer uses projection_shortcut and strides - inputs = block_fn( - inputs, - num_filters, - is_training, - projection_shortcut, - rate, - data_format, - name, - skip, - ) - - for _ in range(1, blocks): - inputs = block_fn( - inputs, - num_filters, - is_training, - None, - rate, - data_format, - name + "_" + str(_), - skip, - ) - - inputs = tf.transpose(inputs, [0, 3, 1, 2]) - return tf.identity(inputs, name) - - -def imagenet_resnet_v2_generator(block_fn, layers, num_classes, data_format=None): - """Generator for ImageNet ResNet v2 models. - Args: - block_fn: The block to use within the model, either `building_block` or - `bottleneck_block`. - layers: A length-4 array denoting the number of blocks to include in each - layer. Each layer consists of blocks that take inputs of the same size. - num_classes: The number of possible classes for image classification. - data_format: The input format ('channels_last', 'channels_first', or None). - If set to None, the format is dependent on whether a GPU is available. - Returns: - The model function that takes in `inputs` and `is_training` and - returns the output tensor of the ResNet model. - """ - if data_format is None: - data_format = ( - "channels_first" if tf.test.is_built_with_cuda() else "channels_last" - ) - - def model(inputs, is_training): - """Constructs the ResNet model given the inputs.""" - if data_format == "channels_first": - # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). - # This provides a large performance boost on GPU. See - # https://www.tensorflow.org/performance/performance_guide#data_formats - inputs = tf.transpose(inputs, [0, 3, 1, 2]) - - inputs = conv2d_fixed_padding( - inputs=inputs, filters=64, kernel_size=7, strides=2, data_format=data_format - ) - inputs = tf.identity(inputs, "initial_conv") - inputs = tf.layers.max_pooling2d( - inputs=inputs, - pool_size=3, - strides=2, - padding="SAME", - data_format=data_format, - ) - inputs = tf.identity(inputs, "initial_max_pool") - - inputs = block_layer( - inputs=inputs, - filters=64, - block_fn=block_fn, - blocks=layers[0], - strides=1, - is_training=is_training, - name="block_layer1", - data_format=data_format, - ) - inputs = block_layer( - inputs=inputs, - filters=128, - block_fn=block_fn, - blocks=layers[1], - strides=2, - is_training=is_training, - name="block_layer2", - data_format=data_format, - ) - inputs = block_layer( - inputs=inputs, - filters=256, - block_fn=block_fn, - blocks=layers[2], - strides=2, - is_training=is_training, - name="block_layer3", - data_format=data_format, - ) - inputs = block_layer( - inputs=inputs, - filters=512, - block_fn=block_fn, - blocks=layers[3], - strides=2, - is_training=is_training, - name="block_layer4", - data_format=data_format, - ) - - inputs = batch_norm_relu(inputs, is_training, data_format) - inputs = tf.layers.average_pooling2d( - inputs=inputs, - pool_size=7, - strides=1, - padding="VALID", - data_format=data_format, - ) - inputs = tf.identity(inputs, "final_avg_pool") - inputs = tf.reshape(inputs, [-1, 512 if block_fn is building_block else 2048]) - inputs = tf.layers.dense(inputs=inputs, units=num_classes) - inputs = tf.identity(inputs, "final_dense") - return inputs - - return model - - -def imagenet_resnet_v2(resnet_size, num_classes, data_format=None): - """Returns the ResNet model for a given size and number of output classes.""" - model_params = { - 18: {"block": building_block, "layers": [2, 2, 2, 2]}, - 34: {"block": building_block, "layers": [3, 4, 6, 3]}, - 50: {"block": bottleneck_block, "layers": [3, 4, 6, 3]}, - 101: {"block": bottleneck_block, "layers": [3, 4, 23, 3]}, - 152: {"block": bottleneck_block, "layers": [3, 8, 36, 3]}, - 200: {"block": bottleneck_block, "layers": [3, 24, 36, 3]}, - } - - if resnet_size not in model_params: - raise ValueError("Not a valid resnet_size:", resnet_size) - - params = model_params[resnet_size] - return imagenet_resnet_v2_generator( - params["block"], params["layers"], num_classes, data_format - ) From 82f8135f9c0ef18e6d3338bf487edef3871e8060 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 14:45:46 +0100 Subject: [PATCH 05/11] updated download_dcm file to use just the first colour channel; overcoming rbg, ybr conversion issues --- .../d02_intermediate/download_dcm.py | 57 ++++--------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/src/usal_echo/d02_intermediate/download_dcm.py b/src/usal_echo/d02_intermediate/download_dcm.py index d066c2b..32d74ac 100644 --- a/src/usal_echo/d02_intermediate/download_dcm.py +++ b/src/usal_echo/d02_intermediate/download_dcm.py @@ -6,10 +6,10 @@ import numpy as np import subprocess -from scipy.misc import imresize +from skimage.transform import resize +from PIL import Image import cv2 import pydicom -from skimage.color import rgb2gray from usal_echo.d00_utils.db_utils import dbReadWriteViews from usal_echo.d00_utils.s3_utils import download_s3_objects @@ -18,13 +18,7 @@ logger = setup_logging(__name__, __name__) -def _ybr2gray(y, u, v): - r = y + 1.402 * (v - 128) - g = y - 0.34414 * (u - 128) - 0.71414 * (v - 128) - b = y + 1.772 * (u - 128) - gray = 0.2989 * r + 0.5870 * g + 0.1140 * b - return np.array(gray, dtype="int8") def decompress_dcm(dcm_filepath, dcmraw_filepath): @@ -60,7 +54,7 @@ def _split_train_test(ratio, table_name): df = io_views.get_table(table_name) np.random.seed(0) - msk = np.random.rand(len(df)) < ratio + msk = np.random.rand(len(df)) > ratio df_train = df[msk].reset_index(drop=True) df_test = df[~msk].reset_index(drop=True) @@ -110,7 +104,7 @@ def s3_download_decomp_dcm( :param downsample_ratio (float): percentage by which to downsample dataset e.g. if ratio=0.1, will downsample by a factor of 10 :param train_test_ratio (float): ratio for splitting into train/test - :param table_name (str): name of views.table with master instancest + :param table_name (str): name of views.table with master instances :param train (bool): download train set instead of test set, default=False """ @@ -192,46 +186,18 @@ def _dcmraw_to_np(dcmraw_obj): if len(pxl_array.shape) == 4: # format 3, nframes, nrow, ncol nframes = pxl_array.shape[1] - maxframes = nframes * 3 elif len(pxl_array.shape) == 3: # format nframes, nrow, ncol nframes = pxl_array.shape[0] - maxframes = nframes * 1 nrow = int(dcmraw_obj.Rows) ncol = int(dcmraw_obj.Columns) ArrayDicom = np.zeros((nrow, ncol), dtype=pxl_array.dtype) framedict = {} - - for counter in range(0, maxframes, 3): # iterate through all subframes - k = counter % nframes - j = (counter) // nframes - m = (counter + 1) % nframes - l = (counter + 1) // nframes - o = (counter + 2) % nframes - n = (counter + 2) // nframes - - if len(pxl_array.shape) == 4: - a = pxl_array[j, k, :, :] - b = pxl_array[l, m, :, :] - c = pxl_array[n, o, :, :] - ArrayDicom[:, :] = _ybr2gray(a, b, c) - ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0 # blanks out name - counter = counter + 1 - ArrayDicom.clip(0) - nrowout = nrow - ncolout = ncol - x = int(counter / 3) - framedict[x] = imresize(ArrayDicom, (nrowout, ncolout)) - elif len(pxl_array.shape) == 3: - ArrayDicom[:, :] = pxl_array[counter, :, :] - ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0 # blanks out name - counter = counter + 1 - ArrayDicom.clip(0) - nrowout = nrow - ncolout = ncol - x = int(counter / 3) - framedict[x] = imresize(ArrayDicom, (nrowout, ncolout)) - + + for i in range(nframes): + ArrayDicom[:, :] = pxl_array[0, i, :, :].copy() + ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0 + framedict[i] = np.resize(ArrayDicom, (nrow,ncol)) return framedict @@ -332,8 +298,9 @@ def dcm_to_segmentation_arrays(dcm_dir, filename): for key in list(framedict.keys()): image = np.zeros((384, 384)) - image[:, :] = imresize(rgb2gray(framedict[key]), (384, 384, 1)) - images.append(image) + image[:, :] = resize(framedict[key], (384, 384), anti_aliasing=True) + image = 255 * image + images.append(image.astype(np.uint8)) orig_images.append(framedict[key]) images = np.array(images).reshape((len(images), 384, 384, 1)) From cf9a6e34d9f2a38783a9342e113cac4dc28e9677 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 15:12:29 +0100 Subject: [PATCH 06/11] the evaluate mask function updated to calculate a range of evaluation measures --- .../d04_segmentation/evaluate_masks.py | 157 ++++++++++++++---- 1 file changed, 122 insertions(+), 35 deletions(-) diff --git a/src/usal_echo/d04_segmentation/evaluate_masks.py b/src/usal_echo/d04_segmentation/evaluate_masks.py index 7c2fa2c..aa84cda 100644 --- a/src/usal_echo/d04_segmentation/evaluate_masks.py +++ b/src/usal_echo/d04_segmentation/evaluate_masks.py @@ -6,15 +6,17 @@ @author: court """ +import os import numpy as np +from medpy.metric.binary import dc, jc, hd, precision, recall, sensitivity, specificity -from usal_echo.d00_utils.db_utils import dbReadWriteSegmentation +from usal_echo.d00_utils.db_utils import dbReadWriteClean, dbReadWriteSegmentation from usal_echo.d00_utils.log_utils import setup_logging logger = setup_logging(__name__, __name__) -def evaluate_masks(): +def evaluate_masks(dcm_dir_path): # Go through the ground truth table and write IOUS # Prediction Table: "instance_id","study_id", "view_name", "frame", "output_np_lv", "output_np_la", @@ -22,20 +24,64 @@ def evaluate_masks(): # "file_name" # Ground truth table: ground_truth_id, instance_id, frame, chamber, study_id, view_name, numpy_array # Evaluation Table: evaluation_id, instance_id, frame, chamber, study_id, score_type, score_value + + #just run eval for dcm_dir_path + path = dcm_dir_path + dataset_name = str(dcm_dir_path).split('/')[-1] - io_segmentation = dbReadWriteSegmentation() - ground_truths = io_segmentation.get_segmentation_table("ground_truths") + file_path = [] + filenames = [] - # Go through the ground truth table and write IOUS + for r, d, f in os.walk(path): + for file in f: + if file.endswith("dcm_raw"): + file_path.append(os.path.join(r, file)) + fullfilename = os.path.basename(os.path.join(r, file)) + filenames.append(str(fullfilename).split(".")[0].split("_")[-1]) + + logger.info("Number of files in the directory: {}".format(len(file_path))) + logger.info("example filename: {}".format(filenames[1])) - for index, gt in ground_truths.iterrows(): + io_segmentation = dbReadWriteSegmentation() + ground_truths = io_segmentation.get_segmentation_table("ground_truths") + + #match ground truth with filenames + ground_truth_files = ground_truths[ground_truths['file_name'].isin(filenames)] + + #get table of voxel spacing values + voxel_spacing_df = get_voxel_spacing_for_instances(ground_truth_files) + #set index to be filename + voxel_spacing_df = voxel_spacing_df.set_index('file_name') + + # Go through the ground truth table and write IOUS, DICE and Hausdorff distance + + for index, gt in ground_truth_files.iterrows(): #only run for files in directory! # match the gt to the prediction table gt_instance_id = gt["instance_id"] gt_study_id = gt["study_id"] gt_chamber = gt["chamber"] gt_view_name = gt["view_name"] gt_frame_no = gt["frame"] - + gt_file_name = gt["file_name"] + + #get voxel spacing for the gt image + logger.info('filename im looking for: {}'.format(gt['file_name'])) + logger.info('format of filenames in voxel_spacing_df: {}'.format(voxel_spacing_df.index[0])) + + #take the min of x or y scale spacing (appear to be the same for all files) + try: + voxel_spacing = float(voxel_spacing_df.loc[gt['file_name']]['value']) + logger.info('what the voxel_spacing looks like {}'.format(voxel_spacing)) + except TypeError: + logger.info('voxel spacing can not be converted to a float') + voxel_spacing = 0.013 + + #min distance of 0.012 + if voxel_spacing > 0.012: + pass + else: + voxel_spacing = 0.013 + pred = io_segmentation.get_instance_from_segementation_table( "predictions", gt_instance_id ) @@ -47,7 +93,9 @@ def evaluate_masks(): ) if len(pred.index) > 0: + pred_last = pred.head(1) pred_view_name = gt["view_name"] + pred_seg_model = pred_last['model_name'] # retrieve gt numpy array gt_numpy_array = io_segmentation.convert_to_np( gt["numpy_array"], 1 @@ -70,33 +118,62 @@ def evaluate_masks(): # get the frame of the prediction, that corresponds to the frame of the ground thruth pred_numpy_array_frame = pred_numpy_array[gt_frame_no, :, :] - # calculate iou - reported_iou = iou(gt_numpy_array, pred_numpy_array_frame) - logger.info("IOU of: {}".format(reported_iou)) - - # write to db + # calculate measures + reported_iou = jc(gt_numpy_array, pred_numpy_array_frame) + reported_dice = dc(gt_numpy_array, pred_numpy_array_frame) + reported_precision = precision(gt_numpy_array, pred_numpy_array_frame) + reported_recall = recall(gt_numpy_array, pred_numpy_array_frame) + reported_sensitivity = sensitivity(gt_numpy_array, pred_numpy_array_frame) + reported_specificity = specificity(gt_numpy_array, pred_numpy_array_frame) + + zhang_dice = zhang_modified_dice(gt_numpy_array, pred_numpy_array_frame) + + try: + reported_hausdorff = hd(gt_numpy_array, pred_numpy_array_frame, voxelspacing=voxel_spacing) + except: + reported_hausdorff = 0; + logger.error('hausdorf distance function fails when array equals zero') + + # write evaluation metrics to db # Evaluation Table: evaluation_id, instance_id, frame, chamber, study_id, score_type, score_value d_columns = [ "instance_id", "frame", + "file_name", "chamber", "study_id", "score_type", "score_value", "gt_view_name", "pred_view_name", - ] - d = [ - gt_instance_id, - gt["frame"], - gt_chamber, - gt_study_id, - "iou", - reported_iou, - gt_view_name, - pred_view_name, - ] - io_segmentation.save_seg_evaluation_to_db(d, d_columns) + "dataset", + "model_name"] + + metric_list = {"Jaccard": reported_iou + , "Dice": reported_dice + , "Hausdorff": reported_hausdorff + , "Precison" : reported_precision + , "Recall" : reported_recall + , "Sensitivity" : reported_sensitivity + , "Specificity": reported_specificity + , "Zhang modified dice": zhang_dice + } + + for label, value in metric_list.items(): + d = [gt_instance_id + , gt["frame"] + , gt_file_name + , gt_chamber + , gt_study_id + , label + , value + , gt_view_name + , pred_view_name + , dataset_name + , pred_seg_model] + io_segmentation.save_seg_evaluation_to_db(d, d_columns) + logger.info("{} metric record, with value of {}".format(label, value)) + else: logger.error( "No record exists for study id {} & instance id {}".format( @@ -104,14 +181,24 @@ def evaluate_masks(): ) ) - -def iou(gt, pred): - gt_bool = np.array(gt, dtype=bool) - pred_bool = np.array(pred, dtype=bool) - - overlap = gt_bool * pred_bool # Logical AND - union = gt_bool + pred_bool # Logical OR - - IOU = float(overlap.sum()) / float(union.sum()) - - return IOU +def zhang_modified_dice(gt, pred): #, seg): + #gt_seg = create_seg(gt, seg) + #pred_seg = create_seg(pred, seg) + overlap = np.minimum(gt, pred) + + return 2*np.sum(overlap)/(np.sum(gt) + np.sum(pred)) + +def get_voxel_spacing_for_instances(df): + io_clean = dbReadWriteClean() + df_dcm = io_clean.get_table("meta_lite") #organised by filename + + df_dcm.rename(columns={"filename": "file_name"}, inplace=True) + df_dcm["file_name"] = df_dcm["file_name"].str.rstrip() + df_dcm["file_name"] = df_dcm["file_name"].str.replace("a_", "") #cut the a_ from the string + df_dcm["value"] = df_dcm["value"].str.replace(".", "0.") + + df_dcm["tag1"] = df_dcm["tag1"].astype(str) # consistency with tag2 + voxel_spacing_df = df_dcm.loc[(df_dcm["tag1"] == "18") & (df_dcm["tag2"] == "602c")] #just return x spacing + + logger.info('voxel spacing table obtained with {} rows'.format(voxel_spacing_df.shape)) + return voxel_spacing_df From 12209d242edafdf16e930e7dfaaf75529fb14a74 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 15:16:33 +0100 Subject: [PATCH 07/11] created additional database tables for identifing studies with segmentation labels --- .../d04_segmentation/create_seg_view.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/usal_echo/d04_segmentation/create_seg_view.py b/src/usal_echo/d04_segmentation/create_seg_view.py index dbe2c6a..7c341b0 100644 --- a/src/usal_echo/d04_segmentation/create_seg_view.py +++ b/src/usal_echo/d04_segmentation/create_seg_view.py @@ -6,11 +6,14 @@ @author: court """ +from usal_echo.d00_utils.log_utils import setup_logging from usal_echo.d00_utils.db_utils import dbReadWriteClean, dbReadWriteViews from usal_echo.d04_segmentation.segment_utils import * import pandas as pd +logger = setup_logging(__name__, __name__) + def create_seg_view(): """ @@ -191,3 +194,53 @@ def create_seg_view(): del a_modvolume_df io_views.save_to_db(df_9, "chords_by_volume_mask") + + # create studies_w_segmentation_labels table + df_10 = df_9.groupby( + ["studyidk", "instanceidk", "indexinmglist"]).agg( + { + "x1coordinate": list, + "y1coordinate": list, + "x2coordinate": list, + "y2coordinate": list, + "chamber": pd.Series.unique, + "frame": pd.Series.unique, + "view": pd.Series.unique, + "instancefilename": pd.Series.unique, + } + ) + df_10 = df_10.reset_index() + df_11 = df_10[df_10['chamber'] != ""] + + #get unique study ids + df_12 = pd.DataFrame(df_11['studyidk'].unique()) + df_12.columns = ['studyidk'] + io_views.save_to_db(df_12, "studies_w_segmentation_labels") + + #get study id that have a pair of segmentation masks (lv and la) on the same frame + + gt_LA = df_11[df_11['chamber'] == 'la'] + gt_LV = df_11[df_11['chamber'] == 'lv'] + + logger.info('gt_LA Shape : {} rows {} columns'.format(gt_LA.shape[0], gt_LA.shape[1])) + logger.info('gt_LV Shape : {} rows {} columns'.format(gt_LV.shape[0], gt_LV.shape[1])) + + #inner join only includes rows where there is a match on the stated columns + gt_lv_la_pairs = pd.merge(gt_LA, gt_LV, how='inner', on=['studyidk', 'instanceidk', 'instancefilename', + 'frame', 'view']) + + gt_lv_la_pairs = gt_lv_la_pairs.rename(columns={'ground_truth_id_x':'ground_truth_id_la', + 'chamber_x':'chamber_la', + 'numpy_array_x':'numpy_array_la', + 'ground_truth_id_y':'ground_truth_id_lv', + 'chamber_y':'chamber_lv', + 'numpy_array_y':'numpy_array_lv'}) + + instances_w_lv_la_segmentation_pairs = gt_lv_la_pairs[['studyidk']].copy() + instances_w_lv_la_segmentation_pairs = instances_w_lv_la_segmentation_pairs.drop_duplicates() + + logger.info('instances_w_lv_la_segmentation_pairs, shape: {}'.format(instances_w_lv_la_segmentation_pairs.shape)) + logger.info('instances_w_lv_la_segmentation_pairs format is {}'.format(",".join(instances_w_lv_la_segmentation_pairs.columns))) + + io_views.save_to_db(instances_w_lv_la_segmentation_pairs, "instances_w_lv_la_segmentation_pairs") + From 824d8d3d79dcb8b8c6916ff73f11f45cb841e6a4 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Sun, 10 May 2020 15:44:08 +0100 Subject: [PATCH 08/11] updated segment views to use the model specified in path parameters; and to record additional details of np arrays in db --- .../d04_segmentation/segment_view.py | 86 +++++++++++++------ 1 file changed, 60 insertions(+), 26 deletions(-) diff --git a/src/usal_echo/d04_segmentation/segment_view.py b/src/usal_echo/d04_segmentation/segment_view.py index 33408bc..cd84286 100644 --- a/src/usal_echo/d04_segmentation/segment_view.py +++ b/src/usal_echo/d04_segmentation/segment_view.py @@ -15,15 +15,21 @@ import tensorflow as tf from PIL import Image from scipy.misc import imresize -import datetime import hashlib +import datetime + +from usal_echo import ( + model_dir, + a4c_segmentation_model, + a2c_segmentation_model +) from usal_echo.d00_utils.log_utils import setup_logging from usal_echo.d02_intermediate.download_dcm import dcm_to_segmentation_arrays from usal_echo.d00_utils.db_utils import ( + dbReadWriteClean, dbReadWriteViews, - dbReadWriteClassification, - dbReadWriteSegmentation, + dbReadWriteSegmentation ) from usal_echo.d03_classification.evaluate_views import _groundtruth_views from usal_echo.d04_segmentation.model_unet import Unet @@ -34,7 +40,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0" -def segmentChamber(videofile, dicomdir, view, model_path): +def segmentChamber(videofile, dicomdir, view, model_path, seg_model, colour_scheme_lookup): """ """ @@ -56,7 +62,7 @@ def segmentChamber(videofile, dicomdir, view, model_path): with g_1.as_default(): saver = tf.train.Saver() saver.restore( - sess1, os.path.join(modeldir, "a4c_45_20_all_model.ckpt-9000") + sess1, os.path.join(modeldir, seg_model) ) elif view == "a2c": g_2 = tf.Graph() @@ -70,37 +76,47 @@ def segmentChamber(videofile, dicomdir, view, model_path): with g_2.as_default(): saver = tf.train.Saver() saver.restore( - sess2, os.path.join(modeldir, "a2c_45_20_all_model.ckpt-10600") + sess2, os.path.join(modeldir, seg_model) ) outpath = "/home/ubuntu/data/04_segmentation/" + view + "/" if not os.path.exists(outpath): os.makedirs(outpath) + + #look up colour scheme + match_filename = 'a_' + str(videofile).split('_')[2].split('.')[0] + instance_colour_scheme = colour_scheme_lookup[colour_scheme_lookup['filename'] == match_filename]['colour_scheme'].item() - images, orig_images = dcm_to_segmentation_arrays(dicomdir, videofile) + images, orig_images = dcm_to_segmentation_arrays(dicomdir, videofile, instance_colour_scheme) np_arrays_x3 = [] images_uuid_x3 = [] + np_in_min = images.min() + np_in_max = images.max() + if view == "a4c": + logger.info('predicitng a4c view') a4c_lv_segs, a4c_la_segs, a4c_lvo_segs, preds = extract_segs( images, orig_images, model, sess, 2, 4, 1 ) + np_total = np.sum(a4c_lv_segs) + np.sum(a4c_la_segs) + np.sum(a4c_lvo_segs) np_arrays_x3.append(np.array(a4c_lv_segs).astype("uint8")) np_arrays_x3.append(np.array(a4c_la_segs).astype("uint8")) np_arrays_x3.append(np.array(a4c_lvo_segs).astype("uint8")) number_frames = (np.array(a4c_lvo_segs).astype("uint8").shape)[0] - model_name = "a4c_45_20_all_model.ckpt-9000" - elif view == "a2c": + model_name = a4c_segmentation_model + if view == "a2c": + logger.info('predicitng a2c view') a2c_lv_segs, a2c_la_segs, a2c_lvo_segs, preds = extract_segs( images, orig_images, model, sess, 2, 3, 1 ) + np_total = np.sum(a2c_lv_segs) + np.sum(a2c_la_segs) + np.sum(a2c_lvo_segs) np_arrays_x3.append(np.array(a2c_lv_segs).astype("uint8")) np_arrays_x3.append(np.array(a2c_la_segs).astype("uint8")) np_arrays_x3.append(np.array(a2c_lvo_segs).astype("uint8")) number_frames = (np.array(a2c_lvo_segs).astype("uint8").shape)[0] - model_name = "a2c_45_20_all_model.ckpt-10600" - - j = 0 + model_name = a2c_segmentation_model + j = 0 nrow = orig_images[0].shape[0] ncol = orig_images[0].shape[1] plt.figure(figsize=(5, 5)) @@ -142,11 +158,13 @@ def segmentChamber(videofile, dicomdir, view, model_path): (outpath + "/" + videofile + "_" + str(j) + "_" + "overlay.png").encode() ).hexdigest() ) + + return [number_frames, model_name, np_arrays_x3, images_uuid_x3, np_in_min, np_in_max, np_total] - return [number_frames, model_name, np_arrays_x3, images_uuid_x3] -def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path): +def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path, + a4c_seg_model, a2c_seg_model): # set up for writing to segmentation schema io_views = dbReadWriteViews() @@ -165,17 +183,25 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path): "output_image_seg", "output_image_orig", "output_image_overlay", - ] + "min_pixel_intensity", + "max_pixel_intensity", + "np_prediction_total" + ] instances_unique_master_list = io_views.get_table("instances_unique_master_list") # below cleans the filename field to remove whitespace instances_unique_master_list["instancefilename"] = instances_unique_master_list[ "instancefilename" ].apply(lambda x: str(x).strip()) + + #below gets the colour scheme lookup + #Get colour_scheme_lookup_table + io_clean = dbReadWriteClean() + colour_scheme_lookup = io_clean.get_table('colour_scheme_lookup') for video in viewlist_a4c: - [number_frames, model_name, np_arrays_x3, images_uuid_x3] = segmentChamber( - video, dcm_path, "a4c", model_path + [number_frames, model_name, np_arrays_x3, images_uuid_x3, np_in_min, np_in_max, np_pred_total] = segmentChamber( + video, dcm_path, "a4c", model_path, a4c_seg_model, colour_scheme_lookup ) instancefilename = video.split("_")[2].split(".")[ 0 @@ -188,9 +214,7 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path): ] df = df.reset_index() instance_id = df.at[0, "instanceidk"] - # Columns names are:prediction_id study_id instance_id file_name - # num_frames model_name date_run output_np_lv output_np_la - # output_np_lvo output_image_seg output_image_orig output_image_overlay + d = [ studyidk, instance_id, @@ -204,12 +228,16 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path): images_uuid_x3[0], images_uuid_x3[1], images_uuid_x3[2], - ] + np_in_min, + np_in_max, + np_pred_total + ] io_segmentation.save_prediction_numpy_array_to_db(d, column_names) + logger.info('Saved an a4c predition') for video in viewlist_a2c: - [number_frames, model_name, np_arrays_x3, images_uuid_x3] = segmentChamber( - video, dcm_path, "a2c", model_path + [number_frames, model_name, np_arrays_x3, images_uuid_x3, np_in_min, np_in_max, np_pred_total] = segmentChamber( + video, dcm_path, "a2c", model_path, a2c_seg_model, colour_scheme_lookup ) instancefilename = video.split("_")[2].split(".")[ 0 @@ -235,9 +263,14 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path): images_uuid_x3[0], images_uuid_x3[1], images_uuid_x3[2], - ] + np_in_min, + np_in_max, + np_pred_total + ] + io_segmentation.save_prediction_numpy_array_to_db(d, column_names) - + logger.info('Saved an a2c predition') + return 1 @@ -278,6 +311,7 @@ def run_segment( model_path, img_dir, classification_model_name, + a4c_seg_model, a2c_seg_model, date_run=datetime.date.today(), ): @@ -326,7 +360,7 @@ def run_segment( viewlist_a2c = viewlist_a2c.to_list() logger.info("{} a2c files added to the view list".format(len(viewlist_a2c))) - segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path) + segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path, a4c_seg_model, a2c_seg_model) end = time.time() viewlist = viewlist_a2c + viewlist_a4c logger.info( From ed7356d28c4fb872d1f5d2f187b134c5d7fe72f3 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Tue, 26 May 2020 19:13:50 +0100 Subject: [PATCH 09/11] added additional rows to the db for seg evaluation and prediction --- conf/infra/models_schema.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/conf/infra/models_schema.sql b/conf/infra/models_schema.sql index 127f0b5..6d7e38c 100644 --- a/conf/infra/models_schema.sql +++ b/conf/infra/models_schema.sql @@ -164,6 +164,9 @@ create table segmentation.predictions( output_image_seg varchar, output_image_orig varchar, output_image_overlay varchar, + min_pixel_intensity float, + max_pixel_intensity float, + np_prediction_total float, primary key(prediction_id) ); @@ -182,6 +185,8 @@ create table segmentation.evaluations( score_value float, gt_view_name varchar, pred_view_name varchar, + dataset varchar, + seg_model varchar, primary key(evaluation_id) ); From f7406e573f19f3cf1acdc07f49c72ca4a9b9d0c8 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Tue, 26 May 2020 19:14:14 +0100 Subject: [PATCH 10/11] added additional rows to the db for seg evaluation and prediction --- src/usal_echo/d00_utils/db_utils.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/usal_echo/d00_utils/db_utils.py b/src/usal_echo/d00_utils/db_utils.py index aa7f1f3..fcc6f12 100644 --- a/src/usal_echo/d00_utils/db_utils.py +++ b/src/usal_echo/d00_utils/db_utils.py @@ -183,10 +183,15 @@ def __init__(self): self.engine.execute(CreateSchema(self.schema)) def save_prediction_numpy_array_to_db(self, binary_data_array, column_names): - # Columns names are:prediction_id study_id instance_id file_name - # num_frames model_name date_run output_np_lv output_np_la - # output_np_lvo output_image_seg output_image_orig output_image_overlay - sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}')".format( + # Columns names are: + # prediction_id serial, study_id integer, instance_id integer, file_name varchar, + # num_frames integer, model_name varchar, date_run timestamp with time zone, + # output_np_lv bytea, output_np_la bytea, output_np_lvo bytea, output_image_seg varchar, + # output_image_orig varchar, output_image_overlay varchar, min_pixel_intensity float, + # max_pixel_intensity float, np_prediction_total float + + + sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}', '{}', '{}', '{}')".format( self.schema, "predictions", ",".join(column_names), @@ -202,6 +207,9 @@ def save_prediction_numpy_array_to_db(self, binary_data_array, column_names): binary_data_array[9], binary_data_array[10], binary_data_array[11], + binary_data_array[12], + binary_data_array[13], + binary_data_array[14] ) self.cursor.execute(sql) self.raw_conn.commit() @@ -266,7 +274,7 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"): # Create new database table from empty dataframe # df.to_sql('evaluation', self.engine, self.schema, if_exists, index=False) - sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format( + sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format( self.schema, "evaluations", ",".join(column_names), @@ -278,6 +286,9 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"): df[5], df[6], df[7], + df[8], + df[9], + df[10] ) self.cursor.execute(sql) self.raw_conn.commit() From ae88cb4eae6717cd1bd1a15eae054f17bc32a968 Mon Sep 17 00:00:00 2001 From: courtneyjean84 Date: Tue, 26 May 2020 19:49:03 +0100 Subject: [PATCH 11/11] cleaning up extra logging statements --- requirements.txt | 1 + src/usal_echo/d04_segmentation/evaluate_masks.py | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index f805dd1..83efdbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ black>=19.3b0 shapely>=1.6 pyinquirer>=1.0 tqdm>=4.34 +medpy>=0.4.0 \ No newline at end of file diff --git a/src/usal_echo/d04_segmentation/evaluate_masks.py b/src/usal_echo/d04_segmentation/evaluate_masks.py index aa84cda..631d8c7 100644 --- a/src/usal_echo/d04_segmentation/evaluate_masks.py +++ b/src/usal_echo/d04_segmentation/evaluate_masks.py @@ -40,7 +40,6 @@ def evaluate_masks(dcm_dir_path): filenames.append(str(fullfilename).split(".")[0].split("_")[-1]) logger.info("Number of files in the directory: {}".format(len(file_path))) - logger.info("example filename: {}".format(filenames[1])) io_segmentation = dbReadWriteSegmentation() ground_truths = io_segmentation.get_segmentation_table("ground_truths") @@ -64,14 +63,10 @@ def evaluate_masks(dcm_dir_path): gt_frame_no = gt["frame"] gt_file_name = gt["file_name"] - #get voxel spacing for the gt image - logger.info('filename im looking for: {}'.format(gt['file_name'])) - logger.info('format of filenames in voxel_spacing_df: {}'.format(voxel_spacing_df.index[0])) #take the min of x or y scale spacing (appear to be the same for all files) try: voxel_spacing = float(voxel_spacing_df.loc[gt['file_name']]['value']) - logger.info('what the voxel_spacing looks like {}'.format(voxel_spacing)) except TypeError: logger.info('voxel spacing can not be converted to a float') voxel_spacing = 0.013 @@ -200,5 +195,4 @@ def get_voxel_spacing_for_instances(df): df_dcm["tag1"] = df_dcm["tag1"].astype(str) # consistency with tag2 voxel_spacing_df = df_dcm.loc[(df_dcm["tag1"] == "18") & (df_dcm["tag2"] == "602c")] #just return x spacing - logger.info('voxel spacing table obtained with {} rows'.format(voxel_spacing_df.shape)) return voxel_spacing_df