From 0395a83ddcd9683648c04d1c9d1a9159c9911054 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 11:10:07 +0100
Subject: [PATCH 01/11] added function to create a colour scheme lookup table

---
 src/usal_echo/d02_intermediate/clean_dcm.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/usal_echo/d02_intermediate/clean_dcm.py b/src/usal_echo/d02_intermediate/clean_dcm.py
index 94c57d5..12f0796 100644
--- a/src/usal_echo/d02_intermediate/clean_dcm.py
+++ b/src/usal_echo/d02_intermediate/clean_dcm.py
@@ -41,4 +41,14 @@ def clean_dcm_meta():
     meta_lite = metadata[metadata["tags"].isin(dicom_tags.values())]
 
     io_clean.save_to_db(meta_lite, "meta_lite")
+    
+        #create a colour scheme lookup
+    #Create a colour scheme lookup for filenames
+    colour_scheme_lookup =  meta_lite[(meta_lite['tag1'] == '0028') & (meta_lite['tag2'] == '0004')].copy()
+    colour_scheme_lookup =  colour_scheme_lookup.drop_duplicates()
+    colour_scheme_lookup =  colour_scheme_lookup.drop_duplicates(subset='filename', keep='first')
+    colour_scheme_lookup =  colour_scheme_lookup.rename(columns={'value':'colour_scheme'})
+    
+    io_clean.save_to_db(colour_scheme_lookup, "colour_scheme_lookup")
+    
     logger.info("Metadata filtered.")

From bb36665ad73d30e0d8aa3e2f2d09f5d4403b7e63 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 11:21:22 +0100
Subject: [PATCH 02/11] little bug fix on the union of tables'

---
 src/usal_echo/d02_intermediate/instance_filters/filter_views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/usal_echo/d02_intermediate/instance_filters/filter_views.py b/src/usal_echo/d02_intermediate/instance_filters/filter_views.py
index 38cd44c..18c01ef 100644
--- a/src/usal_echo/d02_intermediate/instance_filters/filter_views.py
+++ b/src/usal_echo/d02_intermediate/instance_filters/filter_views.py
@@ -256,7 +256,7 @@ def filter_by_views():
     inst_3 = df_ultra_color_filt["instanceidk"].tolist()
 
     # Get instances that passed all filtering steps
-    inst_final = list(set(inst_1) & set(inst_2) & set(inst_3))
+    inst_final = list(set().union(inst_1, inst_2, inst_3))
 
     # Filter out instances that do not meet the dicom metadata criteria
     df = df_inst_all

From e74055cfa129aab0e8c8edb96ee335dcc1ee7888 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 11:31:17 +0100
Subject: [PATCH 03/11] added a couple of lines of code to filter out any blank
 masks from the ground truth table

---
 .../d04_segmentation/generate_masks.py        | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/usal_echo/d04_segmentation/generate_masks.py b/src/usal_echo/d04_segmentation/generate_masks.py
index cac42f6..cf28a7f 100644
--- a/src/usal_echo/d04_segmentation/generate_masks.py
+++ b/src/usal_echo/d04_segmentation/generate_masks.py
@@ -38,24 +38,26 @@ def generate_masks(dcm_path):
         "view_name",
         "numpy_array",
     ]
+    counter = 0
 
     for index, mask in masks_df.iterrows():
         resized_mask = imresize(mask["mask"], (384, 384))
-        d = [
-            int(mask["studyidk"]),
-            mask["instanceidk"],
-            mask["instancefilename"],
-            int(mask["frame"]),
-            mask["chamber"],
-            mask["view"],
-            resized_mask,
-        ]
-
-        io_segmentation.save_ground_truth_numpy_array_to_db(d, gt_table_column_names)
+        if resized_mask.sum != 0: #removes blank masks from ground_truths
+            d = [
+                int(mask["studyidk"]),
+                mask["instanceidk"],
+                mask["instancefilename"],
+                int(mask["frame"]),
+                mask["chamber"],
+                mask["view"],
+                resized_mask,
+            ]
+            io_segmentation.save_ground_truth_numpy_array_to_db(d, gt_table_column_names)
+            counter = counter + 1
 
     logger.info(
         "{} ground truth masks written to the segmentation.ground_truths table".format(
-            masks_df.shape[0]
+            counter
         )
     )
 

From 2a0cbe75b9ffc52dc5776b2d3e1c7a41901c4c3b Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 11:36:36 +0100
Subject: [PATCH 04/11] =?UTF-8?q?removed=20the=20resnet=20features=20from?=
 =?UTF-8?q?=20the=20Zhang=20code=20that=20aren=E2=80=99t=20required?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../d04_segmentation/segment_utils.py         | 531 ------------------
 1 file changed, 531 deletions(-)

diff --git a/src/usal_echo/d04_segmentation/segment_utils.py b/src/usal_echo/d04_segmentation/segment_utils.py
index cafecbf..deeff27 100644
--- a/src/usal_echo/d04_segmentation/segment_utils.py
+++ b/src/usal_echo/d04_segmentation/segment_utils.py
@@ -241,534 +241,3 @@ def iou(gt, pred, seg):
     overlap = np.minimum(gt_seg, pred_seg)
     return 2 * np.sum(overlap) / (np.sum(gt_seg) + np.sum(pred_seg))
 
-
-############################
-# Resnet Functions #
-############################
-
-_BATCH_NORM_DECAY = 0.95
-_BATCH_NORM_EPSILON = 1e-5
-
-
-def batch_norm_relu(inputs, is_training, data_format):
-    """Performs a batch normalization followed by a ReLU."""
-    # We set fused=True for a significant performance boost. See
-    # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
-    inputs = tf.layers.batch_normalization(
-        inputs=inputs,
-        axis=1 if data_format == "channels_first" else 3,
-        momentum=_BATCH_NORM_DECAY,
-        epsilon=_BATCH_NORM_EPSILON,
-        center=True,
-        scale=True,
-        training=is_training,
-    )
-    inputs = tf.nn.relu(inputs)
-    return inputs
-
-
-def fixed_padding(inputs, kernel_size, data_format):
-    """Pads the input along the spatial dimensions independently of input size.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
-                 Should be a positive integer.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    A tensor with the same format as the input with the data either intact
-    (if kernel_size == 1) or padded (if kernel_size > 1).
-  """
-    pad_total = kernel_size - 1
-    pad_beg = pad_total // 2
-    pad_end = pad_total - pad_beg
-
-    if data_format == "channels_first":
-        padded_inputs = tf.pad(
-            inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]]
-        )
-    else:
-        padded_inputs = tf.pad(
-            inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]
-        )
-    return padded_inputs
-
-
-def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
-    """Strided 2-D convolution with explicit padding."""
-    # The padding is consistent and is based only on `kernel_size`, not on the
-    # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
-    if strides > 1:
-        inputs = fixed_padding(inputs, kernel_size, data_format)
-
-    return tf.layers.conv2d(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=("SAME" if strides == 1 else "VALID"),
-        use_bias=False,
-        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(factor=1.0),
-        data_format=data_format,
-    )
-
-
-def dilated_conv2d_fixed_padding(
-    inputs, num_filters, kernel_size, rate, data_format, name
-):
-    """Strided 2-D convolution with explicit padding."""
-    # The padding is consistent and is based only on `kernel_size`, not on the
-    # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
-    with tf.variable_scope(name):
-        input_channels = int(inputs.get_shape()[-1])
-        filter_size = kernel_size
-        weights = tf.get_variable(
-            "W",
-            shape=[filter_size, filter_size, input_channels, num_filters],
-            initializer=tf.contrib.layers.xavier_initializer(),
-            collections=["variables"],
-        )
-        conv_out = tf.nn.atrous_conv2d(
-            value=inputs, filters=weights, rate=rate, padding="SAME"
-        )
-    return conv_out
-
-
-def building_block(
-    inputs, filters, is_training, projection_shortcut, strides, data_format
-):
-    """Standard building block for residual networks with BN before convolutions.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the convolutions.
-    is_training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
-      a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    The output tensor of the block.
-  """
-    shortcut = inputs
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-
-    # The projection shortcut should come after the first batch norm and ReLU
-    # since it performs a 1x1 convolution.
-    if projection_shortcut is not None:
-        shortcut = projection_shortcut(inputs)
-
-    inputs = conv2d_fixed_padding(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=3,
-        strides=strides,
-        data_format=data_format,
-    )
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = conv2d_fixed_padding(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=3,
-        strides=1,
-        data_format=data_format,
-    )
-
-    return inputs + shortcut
-
-
-def building_block_dilated(
-    inputs,
-    num_filters,
-    is_training,
-    projection_shortcut,
-    rate,
-    data_format,
-    name,
-    skip=1,
-):
-    """Standard building block for residual networks with BN before convolutions.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the convolutions.
-    is_training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
-      a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    The output tensor of the block.
-  """
-    shortcut = inputs
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-
-    # The projection shortcut should come after the first batch norm and ReLU
-    # since it performs a 1x1 convolution.
-    if projection_shortcut is not None:
-        shortcut = projection_shortcut(inputs)
-
-    inputs = dilated_conv2d_fixed_padding(
-        inputs=inputs,
-        num_filters=num_filters,
-        kernel_size=3,
-        rate=rate,
-        data_format=data_format,
-        name=name + "_1",
-    )
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = dilated_conv2d_fixed_padding(
-        inputs=inputs,
-        num_filters=num_filters,
-        kernel_size=3,
-        rate=rate,
-        data_format=data_format,
-        name=name + "_2",
-    )
-    if skip:
-        return inputs + shortcut
-    else:
-        return inputs
-
-
-def bottleneck_block(
-    inputs, filters, is_training, projection_shortcut, strides, data_format
-):
-    """Bottleneck block variant for residual networks with BN before convolutions.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first two convolutions. Note that the
-      third and final convolution will use 4 times as many filters.
-    is_training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
-      a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    The output tensor of the block.
-  """
-    shortcut = inputs
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-
-    # The projection shortcut should come after the first batch norm and ReLU
-    # since it performs a 1x1 convolution.
-    if projection_shortcut is not None:
-        shortcut = projection_shortcut(inputs)
-
-    inputs = conv2d_fixed_padding(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=1,
-        strides=1,
-        data_format=data_format,
-    )
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = conv2d_fixed_padding(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=3,
-        strides=strides,
-        data_format=data_format,
-    )
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = conv2d_fixed_padding(
-        inputs=inputs,
-        filters=4 * filters,
-        kernel_size=1,
-        strides=1,
-        data_format=data_format,
-    )
-
-    return inputs + shortcut
-
-
-def bottleneck_block_dilated(
-    inputs, num_filters, is_training, projection_shortcut, rate, data_format, skip=1
-):
-    """Bottleneck block variant for residual networks with BN before convolutions.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first two convolutions. Note that the
-      third and final convolution will use 4 times as many filters.
-    is_training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
-      a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    The output tensor of the block.
-  """
-    shortcut = inputs
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-
-    # The projection shortcut should come after the first batch norm and ReLU
-    # since it performs a 1x1 convolution.
-    if projection_shortcut is not None:
-        shortcut = projection_shortcut(inputs)
-
-    inputs = dilated_conv2d_fixed_padding(
-        inputs=inputs,
-        num_filters=num_filters,
-        kernel_size=1,
-        rate=rate,
-        data_format=data_format,
-    )
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = dilated_conv2d_fixed_padding(
-        inputs=inputs,
-        num_filters=num_filters,
-        kernel_size=3,
-        rate=rate,
-        data_format=data_format,
-    )
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = dilated_conv2d_fixed_padding(
-        inputs=inputs,
-        num_filters=4 * num_filters,
-        kernel_size=1,
-        rate=rate,
-        data_format=data_format,
-    )
-    if skip:
-        return inputs + shortcut
-    else:
-        return inputs
-
-
-def block_layer(
-    inputs, filters, block_fn, blocks, strides, is_training, name, data_format
-):
-    """Creates one layer of blocks for the ResNet model.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first convolution of the layer.
-    block_fn: The block to use within the model, either `building_block` or
-      `bottleneck_block`.
-    blocks: The number of blocks contained in the layer.
-    strides: The stride to use for the first convolution of the layer. If
-      greater than 1, this layer will ultimately downsample the input.
-    is_training: Either True or False, whether we are currently training the
-      model. Needed for batch norm.
-    name: A string name for the tensor output of the block layer.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    The output tensor of the block layer.
-  """
-    # Bottleneck blocks end with 4x the number of filters as they start with
-    filters_out = 4 * filters if block_fn is bottleneck_block else filters
-
-    def projection_shortcut(inputs):
-        return conv2d_fixed_padding(
-            inputs=inputs,
-            filters=filters_out,
-            kernel_size=1,
-            strides=strides,
-            data_format=data_format,
-        )
-
-    # Only the first block per block_layer uses projection_shortcut and strides
-    inputs = block_fn(
-        inputs, filters, is_training, projection_shortcut, strides, data_format
-    )
-
-    for _ in range(1, blocks):
-        inputs = block_fn(inputs, filters, is_training, None, 1, data_format)
-
-    return tf.identity(inputs, name)
-
-
-def block_layer_dilated(
-    inputs, num_filters, block_fn, blocks, rate, is_training, name, data_format, skip=1
-):
-    """Creates one layer of blocks for the ResNet model.
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first convolution of the layer.
-    block_fn: The block to use within the model, either `building_block` or
-      `bottleneck_block`.
-    blocks: The number of blocks contained in the layer.
-    strides: The stride to use for the first convolution of the layer. If
-      greater than 1, this layer will ultimately downsample the input.
-    is_training: Either True or False, whether we are currently training the
-      model. Needed for batch norm.
-    name: A string name for the tensor output of the block layer.
-    data_format: The input format ('channels_last' or 'channels_first').
-  Returns:
-    The output tensor of the block layer.
-  """
-    # Bottleneck blocks end with 4x the number of filters as they start with
-    filters_out = 4 * num_filters if block_fn is bottleneck_block else num_filters
-    inputs = tf.transpose(inputs, [0, 2, 3, 1])
-
-    def projection_shortcut(inputs):
-        return dilated_conv2d_fixed_padding(
-            inputs=inputs,
-            num_filters=filters_out,
-            kernel_size=1,
-            rate=rate,
-            data_format=data_format,
-            name=name,
-        )
-
-    # Only the first block per block_layer uses projection_shortcut and strides
-    inputs = block_fn(
-        inputs,
-        num_filters,
-        is_training,
-        projection_shortcut,
-        rate,
-        data_format,
-        name,
-        skip,
-    )
-
-    for _ in range(1, blocks):
-        inputs = block_fn(
-            inputs,
-            num_filters,
-            is_training,
-            None,
-            rate,
-            data_format,
-            name + "_" + str(_),
-            skip,
-        )
-
-    inputs = tf.transpose(inputs, [0, 3, 1, 2])
-    return tf.identity(inputs, name)
-
-
-def imagenet_resnet_v2_generator(block_fn, layers, num_classes, data_format=None):
-    """Generator for ImageNet ResNet v2 models.
-  Args:
-    block_fn: The block to use within the model, either `building_block` or
-      `bottleneck_block`.
-    layers: A length-4 array denoting the number of blocks to include in each
-      layer. Each layer consists of blocks that take inputs of the same size.
-    num_classes: The number of possible classes for image classification.
-    data_format: The input format ('channels_last', 'channels_first', or None).
-      If set to None, the format is dependent on whether a GPU is available.
-  Returns:
-    The model function that takes in `inputs` and `is_training` and
-    returns the output tensor of the ResNet model.
-  """
-    if data_format is None:
-        data_format = (
-            "channels_first" if tf.test.is_built_with_cuda() else "channels_last"
-        )
-
-    def model(inputs, is_training):
-        """Constructs the ResNet model given the inputs."""
-        if data_format == "channels_first":
-            # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
-            # This provides a large performance boost on GPU. See
-            # https://www.tensorflow.org/performance/performance_guide#data_formats
-            inputs = tf.transpose(inputs, [0, 3, 1, 2])
-
-        inputs = conv2d_fixed_padding(
-            inputs=inputs, filters=64, kernel_size=7, strides=2, data_format=data_format
-        )
-        inputs = tf.identity(inputs, "initial_conv")
-        inputs = tf.layers.max_pooling2d(
-            inputs=inputs,
-            pool_size=3,
-            strides=2,
-            padding="SAME",
-            data_format=data_format,
-        )
-        inputs = tf.identity(inputs, "initial_max_pool")
-
-        inputs = block_layer(
-            inputs=inputs,
-            filters=64,
-            block_fn=block_fn,
-            blocks=layers[0],
-            strides=1,
-            is_training=is_training,
-            name="block_layer1",
-            data_format=data_format,
-        )
-        inputs = block_layer(
-            inputs=inputs,
-            filters=128,
-            block_fn=block_fn,
-            blocks=layers[1],
-            strides=2,
-            is_training=is_training,
-            name="block_layer2",
-            data_format=data_format,
-        )
-        inputs = block_layer(
-            inputs=inputs,
-            filters=256,
-            block_fn=block_fn,
-            blocks=layers[2],
-            strides=2,
-            is_training=is_training,
-            name="block_layer3",
-            data_format=data_format,
-        )
-        inputs = block_layer(
-            inputs=inputs,
-            filters=512,
-            block_fn=block_fn,
-            blocks=layers[3],
-            strides=2,
-            is_training=is_training,
-            name="block_layer4",
-            data_format=data_format,
-        )
-
-        inputs = batch_norm_relu(inputs, is_training, data_format)
-        inputs = tf.layers.average_pooling2d(
-            inputs=inputs,
-            pool_size=7,
-            strides=1,
-            padding="VALID",
-            data_format=data_format,
-        )
-        inputs = tf.identity(inputs, "final_avg_pool")
-        inputs = tf.reshape(inputs, [-1, 512 if block_fn is building_block else 2048])
-        inputs = tf.layers.dense(inputs=inputs, units=num_classes)
-        inputs = tf.identity(inputs, "final_dense")
-        return inputs
-
-    return model
-
-
-def imagenet_resnet_v2(resnet_size, num_classes, data_format=None):
-    """Returns the ResNet model for a given size and number of output classes."""
-    model_params = {
-        18: {"block": building_block, "layers": [2, 2, 2, 2]},
-        34: {"block": building_block, "layers": [3, 4, 6, 3]},
-        50: {"block": bottleneck_block, "layers": [3, 4, 6, 3]},
-        101: {"block": bottleneck_block, "layers": [3, 4, 23, 3]},
-        152: {"block": bottleneck_block, "layers": [3, 8, 36, 3]},
-        200: {"block": bottleneck_block, "layers": [3, 24, 36, 3]},
-    }
-
-    if resnet_size not in model_params:
-        raise ValueError("Not a valid resnet_size:", resnet_size)
-
-    params = model_params[resnet_size]
-    return imagenet_resnet_v2_generator(
-        params["block"], params["layers"], num_classes, data_format
-    )

From 82f8135f9c0ef18e6d3338bf487edef3871e8060 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 14:45:46 +0100
Subject: [PATCH 05/11] updated download_dcm file to use just the first colour
 channel; overcoming rbg, ybr conversion issues

---
 .../d02_intermediate/download_dcm.py          | 57 ++++---------------
 1 file changed, 12 insertions(+), 45 deletions(-)

diff --git a/src/usal_echo/d02_intermediate/download_dcm.py b/src/usal_echo/d02_intermediate/download_dcm.py
index d066c2b..32d74ac 100644
--- a/src/usal_echo/d02_intermediate/download_dcm.py
+++ b/src/usal_echo/d02_intermediate/download_dcm.py
@@ -6,10 +6,10 @@
 import numpy as np
 import subprocess
 
-from scipy.misc import imresize
+from skimage.transform import resize
+from PIL import Image
 import cv2
 import pydicom
-from skimage.color import rgb2gray
 
 from usal_echo.d00_utils.db_utils import dbReadWriteViews
 from usal_echo.d00_utils.s3_utils import download_s3_objects
@@ -18,13 +18,7 @@
 logger = setup_logging(__name__, __name__)
 
 
-def _ybr2gray(y, u, v):
-    r = y + 1.402 * (v - 128)
-    g = y - 0.34414 * (u - 128) - 0.71414 * (v - 128)
-    b = y + 1.772 * (u - 128)
-    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
 
-    return np.array(gray, dtype="int8")
 
 
 def decompress_dcm(dcm_filepath, dcmraw_filepath):
@@ -60,7 +54,7 @@ def _split_train_test(ratio, table_name):
     df = io_views.get_table(table_name)
 
     np.random.seed(0)
-    msk = np.random.rand(len(df)) < ratio
+    msk = np.random.rand(len(df)) > ratio
     df_train = df[msk].reset_index(drop=True)
     df_test = df[~msk].reset_index(drop=True)
 
@@ -110,7 +104,7 @@ def s3_download_decomp_dcm(
     :param downsample_ratio (float): percentage by which to downsample dataset
                          e.g. if ratio=0.1, will downsample by a factor of 10
     :param train_test_ratio (float): ratio for splitting into train/test
-    :param table_name (str): name of views.table with master instancest
+    :param table_name (str): name of views.table with master instances
     :param train (bool): download train set instead of test set, default=False
     
     """
@@ -192,46 +186,18 @@ def _dcmraw_to_np(dcmraw_obj):
 
     if len(pxl_array.shape) == 4:  # format 3, nframes, nrow, ncol
         nframes = pxl_array.shape[1]
-        maxframes = nframes * 3
     elif len(pxl_array.shape) == 3:  # format nframes, nrow, ncol
         nframes = pxl_array.shape[0]
-        maxframes = nframes * 1
 
     nrow = int(dcmraw_obj.Rows)
     ncol = int(dcmraw_obj.Columns)
     ArrayDicom = np.zeros((nrow, ncol), dtype=pxl_array.dtype)
     framedict = {}
-
-    for counter in range(0, maxframes, 3):  # iterate through all subframes
-        k = counter % nframes
-        j = (counter) // nframes
-        m = (counter + 1) % nframes
-        l = (counter + 1) // nframes
-        o = (counter + 2) % nframes
-        n = (counter + 2) // nframes
-
-        if len(pxl_array.shape) == 4:
-            a = pxl_array[j, k, :, :]
-            b = pxl_array[l, m, :, :]
-            c = pxl_array[n, o, :, :]
-            ArrayDicom[:, :] = _ybr2gray(a, b, c)
-            ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0  # blanks out name
-            counter = counter + 1
-            ArrayDicom.clip(0)
-            nrowout = nrow
-            ncolout = ncol
-            x = int(counter / 3)
-            framedict[x] = imresize(ArrayDicom, (nrowout, ncolout))
-        elif len(pxl_array.shape) == 3:
-            ArrayDicom[:, :] = pxl_array[counter, :, :]
-            ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0  # blanks out name
-            counter = counter + 1
-            ArrayDicom.clip(0)
-            nrowout = nrow
-            ncolout = ncol
-            x = int(counter / 3)
-            framedict[x] = imresize(ArrayDicom, (nrowout, ncolout))
-
+    
+    for i in range(nframes):
+        ArrayDicom[:, :] = pxl_array[0, i, :, :].copy()
+        ArrayDicom[0 : int(nrow / 10), 0 : int(ncol)] = 0
+        framedict[i] = np.resize(ArrayDicom, (nrow,ncol))
     return framedict
 
 
@@ -332,8 +298,9 @@ def dcm_to_segmentation_arrays(dcm_dir, filename):
 
         for key in list(framedict.keys()):
             image = np.zeros((384, 384))
-            image[:, :] = imresize(rgb2gray(framedict[key]), (384, 384, 1))
-            images.append(image)
+            image[:, :] = resize(framedict[key], (384, 384), anti_aliasing=True)
+            image = 255 * image
+            images.append(image.astype(np.uint8))
             orig_images.append(framedict[key])
 
         images = np.array(images).reshape((len(images), 384, 384, 1))

From cf9a6e34d9f2a38783a9342e113cac4dc28e9677 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 15:12:29 +0100
Subject: [PATCH 06/11] the evaluate mask function updated to calculate a range
 of evaluation measures

---
 .../d04_segmentation/evaluate_masks.py        | 157 ++++++++++++++----
 1 file changed, 122 insertions(+), 35 deletions(-)

diff --git a/src/usal_echo/d04_segmentation/evaluate_masks.py b/src/usal_echo/d04_segmentation/evaluate_masks.py
index 7c2fa2c..aa84cda 100644
--- a/src/usal_echo/d04_segmentation/evaluate_masks.py
+++ b/src/usal_echo/d04_segmentation/evaluate_masks.py
@@ -6,15 +6,17 @@
 
 @author: court
 """
+import os
 import numpy as np
+from medpy.metric.binary import dc, jc, hd, precision, recall, sensitivity, specificity
 
-from usal_echo.d00_utils.db_utils import dbReadWriteSegmentation
+from usal_echo.d00_utils.db_utils import dbReadWriteClean, dbReadWriteSegmentation
 from usal_echo.d00_utils.log_utils import setup_logging
 
 logger = setup_logging(__name__, __name__)
 
 
-def evaluate_masks():
+def evaluate_masks(dcm_dir_path):
     # Go through the ground truth table and write IOUS
 
     # Prediction Table: "instance_id","study_id", "view_name", "frame", "output_np_lv", "output_np_la",
@@ -22,20 +24,64 @@ def evaluate_masks():
     #        "file_name"
     # Ground truth table: ground_truth_id, instance_id, frame, chamber, study_id, view_name, numpy_array
     # Evaluation Table: evaluation_id, instance_id, frame, chamber, study_id, score_type, score_value
+    
+    #just run eval for dcm_dir_path
+    path = dcm_dir_path
+    dataset_name = str(dcm_dir_path).split('/')[-1]
 
-    io_segmentation = dbReadWriteSegmentation()
-    ground_truths = io_segmentation.get_segmentation_table("ground_truths")
+    file_path = []
+    filenames = []
 
-    # Go through the ground truth table and write IOUS
+    for r, d, f in os.walk(path):
+        for file in f:
+            if file.endswith("dcm_raw"):
+                file_path.append(os.path.join(r, file))
+                fullfilename = os.path.basename(os.path.join(r, file))
+                filenames.append(str(fullfilename).split(".")[0].split("_")[-1])
+
+    logger.info("Number of files in the directory: {}".format(len(file_path)))
+    logger.info("example filename: {}".format(filenames[1]))
 
-    for index, gt in ground_truths.iterrows():
+    io_segmentation = dbReadWriteSegmentation()
+    ground_truths = io_segmentation.get_segmentation_table("ground_truths")
+    
+    #match ground truth with filenames
+    ground_truth_files = ground_truths[ground_truths['file_name'].isin(filenames)]
+    
+    #get table of voxel spacing values
+    voxel_spacing_df = get_voxel_spacing_for_instances(ground_truth_files)
+    #set index to be filename
+    voxel_spacing_df = voxel_spacing_df.set_index('file_name')
+
+    # Go through the ground truth table and write IOUS, DICE and Hausdorff distance
+
+    for index, gt in ground_truth_files.iterrows(): #only run for files in directory!
         # match the gt to the prediction table
         gt_instance_id = gt["instance_id"]
         gt_study_id = gt["study_id"]
         gt_chamber = gt["chamber"]
         gt_view_name = gt["view_name"]
         gt_frame_no = gt["frame"]
-
+        gt_file_name = gt["file_name"]
+        
+        #get voxel spacing for the gt image
+        logger.info('filename im looking for: {}'.format(gt['file_name']))
+        logger.info('format of filenames in voxel_spacing_df: {}'.format(voxel_spacing_df.index[0]))
+        
+        #take the min of x or y scale spacing (appear to be the same for all files)
+        try:
+            voxel_spacing = float(voxel_spacing_df.loc[gt['file_name']]['value'])
+            logger.info('what the voxel_spacing looks like {}'.format(voxel_spacing))
+        except TypeError:
+            logger.info('voxel spacing can not be converted to a float')
+            voxel_spacing = 0.013
+
+        #min distance of 0.012
+        if voxel_spacing > 0.012:
+            pass
+        else:
+            voxel_spacing = 0.013
+            
         pred = io_segmentation.get_instance_from_segementation_table(
             "predictions", gt_instance_id
         )
@@ -47,7 +93,9 @@ def evaluate_masks():
         )
 
         if len(pred.index) > 0:
+            pred_last = pred.head(1)
             pred_view_name = gt["view_name"]
+            pred_seg_model = pred_last['model_name']
             # retrieve gt numpy array
             gt_numpy_array = io_segmentation.convert_to_np(
                 gt["numpy_array"], 1
@@ -70,33 +118,62 @@ def evaluate_masks():
             # get the frame of the prediction, that corresponds to the frame of the ground thruth
             pred_numpy_array_frame = pred_numpy_array[gt_frame_no, :, :]
 
-            # calculate iou
-            reported_iou = iou(gt_numpy_array, pred_numpy_array_frame)
-            logger.info("IOU of: {}".format(reported_iou))
-
-            # write to db
+            # calculate measures
+            reported_iou = jc(gt_numpy_array, pred_numpy_array_frame)            
+            reported_dice = dc(gt_numpy_array, pred_numpy_array_frame)    
+            reported_precision = precision(gt_numpy_array, pred_numpy_array_frame)
+            reported_recall = recall(gt_numpy_array, pred_numpy_array_frame)
+            reported_sensitivity = sensitivity(gt_numpy_array, pred_numpy_array_frame)
+            reported_specificity = specificity(gt_numpy_array, pred_numpy_array_frame)
+            
+            zhang_dice = zhang_modified_dice(gt_numpy_array, pred_numpy_array_frame)
+            
+            try:
+                reported_hausdorff = hd(gt_numpy_array, pred_numpy_array_frame, voxelspacing=voxel_spacing)
+            except:
+                reported_hausdorff = 0;
+                logger.error('hausdorf distance function fails when array equals zero')    
+            
+            # write evaluation metrics to db
             # Evaluation Table: evaluation_id, instance_id, frame, chamber, study_id, score_type, score_value
             d_columns = [
                 "instance_id",
                 "frame",
+                "file_name",
                 "chamber",
                 "study_id",
                 "score_type",
                 "score_value",
                 "gt_view_name",
                 "pred_view_name",
-            ]
-            d = [
-                gt_instance_id,
-                gt["frame"],
-                gt_chamber,
-                gt_study_id,
-                "iou",
-                reported_iou,
-                gt_view_name,
-                pred_view_name,
-            ]
-            io_segmentation.save_seg_evaluation_to_db(d, d_columns)
+                "dataset",
+                "model_name"]
+            
+            metric_list = {"Jaccard": reported_iou
+                           , "Dice": reported_dice
+                           , "Hausdorff": reported_hausdorff
+                           , "Precison" : reported_precision
+                           , "Recall" : reported_recall
+                           , "Sensitivity" : reported_sensitivity
+                           , "Specificity": reported_specificity
+                           , "Zhang modified dice": zhang_dice
+                           }
+            
+            for label, value in metric_list.items():
+                d = [gt_instance_id
+                     , gt["frame"]
+                     , gt_file_name
+                     , gt_chamber
+                     , gt_study_id
+                     , label
+                     , value
+                     , gt_view_name
+                     , pred_view_name
+                     , dataset_name
+                     , pred_seg_model]
+                io_segmentation.save_seg_evaluation_to_db(d, d_columns)
+                logger.info("{} metric record, with value of {}".format(label, value))
+        
         else:
             logger.error(
                 "No record exists for study id {} & instance id {}".format(
@@ -104,14 +181,24 @@ def evaluate_masks():
                 )
             )
 
-
-def iou(gt, pred):
-    gt_bool = np.array(gt, dtype=bool)
-    pred_bool = np.array(pred, dtype=bool)
-
-    overlap = gt_bool * pred_bool  # Logical AND
-    union = gt_bool + pred_bool  # Logical OR
-
-    IOU = float(overlap.sum()) / float(union.sum())
-
-    return IOU
+def zhang_modified_dice(gt, pred): #, seg):
+    #gt_seg = create_seg(gt, seg)
+    #pred_seg = create_seg(pred, seg)
+    overlap = np.minimum(gt, pred)
+    
+    return 2*np.sum(overlap)/(np.sum(gt) + np.sum(pred))
+
+def get_voxel_spacing_for_instances(df):
+    io_clean = dbReadWriteClean()
+    df_dcm = io_clean.get_table("meta_lite") #organised by filename
+    
+    df_dcm.rename(columns={"filename": "file_name"}, inplace=True)
+    df_dcm["file_name"] = df_dcm["file_name"].str.rstrip()
+    df_dcm["file_name"] = df_dcm["file_name"].str.replace("a_", "") #cut the a_ from the string
+    df_dcm["value"] = df_dcm["value"].str.replace(".", "0.") 
+        
+    df_dcm["tag1"] = df_dcm["tag1"].astype(str)  # consistency with tag2
+    voxel_spacing_df = df_dcm.loc[(df_dcm["tag1"] == "18") & (df_dcm["tag2"] == "602c")] #just return x spacing
+    
+    logger.info('voxel spacing table obtained with {} rows'.format(voxel_spacing_df.shape))
+    return voxel_spacing_df   

From 12209d242edafdf16e930e7dfaaf75529fb14a74 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 15:16:33 +0100
Subject: [PATCH 07/11] created additional database tables for identifing
 studies with segmentation labels

---
 .../d04_segmentation/create_seg_view.py       | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/usal_echo/d04_segmentation/create_seg_view.py b/src/usal_echo/d04_segmentation/create_seg_view.py
index dbe2c6a..7c341b0 100644
--- a/src/usal_echo/d04_segmentation/create_seg_view.py
+++ b/src/usal_echo/d04_segmentation/create_seg_view.py
@@ -6,11 +6,14 @@
 
 @author: court
 """
+from usal_echo.d00_utils.log_utils import setup_logging
 from usal_echo.d00_utils.db_utils import dbReadWriteClean, dbReadWriteViews
 from usal_echo.d04_segmentation.segment_utils import *
 
 import pandas as pd
 
+logger = setup_logging(__name__, __name__)
+
 
 def create_seg_view():
     """
@@ -191,3 +194,53 @@ def create_seg_view():
     del a_modvolume_df
 
     io_views.save_to_db(df_9, "chords_by_volume_mask")
+    
+    # create studies_w_segmentation_labels table
+    df_10 = df_9.groupby(
+        ["studyidk", "instanceidk", "indexinmglist"]).agg(
+        {
+            "x1coordinate": list,
+            "y1coordinate": list,
+            "x2coordinate": list,
+            "y2coordinate": list,
+            "chamber": pd.Series.unique,
+            "frame": pd.Series.unique,
+            "view": pd.Series.unique,
+            "instancefilename": pd.Series.unique,
+        }
+    )
+    df_10 = df_10.reset_index()
+    df_11 = df_10[df_10['chamber'] != ""]
+        
+    #get unique study ids
+    df_12 = pd.DataFrame(df_11['studyidk'].unique())
+    df_12.columns = ['studyidk']
+    io_views.save_to_db(df_12, "studies_w_segmentation_labels")
+    
+    #get study id that have a pair of segmentation masks (lv and la) on the same frame
+    
+    gt_LA = df_11[df_11['chamber'] == 'la']
+    gt_LV = df_11[df_11['chamber'] == 'lv']
+    
+    logger.info('gt_LA Shape : {} rows {} columns'.format(gt_LA.shape[0], gt_LA.shape[1]))
+    logger.info('gt_LV Shape : {} rows {} columns'.format(gt_LV.shape[0], gt_LV.shape[1]))
+    
+    #inner join only includes rows where there is a match on the stated columns
+    gt_lv_la_pairs = pd.merge(gt_LA, gt_LV, how='inner', on=['studyidk', 'instanceidk', 'instancefilename', 
+                                                             'frame', 'view'])
+    
+    gt_lv_la_pairs = gt_lv_la_pairs.rename(columns={'ground_truth_id_x':'ground_truth_id_la',
+                                   'chamber_x':'chamber_la',
+                                   'numpy_array_x':'numpy_array_la',
+                                   'ground_truth_id_y':'ground_truth_id_lv',
+                                   'chamber_y':'chamber_lv',
+                                   'numpy_array_y':'numpy_array_lv'})
+    
+    instances_w_lv_la_segmentation_pairs = gt_lv_la_pairs[['studyidk']].copy()  
+    instances_w_lv_la_segmentation_pairs = instances_w_lv_la_segmentation_pairs.drop_duplicates()  
+        
+    logger.info('instances_w_lv_la_segmentation_pairs, shape: {}'.format(instances_w_lv_la_segmentation_pairs.shape))
+    logger.info('instances_w_lv_la_segmentation_pairs format is {}'.format(",".join(instances_w_lv_la_segmentation_pairs.columns)))
+    
+    io_views.save_to_db(instances_w_lv_la_segmentation_pairs, "instances_w_lv_la_segmentation_pairs")
+

From 824d8d3d79dcb8b8c6916ff73f11f45cb841e6a4 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Sun, 10 May 2020 15:44:08 +0100
Subject: [PATCH 08/11] updated segment views to use the model specified in
 path parameters; and to record additional details of np arrays in db

---
 .../d04_segmentation/segment_view.py          | 86 +++++++++++++------
 1 file changed, 60 insertions(+), 26 deletions(-)

diff --git a/src/usal_echo/d04_segmentation/segment_view.py b/src/usal_echo/d04_segmentation/segment_view.py
index 33408bc..cd84286 100644
--- a/src/usal_echo/d04_segmentation/segment_view.py
+++ b/src/usal_echo/d04_segmentation/segment_view.py
@@ -15,15 +15,21 @@
 import tensorflow as tf
 from PIL import Image
 from scipy.misc import imresize
-import datetime
 import hashlib
+import datetime
+
+from usal_echo import (
+    model_dir,
+    a4c_segmentation_model,
+    a2c_segmentation_model
+)
 
 from usal_echo.d00_utils.log_utils import setup_logging
 from usal_echo.d02_intermediate.download_dcm import dcm_to_segmentation_arrays
 from usal_echo.d00_utils.db_utils import (
+    dbReadWriteClean,
     dbReadWriteViews,
-    dbReadWriteClassification,
-    dbReadWriteSegmentation,
+    dbReadWriteSegmentation
 )
 from usal_echo.d03_classification.evaluate_views import _groundtruth_views
 from usal_echo.d04_segmentation.model_unet import Unet
@@ -34,7 +40,7 @@
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
 
-def segmentChamber(videofile, dicomdir, view, model_path):
+def segmentChamber(videofile, dicomdir, view, model_path, seg_model, colour_scheme_lookup):
     """
     
     """
@@ -56,7 +62,7 @@ def segmentChamber(videofile, dicomdir, view, model_path):
         with g_1.as_default():
             saver = tf.train.Saver()
             saver.restore(
-                sess1, os.path.join(modeldir, "a4c_45_20_all_model.ckpt-9000")
+                sess1, os.path.join(modeldir, seg_model)
             )
     elif view == "a2c":
         g_2 = tf.Graph()
@@ -70,37 +76,47 @@ def segmentChamber(videofile, dicomdir, view, model_path):
         with g_2.as_default():
             saver = tf.train.Saver()
             saver.restore(
-                sess2, os.path.join(modeldir, "a2c_45_20_all_model.ckpt-10600")
+                sess2, os.path.join(modeldir, seg_model)
             )
 
     outpath = "/home/ubuntu/data/04_segmentation/" + view + "/"
     if not os.path.exists(outpath):
         os.makedirs(outpath)
+        
+    #look up colour scheme
+    match_filename = 'a_' + str(videofile).split('_')[2].split('.')[0]
+    instance_colour_scheme = colour_scheme_lookup[colour_scheme_lookup['filename'] == match_filename]['colour_scheme'].item()
 
-    images, orig_images = dcm_to_segmentation_arrays(dicomdir, videofile)
+    images, orig_images = dcm_to_segmentation_arrays(dicomdir, videofile, instance_colour_scheme)
     np_arrays_x3 = []
     images_uuid_x3 = []
+    np_in_min = images.min()
+    np_in_max = images.max()
+    
 
     if view == "a4c":
+        logger.info('predicitng a4c view')
         a4c_lv_segs, a4c_la_segs, a4c_lvo_segs, preds = extract_segs(
             images, orig_images, model, sess, 2, 4, 1
         )
+        np_total = np.sum(a4c_lv_segs) + np.sum(a4c_la_segs) + np.sum(a4c_lvo_segs)
         np_arrays_x3.append(np.array(a4c_lv_segs).astype("uint8"))
         np_arrays_x3.append(np.array(a4c_la_segs).astype("uint8"))
         np_arrays_x3.append(np.array(a4c_lvo_segs).astype("uint8"))
         number_frames = (np.array(a4c_lvo_segs).astype("uint8").shape)[0]
-        model_name = "a4c_45_20_all_model.ckpt-9000"
-    elif view == "a2c":
+        model_name = a4c_segmentation_model        
+    if view == "a2c":
+        logger.info('predicitng a2c view')
         a2c_lv_segs, a2c_la_segs, a2c_lvo_segs, preds = extract_segs(
             images, orig_images, model, sess, 2, 3, 1
         )
+        np_total = np.sum(a2c_lv_segs) + np.sum(a2c_la_segs) + np.sum(a2c_lvo_segs)
         np_arrays_x3.append(np.array(a2c_lv_segs).astype("uint8"))
         np_arrays_x3.append(np.array(a2c_la_segs).astype("uint8"))
         np_arrays_x3.append(np.array(a2c_lvo_segs).astype("uint8"))
         number_frames = (np.array(a2c_lvo_segs).astype("uint8").shape)[0]
-        model_name = "a2c_45_20_all_model.ckpt-10600"
-
-    j = 0
+        model_name = a2c_segmentation_model
+    j = 0    
     nrow = orig_images[0].shape[0]
     ncol = orig_images[0].shape[1]
     plt.figure(figsize=(5, 5))
@@ -142,11 +158,13 @@ def segmentChamber(videofile, dicomdir, view, model_path):
             (outpath + "/" + videofile + "_" + str(j) + "_" + "overlay.png").encode()
         ).hexdigest()
     )
+    
+    return [number_frames, model_name, np_arrays_x3, images_uuid_x3, np_in_min, np_in_max, np_total]
 
-    return [number_frames, model_name, np_arrays_x3, images_uuid_x3]
 
 
-def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path):
+def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path, 
+                 a4c_seg_model, a2c_seg_model):
 
     # set up for writing to segmentation schema
     io_views = dbReadWriteViews()
@@ -165,17 +183,25 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path):
         "output_image_seg",
         "output_image_orig",
         "output_image_overlay",
-    ]
+        "min_pixel_intensity",
+        "max_pixel_intensity",
+        "np_prediction_total"
+        ]
 
     instances_unique_master_list = io_views.get_table("instances_unique_master_list")
     # below cleans the filename field to remove whitespace
     instances_unique_master_list["instancefilename"] = instances_unique_master_list[
         "instancefilename"
     ].apply(lambda x: str(x).strip())
+    
+    #below gets the colour scheme lookup
+    #Get colour_scheme_lookup_table
+    io_clean = dbReadWriteClean()
+    colour_scheme_lookup = io_clean.get_table('colour_scheme_lookup')
 
     for video in viewlist_a4c:
-        [number_frames, model_name, np_arrays_x3, images_uuid_x3] = segmentChamber(
-            video, dcm_path, "a4c", model_path
+        [number_frames, model_name, np_arrays_x3, images_uuid_x3, np_in_min, np_in_max, np_pred_total] = segmentChamber(
+            video, dcm_path, "a4c", model_path, a4c_seg_model, colour_scheme_lookup
         )
         instancefilename = video.split("_")[2].split(".")[
             0
@@ -188,9 +214,7 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path):
         ]
         df = df.reset_index()
         instance_id = df.at[0, "instanceidk"]
-        # Columns names are:prediction_id	study_id	instance_id	file_name
-        # num_frames	model_name	date_run	output_np_lv	output_np_la
-        # output_np_lvo	output_image_seg	output_image_orig	output_image_overlay
+        
         d = [
             studyidk,
             instance_id,
@@ -204,12 +228,16 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path):
             images_uuid_x3[0],
             images_uuid_x3[1],
             images_uuid_x3[2],
-        ]
+            np_in_min, 
+            np_in_max,
+            np_pred_total
+            ]
         io_segmentation.save_prediction_numpy_array_to_db(d, column_names)
+        logger.info('Saved an a4c predition')
 
     for video in viewlist_a2c:
-        [number_frames, model_name, np_arrays_x3, images_uuid_x3] = segmentChamber(
-            video, dcm_path, "a2c", model_path
+        [number_frames, model_name, np_arrays_x3, images_uuid_x3, np_in_min, np_in_max, np_pred_total] = segmentChamber(
+            video, dcm_path, "a2c", model_path, a2c_seg_model, colour_scheme_lookup
         )
         instancefilename = video.split("_")[2].split(".")[
             0
@@ -235,9 +263,14 @@ def segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path):
             images_uuid_x3[0],
             images_uuid_x3[1],
             images_uuid_x3[2],
-        ]
+            np_in_min, 
+            np_in_max,
+            np_pred_total
+            ]
+            
         io_segmentation.save_prediction_numpy_array_to_db(d, column_names)
-
+        logger.info('Saved an a2c predition')
+    
     return 1
 
 
@@ -278,6 +311,7 @@ def run_segment(
     model_path,
     img_dir,
     classification_model_name,
+    a4c_seg_model, a2c_seg_model,
     date_run=datetime.date.today(),
 ):
 
@@ -326,7 +360,7 @@ def run_segment(
     viewlist_a2c = viewlist_a2c.to_list()
     logger.info("{} a2c files added to the view list".format(len(viewlist_a2c)))
 
-    segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path)
+    segmentstudy(viewlist_a2c, viewlist_a4c, dcm_path, model_path, a4c_seg_model, a2c_seg_model)
     end = time.time()
     viewlist = viewlist_a2c + viewlist_a4c
     logger.info(

From ed7356d28c4fb872d1f5d2f187b134c5d7fe72f3 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Tue, 26 May 2020 19:13:50 +0100
Subject: [PATCH 09/11] added additional rows to the db for seg evaluation and
 prediction

---
 conf/infra/models_schema.sql | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/conf/infra/models_schema.sql b/conf/infra/models_schema.sql
index 127f0b5..6d7e38c 100644
--- a/conf/infra/models_schema.sql
+++ b/conf/infra/models_schema.sql
@@ -164,6 +164,9 @@ create table segmentation.predictions(
    output_image_seg varchar,
    output_image_orig varchar,
    output_image_overlay varchar,
+   min_pixel_intensity float,
+   max_pixel_intensity float,
+   np_prediction_total float,
    primary key(prediction_id)
 );
 
@@ -182,6 +185,8 @@ create table segmentation.evaluations(
     score_value float,
     gt_view_name varchar,
     pred_view_name varchar,
+    dataset varchar,
+    seg_model varchar,
     primary key(evaluation_id)
 );
 

From f7406e573f19f3cf1acdc07f49c72ca4a9b9d0c8 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Tue, 26 May 2020 19:14:14 +0100
Subject: [PATCH 10/11] added additional rows to the db for seg evaluation and
 prediction

---
 src/usal_echo/d00_utils/db_utils.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/usal_echo/d00_utils/db_utils.py b/src/usal_echo/d00_utils/db_utils.py
index aa7f1f3..fcc6f12 100644
--- a/src/usal_echo/d00_utils/db_utils.py
+++ b/src/usal_echo/d00_utils/db_utils.py
@@ -183,10 +183,15 @@ def __init__(self):
             self.engine.execute(CreateSchema(self.schema))
 
     def save_prediction_numpy_array_to_db(self, binary_data_array, column_names):
-        # Columns names are:prediction_id	study_id	instance_id	file_name
-        # num_frames	model_name	date_run	output_np_lv	output_np_la
-        # output_np_lvo	output_image_seg	output_image_orig	output_image_overlay
-        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}')".format(
+        # Columns names are:
+        # prediction_id serial, study_id integer, instance_id integer, file_name varchar,
+        # num_frames integer, model_name varchar, date_run timestamp with time zone,
+        # output_np_lv bytea, output_np_la bytea, output_np_lvo bytea, output_image_seg varchar,
+        # output_image_orig varchar, output_image_overlay varchar, min_pixel_intensity float,
+        # max_pixel_intensity float, np_prediction_total float
+        
+        
+        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {}, {}, '{}', '{}', '{}', '{}', '{}', '{}')".format(
             self.schema,
             "predictions",
             ",".join(column_names),
@@ -202,6 +207,9 @@ def save_prediction_numpy_array_to_db(self, binary_data_array, column_names):
             binary_data_array[9],
             binary_data_array[10],
             binary_data_array[11],
+            binary_data_array[12],
+            binary_data_array[13],
+            binary_data_array[14]
         )
         self.cursor.execute(sql)
         self.raw_conn.commit()
@@ -266,7 +274,7 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"):
 
         # Create new database table from empty dataframe
         # df.to_sql('evaluation', self.engine, self.schema, if_exists, index=False)
-        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(
+        sql = "insert into {}.{} ({}) values ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(
             self.schema,
             "evaluations",
             ",".join(column_names),
@@ -278,6 +286,9 @@ def save_seg_evaluation_to_db(self, df, column_names, if_exists="append"):
             df[5],
             df[6],
             df[7],
+            df[8],
+            df[9],
+            df[10]
         )
         self.cursor.execute(sql)
         self.raw_conn.commit()

From ae88cb4eae6717cd1bd1a15eae054f17bc32a968 Mon Sep 17 00:00:00 2001
From: courtneyjean84 <courtney_irwin@hotmail.com>
Date: Tue, 26 May 2020 19:49:03 +0100
Subject: [PATCH 11/11] cleaning up extra logging statements

---
 requirements.txt                                 | 1 +
 src/usal_echo/d04_segmentation/evaluate_masks.py | 6 ------
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f805dd1..83efdbd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,4 @@ black>=19.3b0
 shapely>=1.6
 pyinquirer>=1.0
 tqdm>=4.34
+medpy>=0.4.0
\ No newline at end of file
diff --git a/src/usal_echo/d04_segmentation/evaluate_masks.py b/src/usal_echo/d04_segmentation/evaluate_masks.py
index aa84cda..631d8c7 100644
--- a/src/usal_echo/d04_segmentation/evaluate_masks.py
+++ b/src/usal_echo/d04_segmentation/evaluate_masks.py
@@ -40,7 +40,6 @@ def evaluate_masks(dcm_dir_path):
                 filenames.append(str(fullfilename).split(".")[0].split("_")[-1])
 
     logger.info("Number of files in the directory: {}".format(len(file_path)))
-    logger.info("example filename: {}".format(filenames[1]))
 
     io_segmentation = dbReadWriteSegmentation()
     ground_truths = io_segmentation.get_segmentation_table("ground_truths")
@@ -64,14 +63,10 @@ def evaluate_masks(dcm_dir_path):
         gt_frame_no = gt["frame"]
         gt_file_name = gt["file_name"]
         
-        #get voxel spacing for the gt image
-        logger.info('filename im looking for: {}'.format(gt['file_name']))
-        logger.info('format of filenames in voxel_spacing_df: {}'.format(voxel_spacing_df.index[0]))
         
         #take the min of x or y scale spacing (appear to be the same for all files)
         try:
             voxel_spacing = float(voxel_spacing_df.loc[gt['file_name']]['value'])
-            logger.info('what the voxel_spacing looks like {}'.format(voxel_spacing))
         except TypeError:
             logger.info('voxel spacing can not be converted to a float')
             voxel_spacing = 0.013
@@ -200,5 +195,4 @@ def get_voxel_spacing_for_instances(df):
     df_dcm["tag1"] = df_dcm["tag1"].astype(str)  # consistency with tag2
     voxel_spacing_df = df_dcm.loc[(df_dcm["tag1"] == "18") & (df_dcm["tag2"] == "602c")] #just return x spacing
     
-    logger.info('voxel spacing table obtained with {} rows'.format(voxel_spacing_df.shape))
     return voxel_spacing_df