From 6c9b4a6d4d166f5da375ad18b7952bde099383de Mon Sep 17 00:00:00 2001 From: JWDebelius Date: Tue, 19 Jan 2016 13:36:58 -0800 Subject: [PATCH] Updates to address reviewer comments --- americangut/per_sample.py | 51 ++++++++++++++--------------------- tests/test_per_sample.py | 57 +++++++++++++-------------------------- 2 files changed, 39 insertions(+), 69 deletions(-) diff --git a/americangut/per_sample.py b/americangut/per_sample.py index 449b020..0043685 100644 --- a/americangut/per_sample.py +++ b/americangut/per_sample.py @@ -281,11 +281,20 @@ def alpha_plot(opts, sample_ids): sep='\t', dtype=str, ) - alpha_map[['shannon_1k', 'PD_whole_tree_1k']] = \ - alpha_map[['shannon_1k', 'PD_whole_tree_1k']].astype(float) - alpha_map.set_index('#SampleID', inplace=True) - results['index'] = alpha_map.index + alpha_metrics = ['shannon_1k', 'PD_whole_tree_1k'] + + # Checks the alpha_field is in the mapping file + for metric in alpha_metrics: + if metric not in alpha_map.columns: + raise ValueError('%s is not a valid alpha diversity field name.' + % metric) + # Checks the group_field is in the mapping file + if 'SIMPLE_BODY_SITE' not in alpha_map.columns: + raise ValueError('SIMPLE_BODY_SITE is not a valid field name.') + + alpha_map[alpha_metrics] = alpha_map[alpha_metrics].astype(float) + alpha_map.set_index('#SampleID', inplace=True) results = {} for id_ in sample_ids: @@ -587,9 +596,9 @@ def _plot_alpha(sample, alpha_map, alpha_field, group_field='SIMPLE_BODY_SITE', ---------- sample : str The sample ID to be plotted - alpha_map_fp : str, dataframe - The filepath of a comma-seperated file or the pandas dataframe where - the sample ID are given in the `'#SampleID'` column, a column with + alpha_map_fp : pandas DataFrame + A pandas dataframe containing the sample metadata. The sample ID + should be given in the `'#SampleID'` column, a column with the name given by `alpha_field` contains alpha diversity values, and the `group_field` column specifying the groups which should be used to seperate the data for making the distribution plot. @@ -606,43 +615,23 @@ def _plot_alpha(sample, alpha_map, alpha_field, group_field='SIMPLE_BODY_SITE', Returns ------- - If the sample is not included in the the mapping file, a string is returned - stating this fact. - If the sample is present, a matplotlib figure with the alpha diversity - distribution and a line indicating the sample value is returned. + distribution and a line indicating the sample value is returned. If a + file path is specified, the figure will be saved at the filepath instead + of returning. If debug is passed, the following parameters are returned: group : str The value of the `group_field` for the sample group_alpha : ndarray - The alpha diversity values assoicated with the group + The alpha diversity values associated with the group sample_alpha : float The alpha diversity for the sample xlabel : str The label used for the x-axis of the plot. - Raises - ------ - ValueError - If the alpha_field is not in alpha_map - ValueError - If the group_field is not in alpha_map - """ - # Checks the alpha_field is in the mapping file - if alpha_field not in alpha_map.columns: - raise ValueError('%s is not a valid alpha diversity field name.' - % alpha_field) - # Checks the group_field is in the mapping file - if group_field not in alpha_map.columns: - raise ValueError('%s is not a valid field name.' % group_field) - # Checks the same is in the mapping file - if sample not in alpha_map.index: - return ('%s does not have an alpha diversity value for %s.' - % (alpha_field, sample)) - # Explicitly casts the alpha diversity to a float alpha_map[alpha_field] = alpha_map[alpha_field].astype(float) diff --git a/tests/test_per_sample.py b/tests/test_per_sample.py index 6af3fd9..8211399 100644 --- a/tests/test_per_sample.py +++ b/tests/test_per_sample.py @@ -266,46 +266,27 @@ def test_plot_alpha_no_sample(self): alpha_field='alpha') self.assertEqual(tvalue, kvalue) - def test_plot_alpha_alpha_field_error(self): - map_ = pd.DataFrame( - data=np.array([ - ['skin', '1990', 'female', 'Verity', 'US', 12.5], - ['fecal', '1990', 'female', 'Verity', 'US', 8.6], - ['fecal', '1987', 'male', 'Alex', 'US', 7.9], - ['fecal', '1993', 'female', 'Annie', 'US', 7.5], - ['skin', '1989', 'male', 'Dominic', 'UK', 14.0], - ['fecal', '1986', 'female', 'Sarah', 'US', 15.0], - ['oral', '1988', 'female', 'Shelby', 'AUS', 4.2], - ]), - index=['VeP0', 'VeP1', 'AxP0', 'AnP0', 'DoD0', 'SaZ0', 'ShT0'], - columns=['SIMPLE_BODY_SITE', 'BIRTH_YEAR', 'SEX', - 'HOST_SUBJECT_ID', 'NATIONALITY', 'alpha'], - ) + def test_alpha_plts_metric_error_field_error(self): + opts = {'collapsed': { + '100nt': { + 'alpha_map': + os.path.join(ag.WORKING_DIR.split('American-Gut')[0], + 'American-Gut/tests/files/' + 'test_mapping.txt')}}, + 'sample_type': 'fecal'} with self.assertRaises(ValueError): - agps._plot_alpha(sample='VeP0', - alpha_map=map_, - alpha_field='InCryptid') - - def test_plot_alpha_group_field_error(self): - map_ = pd.DataFrame( - data=np.array([ - ['skin', '1990', 'female', 'Verity', 'US', 12.5], - ['fecal', '1990', 'female', 'Verity', 'US', 8.6], - ['fecal', '1987', 'male', 'Alex', 'US', 7.9], - ['fecal', '1993', 'female', 'Annie', 'US', 7.5], - ['skin', '1989', 'male', 'Dominic', 'UK', 14.0], - ['fecal', '1986', 'female', 'Sarah', 'US', 15.0], - ['oral', '1988', 'female', 'Shelby', 'AUS', 4.2], - ]), - index=['VeP0', 'VeP1', 'AxP0', 'AnP0', 'DoD0', 'SaZ0', 'ShT0'], - columns=['SIMPLE_BODY_SITE', 'BIRTH_YEAR', 'SEX', - 'HOST_SUBJECT_ID', 'NATIONALITY', 'alpha'], - ) + agps.alpha_plot(opts, ['sample_a', 'sample_b']) + + def test_alpha_plot_group_field_error(self): + opts = {'collapsed': { + '100nt': { + 'alpha_map': + os.path.join(ag.WORKING_DIR.split('American-Gut')[0], + 'American-Gut/tests/files/' + 'test_mapping_alpha.txt')}}, + 'sample_type': 'fecal'} with self.assertRaises(ValueError): - agps._plot_alpha(sample='VeP0', - alpha_map=map_, - alpha_field='alpha', - group_field='BODY_HABITAT') + agps.alpha_plot(opts, ['sample_a', 'sample_b']) if __name__ == '__main__': main()