-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplot_2matrices_together.py
102 lines (73 loc) · 4.2 KB
/
plot_2matrices_together.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
# Description: This script takes two matrix files and outputs a heatmap with each matrix ploteed in a triangle
# Usage: python plot_2matrices_together.py -m1 matrix_file1 -m2 matrix_file2 -o output_file -w window_file
# Input: matrix_file1 = matrix file with genomic windows as row and column names
# matrix_file2 = matrix file with genomic windows as row and column names
# window_file = file with genomic window positions
# Output: output_file = heatmap plot
# Modules required: argparse, pandas, numpy, matplotlib, seaborn
# Date: 27 September 2023
# Author: Anna Orteu
#########################################################################################################################
import argparse
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
#########################################################################################################################
### parse arguments
parser = argparse.ArgumentParser()
#input and output files
parser.add_argument("-m1", "--matrix1", help="Input first matrix", action = "store")
parser.add_argument("-m2", "--matrix2", help="Input second matrix", action = "store")
parser.add_argument("-o", "--outFile", help="Output heatmap file", action = "store")
parser.add_argument("-w", "--windowFile", help="Input genomic windows file", action = "store")
args = parser.parse_args()
#########################################################################################################################
#open files
matrix_file1 = pd.read_csv(args.matrix1, sep=',', lineterminator='\n', header=None)
matrix_file2 = pd.read_csv(args.matrix2, sep=',', lineterminator='\n', header=None)
window_file = pd.read_csv(args.windowFile, sep='\t', lineterminator='\n', header=None)
output = args.outFile
#########################################################################################################################
#Transpose one of the matrices and join the two triangles
matrix_file3 = matrix_file1.transpose().add(matrix_file2)
#Drop the last column and last row as its full of NaNs
matrix_file3 = matrix_file3.iloc[:-1, :-1]
#rename axis based on genomic window positions
matrix_file3.rename(index=window_file[1], columns=window_file[1], inplace=True)
#########################################################################################################################
#plot and save output
plt.rcParams['figure.figsize'] = [30, 30]
sns.set(font_scale=3)
data = np.log(matrix_file3 + 0.0001) * 100
heatmap_plot = sns.heatmap(data, cmap="YlGnBu", square=True, cbar_kws={'label': 'Barcode sharing %', 'shrink': 0.5})
# Calculate appropriate tick locations and labels for the y-axis (rows)
num_ticks_y = round(len(data.index) / 60) # Adjust the number of desired ticks
if num_ticks_y <= 10: #if there are less than 10 ticks, set the number of ticks to 10
num_ticks_y = 10
tick_locs_y = np.around(np.linspace(0, len(data.index) - 1, num_ticks_y)).astype(int)
tick_labels_y = data.index[tick_locs_y] # get the labels from the index (chromosome positions)
heatmap_plot.set_yticks(tick_locs_y)
heatmap_plot.set_yticklabels(tick_labels_y)
# Calculate appropriate tick locations and labels for the x-axis (columns)
num_ticks_x = round(len(data.columns) / 60) # Adjust the number of desired ticks
if num_ticks_x <= 10: #if there are less than 10 ticks, set the number of ticks to 10
num_ticks_x = 10
tick_locs_x = np.around(np.linspace(0, len(data.columns) - 1, num_ticks_x)).astype(int)
tick_labels_x = data.columns[tick_locs_x] # get the labels from the index (chromosome positions)
heatmap_plot.set_xticks(tick_locs_x)
heatmap_plot.set_xticklabels(tick_labels_x)
# Calculate appropriate tick locations and labels for the colorbar
cbar = heatmap_plot.collections[0].colorbar
num_ticks = 2 # Adjust the number of desired ticks
# Ensure that tick_locs is a 1D array
tick_locs = np.linspace(np.nanmin(data.to_numpy()), np.nanmax(data.to_numpy()), num_ticks)
# Transform tick_locs back to the original scale
cbar.set_ticks(tick_locs)
cbar.set_ticklabels(['0','100'])
# Plot the figure
fig = heatmap_plot.get_figure()
fig.figure.axes[-1].yaxis.label.set_size(50)
fig.figure.axes[-1].tick_params(labelsize=40)
fig.savefig(output)