{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import scanpy as sc\n",
    "import scvelo as scv\n",
    "import numpy as np\n",
    "import collections"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(23914, 5)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>cell</th>\n",
       "      <th>treatment</th>\n",
       "      <th>replicate</th>\n",
       "      <th>barcodes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>SCC1</td>\n",
       "      <td>CTX</td>\n",
       "      <td>R1</td>\n",
       "      <td>AAACCTGAGTTGTCGT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>SCC1</td>\n",
       "      <td>CTX</td>\n",
       "      <td>R1</td>\n",
       "      <td>AAACCTGCACAGCGTC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>SCC1</td>\n",
       "      <td>CTX</td>\n",
       "      <td>R1</td>\n",
       "      <td>AAACCTGGTACTTCTT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>SCC1</td>\n",
       "      <td>CTX</td>\n",
       "      <td>R1</td>\n",
       "      <td>AAAGATGGTGTGAATA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>SCC1</td>\n",
       "      <td>CTX</td>\n",
       "      <td>R1</td>\n",
       "      <td>AAAGCAAGTAGCTGCC</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  cell treatment replicate          barcodes\n",
       "0           0  SCC1       CTX        R1  AAACCTGAGTTGTCGT\n",
       "1           1  SCC1       CTX        R1  AAACCTGCACAGCGTC\n",
       "2           2  SCC1       CTX        R1  AAACCTGGTACTTCTT\n",
       "3           3  SCC1       CTX        R1  AAAGATGGTGTGAATA\n",
       "4           4  SCC1       CTX        R1  AAAGCAAGTAGCTGCC"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "phenoDat = pd.read_csv('phenoSCCall.csv')\n",
    "print(phenoDat.shape)\n",
    "phenoDat.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "phenoDat.drop('Unnamed: 0',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "ename": "MemoryError",
     "evalue": "Unable to allocate array with shape (396045101,) and data type float32",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mMemoryError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-34-dc4042396d9a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0madata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_h5ad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'processed_adata.h5ad'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0madata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/conda/envs/Fertig_Python_3_7/lib/python3.7/site-packages/anndata/readwrite/read.py\u001b[0m in \u001b[0;36mread_h5ad\u001b[0;34m(filename, backed, chunk_size)\u001b[0m\n\u001b[1;32m    450\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mX\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    451\u001b[0m             \u001b[0mdtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m  \u001b[0;31m# maintain dtype, since 0.7\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 452\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mAnnData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0m_read_args_from_h5ad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunk_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunk_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    453\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    454\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/conda/envs/Fertig_Python_3_7/lib/python3.7/site-packages/anndata/readwrite/read.py\u001b[0m in \u001b[0;36m_read_args_from_h5ad\u001b[0;34m(adata, filename, mode, chunk_size)\u001b[0m\n\u001b[1;32m    484\u001b[0m             \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    485\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 486\u001b[0;31m             \u001b[0m_read_key_value_from_h5\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunk_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunk_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    487\u001b[0m     \u001b[0;31m# backwards compat: save X with the correct name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    488\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;34m'X'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/conda/envs/Fertig_Python_3_7/lib/python3.7/site-packages/anndata/readwrite/read.py\u001b[0m in \u001b[0;36m_read_key_value_from_h5\u001b[0;34m(f, d, key, key_write, chunk_size)\u001b[0m\n\u001b[1;32m    520\u001b[0m             \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_direct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    521\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mh5py\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSparseDataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 522\u001b[0;31m         \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    523\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    524\u001b[0m         \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/opt/conda/envs/Fertig_Python_3_7/lib/python3.7/site-packages/anndata/h5py/h5sparse.py\u001b[0m in \u001b[0;36mvalue\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    313\u001b[0m         \u001b[0mobject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mh5py_group\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    314\u001b[0m         \u001b[0mdata_array\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mformat_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 315\u001b[0;31m         \u001b[0mdata_array\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    316\u001b[0m         \u001b[0mdata_array\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindices\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'indices'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'indices'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    317\u001b[0m         \u001b[0mdata_array\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindptr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'indptr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'indptr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mMemoryError\u001b[0m: Unable to allocate array with shape (396045101,) and data type float32"
     ]
    }
   ],
   "source": [
    "adata = sc.read_h5ad('adataWithVelocity.h5ad')\n",
    "adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AAACCTGAGCGATTCT',\n",
       " 'AAACCTGAGCTTTGGT',\n",
       " 'AAACCTGAGGCCCTCA',\n",
       " 'AAACCTGAGTAGCCGA',\n",
       " 'AAACCTGGTAGCGCTC']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indices = []\n",
    "for i,idx in enumerate(adata.obs.index):\n",
    "    indices.append(idx[:16])\n",
    "indices[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>folder</th>\n",
       "      <th>n_genes</th>\n",
       "      <th>initial_size_spliced</th>\n",
       "      <th>initial_size_unspliced</th>\n",
       "      <th>initial_size</th>\n",
       "      <th>n_counts</th>\n",
       "      <th>velocity_self_transition</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAACCTGAGCGATTCT-0</th>\n",
       "      <td>S4_L007</td>\n",
       "      <td>2596</td>\n",
       "      <td>9959.0</td>\n",
       "      <td>2476.0</td>\n",
       "      <td>9959.0</td>\n",
       "      <td>4728.473145</td>\n",
       "      <td>0.380558</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACCTGAGCTTTGGT-0</th>\n",
       "      <td>S4_L007</td>\n",
       "      <td>329</td>\n",
       "      <td>561.0</td>\n",
       "      <td>189.0</td>\n",
       "      <td>561.0</td>\n",
       "      <td>4725.589844</td>\n",
       "      <td>0.338164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACCTGAGGCCCTCA-0</th>\n",
       "      <td>S4_L007</td>\n",
       "      <td>2178</td>\n",
       "      <td>7789.0</td>\n",
       "      <td>1995.0</td>\n",
       "      <td>7789.0</td>\n",
       "      <td>4761.850586</td>\n",
       "      <td>0.306395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACCTGAGTAGCCGA-0</th>\n",
       "      <td>S4_L007</td>\n",
       "      <td>253</td>\n",
       "      <td>539.0</td>\n",
       "      <td>597.0</td>\n",
       "      <td>539.0</td>\n",
       "      <td>3514.505371</td>\n",
       "      <td>0.153943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACCTGGTAGCGCTC-0</th>\n",
       "      <td>S4_L007</td>\n",
       "      <td>420</td>\n",
       "      <td>878.0</td>\n",
       "      <td>1053.0</td>\n",
       "      <td>878.0</td>\n",
       "      <td>3543.319092</td>\n",
       "      <td>0.207334</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     folder  n_genes  initial_size_spliced  \\\n",
       "index                                                        \n",
       "AAACCTGAGCGATTCT-0  S4_L007     2596                9959.0   \n",
       "AAACCTGAGCTTTGGT-0  S4_L007      329                 561.0   \n",
       "AAACCTGAGGCCCTCA-0  S4_L007     2178                7789.0   \n",
       "AAACCTGAGTAGCCGA-0  S4_L007      253                 539.0   \n",
       "AAACCTGGTAGCGCTC-0  S4_L007      420                 878.0   \n",
       "\n",
       "                    initial_size_unspliced  initial_size     n_counts  \\\n",
       "index                                                                   \n",
       "AAACCTGAGCGATTCT-0                  2476.0        9959.0  4728.473145   \n",
       "AAACCTGAGCTTTGGT-0                   189.0         561.0  4725.589844   \n",
       "AAACCTGAGGCCCTCA-0                  1995.0        7789.0  4761.850586   \n",
       "AAACCTGAGTAGCCGA-0                   597.0         539.0  3514.505371   \n",
       "AAACCTGGTAGCGCTC-0                  1053.0         878.0  3543.319092   \n",
       "\n",
       "                    velocity_self_transition  \n",
       "index                                         \n",
       "AAACCTGAGCGATTCT-0                  0.380558  \n",
       "AAACCTGAGCTTTGGT-0                  0.338164  \n",
       "AAACCTGAGGCCCTCA-0                  0.306395  \n",
       "AAACCTGAGTAGCCGA-0                  0.153943  \n",
       "AAACCTGGTAGCGCTC-0                  0.207334  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.obs.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "adata.obs.set_index(pd.Index(indices),inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(177151, 11)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = adata.obs.join(phenoDat.set_index('barcodes'),how='inner')\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "adata2 = adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23585"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(df.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "172419"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "common = []\n",
    "for x in indices:\n",
    "    common.append(x in phenoDat['barcodes'].values)\n",
    "sum(common)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "33209"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(indices))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('AAACCTGAGCGATTCT', 'AAACCTGAGTTGTCGT')"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indices[0],phenoDat['barcodes']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "(Fertig) Python 3.7",
   "language": "python",
   "name": "fertig_python_3_7"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}