diff --git a/AliMehroze_lhr_assignment_1_Python.ipynb b/AliMehroze_lhr_assignment_1_Python.ipynb
new file mode 100644
index 0000000..cad99c5
--- /dev/null
+++ b/AliMehroze_lhr_assignment_1_Python.ipynb
@@ -0,0 +1,420 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['age' 'bp' 'sg' 'al' 'su' 'rbc' 'pc' 'pcc' 'ba' 'bgr' 'bu' 'sc' 'sod'\n",
+ " 'pot' 'hemo' 'pcv' 'wbcc' 'rbcc' 'htn' 'dm' 'cad' 'appet' 'pe' 'ane'\n",
+ " 'class']\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib\n",
+ "import csv\n",
+ "\n",
+ "df=pd.DataFrame()\n",
+ "df=pd.read_csv(\"chronic_kidney_disease_updated.csv\")\n",
+ "\n",
+ "\n",
+ "print df.columns.values\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " bp | \n",
+ " sg | \n",
+ " al | \n",
+ " su | \n",
+ " rbc | \n",
+ " pc | \n",
+ " pcc | \n",
+ " ba | \n",
+ " bgr | \n",
+ " ... | \n",
+ " pcv | \n",
+ " wbcc | \n",
+ " rbcc | \n",
+ " htn | \n",
+ " dm | \n",
+ " cad | \n",
+ " appet | \n",
+ " pe | \n",
+ " ane | \n",
+ " class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 48 | \n",
+ " 80 | \n",
+ " 1.020 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " ? | \n",
+ " normal | \n",
+ " notpresent | \n",
+ " notpresent | \n",
+ " 121 | \n",
+ " ... | \n",
+ " 44 | \n",
+ " 7800 | \n",
+ " 5.2 | \n",
+ " yes | \n",
+ " yes | \n",
+ " no | \n",
+ " good | \n",
+ " no | \n",
+ " no | \n",
+ " ckd | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 50 | \n",
+ " 1.020 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " ? | \n",
+ " normal | \n",
+ " notpresent | \n",
+ " notpresent | \n",
+ " ? | \n",
+ " ... | \n",
+ " 38 | \n",
+ " 6000 | \n",
+ " ? | \n",
+ " no | \n",
+ " no | \n",
+ " no | \n",
+ " good | \n",
+ " no | \n",
+ " no | \n",
+ " ckd | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 62 | \n",
+ " 80 | \n",
+ " 1.010 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " normal | \n",
+ " normal | \n",
+ " notpresent | \n",
+ " notpresent | \n",
+ " 423 | \n",
+ " ... | \n",
+ " 31 | \n",
+ " 7500 | \n",
+ " ? | \n",
+ " no | \n",
+ " yes | \n",
+ " no | \n",
+ " poor | \n",
+ " no | \n",
+ " yes | \n",
+ " ckd | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 48 | \n",
+ " 70 | \n",
+ " 1.005 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " normal | \n",
+ " abnormal | \n",
+ " present | \n",
+ " notpresent | \n",
+ " 117 | \n",
+ " ... | \n",
+ " 32 | \n",
+ " 6700 | \n",
+ " 3.9 | \n",
+ " yes | \n",
+ " no | \n",
+ " no | \n",
+ " poor | \n",
+ " yes | \n",
+ " yes | \n",
+ " ckd | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age bp sg al su rbc pc pcc ba bgr \\\n",
+ "0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
+ "1 48 80 1.020 1 0 ? normal notpresent notpresent 121 \n",
+ "2 7 50 1.020 4 0 ? normal notpresent notpresent ? \n",
+ "3 62 80 1.010 2 3 normal normal notpresent notpresent 423 \n",
+ "4 48 70 1.005 4 0 normal abnormal present notpresent 117 \n",
+ "\n",
+ " ... pcv wbcc rbcc htn dm cad appet pe ane class \n",
+ "0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
+ "1 ... 44 7800 5.2 yes yes no good no no ckd \n",
+ "2 ... 38 6000 ? no no no good no no ckd \n",
+ "3 ... 31 7500 ? no yes no poor no yes ckd \n",
+ "4 ... 32 6700 3.9 yes no no poor yes yes ckd \n",
+ "\n",
+ "[5 rows x 25 columns]"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#first five rows\n",
+ "df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "##A function to replace \"?\" with NaN, and trim other data\n",
+ "def cleanse(df):\n",
+ " df.replace(\"?\", np.nan, inplace=True)\n",
+ " df.replace(regex=True,inplace=True, to_replace=r'\\t',value=r'')\n",
+ " df.replace(regex=True,inplace=True, to_replace=r' ',value=r'')\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([nan, 'yes', 'no'], dtype=object)"
+ ]
+ },
+ "execution_count": 103,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "##calling function\n",
+ "# df.age.replace(\"?\", np.nan, inplace=True)\n",
+ "cleanse(df)\n",
+ "##df.head(5)\n",
+ "df.dm.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[nan 'yes' 'no']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print df.dm.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "no 261\n",
+ "yes 137\n",
+ "Name: dm, dtype: int64"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dm.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "('Unable to parse string', u'occurred at index age')",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0mTraceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'age'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'bp'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'bgr'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'bu'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sod'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'pot'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'hemo'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'pcv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wbcc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rbcc'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'age'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'bp'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'bgr'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'bu'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sod'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'pot'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'hemo'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'pcv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wbcc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rbcc'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[1;32m 4059\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4060\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4061\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4062\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4063\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_apply_standard\u001b[0;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[1;32m 4155\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4156\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4157\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4158\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4159\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/tools/util.pyc\u001b[0m in \u001b[0;36mto_numeric\u001b[0;34m(arg, errors)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m values = lib.maybe_convert_numeric(values, set(),\n\u001b[0;32m--> 115\u001b[0;31m coerce_numeric=coerce_numeric)\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32mpandas/src/inference.pyx\u001b[0m in \u001b[0;36mpandas.lib.maybe_convert_numeric (pandas/lib.c:53558)\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/src/inference.pyx\u001b[0m in \u001b[0;36mpandas.lib.maybe_convert_numeric (pandas/lib.c:53344)\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: ('Unable to parse string', u'occurred at index age')"
+ ]
+ }
+ ],
+ "source": [
+ "df[['age', 'bp', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wbcc', 'rbcc']] = df[['age', 'bp', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wbcc', 'rbcc']].apply(pd.to_numeric)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "#df\n",
+ "##matplotlib.bar(df['rbcc'],df['class'], width = 0.5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0mTraceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bp'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bp'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'ckd'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/usr/local/lib/python2.7/dist-packages/pandas/core/generic.pyc\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 890\u001b[0m raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m 891\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 892\u001b[0;31m .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m 893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
+ ]
+ }
+ ],
+ "source": [
+ "(df['bp']==max(df['bp']) and df['class']=='ckd')==True"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "df.to_csv(\"clean_chronic_kidney_disease.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.12"
+ },
+ "widgets": {
+ "state": {},
+ "version": "1.1.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/DIH b/DIH
new file mode 160000
index 0000000..c089def
--- /dev/null
+++ b/DIH
@@ -0,0 +1 @@
+Subproject commit c089defeacf966815508d06dec5fe5cab65c2b0d