diff --git a/analysis.py b/analysis.py new file mode 100644 index 0000000..1b5a6e7 --- /dev/null +++ b/analysis.py @@ -0,0 +1,13 @@ +import statistics +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns +import pandas as pd + + + + + + +if __name__ == '__main__': + complaints = pd.read_csv("complaints_dec_2014.csv") diff --git a/consumer-complaints.ipynb b/consumer-complaints.ipynb new file mode 100644 index 0000000..eae0f65 --- /dev/null +++ b/consumer-complaints.ipynb @@ -0,0 +1,1554 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:441914b7ee7ca184479d88fdbd35965f6f428792b30184a8ad9db333e44d6203" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import statistics\n", + "from collections import Counter\n", + "from datetime import datetime\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "complaints = pd.read_csv(\"complaints_dec_2014.csv\")\n", + "population = pd.read_csv(\"population.csv\")\n", + "pd.set_option('display.mpl_style', 'default') \n", + "pd.set_option('display.line_width', 5000) \n", + "pd.set_option('display.max_columns', 60)\n", + "%matplotlib inline" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "line_width has been deprecated, use display.width instead (currently both are\n", + "identical)\n", + "\n" + ] + } + ], + "prompt_number": 1140 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "complaints.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Complaint IDProductSub-productIssueSub-issueStateZIP codeSubmitted viaDate receivedDate sent to companyCompanyCompany responseTimely response?Consumer disputed?
0 1177167 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Ad Astra Recovery Services Inc Closed with explanation Yes NaN
1 1177166 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Unique Management Services, Inc Closed with explanation Yes NaN
2 1177165 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 CL Holdings, LLC Closed with monetary relief Yes NaN
3 1177164 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Enhanced Recovery Company, LLC Closed with non-monetary relief Yes NaN
4 1177163 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Enhanced Acquisitions, LLC Closed with explanation Yes NaN
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1141, + "text": [ + " Complaint ID Product Sub-product Issue Sub-issue State ZIP code Submitted via Date received Date sent to company Company Company response Timely response? Consumer disputed?\n", + "0 1177167 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Ad Astra Recovery Services Inc Closed with explanation Yes NaN\n", + "1 1177166 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Unique Management Services, Inc Closed with explanation Yes NaN\n", + "2 1177165 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 CL Holdings, LLC Closed with monetary relief Yes NaN\n", + "3 1177164 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Enhanced Recovery Company, LLC Closed with non-monetary relief Yes NaN\n", + "4 1177163 Debt collection NaN Cont'd attempts collect debt not owed Debt is not mine TX 77068 Web 12/31/2014 12/31/2014 Enhanced Acquisitions, LLC Closed with explanation Yes NaN" + ] + } + ], + "prompt_number": 1141 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Number of Complaints by Product." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "by_product = complaints['Product'].value_counts()" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1142 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print(by_product)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Mortgage 3002\n", + "Debt collection 2942\n", + "Credit reporting 2113\n", + "Bank account or service 1136\n", + "Credit card 1100\n", + "Consumer loan 578\n", + "Student loan 340\n", + "Payday loan 141\n", + "Money transfers 107\n", + "Prepaid card 70\n", + "Other financial service 14\n", + "dtype: int64\n" + ] + } + ], + "prompt_number": 1143 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Number of Complaints by Company (Top Ten Companies Only)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "top_companies = complaints['Company'].value_counts().head(10)\n", + "top_companies" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1144, + "text": [ + "Bank of America 766\n", + "Equifax 737\n", + "Experian 675\n", + "TransUnion 604\n", + "Wells Fargo 598\n", + "JPMorgan Chase 545\n", + "Ocwen 408\n", + "Citibank 403\n", + "Nationstar Mortgage 357\n", + "Capital One 252\n", + "dtype: int64" + ] + } + ], + "prompt_number": 1144 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Number of Complaints by Company Response" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "complaints_per_response = complaints['Company response'].value_counts()\n", + "complaints_per_response" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1145, + "text": [ + "Closed with explanation 8185\n", + "Closed with non-monetary relief 1253\n", + "In progress 1056\n", + "Closed with monetary relief 643\n", + "Closed 239\n", + "Untimely response 167\n", + "dtype: int64" + ] + } + ], + "prompt_number": 1145 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Weight the Days of Week" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "complaints['days'] = pd.to_datetime(complaints.pop(\"Date received\"), format='%m/%d/%Y')" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1146 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "num_of_complaints = max(complaints.count())\n", + "timespan = len(complaints['days'].unique())\n", + "dates = complaints['days'].unique()\n", + "days = []\n", + "for day in dates:\n", + " dt64 = np.datetime64(day)\n", + " ts = (dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')\n", + " days.append(datetime.utcfromtimestamp(ts).weekday())\n", + "count_length = len(days)\n", + "days_count = Counter(days)\n", + "day_ratio = []\n", + "print(\"Monday - Friday Count\")\n", + "print(count_length)\n", + "print(days_count)\n", + "for n in range(7):\n", + " day_ratio.append(31/(days_count[n]))\n", + "#day_ratio = list(days_count.values())\n", + "print(day_ratio)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Monday - Friday Count\n", + "31\n", + "Counter({0: 5, 1: 5, 2: 5, 3: 4, 4: 4, 5: 4, 6: 4})\n", + "[6.2, 6.2, 6.2, 7.75, 7.75, 7.75, 7.75]\n" + ] + } + ], + "prompt_number": 1147 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Number of Complaints per Day of Week" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "complaints['Days'] = complaints.days.dt.dayofweek" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1148 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1148 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday',]\n", + "day_map = lambda x: days[x]\n", + "complaints_by_day = complaints['Days'].map(day_map)\n", + "complaints_by_day = complaints_by_day.value_counts()\n", + "print(complaints_by_day)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Tuesday 2580\n", + "Monday 2544\n", + "Wednesday 2289\n", + "Thursday 1634\n", + "Friday 1446\n", + "Saturday 543\n", + "Sunday 507\n", + "dtype: int64\n" + ] + } + ], + "prompt_number": 1149 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Mean Number of Complaints per Day of Week" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "complaints_by_day = complaints['Days'].value_counts()/timespan\n", + "complaints_by_day" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1150, + "text": [ + "1 83.225806\n", + "0 82.064516\n", + "2 73.838710\n", + "3 52.709677\n", + "4 46.645161\n", + "5 17.516129\n", + "6 16.354839\n", + "dtype: float64" + ] + } + ], + "prompt_number": 1150 + }, + { + "cell_type": "code", + "collapsed": true, + "input": [ + "complaints_by_day *= day_ratio \n", + "complaints_by_day" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1151, + "text": [ + "1 516.00\n", + "0 508.80\n", + "2 457.80\n", + "3 408.50\n", + "4 361.50\n", + "5 135.75\n", + "6 126.75\n", + "dtype: float64" + ] + } + ], + "prompt_number": 1151 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "complaints_by_day.plot(kind='bar')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1152, + "text": [ + "" + ] + }, + { + "metadata": {}, + "output_type": "display_data", + "png": "iVBORw0KGgoAAAANSUhEUgAAAXMAAAD8CAYAAACFK0QrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADnBJREFUeJzt3W+IZfddx/H37E6aqMTuzSqJKD6oM1MQBIlUSmtb7G4j\nBoOo+G2eSBMFDWtMKXUNamV2oFBromu0XY0Fa1LQ+u2jIkQzm52QygapJIhP2uzOFGz9k7CbzK6r\nTrIzm/XBOaOzkztzz7m5Z+/cX94vGHLvd3787gfCfvbwu/fsnbp69SqSpMm2b9wBJElvnmUuSQWw\nzCWpAJa5JBXAMpekAljmklSA6UELIuL7gC/Ua7+amR+PiMPAfL1kPjOX6rV955Kkbg0sc+Bh4Lcz\n81mAiNgHLACH698/CSz1m0fE05npB9klqWO7lnlE7Ad+YLPIa7PAmcxcq9esRMQs1ZHNNXNgBjjb\nSXJJ0v+Z2u0O0Ii4DTgJrADfCfwx8CIQW/cAvlj/9w3zzPyHEWeWJG0z6JjlZeAi8HPAfuA08EvA\nAeAIVWGfAM5TXZn3m/d16tQpj18kaQiHDh2a2j7btcwzcz0ivgXclpn/FhGvAcvA3JZls5m5XB/J\nvGG+2/6333578/Qt9Xo9VldXO9u/a5Ocf5Kzg/nHzfy7e/755/vOm3w08UHgcxFxGvhSZv4P1Rud\nJ4FF4BhAZl7pN5ckdW/gp1ky85vAndtmi1SFvX1t37kkqVveNCRJBbDMJakAlrkkFcAyl6QCWOaS\nVADLXJIKYJlLUgEsc0kqgGUuSQWwzCWpAJa5JBXAMpekAljmklQAy1ySCmCZS1IBLHNJKoBlLkkF\nsMwlqQCWuSQVwDKXpAJY5pJUAMtckgpgmUtSASxzSSqAZS5JBbDMJakAlrkkFWB63AGaevHSa7x0\n6XLj9dPn1thY32i8/tab38ZtN984TDRJGruJKfOXLl3m6BPLne3/0J0zlrmkiTWwzCPiL4B3Aq8C\nn8/MxyPiMDBfL5nPzKV6bd+5JKlbTa7MrwIfzsxvAkTEPmABOFz//klgqd88Ip7OzKsjzjyRPCaS\n1KWmxyxTWx7PAmcycw0gIlYiYpbqzdRr5sAMcHaEeSeWx0SSutSkzC8BfxkRrwAfA24BLkTE8fr3\nF4GDVIXfb26ZS1LHBpZ5Zj4AEBE/DDwEPAgcAI5QFfgJ4DzVlXm/+Y56vV7zoOfWGq8dxvQN063y\ntN5/wvO3tZeyDMP842X+9tp8muVVYB1YBua2zGczczki9veb77bh6upq4xdvc348jI31jVZ5htm/\nS13nb6PX6+2ZLMMw/3iZfzhNPs3yReB7gP8CjmTm6xGxAJyslxwDyMwr/eaSpO41OWa5u89sEVhs\nOpckdcvb+SWpAJa5JBXAMpekAljmklQAy1ySCmCZS1IBLHNJKoBlLkkFsMwlqQCWuSQVwDKXpAJY\n5pJUgIn5QmeNj195J+19lrkG8ivvpL3PYxZJKoBlLkkFsMwlqQCWuSQVwDKXpAJY5pJUAMtckgpg\nmUtSASxzSSqAZS5JBbDMJakAlrkkFcAyl6QCWOaSVADLXJIKYJlLUgEafTlFRNwInAF+LzM/GxGH\ngfn61/OZuVSv6zuXxslvStJbQdNvGroPeA64GhFTwAJwuP7dk8BSROzbPo+IpzPz6igDS235TUl6\nKxh4zBIR3w58CPgyMAXMAmcycy0z14CViJjtNwdmuosuSdrU5Mr8AeAzwK3184PAhYg4Xj+/WM+m\ndpif3WnjXq/XPOi5tcZrhzF9w3SrPK33n+D8k5wdJj//MPZanrbM396uZR4Rbwd+LDN/NyLuqccv\nAweAI1QFfgI4T3WV32++o9XV1cZB25xhDmNjfaNVnmH271KX+Sc5++b+Xeo6f1u9Xm9P5WnL/MMZ\ndMzyXuCmiPgrqnPze4GbgLkta2Yzc5nqWKXfXJLUsV2vzDPzCeAJgIj4CPAdmfnPEbEAnKyXHavX\nXuk3lyR1r+mnWcjMx7Y8XgQW+6zpO5ckdcubhiSpAJa5JBXAMpekAljmklQAy1ySCmCZS1IBLHNJ\nKoBlLkkFsMwlqQCWuSQVoPHt/JLGw29KUhOWubTH+U1JasJjFkkqgGUuSQWwzCWpAJa5JBXAMpek\nAljmklQAy1ySCmCZS1IBLHNJKoBlLkkFsMwlqQCWuSQVwDKXpAJY5pJUAMtckgpgmUtSASxzSSrA\nwG8aiohPAu8BXgd+OTO/ERGHgfl6yXxmLtVr+84lSd0aeGWemZ/IzA9SlfSDETEFLAB31D/HACJi\n3/Z5vVaS1LE23wH6buBrwCxwJjPXACJiJSJmqf5iuGYOzABnRxtZkrRdozKPiK8A3wW8D5gDLkTE\n8frXF4GDwNQOc8tckjrWqMwz8/0R8aPA48DHgAPAEaoCPwGcp7oy7zffUa/Xax703FrjtcOYvmG6\nVZ7W+09w/knODuYfuH/H+Yex1/K0NY78bY5ZXqzXL1NdnW+azczliNjfb77bhqurq41ffGN9o0XU\n9jbWN1rlGWb/LnWZf5Kzb+7fJfOPVq/X21N52hpX/iafZvlrqiOWy8D9mfl6RCwAJ+slxwAy80q/\nuSSpewPLPDM/3Ge2CCw2nUuSuuVNQ5JUAMtckgpgmUtSASxzSSqAZS5JBbDMJakAlrkkFcAyl6QC\nWOaSVADLXJIKYJlLUgEsc0kqgGUuSQWwzCWpAJa5JBXAMpekAljmklQAy1ySCmCZS1IBLHNJKoBl\nLkkFsMwlqQCWuSQVwDKXpAJY5pJUAMtckgpgmUtSASxzSSqAZS5JBZgetCAi/hR4J1Xx35uZ34iI\nw8B8vWQ+M5fqtX3nkqRuDbwyz8z7MvPHgQXgaERM1Y/vqH+OAUTEvu3zeq0kqWMDr8y3uARcBmaB\nM5m5BhARKxExS/UXwzVzYAY4O9rIkqTt2pT5LwKPAAeBCxFxvJ5frGdTO8wtc0nqWKMyj4i7gBcy\n8+sRMQccAI5QFfgJ4DzVlXm/+Y56vV7zoOfWGq8dxvQN063ytN5/gvNPcnYw/8D9O84/jL2Wp61x\n5G/yBuiPAB/IzF+vRyvA3JYls5m5HBH7+81323t1dbVx0I31jcZrh7GxvtEqzzD7d6nL/JOcfXP/\nLpl/tHq93p7K09a48jf5aOKXgHdFxNMR8UhmXqF6o/MksEj9BuhOc0lS9wZemWfmO/rMFqkKu9Fc\nktQtbxqSpAJY5pJUAMtckgpgmUtSASxzSSqAZS5JBbDMJakAlrkkFcAyl6QCWOaSVADLXJIKYJlL\nUgEsc0kqgGUuSQWwzCWpAJa5JBXAMpekAljmklQAy1ySCmCZS1IBBn6hsyS9GS9eeo2XLl1uvH76\n3Bob6xuN199689u47eYbh4lWFMtcUqdeunSZo08sd7b/Q3fOWOZ4zCJJRfDKXJJ2MSnHRJa5JO1i\nUo6JPGaRpAJY5pJUAMtckgpgmUtSAQa+ARoR7wN+H3gmM4/Ws8PAfL1kPjOXdptLkrrV5Mr8RuBT\nm08iYh+wANxR/xzbaR4RUyPOK0nqY2CZZ+ZTwCtbRrPAmcxcy8w1YCUiZvvNgZkuQkuSrjXM58xv\nAS5ExPH6+UXgIDC1w/zsm04pSdrVMGX+MnAAOEJV4CeA81RX+f3mO+r1es2DnlsbImpz0zdMt8rT\nev8Jzj/J2cH8A/c3/+77T0j+pmW+9ex7BZjb8nw2M5cjYn+/+W6brq6uNnx5Wt0eO4yN9Y1WeYbZ\nv0td5p/k7Jv7d8n8g/fvkvkrA8/MI+JBqjc574qIRzPzCtUbnSeBxfp37DSXJHVv4JV5Zn4a+PS2\n2SJVYW9f23cuSeqWNw1JUgEsc0kqgGUuSQWwzCWpAJa5JBXAMpekAljmklQAy1ySCmCZS1IBLHNJ\nKoBlLkkFsMwlqQCWuSQVwDKXpAJY5pJUAMtckgpgmUtSASxzSSqAZS5JBbDMJakAlrkkFcAyl6QC\nWOaSVADLXJIKYJlLUgEsc0kqgGUuSQWwzCWpAJa5JBVgetQbRsRhYL5+Op+ZS6N+DUnStUZ6ZR4R\n+4AF4I7651hETI3yNSRJbzTqY5ZZ4ExmrmXmGrACzIz4NSRJ24z6mOUW4EJEHK+fXwQOAmdH/DqS\npC2mrl69OrLNImIO+E3gCDAFnAA+mZnL29eeOnVqdC8sSW8hhw4desPx9aivzFeAuS3PZ/sV+U5h\nJEnDGemZeWZeoXoD9CSwCBwb5f6SpP5GeswiSRoPbxqSpAJY5pJUgJHfAar26hurZqg+xnl+pzeN\nJe1dEfFtAPU9NtedV+ZjFhEfBJ4Bfgu4G/hERJyOiA+NN5kmVUT84LgzvBVExB9sefwzwFeAkxHx\n0XHkKfbKPCIezcxfGXeOBhaAOzJzrb7Z6kg9X6T6VNCeFhHvAI4Cl4E/ycyv1/M/yswHxhqugTr/\ng8DXgC8Aj1P9ubg/M/f8zW4R8avAVar7OjbdGxF/npknxhSrkYj4SGY+Vj+eAR4C1oGjmfkvYw3X\nzLu2PP414L2ZeTkiTgOPXO8wE1/mEfHQDr/6wHUNMjqT9vGiz1P9hXQJ+GhEvJCZfwj80HhjNfZn\nVP8w3LuBvwfuAV4EHgV+cnyxGvsN4Dngy/XzKWAD+O+xJWruHuCx+vFx4HeAV4HPAj81pkxtbETE\nezLzWeA/gSsR8b1Ud75fdxNf5lSl/SnglS2zKa79W3MvWwCeiogzwAWqu2bn6vkkeH3Lv4z5jxFx\nV0Q8zOQc4e3LzNPA6Yj46cz8KkBEvG3MuZqaA+4Dfh74TGb+XUT8xOYV7x53U0R8P9Wf11sy858A\nIuLm8cZq7BeAhyPiEeAmquPSb1H9/7juSijzjwM3ZuYzW4cR8bdjytNKZj4VEUtU/0jZLcDLwEp9\nA9YkeGHrk8z8m4j4d+Bnx5SnrWc3H2Tm+7fMz48hS2uZ+RrwSER8DnggIu6nKpZJ8AL/f9Hy3Jb5\nf4whS2uZ+a/A3RGxH/hu4OXMXB9XHm8akgoSEW+nOrt9YtxZdH1Z5pJUgEk515Qk7cIyl6QCWOaS\nVADLXJIK8L9YXAgM1R+ruQAAAABJRU5ErkJggg==\n", + "text": [ + "" + ] + } + ], + "prompt_number": 1152 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Population Import" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "population.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SUMLEVREGIONDIVISIONSTATENAMECENSUS2010POPESTIMATESBASE2010POPESTIMATE2010POPESTIMATE2011POPESTIMATE2012POPESTIMATE2013POPESTIMATE2014NPOPCHG_2010NPOPCHG_2011NPOPCHG_2012NPOPCHG_2013NPOPCHG_2014BIRTHS2010BIRTHS2011BIRTHS2012BIRTHS2013BIRTHS2014DEATHS2010DEATHS2011DEATHS2012DEATHS2013DEATHS2014NATURALINC2010NATURALINC2011NATURALINC2012...NETMIG2014RESIDUAL2010RESIDUAL2011RESIDUAL2012RESIDUAL2013RESIDUAL2014RBIRTH2011RBIRTH2012RBIRTH2013RBIRTH2014RDEATH2011RDEATH2012RDEATH2013RDEATH2014RNATURALINC2011RNATURALINC2012RNATURALINC2013RNATURALINC2014RINTERNATIONALMIG2011RINTERNATIONALMIG2012RINTERNATIONALMIG2013RINTERNATIONALMIG2014RDOMESTICMIG2011RDOMESTICMIG2012RDOMESTICMIG2013RDOMESTICMIG2014RNETMIG2011RNETMIG2012RNETMIG2013RNETMIG2014
0 10 0 0 0 United States 308745538 308758105 309347057 311721632 314112078 316497531 318857056 588952 2374575 2390446 2385453 2359525 987836 3973485 3936976 3955128 3957577 598716 2512492 2501531 2568627 2593996 389120 1460993 1435445... 995944 0 0 0 0 0 12.795638 12.581540 12.543824 12.457853 8.090867 7.994235 8.146489 8.165506 4.704771 4.587305 4.397335 4.292346 2.941968 3.051932 3.168211 3.135081 X X X X 2.941968 3.051932 3.168211 3.135081
1 20 1 0 0 Northeast Region 55317240 55318348 55381690 55635670 55832038 56028220 56152333 63342 253980 196368 196182 124113 158640 646248 637883 638177 637853 110781 470859 460981 473411 478007 47859 175389 176902... -24492-2273 -9005-3141-16931-11241 11.642287 11.445162 11.410254 11.371900 8.482619 8.271113 8.464329 8.522101 3.159668 3.174049 2.945926 2.849799 4.469517 4.393219 4.692605 4.674678 -2.891457696 -3.987594326 -3.82818713 -5.111331551 1.578060 0.405624 0.864418-0.436653
2 20 2 0 0 Midwest Region 66927001 66929898 66972390 67149657 67331458 67567871 67745108 42492 177267 181801 236413 177237 208629 834908 830703 831117 829620 140802 586908 584851 584006 586099 67827 248000 245852... -54450 -951 -1475 -432 -7258-11834 12.449974 12.354196 12.322033 12.262238 8.751850 8.697890 8.658397 8.662864 3.698124 3.656305 3.663636 3.599374 1.695247 1.783328 1.891826 1.886101 -2.728007872 -2.729468744 -1.942826565 -2.690902253-1.032761-0.946140-0.051001-0.804801
3 20 3 0 0 South Region 114555744 114562951 114871231 116089908 117346322 118522802 119771934 308280 1218677 1256414 1176480 1249132 377272 1509622 1505000 1508186 1511280 228750 962677 960469 999068 1007640 148522 546945 544531... 724245 1874 16042 5124 14387 21247 13.072520 12.894314 12.788329 12.684124 8.336268 8.228963 8.471376 8.457090 4.736251 4.665351 4.316953 4.227034 2.870855 3.052174 3.064411 3.012706 2.807069634 3.003089966 2.472345639 3.065858744 5.677925 6.055264 5.536757 6.078565
4 20 4 0 0 West Region 71945553 71946908 72121746 72846397 73602260 74378638 75187681 174838 724651 755863 776378 809043 243295 982707 963390 977648 978824 118383 492048 495230 512142 522250 124912 490659 468160... 350641 1350 -5562-1551 9802 1828 13.557558 13.156693 13.213165 13.088829 6.788360 6.763189 6.921731 6.983524 6.769198 6.393503 6.291434 6.105305 3.038902 3.195577 3.344905 3.305249 0.266017066 0.754667214 0.724134003 1.383520042 3.304919 3.950244 4.069039 4.688770
\n", + "

5 rows \u00d7 76 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1153, + "text": [ + " SUMLEV REGION DIVISION STATE NAME CENSUS2010POP ESTIMATESBASE2010 POPESTIMATE2010 POPESTIMATE2011 POPESTIMATE2012 POPESTIMATE2013 POPESTIMATE2014 NPOPCHG_2010 NPOPCHG_2011 NPOPCHG_2012 NPOPCHG_2013 NPOPCHG_2014 BIRTHS2010 BIRTHS2011 BIRTHS2012 BIRTHS2013 BIRTHS2014 DEATHS2010 DEATHS2011 DEATHS2012 DEATHS2013 DEATHS2014 NATURALINC2010 NATURALINC2011 NATURALINC2012 ... NETMIG2014 RESIDUAL2010 RESIDUAL2011 RESIDUAL2012 RESIDUAL2013 RESIDUAL2014 RBIRTH2011 RBIRTH2012 RBIRTH2013 RBIRTH2014 RDEATH2011 RDEATH2012 RDEATH2013 RDEATH2014 RNATURALINC2011 RNATURALINC2012 RNATURALINC2013 RNATURALINC2014 RINTERNATIONALMIG2011 RINTERNATIONALMIG2012 RINTERNATIONALMIG2013 RINTERNATIONALMIG2014 RDOMESTICMIG2011 RDOMESTICMIG2012 RDOMESTICMIG2013 RDOMESTICMIG2014 RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014\n", + "0 10 0 0 0 United States 308745538 308758105 309347057 311721632 314112078 316497531 318857056 588952 2374575 2390446 2385453 2359525 987836 3973485 3936976 3955128 3957577 598716 2512492 2501531 2568627 2593996 389120 1460993 1435445 ... 995944 0 0 0 0 0 12.795638 12.581540 12.543824 12.457853 8.090867 7.994235 8.146489 8.165506 4.704771 4.587305 4.397335 4.292346 2.941968 3.051932 3.168211 3.135081 X X X X 2.941968 3.051932 3.168211 3.135081\n", + "1 20 1 0 0 Northeast Region 55317240 55318348 55381690 55635670 55832038 56028220 56152333 63342 253980 196368 196182 124113 158640 646248 637883 638177 637853 110781 470859 460981 473411 478007 47859 175389 176902 ... -24492 -2273 -9005 -3141 -16931 -11241 11.642287 11.445162 11.410254 11.371900 8.482619 8.271113 8.464329 8.522101 3.159668 3.174049 2.945926 2.849799 4.469517 4.393219 4.692605 4.674678 -2.891457696 -3.987594326 -3.82818713 -5.111331551 1.578060 0.405624 0.864418 -0.436653\n", + "2 20 2 0 0 Midwest Region 66927001 66929898 66972390 67149657 67331458 67567871 67745108 42492 177267 181801 236413 177237 208629 834908 830703 831117 829620 140802 586908 584851 584006 586099 67827 248000 245852 ... -54450 -951 -1475 -432 -7258 -11834 12.449974 12.354196 12.322033 12.262238 8.751850 8.697890 8.658397 8.662864 3.698124 3.656305 3.663636 3.599374 1.695247 1.783328 1.891826 1.886101 -2.728007872 -2.729468744 -1.942826565 -2.690902253 -1.032761 -0.946140 -0.051001 -0.804801\n", + "3 20 3 0 0 South Region 114555744 114562951 114871231 116089908 117346322 118522802 119771934 308280 1218677 1256414 1176480 1249132 377272 1509622 1505000 1508186 1511280 228750 962677 960469 999068 1007640 148522 546945 544531 ... 724245 1874 16042 5124 14387 21247 13.072520 12.894314 12.788329 12.684124 8.336268 8.228963 8.471376 8.457090 4.736251 4.665351 4.316953 4.227034 2.870855 3.052174 3.064411 3.012706 2.807069634 3.003089966 2.472345639 3.065858744 5.677925 6.055264 5.536757 6.078565\n", + "4 20 4 0 0 West Region 71945553 71946908 72121746 72846397 73602260 74378638 75187681 174838 724651 755863 776378 809043 243295 982707 963390 977648 978824 118383 492048 495230 512142 522250 124912 490659 468160 ... 350641 1350 -5562 -1551 9802 1828 13.557558 13.156693 13.213165 13.088829 6.788360 6.763189 6.921731 6.983524 6.769198 6.393503 6.291434 6.105305 3.038902 3.195577 3.344905 3.305249 0.266017066 0.754667214 0.724134003 1.383520042 3.304919 3.950244 4.069039 4.688770\n", + "\n", + "[5 rows x 76 columns]" + ] + } + ], + "prompt_number": 1153 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "state_pop = population[[\"NAME\",\"POPESTIMATE2014\"]]\n", + "state_pop = state_pop.rename(columns={'NAME':'Name','POPESTIMATE2014':'Population'})\n", + "complaints_by_state = complaints[\"State\"].value_counts()\n", + "def switch_state(state):\n", + " try:\n", + " return states[state]\n", + " except:\n", + " return 'NONE'\n", + " \n", + "state_map = lambda x: switch_state(x)\n", + "complaints_by_state = complaints['State'].map(state_map)\n", + "complaint_dict = dict(complaints_by_state.value_counts())" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1154 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "states = {\n", + " 'AK': 'Alaska','AL': 'Alabama','AR': 'Arkansas','AS': 'American Samoa','AZ': 'Arizona','CA': 'California','CO': 'Colorado','CT': 'Connecticut','DC': 'District of Columbia','DE': 'Delaware',\n", + " 'FL': 'Florida','GA': 'Georgia','GU': 'Guam','HI': 'Hawaii','IA': 'Iowa','ID': 'Idaho','IL': 'Illinois','IN': 'Indiana',\n", + " 'KS': 'Kansas','KY': 'Kentucky','LA': 'Louisiana','MA': 'Massachusetts','MD': 'Maryland','ME': 'Maine','MI': 'Michigan','MN': 'Minnesota','MO': 'Missouri','MP': 'Northern Mariana Islands','MS': 'Mississippi','MT': 'Montana','NA': 'National','NC': 'North Carolina','ND': 'North Dakota','NE': 'Nebraska','NH': 'New Hampshire','NJ': 'New Jersey','NM': 'New Mexico','NV': 'Nevada','NY': 'New York','OH': 'Ohio','OK': 'Oklahoma','OR': 'Oregon','PA': 'Pennsylvania','PR': 'Puerto Rico','RI': 'Rhode Island','SC': 'South Carolina','SD': 'South Dakota','TN': 'Tennessee','TX': 'Texas','UT': 'Utah','VA': 'Virginia','VI': 'Virgin Islands','VT': 'Vermont','WA': 'Washington','WI': 'Wisconsin','WV': 'West Virginia','WY': 'Wyoming'\n", + "}" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1155 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def complaints(state):\n", + " try:\n", + " \n", + " return complaint_dict[state]\n", + " except:\n", + " return 'NONE'\n", + "complaint_map = lambda x: complaints(x)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1156 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1156 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "state_pop['Complaints'] = state_pop['Name'].map(complaint_map)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1157 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1157 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "state_pop" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePopulationComplaints
0 United States 318857056 NONE
1 Northeast Region 56152333 NONE
2 Midwest Region 67745108 NONE
3 South Region 119771934 NONE
4 West Region 75187681 NONE
5 Alabama 4849377 147
6 Alaska 736732 15
7 Arizona 6731484 213
8 Arkansas 2966369 59
9 California 38802500 1591
10 Colorado 5355866 180
11 Connecticut 3596677 109
12 Delaware 935614 44
13 District of Columbia 658893 82
14 Florida 19893297 1093
15 Georgia 10097343 512
16 Hawaii 1419561 48
17 Idaho 1634464 39
18 Illinois 12880580 427
19 Indiana 6596855 132
20 Iowa 3107126 51
21 Kansas 2904021 56
22 Kentucky 4413457 59
23 Louisiana 4649676 127
24 Maine 1330089 39
25 Maryland 5976407 342
26 Massachusetts 6745408 200
27 Michigan 9909877 287
28 Minnesota 5457173 135
29 Mississippi 2994079 57
30 Missouri 6063589 119
31 Montana 1023579 14
32 Nebraska 1881503 37
33 Nevada 2839099 159
34 New Hampshire 1326813 46
35 New Jersey 8938175 465
36 New Mexico 2085572 55
37 New York 19746227 733
38 North Carolina 9943964 287
39 North Dakota 739482 8
40 Ohio 11594163 348
41 Oklahoma 3878051 93
42 Oregon 3970239 120
43 Pennsylvania 12787209 418
44 Rhode Island 1055173 40
45 South Carolina 4832482 130
46 South Dakota 853175 22
47 Tennessee 6549352 192
48 Texas 26956958 1099
49 Utah 2942902 70
50 Vermont 626562 18
51 Virginia 8326289 373
52 Washington 7061530 231
53 West Virginia 1850326 26
54 Wisconsin 5757564 143
55 Wyoming 584153 8
56 Puerto Rico 3548397 27
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1158, + "text": [ + " Name Population Complaints\n", + "0 United States 318857056 NONE\n", + "1 Northeast Region 56152333 NONE\n", + "2 Midwest Region 67745108 NONE\n", + "3 South Region 119771934 NONE\n", + "4 West Region 75187681 NONE\n", + "5 Alabama 4849377 147\n", + "6 Alaska 736732 15\n", + "7 Arizona 6731484 213\n", + "8 Arkansas 2966369 59\n", + "9 California 38802500 1591\n", + "10 Colorado 5355866 180\n", + "11 Connecticut 3596677 109\n", + "12 Delaware 935614 44\n", + "13 District of Columbia 658893 82\n", + "14 Florida 19893297 1093\n", + "15 Georgia 10097343 512\n", + "16 Hawaii 1419561 48\n", + "17 Idaho 1634464 39\n", + "18 Illinois 12880580 427\n", + "19 Indiana 6596855 132\n", + "20 Iowa 3107126 51\n", + "21 Kansas 2904021 56\n", + "22 Kentucky 4413457 59\n", + "23 Louisiana 4649676 127\n", + "24 Maine 1330089 39\n", + "25 Maryland 5976407 342\n", + "26 Massachusetts 6745408 200\n", + "27 Michigan 9909877 287\n", + "28 Minnesota 5457173 135\n", + "29 Mississippi 2994079 57\n", + "30 Missouri 6063589 119\n", + "31 Montana 1023579 14\n", + "32 Nebraska 1881503 37\n", + "33 Nevada 2839099 159\n", + "34 New Hampshire 1326813 46\n", + "35 New Jersey 8938175 465\n", + "36 New Mexico 2085572 55\n", + "37 New York 19746227 733\n", + "38 North Carolina 9943964 287\n", + "39 North Dakota 739482 8\n", + "40 Ohio 11594163 348\n", + "41 Oklahoma 3878051 93\n", + "42 Oregon 3970239 120\n", + "43 Pennsylvania 12787209 418\n", + "44 Rhode Island 1055173 40\n", + "45 South Carolina 4832482 130\n", + "46 South Dakota 853175 22\n", + "47 Tennessee 6549352 192\n", + "48 Texas 26956958 1099\n", + "49 Utah 2942902 70\n", + "50 Vermont 626562 18\n", + "51 Virginia 8326289 373\n", + "52 Washington 7061530 231\n", + "53 West Virginia 1850326 26\n", + "54 Wisconsin 5757564 143\n", + "55 Wyoming 584153 8\n", + "56 Puerto Rico 3548397 27" + ] + } + ], + "prompt_number": 1158 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "new_table = state_pop[5:]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1159 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "new_table['Whining Ratio'] = new_table['Complaints'] / new_table['Population']" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1160 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "max(new_table['Whining Ratio'])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1161, + "text": [ + "0.00012445116278363862" + ] + } + ], + "prompt_number": 1161 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "new_table['Whining Ratio'].mean()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1162, + "text": [ + "3.1289281006838172e-05" + ] + } + ], + "prompt_number": 1162 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "New Population" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "population = population[['NAME','POPESTIMATE2014']]\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1163 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1163 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..849746c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +numpy +seaborn +pandas