From e7d318cc89f413abaa5576ea71ed86debc45ba60 Mon Sep 17 00:00:00 2001 From: Sanders Kleinfeld Date: Mon, 7 Oct 2024 14:39:22 -0400 Subject: [PATCH] Pin TF versions --- ml/cc/exercises/intro_to_ml_fairness.ipynb | 230 +++++++-------------- ml/cc/exercises/intro_to_neural_nets.ipynb | 7 +- 2 files changed, 81 insertions(+), 156 deletions(-) diff --git a/ml/cc/exercises/intro_to_ml_fairness.ipynb b/ml/cc/exercises/intro_to_ml_fairness.ipynb index d72c88e..dd2ba18 100644 --- a/ml/cc/exercises/intro_to_ml_fairness.ipynb +++ b/ml/cc/exercises/intro_to_ml_fairness.ipynb @@ -28,9 +28,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "y5T8lbpLd1sr", - "colab": {} + "id": "y5T8lbpLd1sr" }, "source": [ "#@title Copyright 2020 Google LLC. Double-click here for license information.\n", @@ -46,7 +44,7 @@ "# See the License for the specific language governing permissions and\n", "# limitations under the License." ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { @@ -63,7 +61,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "84x4Fxc5lzFv" }, "source": [ @@ -74,7 +71,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "J8daw3YOIAXH" }, "source": [ @@ -85,7 +81,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xFxZOg55lWJE" }, "source": [ @@ -99,7 +94,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "l-K-xqksm-X3" }, "source": [ @@ -113,7 +107,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "TXkkHYyJ98_k" }, "source": [ @@ -138,7 +131,7 @@ "* `occupation`: The occupation of the individual. Example include: `tech-support`, `Craft-repair`, `Other-service`, `Sales`, `Exec-managerial` and more.\n", "* `relationship`: The relationship of each individual in a household. Examples include: `Wife`, `Own-child`, `Husband`, `Not-in-family`, `Other-relative`, and `Unmarried`.\n", "* `gender`: Gender of the individual available only in binary choices: `Female` or `Male`.\n", - "* `race`: `White`, `Asian-Pac-Islander`, `Amer-Indian-Eskimo`, `Black`, and `Other`. \n", + "* `race`: `White`, `Asian-Pac-Islander`, `Amer-Indian-Eskimo`, `Black`, and `Other`.\n", "* `native_country`: Country of origin of the individual. Examples include: `United-States`, `Cambodia`, `England`, `Puerto-Rico`, `Canada`, `Germany`, `Outlying-US(Guam-USVI-etc)`, `India`, `Japan`, and more.\n", "\n", "### Prediction Task\n", @@ -149,7 +142,7 @@ "\n", "### Notes on Data Collection\n", "\n", - "All the examples extracted for this dataset meet the following conditions: \n", + "All the examples extracted for this dataset meet the following conditions:\n", "* `age` is 16 years or older.\n", "* The adjusted gross income (used to calculate `income_bracket`) is greater than $100 USD annually.\n", "* `fnlwgt` is greater than 0.\n", @@ -163,12 +156,11 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "jUsgiVsUeKRR" }, "source": [ "## Setup\n", - "We'll import the necessary modules to run the code in the rest of this Colaboratory notebook. \n", + "We'll import the necessary modules to run the code in the rest of this Colaboratory notebook.\n", "\n", "In addition to importing the usual libraries, this setup code cell also installs [Facets](https://pair-code.github.io/facets/), an open-source tool created by [PAIR](https://research.google/teams/brain/pair/) that contains two robust visualizations we'll be using to aid in understanding and analyzing ML datasets." ] @@ -176,10 +168,7 @@ { "cell_type": "code", "metadata": { - "cellView": "form", - "colab_type": "code", - "id": "2e_0DJJ8zE29", - "colab": {} + "id": "2e_0DJJ8zE29" }, "source": [ "#@title Import relevant modules and install Facets\n", @@ -191,7 +180,7 @@ "from matplotlib import rcParams\n", "import seaborn as sns\n", "\n", - "# The following lines adjust the granularity of reporting. \n", + "# The following lines adjust the granularity of reporting.\n", "pd.options.display.max_rows = 10\n", "pd.options.display.float_format = \"{:.1f}\".format\n", "\n", @@ -199,16 +188,16 @@ "# For facets\n", "from IPython.core.display import display, HTML\n", "import base64\n", + "!pip install tensorflow==2.15.1\n", "!pip install facets-overview==1.1.1\n", "from facets_overview.feature_statistics_generator import FeatureStatisticsGenerator" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "-xgIRapb5LaQ" }, "source": [ @@ -220,9 +209,7 @@ { "cell_type": "code", "metadata": { - "colab_type": "code", - "id": "TeCNVvVUVS0P", - "colab": {} + "id": "TeCNVvVUVS0P" }, "source": [ "COLUMNS = [\"age\", \"workclass\", \"fnlwgt\", \"education\", \"education_num\",\n", @@ -230,33 +217,30 @@ " \"capital_gain\", \"capital_loss\", \"hours_per_week\", \"native_country\",\n", " \"income_bracket\"]\n", "\n", - "train_csv = tf.keras.utils.get_file('adult.data', \n", + "train_csv = tf.keras.utils.get_file('adult.data',\n", " 'https://download.mlcc.google.com/mledu-datasets/adult_census_train.csv')\n", - "test_csv = tf.keras.utils.get_file('adult.test' , \n", + "test_csv = tf.keras.utils.get_file('adult.test' ,\n", " 'https://download.mlcc.google.com/mledu-datasets/adult_census_test.csv')\n", "\n", - "train_df = pd.read_csv(train_csv, names=COLUMNS, sep=r'\\s*,\\s*', \n", + "train_df = pd.read_csv(train_csv, names=COLUMNS, sep=r'\\s*,\\s*',\n", " engine='python', na_values=\"?\")\n", "test_df = pd.read_csv(test_csv, names=COLUMNS, sep=r'\\s*,\\s*', skiprows=[0],\n", " engine='python', na_values=\"?\")\n", "# Strip trailing periods mistakenly included only in UCI test dataset.\n", "test_df['income_bracket'] = test_df.income_bracket.str.rstrip('.')" - - ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "coilRN-hooja" }, "source": [ "## Analyzing the Adult Dataset with Facets\n", "\n", - "As mentioned in MLCC, it is important to understand your dataset *before* diving straight into the prediction task. \n", + "As mentioned in MLCC, it is important to understand your dataset *before* diving straight into the prediction task.\n", "\n", "Some important questions to investigate when auditing a dataset for fairness:\n", "\n", @@ -269,7 +253,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "9yCIuAqWA1Pm" }, "source": [ @@ -280,9 +263,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "MW-qryqs1gig", - "colab": {} + "id": "MW-qryqs1gig" }, "source": [ "#@title Visualize the Data in Facets\n", @@ -302,13 +283,12 @@ "html = HTML_TEMPLATE.format(protostr=protostr)\n", "display(HTML(html))" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "91wjnZFpPWw-" }, "source": [ @@ -327,7 +307,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "KlF-lQ8yQ69b" }, "source": [ @@ -339,7 +318,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xX_qjj5AQ_Hb" }, "source": [ @@ -348,7 +326,7 @@ "* workclass\n", "* occupation\n", "\n", - "Now, because it's only a small percentage of samples that contain either a missing workclass value or occupation value, we can safely drop those rows from the data set. If that percentage was much higher, then we would have to consider using a different data set that is more complete. \n", + "Now, because it's only a small percentage of samples that contain either a missing workclass value or occupation value, we can safely drop those rows from the data set. If that percentage was much higher, then we would have to consider using a different data set that is more complete.\n", "\n", "Luckily, in Pandas, there is a convenient way to drop any row containing a missing value in the data set:\n", "\n", @@ -357,7 +335,7 @@ "```\n", "We will use this method prior to training the model when we convert a Pandas DataFrame to a Numpy array.\n", "\n", - "As for the remaining data that does not contain any missing values: if we look at the min/max values and histograms for each numeric feature, then we can pinpoint any extreme outliers in our data set. \n", + "As for the remaining data that does not contain any missing values: if we look at the min/max values and histograms for each numeric feature, then we can pinpoint any extreme outliers in our data set.\n", "\n", "For `hours_per_week`, we can see that the minimum is 1, which might be a bit surprising, given that most jobs typically require multiple hours of work per week. For `capital_gain` and `capital_loss`, we can see that over 90% of values are 0. Given that capital gains/losses are only registered by individuals who make investments, it's certainly plausible that less than 10% of examples would have nonzero values for these feature, but we may want to take a closer look to verify the values for these features are valid.\n", "\n", @@ -367,7 +345,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "hKj2hz-Sql7V" }, "source": [ @@ -376,22 +353,20 @@ "To futher explore the dataset, we can use [Facets Dive](https://pair-code.github.io/facets/), a tool that provides an interactive interface where each individual item in the visualization represents a data point. But to use Facets Dive, we need to convert the data to a JSON array.\n", "Thankfully the DataFrame method `to_json()` takes care of this for us.\n", "\n", - "Run the cell below to perform the data transform to JSON and also load Facets Dive. " + "Run the cell below to perform the data transform to JSON and also load Facets Dive." ] }, { "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "stlklrG_xssF", - "colab": {} + "id": "stlklrG_xssF" }, "source": [ "#@title Set the Number of Data Points to Visualize in Facets Dive\n", "\n", "SAMPLE_SIZE = 5000 #@param\n", - " \n", + "\n", "train_dive = train_df.sample(SAMPLE_SIZE).to_json(orient='records')\n", "\n", "HTML_TEMPLATE = \"\"\"\n", @@ -404,13 +379,12 @@ "html = HTML_TEMPLATE.format(jsonstr=train_dive)\n", "display(HTML(html))" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "LxqAPDcRDFB2" }, "source": [ @@ -434,7 +408,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "qZ-9vJgSEpHj" }, "source": [ @@ -446,7 +419,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "uYpbgdATEx8L" }, "source": [ @@ -458,13 +430,12 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "7YVH8hYfSjer" }, "source": [ "### Summary\n", "\n", - "Plotting histograms, ranking most-to-least common examples, identifying duplicate or missing examples, making sure the training and test sets are similar, computing feature quantiles—**these are all critical analyses to perform on your data.** \n", + "Plotting histograms, ranking most-to-least common examples, identifying duplicate or missing examples, making sure the training and test sets are similar, computing feature quantiles—**these are all critical analyses to perform on your data.**\n", "\n", "**The better you know what's going on in your data, the more insight you'll have as to where unfairness might creep in!**" ] @@ -472,7 +443,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "2ivWw9Wpj67m" }, "source": [ @@ -489,9 +459,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "8bFDVCV1sxiX", - "colab": {} + "id": "8bFDVCV1sxiX" }, "source": [ "feature = 'fnlwgt' #@param [\"\", \"hours_per_week\", \"fnlwgt\", \"gender\", \"capital_gain / capital_loss\", \"age\"] {allow-input: false}\n", @@ -509,8 +477,8 @@ "if feature == \"fnlwgt\":\n", " print(\n", "\"\"\"'fnlwgt' represents the weight of the observations. After fitting the model\n", - "to this data set, if certain group of individuals end up performing poorly \n", - "compared to other groups, then we could explore ways of reweighting each data \n", + "to this data set, if certain group of individuals end up performing poorly\n", + "compared to other groups, then we could explore ways of reweighting each data\n", "point using this feature.\"\"\")\n", "if feature == \"gender\":\n", " print(\n", @@ -521,16 +489,16 @@ "case, women).\"\"\")\n", "if feature == \"capital_gain / capital_loss\":\n", " print(\n", - "\"\"\"As alluded to in Task #1, both 'capital_gain' and 'capital_loss' could be \n", - "indicative of income status as only individuals who make investments register \n", - "their capital gains and losses. The caveat is that over 90% of the values in \n", - "both 'capital_gain' and 'capital_loss' are 0, and it's not entirely clear from \n", - "the description of the data set why that is the case. That is, we don't know \n", + "\"\"\"As alluded to in Task #1, both 'capital_gain' and 'capital_loss' could be\n", + "indicative of income status as only individuals who make investments register\n", + "their capital gains and losses. The caveat is that over 90% of the values in\n", + "both 'capital_gain' and 'capital_loss' are 0, and it's not entirely clear from\n", + "the description of the data set why that is the case. That is, we don't know\n", "whether we should interpret all these 0s as \"no investment gain/loss or \"\n", - "investment gain/loss is unknown.\" Lack of context is always a flag for concern, \n", - "and one that could trigger fairness-related issues later on. For now, we are \n", - "going to omit these features from the model, but you are more than welcome to \n", - "experiment with them if you come up with an idea on how capital gains and \n", + "investment gain/loss is unknown.\" Lack of context is always a flag for concern,\n", + "and one that could trigger fairness-related issues later on. For now, we are\n", + "going to omit these features from the model, but you are more than welcome to\n", + "experiment with them if you come up with an idea on how capital gains and\n", "losses should be handled.\"\"\")\n", "if feature == \"age\":\n", " print(\n", @@ -538,17 +506,16 @@ "fine-grained correlations between income and age, as well as to prevent\n", "overfitting.''')\n" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "n3OT-YVpftEI" }, "source": [ - "## Predicting income using the Keras API \n", + "## Predicting income using the Keras API\n", "\n", "Now that we have a better sense of the Adult dataset, we can now begin with creating a neural network to predict income. In this section, as with previous exercises, we will be using TensorFlow's Keras API (specifically, `tf.keras.Sequential`) to construct our neural network model." ] @@ -556,12 +523,11 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "ECBRATBVG4rn" }, "source": [ "### Convert Adult Dataset into Tensors\n", - "We first have to define our input function, which will take the Adult dataset that is in a pandas DataFrame and convert it to a Numpy array. \n", + "We first have to define our input function, which will take the Adult dataset that is in a pandas DataFrame and convert it to a Numpy array.\n", "\n", "While a pandas DataFrame is great — especially when working with Facets and other Python modules that visualize data — `tf.keras.Sequential` doesn't accept a pandas DataFrame as a data type. Luckily for us, it's quite trivial to convert a pandas DataFrame into a Numpy array, which is an accepted data type." ] @@ -569,9 +535,7 @@ { "cell_type": "code", "metadata": { - "colab_type": "code", - "id": "Bt-rQvJLx4Hm", - "colab": {} + "id": "Bt-rQvJLx4Hm" }, "source": [ "def pandas_to_numpy(data):\n", @@ -583,16 +547,15 @@ " labels = np.array(data['income_bracket'] == \">50K\")\n", " features = data.drop('income_bracket', axis=1)\n", " features = {name:np.array(value) for name, value in features.items()}\n", - " \n", + "\n", " return features, labels" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "0mz2sts6IjBO" }, "source": [ @@ -604,9 +567,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "tAG5hUJwx725", - "colab": {} + "id": "tAG5hUJwx725" }, "source": [ "#@title Create categorical feature columns\n", @@ -650,16 +611,14 @@ " \"Local-gov\", \"?\", \"Self-emp-inc\", \"Without-pay\", \"Never-worked\"\n", " ])" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "Jwtuu8MmyKCJ", - "colab": {} + "id": "Jwtuu8MmyKCJ" }, "source": [ "#@title Create numeric feature columns\n", @@ -672,13 +631,12 @@ "capital_loss = tf.feature_column.numeric_column(\"capital_loss\")\n", "hours_per_week = tf.feature_column.numeric_column(\"hours_per_week\")" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "3WqAbug6jePb" }, "source": [ @@ -690,21 +648,18 @@ { "cell_type": "code", "metadata": { - "colab_type": "code", - "id": "HxVm8X15yLR7", - "colab": {} + "id": "HxVm8X15yLR7" }, "source": [ "age_buckets = tf.feature_column.bucketized_column(\n", " age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "2lx4JuLdi7jw" }, "source": [ @@ -722,7 +677,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "5aD1OM8egad9" }, "source": [ @@ -736,24 +690,21 @@ { "cell_type": "code", "metadata": { - "colab_type": "code", - "id": "O68xV_24gbnD", - "colab": {} + "id": "O68xV_24gbnD" }, "source": [ "# List of variables, with special handling for gender subgroup.\n", - "variables = [native_country, education, occupation, workclass, \n", + "variables = [native_country, education, occupation, workclass,\n", " relationship, age_buckets]\n", "subgroup_variables = [gender]\n", "feature_columns = variables + subgroup_variables" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "3nYSMg67jWaA" }, "source": [ @@ -763,7 +714,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "_kRL5rScH1F7" }, "source": [ @@ -791,9 +741,7 @@ "cell_type": "code", "metadata": { "cellView": "code", - "colab_type": "code", - "id": "bnyw4cyLTSUB", - "colab": {} + "id": "bnyw4cyLTSUB" }, "source": [ "deep_columns = [\n", @@ -805,13 +753,12 @@ " tf.feature_column.embedding_column(occupation, dimension=8),\n", "]" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "lBaCn_Z1PshC" }, "source": [ @@ -823,9 +770,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "tQZ1kumWk8XO", - "colab": {} + "id": "tQZ1kumWk8XO" }, "source": [ "#@title Define Deep Neural Net Model\n", @@ -845,7 +790,7 @@ " tf.keras.metrics.TruePositives(name='tp'),\n", " tf.keras.metrics.FalsePositives(name='fp'),\n", " tf.keras.metrics.TrueNegatives(name='tn'),\n", - " tf.keras.metrics.FalseNegatives(name='fn'), \n", + " tf.keras.metrics.FalseNegatives(name='fn'),\n", " tf.keras.metrics.BinaryAccuracy(name='accuracy'),\n", " tf.keras.metrics.Precision(name='precision'),\n", " tf.keras.metrics.Recall(name='recall'),\n", @@ -865,17 +810,16 @@ " 1, activation='sigmoid', kernel_regularizer=regularizer)\n", "])\n", "\n", - "model.compile(optimizer=tf.keras.optimizers.Adagrad(LEARNING_RATE), \n", + "model.compile(optimizer=tf.keras.optimizers.Adagrad(LEARNING_RATE),\n", " loss=tf.keras.losses.BinaryCrossentropy(),\n", " metrics=METRICS)" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Tjhqo9XOP2VV" }, "source": [ @@ -886,9 +830,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "UtrhAXwvqtVD", - "colab": {} + "id": "UtrhAXwvqtVD" }, "source": [ "#@title Fit Deep Neural Net Model to the Adult Training Dataset\n", @@ -899,13 +841,12 @@ "features, labels = pandas_to_numpy(train_df)\n", "model.fit(x=features, y=labels, epochs=EPOCHS, batch_size=BATCH_SIZE)" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "m0UHu5t-P7G7" }, "source": [ @@ -916,9 +857,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "HDV8hYqvncCy", - "colab": {} + "id": "HDV8hYqvncCy" }, "source": [ "#@title Evaluate Deep Neural Net Performance\n", @@ -926,17 +865,16 @@ "features, labels = pandas_to_numpy(test_df)\n", "model.evaluate(x=features, y=labels);" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "7j0LrXMGlTDl" }, "source": [ - "You can try retraining the model using different parameters. If you leave the parameters as is, then you see that this relatively simple deep neural net does a decent job in predicting income with an **overall accuracy of 0.8317** and an **AUC of 0.8817**. \n", + "You can try retraining the model using different parameters. If you leave the parameters as is, then you see that this relatively simple deep neural net does a decent job in predicting income with an **overall accuracy of 0.8317** and an **AUC of 0.8817**.\n", "\n", "**But evaluation metrics with respect to subgroups are missing.** We will cover some of the ways you can evaluate at the subgroup level in the next section." ] @@ -944,7 +882,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "sbwmbnUUU1kY" }, "source": [ @@ -952,9 +889,9 @@ "\n", "While evaluating the overall performance of the model gives us some insight into its quality, it doesn't give us much insight into how well our model performs for different subgroups. \n", "\n", - "When evaluating a model for fairness, it's important to determine whether prediction errors are uniform across subgroups or whether certain subgroups are more susceptible to certain prediction errors than others. \n", + "When evaluating a model for fairness, it's important to determine whether prediction errors are uniform across subgroups or whether certain subgroups are more susceptible to certain prediction errors than others.\n", "\n", - "A key tool for comparing the prevalence of different types of model errors is a *confusion matrix*. Recall from the [Classification module of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/classification/true-false-positive-negative) that a confusion matrix is a grid that plots predictions vs. ground truth for your model, and tabulates statistics summarizing how often your model made the correct prediction and how often it made the wrong prediction. \n", + "A key tool for comparing the prevalence of different types of model errors is a *confusion matrix*. Recall from the [Classification module of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/classification/true-false-positive-negative) that a confusion matrix is a grid that plots predictions vs. ground truth for your model, and tabulates statistics summarizing how often your model made the correct prediction and how often it made the wrong prediction.\n", "\n", "Let's start by creating a binary confusion matrix for our income-prediction model—binary because our label (`income_bracket`) has only two possible values (`<50K` or `>50K`). We'll define an income of `>50K` as our **positive label**, and an income of `<50k` as our **negative label**.\n", "\n", @@ -975,7 +912,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "nsUj_XZHU_mI" }, "source": [ @@ -993,27 +929,25 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "ouE72GWSxu1j", - "colab": {} + "id": "ouE72GWSxu1j" }, "source": [ "#@title Define Function to Visualize Binary Confusion Matrix\n", "def plot_confusion_matrix(\n", " confusion_matrix, class_names, subgroup, figsize = (8,6)):\n", - " # We're taking our calculated binary confusion matrix that's already in the \n", - " # form of an array and turning it into a pandas DataFrame because it's a lot \n", - " # easier to work with a pandas DataFrame when visualizing a heat map in \n", + " # We're taking our calculated binary confusion matrix that's already in the\n", + " # form of an array and turning it into a pandas DataFrame because it's a lot\n", + " # easier to work with a pandas DataFrame when visualizing a heat map in\n", " # Seaborn.\n", " df_cm = pd.DataFrame(\n", - " confusion_matrix, index=class_names, columns=class_names, \n", + " confusion_matrix, index=class_names, columns=class_names,\n", " )\n", "\n", " rcParams.update({\n", " 'font.family':'sans-serif',\n", " 'font.sans-serif':['Liberation Sans'],\n", " })\n", - " \n", + "\n", " sns.set_context(\"notebook\", font_scale=1.25)\n", "\n", " fig = plt.figure(figsize=figsize)\n", @@ -1027,7 +961,7 @@ " [\"{0:g}\\n{1}\".format(value, string) for string, value in zip(\n", " strings.flatten(), confusion_matrix.flatten())])).reshape(2, 2)\n", "\n", - " heatmap = sns.heatmap(df_cm, annot=labels, fmt=\"\", \n", + " heatmap = sns.heatmap(df_cm, annot=labels, fmt=\"\",\n", " linewidths=2.0, cmap=sns.color_palette(\"GnBu_d\"));\n", " heatmap.yaxis.set_ticklabels(\n", " heatmap.yaxis.get_ticklabels(), rotation=0, ha='right')\n", @@ -1037,13 +971,12 @@ " plt.xlabel('Predictions')\n", " return fig" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "hUvBYtwXVzlQ" }, "source": [ @@ -1053,7 +986,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "9enf_Jfi-AVS" }, "source": [ @@ -1066,9 +998,7 @@ "cell_type": "code", "metadata": { "cellView": "form", - "colab_type": "code", - "id": "5TBzaWs1VKTa", - "colab": {} + "id": "5TBzaWs1VKTa" }, "source": [ "#@title Visualize Binary Confusion Matrix and Compute Evaluation Metrics Per Subgroup\n", @@ -1078,17 +1008,17 @@ "# Labels for annotating axes in plot.\n", "classes = ['Over $50K', 'Less than $50K']\n", "\n", - "# Given define subgroup, generate predictions and obtain its corresponding \n", + "# Given define subgroup, generate predictions and obtain its corresponding\n", "# ground truth.\n", "subgroup_filter = test_df.loc[test_df[CATEGORY] == SUBGROUP]\n", "features, labels = pandas_to_numpy(subgroup_filter)\n", "subgroup_results = model.evaluate(x=features, y=labels, verbose=0)\n", - "confusion_matrix = np.array([[subgroup_results[1], subgroup_results[4]], \n", + "confusion_matrix = np.array([[subgroup_results[1], subgroup_results[4]],\n", " [subgroup_results[2], subgroup_results[3]]])\n", "\n", "subgroup_performance_metrics = {\n", " 'ACCURACY': subgroup_results[5],\n", - " 'PRECISION': subgroup_results[6], \n", + " 'PRECISION': subgroup_results[6],\n", " 'RECALL': subgroup_results[7],\n", " 'AUC': subgroup_results[8]\n", "}\n", @@ -1098,13 +1028,12 @@ "plot_confusion_matrix(confusion_matrix, classes, SUBGROUP);\n", "performance_df" ], - "execution_count": 0, + "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "TF3B5h3c-7Fb" }, "source": [ @@ -1116,13 +1045,12 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "dhKR49AT_5ZK" }, "source": [ "Using default parameters, you may find that the model performs better for female than male. Specifically, in our run, we found that both accuracy and AUC for female (0.9137 and 0.9089, respectively) outperformed male (0.7923 and 0.8549, respectively). What is going on here?\n", "\n", - "Notice the number of true positives (top-left corner) for female is way lower compared to male (479 to 3822). Recall that in Task #1 we noticed a disproportionately high representation of male in the data set (almost 2-to-1). If you further explore the data set using Facets Dive in Task #2 by setting the color to `income_bracket` and one of the axes to `gender`, then you will also find a disproportionately small number of female examples in the higher income bracket, our positive label. \n", + "Notice the number of true positives (top-left corner) for female is way lower compared to male (479 to 3822). Recall that in Task #1 we noticed a disproportionately high representation of male in the data set (almost 2-to-1). If you further explore the data set using Facets Dive in Task #2 by setting the color to `income_bracket` and one of the axes to `gender`, then you will also find a disproportionately small number of female examples in the higher income bracket, our positive label.\n", "\n", "What this is all suggesting is that the model is **overfitting, particuarly with respect to female and lower income bracket**. In other words, this model will not generalize well, particularly with female data, as it does not have enough positive examples for the model to learn from. It is **not doing that much better with male, either, as there is a disproportionately small number of high income bracket compared to low income bracket** — though not nearly as poorly represented as with female.\n", "\n", @@ -1134,4 +1062,4 @@ ] } ] -} +} \ No newline at end of file diff --git a/ml/cc/exercises/intro_to_neural_nets.ipynb b/ml/cc/exercises/intro_to_neural_nets.ipynb index e5a2999..aec2e59 100644 --- a/ml/cc/exercises/intro_to_neural_nets.ipynb +++ b/ml/cc/exercises/intro_to_neural_nets.ipynb @@ -90,6 +90,7 @@ "outputs": [], "source": [ "#@title Import relevant modules\n", + "!pip install tensorflow==2.15.1\n", "import numpy as np\n", "import pandas as pd\n", "import tensorflow as tf\n", @@ -604,10 +605,6 @@ ], "metadata": { "colab": { - "last_runtime": { - "build_target": "//learning/grp/tools/ml_python:ml_notebook", - "kind": "private" - }, "name": "Intro to Neural Nets.ipynb", "private_outputs": true, "provenance": [] @@ -619,4 +616,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file