diff --git a/.gitignore b/.gitignore
index a0ba551..2276e64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,4 @@
-processedData/
-unprocessedData/
-experiment.py
+.ipynb_checkpoints/
meta.csv
imdb_crop/
wiki_crop/
diff --git a/Age and Gender Distribution.ipynb b/Age and Gender Distribution.ipynb
new file mode 100644
index 0000000..06b457f
--- /dev/null
+++ b/Age and Gender Distribution.ipynb
@@ -0,0 +1,233 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Importing dependencies\n",
+ "\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = pd.read_csv('meta.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " gender | \n",
+ " path | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 42 | \n",
+ " male | \n",
+ " imdb_crop/82/nm0005282_rm878739712_1966-10-31_... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 32 | \n",
+ " female | \n",
+ " imdb_crop/76/nm1411676_rm2490280960_1979-6-24_... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 29 | \n",
+ " male | \n",
+ " imdb_crop/83/nm1145983_rm1406371840_1979-7-31_... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 22 | \n",
+ " female | \n",
+ " imdb_crop/02/nm3240202_rm2934473728_1986-2-28_... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 78 | \n",
+ " female | \n",
+ " imdb_crop/12/nm0326412_rm1823643648_1926-9-7_2... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age gender path\n",
+ "0 42 male imdb_crop/82/nm0005282_rm878739712_1966-10-31_...\n",
+ "1 32 female imdb_crop/76/nm1411676_rm2490280960_1979-6-24_...\n",
+ "2 29 male imdb_crop/83/nm1145983_rm1406371840_1979-7-31_...\n",
+ "3 22 female imdb_crop/02/nm3240202_rm2934473728_1986-2-28_...\n",
+ "4 78 female imdb_crop/12/nm0326412_rm1823643648_1926-9-7_2..."
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['age', 'gender', 'path'], dtype='object')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(224840, 3)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "gender = []\n",
+ "for g in data['gender'].values:\n",
+ " if g == 'male':\n",
+ " gender.append(1)\n",
+ " else:\n",
+ " gender.append(0)\n",
+ " \n",
+ "\n",
+ "plt.hist(gender, range(3))\n",
+ "plt.title('There are total ' + str(len(gender) - sum(gender)) + ' female images and ' + str(sum(gender)) + ' male images')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.hist(data['age'], range(101))\n",
+ "plt.title('Age Distribution')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/README.md b/README.md
index 67b1588..793572f 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ The dataset is great for research purposes. It contains more than `500 thousand+
In this project, I filter all the images, resized them all to `128x128`, remove all the images with invalid age, fix the gender distribution problem, and save them in the proper format. Along with that, I’ve also processed the `.mat` files and converted them in `.csv` files also.
## File Structure
-This repository contains 4 files
+This repository contains 3 files
- `mat.py`
- `gender.py`
- `age.py`