#9 cross rater reliability

DisasterMasters · Mar 5, 2019 · a11a818 · a11a818
1 parent 0b397e7
commit a11a818
Show file tree

Hide file tree

Showing 3 changed files with 201 additions and 0 deletions.
diff --git a/src/Coding_Reliability.ipynb b/src/Coding_Reliability.ipynb
@@ -0,0 +1,201 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "df_s = pd.read_excel(\"syd_emotion.xlsx\")\n",
+    "df_f = pd.read_excel(\"faiza_emotion.xlsx\")\n",
+    "df_s[\"Relevant\"].fillna(0, inplace=True)\n",
+    "df_f[\"Relevant\"].fillna(0, inplace=True)\n",
+    "df_s[\"Emotion?\"].fillna(0, inplace=True)\n",
+    "df_f[\"Emotion?\"].fillna(0, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "            Source   categories                       _id created_at  \\\n",
+      "0  Statuses_Irma_C          NaN  5c409973ec4ee50898175e81  12/3/2017   \n",
+      "1  Statuses_Irma_A  ['utility']  5c11a746ec4ee522100a5b3b  9/15/2017   \n",
+      "2  Statuses_Irma_A      ['gov']  5c0c5cf2ec4ee5221008263d   9/8/2017   \n",
+      "3  Statuses_Irma_A          NaN  5c598cc9ec4ee5929fd1e356  5/11/2017   \n",
+      "4  Statuses_Irma_C          NaN  5c4099ecec4ee508981c3249  1/22/2018   \n",
+      "\n",
+      "   emotion_ml  opinion_ml                                               text  \\\n",
+      "0           0           1  We will add a few more warmer-than-average day...   \n",
+      "1           0           1  I am happy to speak with someone at @dukeenerg...   \n",
+      "2           0           1  Consider it done! Great work! #9PMRoutine http...   \n",
+      "3           0           1    @WPLGLocal10 Waste of time and taxpayers mon ey   \n",
+      "4           0           1  I feel like Derek Sheppard could’ve removed th...   \n",
+      "\n",
+      "  Relevant  Opinion? Emotion? Which emotion? Confusion  Sarcasm? Ps/Ng/Nt?  \\\n",
+      "0        0       NaN        0            NaN       NaN       NaN       NaN   \n",
+      "1        1       NaN        1              D       NaN       NaN        Ng   \n",
+      "2        0       1.0        0            NaN       NaN       NaN        Ps   \n",
+      "3        0       1.0        1              A       NaN       NaN        Ng   \n",
+      "4        0       1.0        0            NaN       NaN       NaN        Nt   \n",
+      "\n",
+      "    Unnamed: 14  \n",
+      "0           NaN  \n",
+      "1  two emotions  \n",
+      "2           NaN  \n",
+      "3           NaN  \n",
+      "4           NaN  \n",
+      "                        _id  \\\n",
+      "0  5c409973ec4ee50898175e81   \n",
+      "1  5c11a746ec4ee522100a5b3b   \n",
+      "2  5c0c5cf2ec4ee5221008263d   \n",
+      "3  5c598cc9ec4ee5929fd1e356   \n",
+      "4  5c4099ecec4ee508981c3249   \n",
+      "\n",
+      "                                                text  emotion_ml  opinion_ml  \\\n",
+      "0  We will add a few more warmer-than-average day...           0           1   \n",
+      "1  I am happy to speak with someone at @dukeenerg...           0           1   \n",
+      "2  Consider it done! Great work! #9PMRoutine http...           0           1   \n",
+      "3    @WPLGLocal10 Waste of time and taxpayers mon ey           0           1   \n",
+      "4  I feel like Derek Sheppard could’ve removed th...           0           1   \n",
+      "\n",
+      "   Relevant  Opinion? Emotion? Which emotion?  Confusion  Sarcasm? Ps/Ng/Nt?  \n",
+      "0       0.0       NaN        0            NaN        NaN       NaN       NaN  \n",
+      "1       1.0       NaN        1              D        NaN       NaN        Nt  \n",
+      "2       1.0       1.0        1             HJ        NaN       NaN        Ps  \n",
+      "3       1.0       1.0        1              A        NaN       NaN        Ng  \n",
+      "4       0.0       NaN        0            NaN        NaN       NaN       NaN  \n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df_s.head())\n",
+    "print(df_f.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "699\n",
+      "699\n"
+     ]
+    }
+   ],
+   "source": [
+    "relevant_s = df_s[\"Relevant\"].tolist()\n",
+    "relevant_f = df_f[\"Relevant\"].tolist()\n",
+    "print(len(relevant_s))\n",
+    "print(len(relevant_f))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Relevant Accuracy: 0.8125894134477826\n"
+     ]
+    }
+   ],
+   "source": [
+    "same = 0\n",
+    "text_s = df_s[\"text\"].tolist()\n",
+    "text_f = df_f[\"text\"].tolist()\n",
+    "#for i in range(len(text_s)):\n",
+    "#    if(text_s[i] != text_f[i]):\n",
+    "#        print(str(i)+\":  \"+text_s[i]+\"  |  \"+text_f[i]) \n",
+    "\n",
+    "for i in range(len(relevant_s)):\n",
+    "    if(relevant_s[i] == relevant_f[i]):\n",
+    "        same = same + 1\n",
+    "\n",
+    "print(\"Relevant Accuracy: \"+str(same/len(relevant_s)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Emotion Accuracy: 0.9577464788732394\n"
+     ]
+    }
+   ],
+   "source": [
+    "s = 0\n",
+    "e_s = df_s[\"Emotion?\"].tolist()\n",
+    "e_f = df_f[\"Emotion?\"].tolist()\n",
+    "\n",
+    "for i in range(len(e_s)):\n",
+    "    if(e_s[i] == e_f[i] and relevant_s[i] == relevant_f[i]):\n",
+    "        s = s + 1\n",
+    "print(\"Emotion Accuracy: \"+str(s/same))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Specific Emotion Accuracy: 0.05330882352941176\n"
+     ]
+    }
+   ],
+   "source": [
+    "sa = 0\n",
+    "we_s = df_s[\"Which emotion?\"].tolist()\n",
+    "we_f = df_f[\"Which emotion?\"].tolist()\n",
+    "\n",
+    "for i in range(len(we_s)):\n",
+    "    if(e_s[i] == e_f[i] and relevant_s[i] == relevant_f[i] and we_s[i] == we_f[i]):\n",
+    "        sa = sa + 1\n",
+    "print(\"Specific Emotion Accuracy: \"+str(sa/s))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/faiza_emotion.xlsx b/src/faiza_emotion.xlsx
diff --git a/src/syd_emotion.xlsx b/src/syd_emotion.xlsx