Skip to content

Commit

Permalink
#9 cross rater reliability
Browse files Browse the repository at this point in the history
  • Loading branch information
syd-shelby authored Mar 5, 2019
1 parent 0b397e7 commit a11a818
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 0 deletions.
201 changes: 201 additions & 0 deletions src/Coding_Reliability.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df_s = pd.read_excel(\"syd_emotion.xlsx\")\n",
"df_f = pd.read_excel(\"faiza_emotion.xlsx\")\n",
"df_s[\"Relevant\"].fillna(0, inplace=True)\n",
"df_f[\"Relevant\"].fillna(0, inplace=True)\n",
"df_s[\"Emotion?\"].fillna(0, inplace=True)\n",
"df_f[\"Emotion?\"].fillna(0, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Source categories _id created_at \\\n",
"0 Statuses_Irma_C NaN 5c409973ec4ee50898175e81 12/3/2017 \n",
"1 Statuses_Irma_A ['utility'] 5c11a746ec4ee522100a5b3b 9/15/2017 \n",
"2 Statuses_Irma_A ['gov'] 5c0c5cf2ec4ee5221008263d 9/8/2017 \n",
"3 Statuses_Irma_A NaN 5c598cc9ec4ee5929fd1e356 5/11/2017 \n",
"4 Statuses_Irma_C NaN 5c4099ecec4ee508981c3249 1/22/2018 \n",
"\n",
" emotion_ml opinion_ml text \\\n",
"0 0 1 We will add a few more warmer-than-average day... \n",
"1 0 1 I am happy to speak with someone at @dukeenerg... \n",
"2 0 1 Consider it done! Great work! #9PMRoutine http... \n",
"3 0 1 @WPLGLocal10 Waste of time and taxpayers mon ey \n",
"4 0 1 I feel like Derek Sheppard could’ve removed th... \n",
"\n",
" Relevant Opinion? Emotion? Which emotion? Confusion Sarcasm? Ps/Ng/Nt? \\\n",
"0 0 NaN 0 NaN NaN NaN NaN \n",
"1 1 NaN 1 D NaN NaN Ng \n",
"2 0 1.0 0 NaN NaN NaN Ps \n",
"3 0 1.0 1 A NaN NaN Ng \n",
"4 0 1.0 0 NaN NaN NaN Nt \n",
"\n",
" Unnamed: 14 \n",
"0 NaN \n",
"1 two emotions \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
" _id \\\n",
"0 5c409973ec4ee50898175e81 \n",
"1 5c11a746ec4ee522100a5b3b \n",
"2 5c0c5cf2ec4ee5221008263d \n",
"3 5c598cc9ec4ee5929fd1e356 \n",
"4 5c4099ecec4ee508981c3249 \n",
"\n",
" text emotion_ml opinion_ml \\\n",
"0 We will add a few more warmer-than-average day... 0 1 \n",
"1 I am happy to speak with someone at @dukeenerg... 0 1 \n",
"2 Consider it done! Great work! #9PMRoutine http... 0 1 \n",
"3 @WPLGLocal10 Waste of time and taxpayers mon ey 0 1 \n",
"4 I feel like Derek Sheppard could’ve removed th... 0 1 \n",
"\n",
" Relevant Opinion? Emotion? Which emotion? Confusion Sarcasm? Ps/Ng/Nt? \n",
"0 0.0 NaN 0 NaN NaN NaN NaN \n",
"1 1.0 NaN 1 D NaN NaN Nt \n",
"2 1.0 1.0 1 HJ NaN NaN Ps \n",
"3 1.0 1.0 1 A NaN NaN Ng \n",
"4 0.0 NaN 0 NaN NaN NaN NaN \n"
]
}
],
"source": [
"print(df_s.head())\n",
"print(df_f.head())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"699\n",
"699\n"
]
}
],
"source": [
"relevant_s = df_s[\"Relevant\"].tolist()\n",
"relevant_f = df_f[\"Relevant\"].tolist()\n",
"print(len(relevant_s))\n",
"print(len(relevant_f))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Relevant Accuracy: 0.8125894134477826\n"
]
}
],
"source": [
"same = 0\n",
"text_s = df_s[\"text\"].tolist()\n",
"text_f = df_f[\"text\"].tolist()\n",
"#for i in range(len(text_s)):\n",
"# if(text_s[i] != text_f[i]):\n",
"# print(str(i)+\": \"+text_s[i]+\" | \"+text_f[i]) \n",
"\n",
"for i in range(len(relevant_s)):\n",
" if(relevant_s[i] == relevant_f[i]):\n",
" same = same + 1\n",
"\n",
"print(\"Relevant Accuracy: \"+str(same/len(relevant_s)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Emotion Accuracy: 0.9577464788732394\n"
]
}
],
"source": [
"s = 0\n",
"e_s = df_s[\"Emotion?\"].tolist()\n",
"e_f = df_f[\"Emotion?\"].tolist()\n",
"\n",
"for i in range(len(e_s)):\n",
" if(e_s[i] == e_f[i] and relevant_s[i] == relevant_f[i]):\n",
" s = s + 1\n",
"print(\"Emotion Accuracy: \"+str(s/same))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Specific Emotion Accuracy: 0.05330882352941176\n"
]
}
],
"source": [
"sa = 0\n",
"we_s = df_s[\"Which emotion?\"].tolist()\n",
"we_f = df_f[\"Which emotion?\"].tolist()\n",
"\n",
"for i in range(len(we_s)):\n",
" if(e_s[i] == e_f[i] and relevant_s[i] == relevant_f[i] and we_s[i] == we_f[i]):\n",
" sa = sa + 1\n",
"print(\"Specific Emotion Accuracy: \"+str(sa/s))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added src/faiza_emotion.xlsx
Binary file not shown.
Binary file added src/syd_emotion.xlsx
Binary file not shown.

0 comments on commit a11a818

Please # to comment.