Simple

Sid-Lais · Feb 17, 2024 · 5e2eb82 · 5e2eb82
1 parent c907377
commit 5e2eb82
Showing 1 changed file with 171 additions and 0 deletions.
diff --git a/simple sentiment.ipynb b/simple sentiment.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# https://www.youtube.com/watch?v=uPKnSq6TaAk\n",
+    "# https://www.youtube.com/watch?v=Lu1nskBkPJU&list=PL7Lkk4UtXtOw04G1nRapMNgd2myNJCZSJ"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
+    "from scipy.special import softmax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sentiment1 = \"@elonmusk your soo cool @ twitter 😀 https://twitter.com/elonmusk\"\n",
+    "sentiment2 = 'Worst content I have ever seen! Unsubscribed 😒😒'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# precprcess tweet\n",
+    "def preprocess_sentiment(tweet):\n",
+    "    tweet_words = []\n",
+    "    for word in tweet.split(' '):\n",
+    "        if word.startswith('@') and len(word) > 1:\n",
+    "            word = '@user'\n",
+    "        \n",
+    "        elif word.startswith('http'):\n",
+    "            word = \"http\"\n",
+    "        tweet_words.append(word)\n",
+    "        tweet_proc = \" \".join(tweet_words)\n",
+    "    return tweet_proc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tweet_proc1 = preprocess_sentiment(sentiment1)\n",
+    "tweet_proc2 = preprocess_sentiment(sentiment2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load model and tokenizer\n",
+    "roberta = \"cardiffnlp/twitter-roberta-base-sentiment\"\n",
+    "\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(roberta)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(roberta)\n",
+    "\n",
+    "labels = ['Negative', 'Neutral', 'Positive']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sentiment analysis\n",
+    "encoded_tweet1 = tokenizer(tweet_proc1, return_tensors='pt')\n",
+    "output1 = model(**encoded_tweet1)\n",
+    "scores1 = output1[0][0].detach().numpy()\n",
+    "scores1 = softmax(scores1)\n",
+    "\n",
+    "encoded_tweet2 = tokenizer(tweet_proc2, return_tensors='pt')\n",
+    "output2 = model(**encoded_tweet2)\n",
+    "scores2 = output2[0][0].detach().numpy()\n",
+    "scores2 = softmax(scores2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sentiment: @elonmusk your soo cool @ twitter 😀 https://twitter.com/elonmusk\n",
+      "\n",
+      "Sentiment Analysis\n",
+      "Negative 0.0010310933\n",
+      "Neutral 0.010395749\n",
+      "Positive 0.9885732\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Sentiment:\",sentiment1)\n",
+    "print(\"\\nSentiment Analysis\")\n",
+    "for i in range(len(scores1)):\n",
+    "    l = labels[i]\n",
+    "    s = scores1[i]\n",
+    "    print(l,s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sentiment: Worst content I have ever seen! Unsubscribed 😒😒\n",
+      "\n",
+      "Sentiment Analysis\n",
+      "Negative 0.9717405\n",
+      "Neutral 0.024161281\n",
+      "Positive 0.0040982515\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Sentiment:\",sentiment2)\n",
+    "print(\"\\nSentiment Analysis\")\n",
+    "for i in range(len(scores1)):\n",
+    "    l = labels[i]\n",
+    "    s = scores2[i]\n",
+    "    print(l,s)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}