Skip to content

Commit

Permalink
Simple
Browse files Browse the repository at this point in the history
  • Loading branch information
Sid-Lais committed Feb 17, 2024
1 parent c907377 commit 5e2eb82
Showing 1 changed file with 171 additions and 0 deletions.
171 changes: 171 additions & 0 deletions simple sentiment.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# https://www.youtube.com/watch?v=uPKnSq6TaAk\n",
"# https://www.youtube.com/watch?v=Lu1nskBkPJU&list=PL7Lkk4UtXtOw04G1nRapMNgd2myNJCZSJ"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
"from scipy.special import softmax"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"sentiment1 = \"@elonmusk your soo cool @ twitter 😀 https://twitter.com/elonmusk\"\n",
"sentiment2 = 'Worst content I have ever seen! Unsubscribed 😒😒'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# precprcess tweet\n",
"def preprocess_sentiment(tweet):\n",
" tweet_words = []\n",
" for word in tweet.split(' '):\n",
" if word.startswith('@') and len(word) > 1:\n",
" word = '@user'\n",
" \n",
" elif word.startswith('http'):\n",
" word = \"http\"\n",
" tweet_words.append(word)\n",
" tweet_proc = \" \".join(tweet_words)\n",
" return tweet_proc"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"tweet_proc1 = preprocess_sentiment(sentiment1)\n",
"tweet_proc2 = preprocess_sentiment(sentiment2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# load model and tokenizer\n",
"roberta = \"cardiffnlp/twitter-roberta-base-sentiment\"\n",
"\n",
"model = AutoModelForSequenceClassification.from_pretrained(roberta)\n",
"tokenizer = AutoTokenizer.from_pretrained(roberta)\n",
"\n",
"labels = ['Negative', 'Neutral', 'Positive']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# sentiment analysis\n",
"encoded_tweet1 = tokenizer(tweet_proc1, return_tensors='pt')\n",
"output1 = model(**encoded_tweet1)\n",
"scores1 = output1[0][0].detach().numpy()\n",
"scores1 = softmax(scores1)\n",
"\n",
"encoded_tweet2 = tokenizer(tweet_proc2, return_tensors='pt')\n",
"output2 = model(**encoded_tweet2)\n",
"scores2 = output2[0][0].detach().numpy()\n",
"scores2 = softmax(scores2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sentiment: @elonmusk your soo cool @ twitter 😀 https://twitter.com/elonmusk\n",
"\n",
"Sentiment Analysis\n",
"Negative 0.0010310933\n",
"Neutral 0.010395749\n",
"Positive 0.9885732\n"
]
}
],
"source": [
"print(\"Sentiment:\",sentiment1)\n",
"print(\"\\nSentiment Analysis\")\n",
"for i in range(len(scores1)):\n",
" l = labels[i]\n",
" s = scores1[i]\n",
" print(l,s)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sentiment: Worst content I have ever seen! Unsubscribed 😒😒\n",
"\n",
"Sentiment Analysis\n",
"Negative 0.9717405\n",
"Neutral 0.024161281\n",
"Positive 0.0040982515\n"
]
}
],
"source": [
"print(\"Sentiment:\",sentiment2)\n",
"print(\"\\nSentiment Analysis\")\n",
"for i in range(len(scores1)):\n",
" l = labels[i]\n",
" s = scores2[i]\n",
" print(l,s)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 5e2eb82

Please # to comment.