-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSentiText.cpp
66 lines (59 loc) · 2.09 KB
/
SentiText.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// implements SentiText class
#include "SentiText.hpp"
#include <string>
namespace vader
{
SentiText::SentiText(String text)
: m_text(text)
{
m_words_and_emoticons = this->_words_and_emoticons();
// doesn't separate words from\
// adjacent punctuation (keeps emoticons & contractions)
this->m_is_cap_diff = allcap_differential(this->m_words_and_emoticons);
}
SentiText::~SentiText()
{
}
std::vector<String> * SentiText::get_words_and_emoticons()
{
return &m_words_and_emoticons;
}
const bool * SentiText::get_is_emoticon()
{
return m_is_emoticon;
}
bool SentiText::isCapDiff()
{
return m_is_cap_diff;
}
String SentiText::_strip_punc_if_word(String &token, int i) // TODO: modify so it returns a pair<String, bool> so that this method can be static and then modify _words_and_emoticons as necessary
{
// Removes all trailing and leading punctuation
// If the resulting string has two or fewer characters, then it was likely an emoticon, so return original string (ie ":)" stripped would be "", so just return ":)"
String punctuation = u8"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
String copy = token;
token.erase(std::remove_if(token.begin(),
token.end(),
[](unsigned char x){return std::ispunct(x) && x != '\'';}), // to leave contractions in, this is different than the original.
token.end());
if (token.length() <= 2)
{
token = copy;
this->m_is_emoticon[i] = true;
}
else
this->m_is_emoticon[i] = false;
return token;
}
std::vector<String> SentiText::_words_and_emoticons()
{
// Removes leading and trailing puncutation
// Leaves contractions and most emoticons
// Does not preserve punc-plus-letter emoticons (e.g. :D)
std::vector<String> wes = split(m_text);
m_is_emoticon = new bool[wes.size()];
for (int i = 0; i < wes.size(); i++)
_strip_punc_if_word(wes[i], i);
return wes;
}
}