-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscratch.R
135 lines (103 loc) · 3.14 KB
/
scratch.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
a <- function(df) {
df_ <- df %>%
rowid_to_column() %>%
unnest_tokens(word, comment)
df_ <- df_ %>%
anti_join(get_stopwords(), by="word")
a <- df_ %>%
add_count(rowid, name = "total_words_wo_stopwords") %>%
inner_join(get_sentiments("bing"), by = "word") %>%
add_count(rowid, name = "sentiment_match_count") %>%
mutate(pct_matched = sentiment_match_count/total_words_wo_stopwords)
add_count(rowid, sentiment, name = "counts") %>%
mutate(sentiment = ifelse(sentiment == "positive", 1, -1),
count = sentiment * counts)
b <- a %>%
group_by(rowid) %>%
summarize(n = sum(count))
df_ <- df %>%
rowid_to_column() %>%
left_join(b, by = "rowid")
group_by(rowid) %>%
summarize(n = )
mutate(sentiment = positive - negative)
janeaustensentiment <- tidy_books %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(book, index = line %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
}
df <- df %>%
rowid_to_column()
df %>%
unnest_tokens(word, comment)
library(janeaustenr)
library(dplyr)
original_books <- austen_books() %>%
group_by(book) %>%
mutate(line = row_number()) %>%
ungroup()
library(tidytext)
tidy_books <- original_books %>%
unnest_tokens(word, text)
tidy_books <- tidy_books %>%
anti_join(get_stopwords())
tidy_books %>%
count(word, sort = TRUE)
get_sentiments("bing")
top_50_commenters <- df %>%
filter(!str_detect(comment, "https|www|com")) %>%
group_by(user) %>%
summarize(count = n()) %>%
arrange(-count) %>%
rowid_to_column() %>%
head(50) %>%
pull(user)
df %>%
filter(user %in% top_50_commenters)
a %>%
ggplot() +
aes(x = rowid, y = count) +
geom_line()
group_by(user) %>%
unnest_tokens(word, comment) %>%
count(word) %>%
ungroup() %>%
bind_log_odds(user, word, n)
avatar_lo <- avatar %>%
unnest_tokens(word, text) %>%
count(aang, word) %>%
bind_log_odds(aang, word, n) %>%
arrange(-log_odds_weighted)
avatar_lo %>%
group_by(aang) %>%
top_n(15) %>%
ungroup() %>%
mutate(word = reorder(word, log_odds_weighted)) %>%
ggplot(aes(log_odds_weighted, word, fill = aang)) +
geom_col(alpha = 0.8, show.legend = FALSE) +
facet_wrap(~aang, scales = "free") +
scale_fill_avatar(palette = "AirNomads") +
labs(y = NULL)
data("mtcars")
library(dplyr)
library(pbapply)
library(gtools)
p_hacked_lm <- function(df, formula){
y <- strsplit(formula, " [~]")[[1]][1]
N <- ncol(df) - 1
X_cols_ids <- which(colnames(df) != y)
Y_id <- which(colnames(df) == y)
X_combos <- lapply(1:N, function(i) combinations(N, i, X_cols_ids))
regs <- pblapply(X_combos,
function(Xs) {
regs <- lapply(1:nrow(Xs), function(row) lm(formula, data = df[ , c(Y_id, Xs[row, ])]))
max_R2 <- lapply(regs, function(reg) summary(reg)$adj.r.square) %>%
which.max()
regs[[max_R2]]
})
max_R2 <- lapply(regs, function(reg) summary(reg)$adj.r.square) %>%
which.max()
regs[[max_R2]]
}
reg <- p_hacked_lm(mtcars, formula = paste("mpg ~ ."))