-
Notifications
You must be signed in to change notification settings - Fork 1
/
Wordcloud.R
109 lines (79 loc) · 2.89 KB
/
Wordcloud.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
library(twitteR)
library(httpuv)
library(ggplot2)
library(scales)
library(readr)
library(tm)
library(stringr)
library(wordcloud)
library(knitr)
library(twitteR)
library(tidytext)
library(syuzhet)
library(lubridate)
library(ggplot2)
library(scales)
library(reshape2)
library(dplyr )
library(rtweet)
library(tidyverse)
allthetweets <-read_csv("allthetweets.csv") %>%
distinct()
tweetswords <- allthetweets %>%
select(text, created, id)
tweetswords %>%
head() %>%
knitr::kable(caption = "Tweets with the word immigration")
reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
tweet_words <-tweetswords %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
tweet_words_count <- tweet_words %>%
count(word, sort = TRUE) %>%
arrange(desc(n))
ggplot(tweet_words_count[1:20,], aes(x= reorder(word, -n), y = n)) +
geom_bar(alpha = 0.8, stat = "identity", show.legend = FALSE) +
coord_flip() +
xlab("Number of Occurances") + ylab("Word") + ggtitle("Words Used in Tweets about Immigration")
# plot word cloud
library(wordcloud)
wordcloud_words <- tweet_words_count[-c(1, 2, 4, 5, 8), ]
wordcloud(words = wordcloud_words$word, freq = wordcloud_words$n, min.freq = 1500,
random.order = FALSE, colors = TRUE)
#different type of word cloud
library(RXKCD)
library(tm)
library(wordcloud)
library(RColorBrewer)
pal <- brewer.pal(9, "BuGn")
pal <- pal[-(1:2)]
wordcloud(wordcloud_words$word,wordcloud_words$n, scale=c(8,.3),min.freq=2500, random.order=FALSE, rot.per=.15, colors= pal, vfont=c("sans serif","plain"))
#and another word cloud
pal2 <- brewer.pal(8,"Set3")
#this is the one i like, i think
wordcloud(wordcloud_words$word,wordcloud_words$n, scale=c(8,.2),min.freq=1500,
max.words=Inf, random.order=FALSE, rot.per=.15, colors=pal2)
wordcloud(wordcloud_words$word,wordcloud_words$n, scale=c(8,.3),min.freq=1500, random.order=FALSE, rot.per=.15, colors= pal2, vfont=c("sans serif","plain"))
#function if we wanted to make a wordcloud for a particular time period:
tweet_dates2 <- tweet_words %>%
separate(created, into = c("date", "time"), sep = -9)
library(lubridate)
wordcloudAtXTime <- function(time1, time2) {
time1= input$start
time2= input$end
starttime <- as.POSIXct("time1")
endtime <- as.POSIXct("time2")
forfunction <- tweet_dates2$date %>%
filter(interval(starttime,endtime))
tweet_words_count_function <- forfunction %>%
count(word, sort = TRUE) %>%
arrange(desc(n))
ggplot(tweet_words_count_function[1:20,], aes(x= reorder(word, -n), y = n)) +
geom_bar(alpha = 0.8, stat = "identity", show.legend = FALSE) +
coord_flip() +
xlab("Number of Occurances") + ylab("Word") + ggtitle("Words Used in Tweets about Immigration")
?str
}