code/interactive_wordcloud.R at main · stau.space/underrepresentation-theory

this repo has no description
underrepresentation-theory / code / interactive_wordcloud.R
at main 86 lines 1.8 kB view raw
 1load("code/big_question_tag_df.rda")
 2
 3head(big_question_tag_df)
 4summary(big_question_tag_df)
 5view(big_question_tag_df)
 6
 7#Remove rows where Tags = NA
 8only_tag <- big_question_tag_df |>
 9  filter(Tags != "NA")
10view(only_tag)
11
12tagged_ams <- only_tag |>
13  filter(Source == "AMS")
14
15tagged_cbms <- only_tag |>
16  filter(Source == "CBMS")
17
18view(tagged_ams)
19view(tagged_cbms)
20
21#All questions by source
22ams_ques <- big_question_tag_df |>
23  filter(Source == "AMS")
24
25cbms_ques <- big_question_tag_df |>
26  filter(Source == "CBMS")
27
28ipeds_ques <- big_question_tag_df |>
29  filter(Source == "IPEDS")
30
31#Tokenization and wordcloud per source
32#AMS
33tidy_ams_ques <- ams_ques |>
34  unnest_tokens(word, Questions) |>
35  anti_join(stop_words) |>
36  filter(!str_detect(word, "^[0-9]+$"))
37
38word_counts_ams <- tidy_ams_ques |>
39  count(word, sort = TRUE) |>
40  filter(n > 0)
41
42view(word_counts_ams)
43
44#CBMS
45tidy_cbms_ques <- cbms_ques |>
46  unnest_tokens(word, Questions) |>
47  anti_join(stop_words) |>
48  filter(!str_detect(word, "^[0-9]+$"))
49
50word_counts_cbms <- tidy_cbms_ques |>
51  count(word, sort = TRUE) |>
52  filter(n > 5) |>
53  filter(!str_detect(word, "_|b2|b1|e1|f1|e2|e.g|ii"))
54
55view(word_counts_cbms)
56
57#IPEDS
58tidy_ipeds_ques <- ipeds_ques |>
59  unnest_tokens(word, Questions) |>
60  anti_join(stop_words) |>
61  filter(!str_detect(word, "^[0-9]+$"))
62
63word_counts_ipeds <- tidy_ipeds_ques |>
64  count(word, sort = TRUE) |>
65  filter(n > 5) |>
66  filter(!str_detect(word, "e.g"))
67
68view(word_counts_ipeds)
69
70#install.packages("wordcloud2")
71library("wordcloud2")
72#install.packages("httpgd")
73
74#AMS
75my_palette = c("#355070",
76               "#6d597a",
77               "#b56576",
78               "#e56b6f",
79               "#eaac8b")
80
81ams_wc = wordcloud2(
82  word_counts_ams,
83  color = rep_len(my_palette,
84                  nrow(word_counts_ams)))
85
86