this repo has no description
1load("code/big_question_tag_df.rda")
2
3head(big_question_tag_df)
4summary(big_question_tag_df)
5view(big_question_tag_df)
6
7#Remove rows where Tags = NA
8only_tag <- big_question_tag_df |>
9 filter(Tags != "NA")
10view(only_tag)
11
12tagged_ams <- only_tag |>
13 filter(Source == "AMS")
14
15tagged_cbms <- only_tag |>
16 filter(Source == "CBMS")
17
18view(tagged_ams)
19view(tagged_cbms)
20
21#All questions by source
22ams_ques <- big_question_tag_df |>
23 filter(Source == "AMS")
24
25cbms_ques <- big_question_tag_df |>
26 filter(Source == "CBMS")
27
28ipeds_ques <- big_question_tag_df |>
29 filter(Source == "IPEDS")
30
31#Tokenization and wordcloud per source
32#AMS
33tidy_ams_ques <- ams_ques |>
34 unnest_tokens(word, Questions) |>
35 anti_join(stop_words) |>
36 filter(!str_detect(word, "^[0-9]+$"))
37
38word_counts_ams <- tidy_ams_ques |>
39 count(word, sort = TRUE) |>
40 filter(n > 0)
41
42view(word_counts_ams)
43
44#CBMS
45tidy_cbms_ques <- cbms_ques |>
46 unnest_tokens(word, Questions) |>
47 anti_join(stop_words) |>
48 filter(!str_detect(word, "^[0-9]+$"))
49
50word_counts_cbms <- tidy_cbms_ques |>
51 count(word, sort = TRUE) |>
52 filter(n > 5) |>
53 filter(!str_detect(word, "_|b2|b1|e1|f1|e2|e.g|ii"))
54
55view(word_counts_cbms)
56
57#IPEDS
58tidy_ipeds_ques <- ipeds_ques |>
59 unnest_tokens(word, Questions) |>
60 anti_join(stop_words) |>
61 filter(!str_detect(word, "^[0-9]+$"))
62
63word_counts_ipeds <- tidy_ipeds_ques |>
64 count(word, sort = TRUE) |>
65 filter(n > 5) |>
66 filter(!str_detect(word, "e.g"))
67
68view(word_counts_ipeds)
69
70#install.packages("wordcloud2")
71library("wordcloud2")
72#install.packages("httpgd")
73
74#AMS
75my_palette = c("#355070",
76 "#6d597a",
77 "#b56576",
78 "#e56b6f",
79 "#eaac8b")
80
81ams_wc = wordcloud2(
82 word_counts_ams,
83 color = rep_len(my_palette,
84 nrow(word_counts_ams)))
85
86