at main 86 lines 1.8 kB view raw
1load("code/big_question_tag_df.rda") 2 3head(big_question_tag_df) 4summary(big_question_tag_df) 5view(big_question_tag_df) 6 7#Remove rows where Tags = NA 8only_tag <- big_question_tag_df |> 9 filter(Tags != "NA") 10view(only_tag) 11 12tagged_ams <- only_tag |> 13 filter(Source == "AMS") 14 15tagged_cbms <- only_tag |> 16 filter(Source == "CBMS") 17 18view(tagged_ams) 19view(tagged_cbms) 20 21#All questions by source 22ams_ques <- big_question_tag_df |> 23 filter(Source == "AMS") 24 25cbms_ques <- big_question_tag_df |> 26 filter(Source == "CBMS") 27 28ipeds_ques <- big_question_tag_df |> 29 filter(Source == "IPEDS") 30 31#Tokenization and wordcloud per source 32#AMS 33tidy_ams_ques <- ams_ques |> 34 unnest_tokens(word, Questions) |> 35 anti_join(stop_words) |> 36 filter(!str_detect(word, "^[0-9]+$")) 37 38word_counts_ams <- tidy_ams_ques |> 39 count(word, sort = TRUE) |> 40 filter(n > 0) 41 42view(word_counts_ams) 43 44#CBMS 45tidy_cbms_ques <- cbms_ques |> 46 unnest_tokens(word, Questions) |> 47 anti_join(stop_words) |> 48 filter(!str_detect(word, "^[0-9]+$")) 49 50word_counts_cbms <- tidy_cbms_ques |> 51 count(word, sort = TRUE) |> 52 filter(n > 5) |> 53 filter(!str_detect(word, "_|b2|b1|e1|f1|e2|e.g|ii")) 54 55view(word_counts_cbms) 56 57#IPEDS 58tidy_ipeds_ques <- ipeds_ques |> 59 unnest_tokens(word, Questions) |> 60 anti_join(stop_words) |> 61 filter(!str_detect(word, "^[0-9]+$")) 62 63word_counts_ipeds <- tidy_ipeds_ques |> 64 count(word, sort = TRUE) |> 65 filter(n > 5) |> 66 filter(!str_detect(word, "e.g")) 67 68view(word_counts_ipeds) 69 70#install.packages("wordcloud2") 71library("wordcloud2") 72#install.packages("httpgd") 73 74#AMS 75my_palette = c("#355070", 76 "#6d597a", 77 "#b56576", 78 "#e56b6f", 79 "#eaac8b") 80 81ams_wc = wordcloud2( 82 word_counts_ams, 83 color = rep_len(my_palette, 84 nrow(word_counts_ams))) 85 86