load("code/big_question_tag_df.rda") head(big_question_tag_df) summary(big_question_tag_df) view(big_question_tag_df) #Remove rows where Tags = NA only_tag <- big_question_tag_df |> filter(Tags != "NA") view(only_tag) tagged_ams <- only_tag |> filter(Source == "AMS") tagged_cbms <- only_tag |> filter(Source == "CBMS") view(tagged_ams) view(tagged_cbms) #All questions by source ams_ques <- big_question_tag_df |> filter(Source == "AMS") cbms_ques <- big_question_tag_df |> filter(Source == "CBMS") ipeds_ques <- big_question_tag_df |> filter(Source == "IPEDS") #Tokenization and wordcloud per source #AMS tidy_ams_ques <- ams_ques |> unnest_tokens(word, Questions) |> anti_join(stop_words) |> filter(!str_detect(word, "^[0-9]+$")) word_counts_ams <- tidy_ams_ques |> count(word, sort = TRUE) |> filter(n > 0) view(word_counts_ams) #CBMS tidy_cbms_ques <- cbms_ques |> unnest_tokens(word, Questions) |> anti_join(stop_words) |> filter(!str_detect(word, "^[0-9]+$")) word_counts_cbms <- tidy_cbms_ques |> count(word, sort = TRUE) |> filter(n > 5) |> filter(!str_detect(word, "_|b2|b1|e1|f1|e2|e.g|ii")) view(word_counts_cbms) #IPEDS tidy_ipeds_ques <- ipeds_ques |> unnest_tokens(word, Questions) |> anti_join(stop_words) |> filter(!str_detect(word, "^[0-9]+$")) word_counts_ipeds <- tidy_ipeds_ques |> count(word, sort = TRUE) |> filter(n > 5) |> filter(!str_detect(word, "e.g")) view(word_counts_ipeds) #install.packages("wordcloud2") library("wordcloud2") #install.packages("httpgd") #AMS my_palette = c("#355070", "#6d597a", "#b56576", "#e56b6f", "#eaac8b") ams_wc = wordcloud2( word_counts_ams, color = rep_len(my_palette, nrow(word_counts_ams)))