Distances on Directed Graphs in R
at main 115 lines 3.6 kB view raw
1 2#include "deduplicate.h" 3 4void deduplicate::update_dupl_edge_map (deduplicate::EdgeMapType &edge_map, 5 const deduplicate::str_pair &this_pair, const double &val) 6{ 7 if (edge_map.find (this_pair) == edge_map.end ()) 8 { 9 edge_map.emplace (this_pair, val); 10 } else 11 { 12 double val_min = edge_map.find (this_pair)->second; 13 if (val < val_min) 14 { 15 edge_map.erase (this_pair); 16 edge_map.emplace (this_pair, val); 17 } 18 } 19} 20 21//' De-duplicate edges by replacing with minimal weighted distances and times 22//' 23//' @param graph Full graph in any form 24//' @param fr_col Name of column holding from edge labels 25//' @param to_col Name of column holding to edge labels 26//' @param d_col Name of column holding weighted distances 27//' @param t_col Name of column holding weighted times (or "" if no weighted 28//' times). 29//' @return A `data.frame` of 3 columns: 'from', 'to', and 'd', where 'd' is the 30//' minimal value taken from all duplicated edges. If 't_col' is specified, the 31//' equivalent minimal times are in the lower half of the result. 32//' @noRd 33// [[Rcpp::export]] 34Rcpp::DataFrame rcpp_deduplicate (const Rcpp::DataFrame &graph, const std::string fr_col, const std::string to_col, 35 const std::string d_col, const std::string t_col) 36{ 37 const bool has_time = t_col.length () > 0L; 38 39 std::unordered_set < deduplicate::str_pair, deduplicate::str_pair_hash > edge_pair_set; 40 std::unordered_set < deduplicate::str_pair, deduplicate::str_pair_hash > edge_pair_dupl; 41 42 const std::vector <std::string> fr = graph [fr_col]; 43 const std::vector <std::string> to = graph [to_col]; 44 const std::vector <double> d = graph [d_col]; 45 std::vector <double> t (graph.nrow ()); 46 if (has_time) { 47 std::vector <double> t = graph [t_col]; 48 } 49 50 const size_t n = static_cast <size_t> (graph.nrow ()); 51 52 // First loop to collect duplicated edges 53 for (size_t i = 0; i < n; i++) 54 { 55 deduplicate::str_pair this_pair {fr [i], to [i]}; 56 if (edge_pair_set.find (this_pair) != edge_pair_set.end ()) 57 { 58 edge_pair_dupl.emplace (this_pair); 59 } 60 edge_pair_set.emplace (this_pair); 61 } 62 63 // Then aggregate values for each duplicate 64 deduplicate::EdgeMapType edge_map_d, edge_map_t; 65 66 for (size_t i = 0; i < n; i++) 67 { 68 deduplicate::str_pair this_pair {fr [i], to [i]}; 69 70 if (edge_pair_dupl.find (this_pair) == edge_pair_dupl.end ()) 71 { 72 continue; 73 } 74 75 deduplicate::update_dupl_edge_map (edge_map_d, this_pair, d [i]); 76 77 if (has_time) 78 { 79 deduplicate::update_dupl_edge_map (edge_map_t, this_pair, t [i]); 80 } 81 82 } 83 84 const size_t ndupl = edge_map_d.size (); 85 std::vector <std::string> fr_dupl (ndupl), to_dupl (ndupl); 86 std::vector <double> d_dupl (ndupl); 87 88 size_t i = 0; 89 for (auto e: edge_map_d) 90 { 91 fr_dupl [i] = e.first.first; 92 to_dupl [i] = e.first.second; 93 d_dupl [i++] = e.second; 94 } 95 if (has_time) 96 { 97 fr_dupl.resize (ndupl * 2); 98 to_dupl.resize (ndupl * 2); 99 d_dupl.resize (ndupl * 2); 100 for (auto e: edge_map_t) 101 { 102 fr_dupl [i] = e.first.first; 103 to_dupl [i] = e.first.second; 104 d_dupl [i++] = e.second; 105 } 106 } 107 108 Rcpp::DataFrame res = Rcpp::DataFrame::create ( 109 Rcpp::Named ("from") = fr_dupl, 110 Rcpp::Named ("to") = to_dupl, 111 Rcpp::Named ("d") = d_dupl, 112 Rcpp::_["stringsAsFactors"] = false); 113 114 return res; 115}