Distances on Directed Graphs in R
at main 291 lines 8.8 kB view raw
1#include "graph.h" 2 3void graph::add_to_v2e_map (vert2edge_map_t &vert2edge_map, const vertex_id_t vid, 4 const edge_id_t eid) 5{ 6 std::unordered_set <edge_id_t> edge_ids; 7 if (vert2edge_map.find (vid) == vert2edge_map.end ()) 8 { 9 edge_ids.emplace (eid); 10 vert2edge_map.emplace (vid, edge_ids); 11 } else 12 { 13 edge_ids = vert2edge_map [vid]; 14 edge_ids.insert (eid); 15 vert2edge_map [vid] = edge_ids; 16 } 17} 18 19void graph::erase_from_v2e_map (vert2edge_map_t &vert2edge_map, 20 const vertex_id_t vid, const edge_id_t eid) 21{ 22 std::unordered_set <edge_id_t> edge_ids = vert2edge_map [vid]; 23 if (edge_ids.find (eid) != edge_ids.end ()) 24 { 25 edge_ids.erase (eid); 26 vert2edge_map [vid] = edge_ids; 27 } 28} 29 30//' graph_has_components 31//' 32//' Does a graph have a vector of connected component IDs? Only used in 33//' \code{sample_one_vertex} 34//' @noRd 35bool graph::graph_has_components (const Rcpp::DataFrame &graph) 36{ 37 Rcpp::CharacterVector graph_names = graph.attr ("names"); 38 for (auto n: graph_names) 39 if (n == "component") 40 return true; 41 42 return false; 43} 44 45 46//' @name graph_from_df 47//' 48//' Convert a standard graph data.frame into an object of class graph. Graphs 49//' are standardised with the function \code{dodgr_convert_graph()$graph}, and 50//' contain only the four columns [from, to, d, w] 51//' 52//' @noRd 53bool graph::graph_from_df (const Rcpp::DataFrame &gr, vertex_map_t &vm, 54 edge_map_t &edge_map, vert2edge_map_t &vert2edge_map) 55{ 56 Rcpp::StringVector edge_id = gr ["edge_id"]; 57 Rcpp::StringVector from = gr ["from"]; 58 Rcpp::StringVector to = gr ["to"]; 59 Rcpp::NumericVector dist = gr ["d"]; 60 Rcpp::NumericVector weight = gr ["d_weighted"]; 61 Rcpp::StringVector colnames = gr.attr ("names"); 62 bool has_times = false; 63 Rcpp::NumericVector time, timew; 64 if (gr.ncol () == 7) 65 { 66 has_times = true; 67 time = gr ["time"]; 68 timew = gr ["time_weighted"]; 69 } 70 71 std::vector <edge_id_t> replacement_edges; // all empty here 72 73 for (R_xlen_t i = 0; i < to.length (); i ++) 74 { 75 Rcpp::checkUserInterrupt (); 76 vertex_id_t from_id = std::string (from [i]); 77 vertex_id_t to_id = std::string (to [i]); 78 79 if (vm.find (from_id) == vm.end ()) 80 { 81 vertex_t fromV = vertex_t (); 82 vm.emplace (from_id, fromV); 83 } 84 vertex_t from_vtx = vm.at (from_id); 85 from_vtx.add_neighbour_out (to_id); 86 vm [from_id] = from_vtx; 87 88 if (vm.find (to_id) == vm.end ()) 89 { 90 vertex_t toV = vertex_t (); 91 vm.emplace (to_id, toV); 92 } 93 vertex_t to_vtx = vm.at (to_id); 94 to_vtx.add_neighbour_in (from_id); 95 vm [to_id] = to_vtx; 96 97 edge_id_t edge_id_str = Rcpp::as <edge_id_t> (edge_id [i]); 98 99 double wt = weight [i]; 100 if (weight [i] < 0.0) 101 wt = INFINITE_DOUBLE; // # nocov 102 103 std::vector <double> weights; 104 if (!has_times) 105 weights = {dist [i], wt}; 106 else 107 weights = {dist [i], wt, time [i], timew [i]}; 108 109 edge_t edge = edge_t (from_id, to_id, weights, 110 edge_id_str, replacement_edges); 111 112 edge_map.emplace (edge_id_str, edge); 113 graph::add_to_v2e_map (vert2edge_map, from_id, edge_id_str); 114 graph::add_to_v2e_map (vert2edge_map, to_id, edge_id_str); 115 } 116 117 return has_times; 118} 119 120//' identify_graph_components 121//' 122//' Identify initial graph components for each **vertex** 123//' Identification for edges is subsequently perrformed with 124//' \code{rcpp_get_component_vector}. 125//' 126//' @param v unordered_map <vertex_id_t, vertex_t> 127//' @param com component map from each vertex to component numbers 128//' @noRd 129size_t graph::identify_graph_components (vertex_map_t &v, 130 std::unordered_map <vertex_id_t, size_t> &com) 131{ 132 // initialize components map 133 std::unordered_set <vertex_id_t> all_verts; 134 for (auto it: v) 135 all_verts.emplace (it.first); 136 com.clear (); 137 138 std::unordered_set <vertex_id_t> nbs_todo, nbs_done; 139 nbs_todo.insert (*all_verts.begin ()); 140 size_t compnum = 0; 141 while (all_verts.size () > 0) 142 { 143 Rcpp::checkUserInterrupt (); 144 vertex_id_t vt = (*nbs_todo.begin ()); 145 all_verts.erase (vt); 146 147 vertex_t vtx = v.find (vt)->second; 148 std::unordered_set <vertex_id_t> nbs = vtx.get_all_neighbours (); 149 for (auto nvtx: nbs) 150 { 151 com.emplace (nvtx, compnum); 152 if (nbs_done.find (nvtx) == nbs_done.end ()) 153 nbs_todo.emplace (nvtx); 154 } 155 nbs_done.emplace (vt); 156 com.emplace (vt, compnum); 157 nbs_todo.erase (vt); 158 159 if (nbs_todo.size () == 0 && all_verts.size () > 0) 160 { 161 nbs_todo.insert (*all_verts.begin ()); 162 compnum++; 163 } 164 } 165 166 long int largest_id = 0; 167 if (compnum > 0) 168 { 169 std::vector <size_t> comp_sizes (compnum + 1, 0); 170 for (auto c: com) 171 comp_sizes [c.second]++; 172 auto maxi = std::max_element (comp_sizes.begin (), comp_sizes.end ()); 173 174 largest_id = std::distance (comp_sizes.begin (), maxi); 175 } 176 177 return static_cast <size_t> (largest_id); 178} 179 180 181//' rcpp_get_component_vector 182//' 183//' Get component numbers for each edge of graph 184//' 185//' @param graph graph to be processed; stripped down and standardised to five 186//' columns 187//' 188//' @return Two vectors: one of edge IDs and one of corresponding component 189//' numbers 190//' @noRd 191// [[Rcpp::export]] 192Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph) 193{ 194 vertex_map_t vertices; 195 edge_map_t edge_map; 196 vert2edge_map_t vert2edge_map; 197 198 bool has_times = graph::graph_from_df (graph, vertices, edge_map, vert2edge_map); 199 has_times = false; // rm unused variable warning 200 201 std::unordered_map <vertex_id_t, size_t> components; 202 size_t largest_component = 203 graph::identify_graph_components (vertices, components); 204 largest_component++; // suppress unused variable warning 205 206 // Then map component numbers of vertices onto edges 207 std::unordered_map <edge_id_t, size_t> comp_nums; 208 for (auto ve: vert2edge_map) 209 { 210 vertex_id_t vi = ve.first; 211 std::unordered_set <edge_id_t> edges = ve.second; 212 for (edge_id_t e: edges) 213 comp_nums.emplace (e, components.find (vi)->second); 214 } 215 216 Rcpp::StringVector edge_id (comp_nums.size ()); 217 Rcpp::IntegerVector comp_num (comp_nums.size ()); 218 size_t i = 0; 219 for (auto cn: comp_nums) 220 { 221 edge_id (i) = cn.first; 222 comp_num (i) = static_cast <int> (cn.second) + 1L; // 1-indexed for R 223 i++; 224 } 225 226 return Rcpp::List::create ( 227 Rcpp::Named ("edge_id") = edge_id, 228 Rcpp::Named ("edge_component") = comp_num); 229} 230 231//' rcpp_unique_rownames 232//' 233//' Construct vertex (from, to) ID values from unique pairs of coordinates 234//' rounded to <precision>. Used when vertices have no ID values. 235//' 236//' @noRd 237// [[Rcpp::export]] 238Rcpp::DataFrame rcpp_unique_rownames (Rcpp::DataFrame xyfrom, 239 Rcpp::DataFrame xyto, 240 const int precision = 10) 241{ 242 const size_t prec_size_t = static_cast <size_t> (precision); 243 244 std::vector <double> xf = xyfrom ["x"], 245 yf = xyfrom ["y"], 246 xt = xyto ["x"], 247 yt = xyto ["y"]; 248 const size_t n = static_cast <size_t> (xyfrom.nrow ()); 249 250 std::vector <std::string> s_f (n), s_t (n); 251 std::vector <std::string> i_f (n), i_t (n); // indices as rownames 252 std::unordered_map <std::string, std::string> xynames; 253 size_t count = 0; 254 for (size_t i = 0; i < n; i++) 255 { 256 std::string xfs = std::to_string (xf [i]), 257 yfs = std::to_string (yf [i]), 258 xts = std::to_string (xt [i]), 259 yts = std::to_string (yt [i]); 260 s_f [i] = xfs.substr(0, xfs.find(".") + prec_size_t + 1) + 261 yfs.substr (0, yfs.find(".") + prec_size_t + 1); 262 s_t [i] = xts.substr (0, xts.find(".") + prec_size_t + 1) + 263 yts.substr (0, yts.find(".") + prec_size_t + 1); 264 265 if (xynames.find (s_f [i]) != xynames.end ()) 266 { 267 i_f [i] = xynames.at (s_f [i]); 268 } else 269 { 270 i_f [i] = std::to_string (count); 271 xynames.emplace (s_f [i], i_f [i]); 272 count++; 273 } 274 275 if (xynames.find (s_t [i]) != xynames.end ()) 276 { 277 i_t [i] = xynames.at (s_t [i]); 278 } else 279 { 280 i_t [i] = std::to_string (count); 281 xynames.emplace (s_t [i], i_t [i]); 282 count++; 283 } 284 } 285 286 Rcpp::DataFrame res = Rcpp::DataFrame::create ( 287 Rcpp::Named ("from_id") = i_f, 288 Rcpp::Named ("to_id") = i_t, 289 Rcpp::_["stringsAsFactors"] = false); 290 return res; 291}