Distances on Directed Graphs in R
1#include "graph.h"
2
3void graph::add_to_v2e_map (vert2edge_map_t &vert2edge_map, const vertex_id_t vid,
4 const edge_id_t eid)
5{
6 std::unordered_set <edge_id_t> edge_ids;
7 if (vert2edge_map.find (vid) == vert2edge_map.end ())
8 {
9 edge_ids.emplace (eid);
10 vert2edge_map.emplace (vid, edge_ids);
11 } else
12 {
13 edge_ids = vert2edge_map [vid];
14 edge_ids.insert (eid);
15 vert2edge_map [vid] = edge_ids;
16 }
17}
18
19void graph::erase_from_v2e_map (vert2edge_map_t &vert2edge_map,
20 const vertex_id_t vid, const edge_id_t eid)
21{
22 std::unordered_set <edge_id_t> edge_ids = vert2edge_map [vid];
23 if (edge_ids.find (eid) != edge_ids.end ())
24 {
25 edge_ids.erase (eid);
26 vert2edge_map [vid] = edge_ids;
27 }
28}
29
30//' graph_has_components
31//'
32//' Does a graph have a vector of connected component IDs? Only used in
33//' \code{sample_one_vertex}
34//' @noRd
35bool graph::graph_has_components (const Rcpp::DataFrame &graph)
36{
37 Rcpp::CharacterVector graph_names = graph.attr ("names");
38 for (auto n: graph_names)
39 if (n == "component")
40 return true;
41
42 return false;
43}
44
45
46//' @name graph_from_df
47//'
48//' Convert a standard graph data.frame into an object of class graph. Graphs
49//' are standardised with the function \code{dodgr_convert_graph()$graph}, and
50//' contain only the four columns [from, to, d, w]
51//'
52//' @noRd
53bool graph::graph_from_df (const Rcpp::DataFrame &gr, vertex_map_t &vm,
54 edge_map_t &edge_map, vert2edge_map_t &vert2edge_map)
55{
56 Rcpp::StringVector edge_id = gr ["edge_id"];
57 Rcpp::StringVector from = gr ["from"];
58 Rcpp::StringVector to = gr ["to"];
59 Rcpp::NumericVector dist = gr ["d"];
60 Rcpp::NumericVector weight = gr ["d_weighted"];
61 Rcpp::StringVector colnames = gr.attr ("names");
62 bool has_times = false;
63 Rcpp::NumericVector time, timew;
64 if (gr.ncol () == 7)
65 {
66 has_times = true;
67 time = gr ["time"];
68 timew = gr ["time_weighted"];
69 }
70
71 std::vector <edge_id_t> replacement_edges; // all empty here
72
73 for (R_xlen_t i = 0; i < to.length (); i ++)
74 {
75 Rcpp::checkUserInterrupt ();
76 vertex_id_t from_id = std::string (from [i]);
77 vertex_id_t to_id = std::string (to [i]);
78
79 if (vm.find (from_id) == vm.end ())
80 {
81 vertex_t fromV = vertex_t ();
82 vm.emplace (from_id, fromV);
83 }
84 vertex_t from_vtx = vm.at (from_id);
85 from_vtx.add_neighbour_out (to_id);
86 vm [from_id] = from_vtx;
87
88 if (vm.find (to_id) == vm.end ())
89 {
90 vertex_t toV = vertex_t ();
91 vm.emplace (to_id, toV);
92 }
93 vertex_t to_vtx = vm.at (to_id);
94 to_vtx.add_neighbour_in (from_id);
95 vm [to_id] = to_vtx;
96
97 edge_id_t edge_id_str = Rcpp::as <edge_id_t> (edge_id [i]);
98
99 double wt = weight [i];
100 if (weight [i] < 0.0)
101 wt = INFINITE_DOUBLE; // # nocov
102
103 std::vector <double> weights;
104 if (!has_times)
105 weights = {dist [i], wt};
106 else
107 weights = {dist [i], wt, time [i], timew [i]};
108
109 edge_t edge = edge_t (from_id, to_id, weights,
110 edge_id_str, replacement_edges);
111
112 edge_map.emplace (edge_id_str, edge);
113 graph::add_to_v2e_map (vert2edge_map, from_id, edge_id_str);
114 graph::add_to_v2e_map (vert2edge_map, to_id, edge_id_str);
115 }
116
117 return has_times;
118}
119
120//' identify_graph_components
121//'
122//' Identify initial graph components for each **vertex**
123//' Identification for edges is subsequently perrformed with
124//' \code{rcpp_get_component_vector}.
125//'
126//' @param v unordered_map <vertex_id_t, vertex_t>
127//' @param com component map from each vertex to component numbers
128//' @noRd
129size_t graph::identify_graph_components (vertex_map_t &v,
130 std::unordered_map <vertex_id_t, size_t> &com)
131{
132 // initialize components map
133 std::unordered_set <vertex_id_t> all_verts;
134 for (auto it: v)
135 all_verts.emplace (it.first);
136 com.clear ();
137
138 std::unordered_set <vertex_id_t> nbs_todo, nbs_done;
139 nbs_todo.insert (*all_verts.begin ());
140 size_t compnum = 0;
141 while (all_verts.size () > 0)
142 {
143 Rcpp::checkUserInterrupt ();
144 vertex_id_t vt = (*nbs_todo.begin ());
145 all_verts.erase (vt);
146
147 vertex_t vtx = v.find (vt)->second;
148 std::unordered_set <vertex_id_t> nbs = vtx.get_all_neighbours ();
149 for (auto nvtx: nbs)
150 {
151 com.emplace (nvtx, compnum);
152 if (nbs_done.find (nvtx) == nbs_done.end ())
153 nbs_todo.emplace (nvtx);
154 }
155 nbs_done.emplace (vt);
156 com.emplace (vt, compnum);
157 nbs_todo.erase (vt);
158
159 if (nbs_todo.size () == 0 && all_verts.size () > 0)
160 {
161 nbs_todo.insert (*all_verts.begin ());
162 compnum++;
163 }
164 }
165
166 long int largest_id = 0;
167 if (compnum > 0)
168 {
169 std::vector <size_t> comp_sizes (compnum + 1, 0);
170 for (auto c: com)
171 comp_sizes [c.second]++;
172 auto maxi = std::max_element (comp_sizes.begin (), comp_sizes.end ());
173
174 largest_id = std::distance (comp_sizes.begin (), maxi);
175 }
176
177 return static_cast <size_t> (largest_id);
178}
179
180
181//' rcpp_get_component_vector
182//'
183//' Get component numbers for each edge of graph
184//'
185//' @param graph graph to be processed; stripped down and standardised to five
186//' columns
187//'
188//' @return Two vectors: one of edge IDs and one of corresponding component
189//' numbers
190//' @noRd
191// [[Rcpp::export]]
192Rcpp::List rcpp_get_component_vector (const Rcpp::DataFrame &graph)
193{
194 vertex_map_t vertices;
195 edge_map_t edge_map;
196 vert2edge_map_t vert2edge_map;
197
198 bool has_times = graph::graph_from_df (graph, vertices, edge_map, vert2edge_map);
199 has_times = false; // rm unused variable warning
200
201 std::unordered_map <vertex_id_t, size_t> components;
202 size_t largest_component =
203 graph::identify_graph_components (vertices, components);
204 largest_component++; // suppress unused variable warning
205
206 // Then map component numbers of vertices onto edges
207 std::unordered_map <edge_id_t, size_t> comp_nums;
208 for (auto ve: vert2edge_map)
209 {
210 vertex_id_t vi = ve.first;
211 std::unordered_set <edge_id_t> edges = ve.second;
212 for (edge_id_t e: edges)
213 comp_nums.emplace (e, components.find (vi)->second);
214 }
215
216 Rcpp::StringVector edge_id (comp_nums.size ());
217 Rcpp::IntegerVector comp_num (comp_nums.size ());
218 size_t i = 0;
219 for (auto cn: comp_nums)
220 {
221 edge_id (i) = cn.first;
222 comp_num (i) = static_cast <int> (cn.second) + 1L; // 1-indexed for R
223 i++;
224 }
225
226 return Rcpp::List::create (
227 Rcpp::Named ("edge_id") = edge_id,
228 Rcpp::Named ("edge_component") = comp_num);
229}
230
231//' rcpp_unique_rownames
232//'
233//' Construct vertex (from, to) ID values from unique pairs of coordinates
234//' rounded to <precision>. Used when vertices have no ID values.
235//'
236//' @noRd
237// [[Rcpp::export]]
238Rcpp::DataFrame rcpp_unique_rownames (Rcpp::DataFrame xyfrom,
239 Rcpp::DataFrame xyto,
240 const int precision = 10)
241{
242 const size_t prec_size_t = static_cast <size_t> (precision);
243
244 std::vector <double> xf = xyfrom ["x"],
245 yf = xyfrom ["y"],
246 xt = xyto ["x"],
247 yt = xyto ["y"];
248 const size_t n = static_cast <size_t> (xyfrom.nrow ());
249
250 std::vector <std::string> s_f (n), s_t (n);
251 std::vector <std::string> i_f (n), i_t (n); // indices as rownames
252 std::unordered_map <std::string, std::string> xynames;
253 size_t count = 0;
254 for (size_t i = 0; i < n; i++)
255 {
256 std::string xfs = std::to_string (xf [i]),
257 yfs = std::to_string (yf [i]),
258 xts = std::to_string (xt [i]),
259 yts = std::to_string (yt [i]);
260 s_f [i] = xfs.substr(0, xfs.find(".") + prec_size_t + 1) +
261 yfs.substr (0, yfs.find(".") + prec_size_t + 1);
262 s_t [i] = xts.substr (0, xts.find(".") + prec_size_t + 1) +
263 yts.substr (0, yts.find(".") + prec_size_t + 1);
264
265 if (xynames.find (s_f [i]) != xynames.end ())
266 {
267 i_f [i] = xynames.at (s_f [i]);
268 } else
269 {
270 i_f [i] = std::to_string (count);
271 xynames.emplace (s_f [i], i_f [i]);
272 count++;
273 }
274
275 if (xynames.find (s_t [i]) != xynames.end ())
276 {
277 i_t [i] = xynames.at (s_t [i]);
278 } else
279 {
280 i_t [i] = std::to_string (count);
281 xynames.emplace (s_t [i], i_t [i]);
282 count++;
283 }
284 }
285
286 Rcpp::DataFrame res = Rcpp::DataFrame::create (
287 Rcpp::Named ("from_id") = i_f,
288 Rcpp::Named ("to_id") = i_t,
289 Rcpp::_["stringsAsFactors"] = false);
290 return res;
291}