Distances on Directed Graphs in R
1
2#include "deduplicate.h"
3
4void deduplicate::update_dupl_edge_map (deduplicate::EdgeMapType &edge_map,
5 const deduplicate::str_pair &this_pair, const double &val)
6{
7 if (edge_map.find (this_pair) == edge_map.end ())
8 {
9 edge_map.emplace (this_pair, val);
10 } else
11 {
12 double val_min = edge_map.find (this_pair)->second;
13 if (val < val_min)
14 {
15 edge_map.erase (this_pair);
16 edge_map.emplace (this_pair, val);
17 }
18 }
19}
20
21//' De-duplicate edges by replacing with minimal weighted distances and times
22//'
23//' @param graph Full graph in any form
24//' @param fr_col Name of column holding from edge labels
25//' @param to_col Name of column holding to edge labels
26//' @param d_col Name of column holding weighted distances
27//' @param t_col Name of column holding weighted times (or "" if no weighted
28//' times).
29//' @return A `data.frame` of 3 columns: 'from', 'to', and 'd', where 'd' is the
30//' minimal value taken from all duplicated edges. If 't_col' is specified, the
31//' equivalent minimal times are in the lower half of the result.
32//' @noRd
33// [[Rcpp::export]]
34Rcpp::DataFrame rcpp_deduplicate (const Rcpp::DataFrame &graph, const std::string fr_col, const std::string to_col,
35 const std::string d_col, const std::string t_col)
36{
37 const bool has_time = t_col.length () > 0L;
38
39 std::unordered_set < deduplicate::str_pair, deduplicate::str_pair_hash > edge_pair_set;
40 std::unordered_set < deduplicate::str_pair, deduplicate::str_pair_hash > edge_pair_dupl;
41
42 const std::vector <std::string> fr = graph [fr_col];
43 const std::vector <std::string> to = graph [to_col];
44 const std::vector <double> d = graph [d_col];
45 std::vector <double> t (graph.nrow ());
46 if (has_time) {
47 std::vector <double> t = graph [t_col];
48 }
49
50 const size_t n = static_cast <size_t> (graph.nrow ());
51
52 // First loop to collect duplicated edges
53 for (size_t i = 0; i < n; i++)
54 {
55 deduplicate::str_pair this_pair {fr [i], to [i]};
56 if (edge_pair_set.find (this_pair) != edge_pair_set.end ())
57 {
58 edge_pair_dupl.emplace (this_pair);
59 }
60 edge_pair_set.emplace (this_pair);
61 }
62
63 // Then aggregate values for each duplicate
64 deduplicate::EdgeMapType edge_map_d, edge_map_t;
65
66 for (size_t i = 0; i < n; i++)
67 {
68 deduplicate::str_pair this_pair {fr [i], to [i]};
69
70 if (edge_pair_dupl.find (this_pair) == edge_pair_dupl.end ())
71 {
72 continue;
73 }
74
75 deduplicate::update_dupl_edge_map (edge_map_d, this_pair, d [i]);
76
77 if (has_time)
78 {
79 deduplicate::update_dupl_edge_map (edge_map_t, this_pair, t [i]);
80 }
81
82 }
83
84 const size_t ndupl = edge_map_d.size ();
85 std::vector <std::string> fr_dupl (ndupl), to_dupl (ndupl);
86 std::vector <double> d_dupl (ndupl);
87
88 size_t i = 0;
89 for (auto e: edge_map_d)
90 {
91 fr_dupl [i] = e.first.first;
92 to_dupl [i] = e.first.second;
93 d_dupl [i++] = e.second;
94 }
95 if (has_time)
96 {
97 fr_dupl.resize (ndupl * 2);
98 to_dupl.resize (ndupl * 2);
99 d_dupl.resize (ndupl * 2);
100 for (auto e: edge_map_t)
101 {
102 fr_dupl [i] = e.first.first;
103 to_dupl [i] = e.first.second;
104 d_dupl [i++] = e.second;
105 }
106 }
107
108 Rcpp::DataFrame res = Rcpp::DataFrame::create (
109 Rcpp::Named ("from") = fr_dupl,
110 Rcpp::Named ("to") = to_dupl,
111 Rcpp::Named ("d") = d_dupl,
112 Rcpp::_["stringsAsFactors"] = false);
113
114 return res;
115}