/*************************************************************************** * Project: osmdata * File: osmdata-sp.cpp * Language: C++ * * osmdata is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * * osmdata is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * osm-router. If not, see . * * Author: Mark Padgham * E-Mail: mark.padgham@email.com * * Description: Extract OSM data from an object of class XmlData and return * it in R 'sp' format. * * Limitations: * * Dependencies: none (rapidXML header included in osmdata) * * Compiler Options: -std=c++11 ***************************************************************************/ #include "osmdata.h" #include #include // for min_element/max_element // Note: This code uses explicit index counters within most loops which use Rcpp // objects, because these otherwise require a // static_cast (std::distance (...)). This operation copies each // instance and can slow the loops down by several orders of magnitude! //' get_osm_nodes //' //' Store OSM nodes as `sf::POINT` objects //' //' @param ptxy Pointer to Rcpp::List to hold the resultant geometries //' @param kv_mat Pointer to Rcpp::DataFrame to hold key-value pairs //' @param nodes Pointer to all nodes in data set //' @param unique_vals pointer to all unique values (OSM IDs and keys) in data set //' @param bbox Pointer to the bbox needed for `sf` construction //' @param crs Pointer to the crs needed for `sf` construction //' //' @noRd void osm_sp::get_osm_nodes (Rcpp::S4 &sp_points, const Nodes &nodes, const UniqueVals &unique_vals) { Rcpp::NumericMatrix ptxy; Rcpp::CharacterMatrix kv_mat; size_t nrow = nodes.size (), ncol = unique_vals.k_point.size (); kv_mat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrow, ncol)); std::fill (kv_mat.begin (), kv_mat.end (), NA_STRING); ptxy = Rcpp::NumericMatrix (Rcpp::Dimension (nrow, 2)); std::vector ptnames; ptnames.reserve (nodes.size ()); unsigned int count = 0; for (auto ni = nodes.begin (); ni != nodes.end (); ++ni) { Rcpp::checkUserInterrupt (); ptxy (count, 0) = ni->second.lon; ptxy (count, 1) = ni->second.lat; ptnames.push_back (std::to_string (ni->first)); for (auto kv_iter = ni->second.key_val.begin (); kv_iter != ni->second.key_val.end (); ++kv_iter) { const std::string &key = kv_iter->first; unsigned int ndi = static_cast ( std::distance (unique_vals.k_point.begin (), unique_vals.k_point.find (key))); kv_mat (count, ndi) = kv_iter->second; } count++; } std::vector colnames = {"lon", "lat"}; Rcpp::List dimnames (0); dimnames.push_back (ptnames); dimnames.push_back (colnames); ptxy.attr ("dimnames") = dimnames; dimnames.erase (0, static_cast (dimnames.size ())); Rcpp::DataFrame kv_df = R_NilValue; if (unique_vals.k_point.size () > 0) { kv_mat.attr ("dimnames") = Rcpp::List::create (ptnames, unique_vals.k_point); kv_mat.attr ("names") = unique_vals.k_point; kv_df = osm_convert::restructure_kv_mat (kv_mat, false); } Rcpp::Language points_call ("new", "SpatialPoints"); Rcpp::Language sp_points_call ("new", "SpatialPointsDataFrame"); sp_points = sp_points_call.eval (); sp_points.slot ("data") = kv_df; sp_points.slot ("coords") = ptxy; } //' get_osm_ways //' //' Store OSM ways as `sf::LINESTRING` or `sf::POLYGON` objects. //' //' @param wayList Pointer to Rcpp::List to hold the resultant geometries //' @param kv_df Pointer to Rcpp::DataFrame to hold key-value pairs //' @param way_ids Vector of IDs of ways to trace //' @param ways Pointer to all ways in data set //' @param nodes Pointer to all nodes in data set //' @param unique_vals pointer to all unique values (OSM IDs and keys) in data set //' @param geom_type Character string specifying "POLYGON" or "LINESTRING" //' @param bbox Pointer to the bbox needed for `sf` construction //' @param crs Pointer to the crs needed for `sf` construction //' //' @noRd void osm_sp::get_osm_ways (Rcpp::S4 &sp_ways, const std::set &way_ids, const Ways &ways, const Nodes &nodes, const UniqueVals &unique_vals, const std::string &geom_type) { const int one = static_cast (1); if (!(geom_type == "line" || geom_type == "polygon")) throw std::runtime_error ("geom_type must be line or polygon"); Rcpp::List wayList (way_ids.size ()); size_t nrow = way_ids.size (), ncol = unique_vals.k_way.size (); std::vector waynames; waynames.reserve (way_ids.size ()); Rcpp::Language line_call ("new", "Line"); Rcpp::Language lines_call ("new", "Lines"); Rcpp::Environment sp_env = Rcpp::Environment::namespace_env ("sp"); Rcpp::Function Polygon = sp_env ["Polygon"]; Rcpp::Language polygons_call ("new", "Polygons"); Rcpp::S4 polygons, line, lines; // index of ill-formed polygons later removed - see issue#85 std::vector indx_out; std::vector poly_okay (nrow); Rcpp::CharacterMatrix kv_mat (Rcpp::Dimension (nrow, ncol)); std::fill (kv_mat.begin (), kv_mat.end (), NA_STRING); unsigned int count = 0; for (auto wi = way_ids.begin (); wi != way_ids.end (); ++wi) { Rcpp::checkUserInterrupt (); waynames.push_back (std::to_string (*wi)); Rcpp::NumericMatrix nmat; osm_convert::trace_way_nmat (ways, nodes, (*wi), nmat); Rcpp::List dummy_list (0); poly_okay [count] = true; if (geom_type == "line") { // sp::Line and sp::Lines objects can be constructed directly from // the data with the following two lines, but this is *enormously* // slower: // Rcpp::S4 line = Rcpp::Language ("Line", nmat).eval (); // Rcpp::S4 lines = Rcpp::Language ("Lines", line, id).eval (); // This way of constructing "new" objects and feeding slots is much // faster: line = line_call.eval (); line.slot ("coords") = nmat; dummy_list.push_back (line); lines = lines_call.eval (); lines.slot ("Lines") = dummy_list; lines.slot ("ID") = (*wi); wayList [count] = lines; } else { const double dtol = 1.0e-6; if (nmat.nrow () == 3 && fabs (nmat (0, 0) - nmat (2, 0)) < dtol && (fabs (nmat (0, 1) - nmat (2, 1)) < dtol)) { // polygon has only 3 rows with start == end, so is ill-formed indx_out.push_back (count); poly_okay [count] = false; // temp copy with > 3 rows necessary to suppress sp warning Rcpp::NumericMatrix nmat2 = Rcpp::NumericMatrix (Rcpp::Dimension (4, 2)); nmat = nmat2; } Rcpp::S4 poly = Polygon (nmat); poly.slot ("hole") = false; poly.slot ("ringDir") = one; dummy_list.push_back (poly); polygons = polygons_call.eval (); polygons.slot ("Polygons") = dummy_list; polygons.slot ("ID") = (*wi); polygons.slot ("plotOrder") = one; polygons.slot ("labpt") = poly.slot ("labpt"); polygons.slot ("area") = poly.slot ("area"); wayList [count] = polygons; } dummy_list.erase (0); auto wj = ways.find (*wi); osm_convert::get_value_mat_way (wj, unique_vals, kv_mat, count++); } // end for it over poly_ways if (indx_out.size () > 0) { std::reverse (indx_out.begin (), indx_out.end ()); for (auto i: indx_out) { wayList.erase (one); waynames.erase (waynames.begin () + i); } } wayList.attr ("names") = waynames; // Reduce kv_mat to indx_in if (indx_out.size () > 0) { size_t n_okay = nrow - indx_out.size (); Rcpp::CharacterMatrix kv_mat2 (Rcpp::Dimension (n_okay, ncol)); std::fill (kv_mat2.begin (), kv_mat2.end (), NA_STRING); int pos = 0, i_int = 0; for (size_t i = 0; i 0) { kv_mat.attr ("names") = unique_vals.k_way; kv_mat.attr ("dimnames") = Rcpp::List::create (waynames, unique_vals.k_way); kv_mat.attr ("names") = unique_vals.k_way; if (kv_mat.nrow () > 0 && kv_mat.ncol () > 0) kv_df = osm_convert::restructure_kv_mat (kv_mat, false); // TODO: Can names be assigned to R_NilValue? } if (geom_type == "line") { Rcpp::Language sp_lines_call ("new", "SpatialLinesDataFrame"); sp_ways = sp_lines_call.eval (); sp_ways.slot ("lines") = wayList; sp_ways.slot ("data") = kv_df; } else { Rcpp::Language sp_polys_call ("new", "SpatialPolygonsDataFrame"); sp_ways = sp_polys_call.eval (); sp_ways.slot ("polygons") = wayList; // Fill plotOrder slot with int vector - this has to be int, not // unsigned int! std::vector plord; for (int i=0; i (nrow); i++) plord.push_back (i + 1); sp_ways.slot ("plotOrder") = plord; plord.clear (); sp_ways.slot ("data") = kv_df; } } //' get_osm_relations //' //' Return a dual Rcpp::List containing all OSM relations, the firmt element of //' which holds `multipolygon` relations, while the second holds all others, //' which are stored as `multilinestring` objects. //' //' @param rels Pointer to the vector of Relation objects //' @param nodes Pointer to the vector of node objects //' @param ways Pointer to the vector of way objects //' @param unique_vals Pointer to a UniqueVals object containing std::sets of all //' unique IDs and keys for each kind of OSM object (nodes, ways, rels). //' //' @return A dual Rcpp::List, the first of which contains the multipolygon //' relations; the second the multilinestring relations. //' //' @noRd void osm_sp::get_osm_relations (Rcpp::S4 &multilines, Rcpp::S4 &multipolygons, const Relations &rels, const std::map &nodes, const std::map &ways, const UniqueVals &unique_vals) { /* Trace all multipolygon relations. These are the only OSM types where * sizes are not known before, so lat-lons and node names are stored in * dynamic vectors. These are 3D monsters: #1 for relation, #2 for polygon * in relation, and #3 for data. There are also associated 2D vector * objects for IDs and multilinestring roles. */ std::set keyset; // must be ordered! std::vector colnames = {"lat", "lon"}, rownames; Rcpp::List dimnames (0); Rcpp::NumericMatrix nmat (Rcpp::Dimension (0, 0)); double_arr2 lat_vec, lon_vec; double_arr3 lat_arr_mp, lon_arr_mp, lon_arr_ls, lat_arr_ls; string_arr2 rowname_vec, id_vec_mp, roles_ls; string_arr3 rowname_arr_mp, rowname_arr_ls; std::vector ids_ls; std::vector ids_mp, rel_id_mp, rel_id_ls; osmt_arr2 id_vec_ls; std::vector roles; unsigned int nmp = 0, nls = 0; // number of multipolygon and multilinestringrelations for (auto itr = rels.begin (); itr != rels.end (); ++itr) { if (itr->ispoly) nmp++; else { // TODO: Store these as std::vector > to avoid // repetition below std::set roles_set; for (auto itw = itr->ways.begin (); itw != itr->ways.end (); ++itw) roles_set.insert (itw->second); nls += roles_set.size (); } } size_t ncol = unique_vals.k_rel.size (); rel_id_mp.reserve (nmp); rel_id_ls.reserve (nls); Rcpp::CharacterMatrix kv_mat_mp (Rcpp::Dimension (nmp, ncol)), kv_mat_ls (Rcpp::Dimension (nls, ncol)); unsigned int count_mp = 0, count_ls = 0; for (auto itr = rels.begin (); itr != rels.end (); ++itr) { Rcpp::checkUserInterrupt (); if (itr->ispoly) // itr->second can only be "outer" or "inner" { trace_multipolygon (itr, ways, nodes, lon_vec, lat_vec, rowname_vec, ids_mp); // Store all ways in that relation and their associated roles rel_id_mp.push_back (std::to_string (itr->id)); lon_arr_mp.push_back (lon_vec); lat_arr_mp.push_back (lat_vec); rowname_arr_mp.push_back (rowname_vec); id_vec_mp.push_back (ids_mp); lon_vec.clear (); lon_vec.shrink_to_fit (); lat_vec.clear (); lat_vec.shrink_to_fit (); rowname_vec.clear (); rowname_vec.shrink_to_fit (); ids_mp.clear (); ids_mp.shrink_to_fit (); if (nmp > 0) osm_convert::get_value_mat_rel (itr, unique_vals, kv_mat_mp, count_mp++); } else // store as multilinestring { // multistrings are grouped here by roles, unlike GDAL which just // dumps all of them. std::set roles_set; for (auto itw = itr->ways.begin (); itw != itr->ways.end (); ++itw) roles_set.insert (itw->second); roles.reserve (roles_set.size ()); for (auto it = roles_set.begin (); it != roles_set.end (); ++it) roles.push_back (*it); roles_set.clear (); for (std::string role: roles) { trace_multilinestring (itr, role, ways, nodes, lon_vec, lat_vec, rowname_vec, ids_ls); std::stringstream ss; ss.str (""); if (role == "") ss << std::to_string (itr->id) << "-(no role)"; else ss << std::to_string (itr->id) << "-" << role; rel_id_ls.push_back (ss.str ()); lon_arr_ls.push_back (lon_vec); lat_arr_ls.push_back (lat_vec); rowname_arr_ls.push_back (rowname_vec); id_vec_ls.push_back (ids_ls); lon_vec.clear (); lon_vec.shrink_to_fit (); lat_vec.clear (); lat_vec.shrink_to_fit (); rowname_vec.clear (); rowname_vec.shrink_to_fit (); ids_ls.clear (); ids_ls.shrink_to_fit (); if (nls > 0) osm_convert::get_value_mat_rel (itr, unique_vals, kv_mat_ls, count_ls++); } roles_ls.push_back (roles); roles.clear (); } } osm_convert::convert_multipoly_to_sp (multipolygons, rels, lon_arr_mp, lat_arr_mp, rowname_arr_mp, id_vec_mp, unique_vals); osm_convert::convert_multiline_to_sp (multilines, rels, lon_arr_ls, lat_arr_ls, rowname_arr_ls, id_vec_ls, unique_vals); // ****** clean up ***** lon_arr_mp.clear (); lon_arr_mp.shrink_to_fit (); lon_arr_ls.clear (); lon_arr_ls.shrink_to_fit (); lat_arr_mp.clear (); lat_arr_mp.shrink_to_fit (); lat_arr_ls.clear (); lat_arr_ls.shrink_to_fit (); rowname_arr_mp.clear (); rowname_arr_mp.shrink_to_fit (); rowname_arr_ls.clear (); rowname_arr_ls.shrink_to_fit (); rel_id_mp.clear (); rel_id_mp.shrink_to_fit (); rel_id_ls.clear (); rel_id_ls.shrink_to_fit (); roles_ls.clear (); roles_ls.shrink_to_fit (); } //' rcpp_osmdata_sp //' //' Extracts all polygons from an overpass API query //' //' @param st Text contents of an overpass API query //' @return A \code{SpatialLinesDataFrame} contains all polygons and associated data //' //' @noRd // [[Rcpp::export]] Rcpp::List rcpp_osmdata_sp (const std::string& st) { #ifdef DUMP_INPUT { std::ofstream dump ("./osmdata-sp.xml"); if (dump.is_open()) { dump.write (st.c_str(), st.size()); } } #endif XmlData xml (st); const std::map & nodes = xml.nodes (); const std::map & ways = xml.ways (); const std::vector & rels = xml.relations (); const UniqueVals unique_vals = xml.unique_vals (); /************************************************************************ ************************************************************************ ** ** ** PRE-PROCESSING ** ** ** ************************************************************************ ************************************************************************/ // Step#2 std::set poly_ways, non_poly_ways; for (auto itw = ways.begin (); itw != ways.end (); ++itw) { if ((*itw).second.nodes.front () == (*itw).second.nodes.back ()) { if (poly_ways.find ((*itw).first) == poly_ways.end ()) poly_ways.insert ((*itw).first); } else if (non_poly_ways.find ((*itw).first) == non_poly_ways.end ()) non_poly_ways.insert ((*itw).first); } /************************************************************************ ************************************************************************ ** ** ** GET OSM DATA ** ** ** ************************************************************************ ************************************************************************/ // The actual routines to extract the OSM data and store in sp objects Rcpp::S4 sp_points, sp_lines, sp_polygons, sp_multilines, sp_multipolygons; osm_sp::get_osm_ways (sp_polygons, poly_ways, ways, nodes, unique_vals, "polygon"); osm_sp::get_osm_ways (sp_lines, non_poly_ways, ways, nodes, unique_vals, "line"); osm_sp::get_osm_nodes (sp_points, nodes, unique_vals); osm_sp::get_osm_relations (sp_multilines, sp_multipolygons, rels, nodes, ways, unique_vals); // Add bbox and crs to each sp object Rcpp::NumericMatrix bbox = rcpp_get_bbox (xml.x_min (), xml.x_max (), xml.y_min (), xml.y_max ()); sp_points.slot ("bbox") = bbox; sp_lines.slot ("bbox") = bbox; sp_polygons.slot ("bbox") = bbox; sp_multilines.slot ("bbox") = bbox; sp_multipolygons.slot ("bbox") = bbox; Rcpp::Language crs_call ("new", "CRS"); Rcpp::S4 crs = crs_call.eval (); crs.slot ("projargs") = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0"; sp_points.slot ("proj4string") = crs; sp_lines.slot ("proj4string") = crs; sp_polygons.slot ("proj4string") = crs; sp_multilines.slot ("proj4string") = crs; sp_multipolygons.slot ("proj4string") = crs; Rcpp::List ret (6); ret [0] = bbox; ret [1] = sp_points; ret [2] = sp_lines; ret [3] = sp_polygons; ret [4] = sp_multilines; ret [5] = sp_multipolygons; std::vector retnames {"bbox", "points", "lines", "polygons", "multilines", "multipolygons"}; ret.attr ("names") = retnames; return ret; }