package eu.simstadt.regionchooser; import java.io.IOException; import java.io.Writer; import java.nio.file.Path; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.GeometryFactory; import org.locationtech.jts.geom.Point; import org.locationtech.jts.io.ParseException; import org.locationtech.jts.io.WKTReader; import com.ximpleware.NavException; import com.ximpleware.XPathEvalException; import com.ximpleware.XPathParseException; import eu.simstadt.regionchooser.fast_xml_parser.BuildingXmlNode; import eu.simstadt.regionchooser.fast_xml_parser.CityGmlIterator; public class RegionExtractor { private static final WKTReader WKT_READER = new WKTReader(); private static final Logger LOGGER = Logger.getLogger(RegionExtractor.class.getName()); private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory(); /** * Main method behind RegionChooser. Given CityGMLs (as Path[]) and a geometry (as Well-known text POLYGON, in the * same coordinate system as the CityGML), it iterates over each Building and checks if the building is inside the * geometry. It only works with CityGML files smaller than 2GB. It uses VTD-XML parser instead of a whole * Simstadt/Citydoctor/Citygml model. * * @param wktPolygon * @param srsName * @param output * @param citygmlPaths * * * @writes the extracted Citygml, including header, buildings and footer to output * @return counts of found building. * @throws ParseException * @throws IOException * @throws XPathEvalException * @throws NavException * @throws XPathParseException * @throws NumberFormatException */ static int selectRegionDirectlyFromCityGML(String wktPolygon, String srsName, Writer sb, Path... citygmlPaths) throws ParseException, XPathParseException, NavException, IOException { int buildingsCount = 0; int foundBuildingsCount = 0; Geometry poly = WKT_READER.read(wktPolygon); CityGmlIterator citygml = null; for (int i = 0; i < citygmlPaths.length; i++) { Path citygmlPath = citygmlPaths[i]; LOGGER.info("Parsing " + citygmlPath); //TODO: Allow citygmlPath for folders too, and iterate over gmls? citygml = new CityGmlIterator(citygmlPath); for (BuildingXmlNode buildingXmlNode : citygml) { if (buildingsCount == 0) { sb.append(replaceEnvelopeInHeader(citygml.getHeader(), poly.getEnvelopeInternal(), srsName)); } buildingsCount += 1; if (buildingXmlNode.hasCoordinates()) { Coordinate coord = new Coordinate(buildingXmlNode.x, buildingXmlNode.y); Point point = GEOMETRY_FACTORY.createPoint(coord); if (point.within(poly)) { foundBuildingsCount++; sb.append(buildingXmlNode.toString()); } } if (buildingsCount % 1000 == 0) { LOGGER.info("1000 buildings parsed"); } } } if (citygml == null) { throw new IllegalArgumentException("There should be at least one citygml"); } if (foundBuildingsCount == 0) { LOGGER.warning("No building found in the selected region."); } LOGGER.info("Buildings found in selected region : " + foundBuildingsCount); //NOTE: This could be a problem if header starts with and footer ends with sb.append(citygml.getFooter()); return foundBuildingsCount; } /** * Some Citygml files include an envelope (bounding box), defined at the very beginning of the file. If the extracted * region comes from a huge file (e.g. from NYC), it might inherit this header with a huge envelope. Some methods * might get confused by this wrong envelope, so this method replaces the original envelope with the bounding box * from the extracting polygon. The real envelope might be even smaller, but it could only be known at the end of the * parsing, after having analyzed every building. The envelope should be written in the header. If present, min and * max values for Z are kept. * * @param header * @param envelope * @param srsName * @return CityGML Header with an updated envelope */ private static String replaceEnvelopeInHeader(String header, Envelope envelope, String srsName) { //NOTE: Sorry for using a regex to parse XML. The header in itself isn't a valid XML, so this looked like the easiest solution. double zMin = 0; double zMax = 0; Pattern boundedByPattern = Pattern.compile( "(?is).*?(.*?)\\s*(.*?).*?"); Matcher matcher = boundedByPattern.matcher(header); String headerWithoutEnvelope = header; if (matcher.find()) { headerWithoutEnvelope = matcher.replaceFirst(""); String[] mins = matcher.group(1).split("\\s+"); String[] maxs = matcher.group(2).split("\\s+"); if (mins.length > 2) { zMin = Double.valueOf(mins[2]); } if (maxs.length > 2) { zMax = Double.valueOf(maxs[2]); } } String newEnvelope = "\r\n" + " \r\n" + //NOTE: Would srsDimension="2" be better? Should the original Z get extracted? " " + envelope.getMinX() + " " + envelope.getMinY() + " " + zMin + "\r\n" + " " + envelope.getMaxX() + " " + envelope.getMaxY() + " " + zMax + "\r\n" + " \r\n" + "\r\n"; return headerWithoutEnvelope + newEnvelope; } }