RegionExtractor.java 4.85 KB
Newer Older
1
2
3
4
package eu.simstadt.regionchooser;

import java.io.IOException;
import java.nio.file.Path;
5
import java.util.logging.Logger;
6
7
import java.util.regex.Matcher;
import java.util.regex.Pattern;
8
import com.vividsolutions.jts.geom.Coordinate;
9
import com.vividsolutions.jts.geom.Envelope;
10
import com.vividsolutions.jts.geom.Geometry;
11
import com.vividsolutions.jts.geom.GeometryFactory;
12
13
14
import com.vividsolutions.jts.geom.Point;
import com.vividsolutions.jts.io.ParseException;
import com.vividsolutions.jts.io.WKTReader;
15
16
17
import com.ximpleware.NavException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
18
19
import eu.simstadt.geo.fast_xml_parser.BuildingXmlNode;
import eu.simstadt.geo.fast_xml_parser.CityGmlIterator;
20
21
22
23


public class RegionExtractor
{
24
	private static final WKTReader wktReader = new WKTReader();
25

duminil's avatar
duminil committed
26
	private static final Logger LOGGER = Logger.getLogger(RegionExtractor.class.getName());
27
	private static final GeometryFactory gf = new GeometryFactory();
28

29
30
31
32
33
34
35
36
37
	/**
	 * Main method behind RegionChooser. Given a CityGML (as Path) and a geometry (as Well-known text POLYGON, in the
	 * same coordinate system as the CityGML), it iterates over each Building and checks if the building is inside the
	 * geometry. It only works with CityGML files smaller than 2GB. It uses VTD-XML parser instead of a whole
	 * Simstadt/Citydoctor/Citygml model.
	 * 
	 * 
	 * @param citygmlPath
	 * @param wktPolygon
38
39
	 * @param string
	 * @return a StringBuffer, full with the extracted Citygml, including header, buildings and footer.
40
41
42
43
44
45
	 * @throws ParseException
	 * @throws IOException
	 * @throws XPathEvalException
	 * @throws NavException
	 * @throws XPathParseException
	 * @throws NumberFormatException
46
	 */
47
	static public StringBuffer selectRegionDirectlyFromCityGML(Path citygmlPath, String wktPolygon, String srsName)
48
49
			throws ParseException, NumberFormatException, XPathParseException, NavException, XPathEvalException,
			IOException {
50

51
		int buildingsCount = 0;
52
53
54
		int foundBuildingsCount = 0;
		StringBuffer sb = new StringBuffer();
		Geometry poly = wktReader.read(wktPolygon);
55

56
57
		CityGmlIterator citygml = new CityGmlIterator(citygmlPath);
		for (BuildingXmlNode buildingXmlNode : citygml) {
duminil's avatar
duminil committed
58
			if (buildingsCount == 0) {
59
				sb.append(replaceEnvelopeInHeader(citygml.getHeader(), poly.getEnvelopeInternal(), srsName));
60
			}
duminil's avatar
duminil committed
61
			buildingsCount += 1;
62
			Coordinate coord = new Coordinate(buildingXmlNode.x, buildingXmlNode.y);
63
			Point point = gf.createPoint(coord);
64
65
66
			if (point.within(poly)) {
				foundBuildingsCount++;
				sb.append(buildingXmlNode.toString());
67
			}
duminil's avatar
duminil committed
68
69
70
			if (buildingsCount % 1000 == 0) {
				LOGGER.info("1000 buildings parsed");
			}
71
		}
72

73
		LOGGER.info("Buildings found in selected region " + foundBuildingsCount);
duminil's avatar
duminil committed
74
		sb.append(citygml.getFooter());
75
76
		return sb;
	}
77
78
79
80
81
82

	/**
	 * Some Citygml files include an envelope (bounding box), defined at the very beginning of the file. If the extracted
	 * region comes from a huge file (e.g. from NYC), it might inherit this header with a huge envelope. Some methods
	 * might get confused by this wrong envelope, so this method replaces the original envelope with the bounding box
	 * from the extracting polygon. The real envelope might be even smaller, but it could only be known at the end of the
83
84
	 * parsing, after having analyzed every building. The envelope should be written in the header. If present, min and
	 * max values for Z are kept.
85
86
87
88
89
90
91
	 * 
	 * @param header
	 * @param envelope
	 * @param srsName
	 * @return CityGML Header with an updated envelope
	 */
	private static String replaceEnvelopeInHeader(String header, Envelope envelope, String srsName) {
duminil's avatar
duminil committed
92
		//NOTE: Sorry for using a regex to parse XML. The header in itself isn't a valid XML, so this looked like the easiest solution.
93
94
95
96
97
98
99
100
101
102
103
		double zMin = 0;
		double zMax = 0;
		Pattern boundedByPattern = Pattern.compile(
				"(?is)<gml:boundedBy>.*?<gml:lowerCorner>(.*?)</gml:lowerCorner>\\s*<gml:upperCorner>(.*?)</gml:upperCorner>.*?</gml:boundedBy>");
		Matcher matcher = boundedByPattern.matcher(header);
		String headerWithoutEnvelope = header;
		if (matcher.find()) {
			headerWithoutEnvelope = matcher.replaceFirst("");
			zMin = Double.valueOf(matcher.group(1).split("\\s+")[2]);
			zMax = Double.valueOf(matcher.group(2).split("\\s+")[2]);
		}
104
105
		String newEnvelope = "<gml:boundedBy>\r\n" +
				"  <gml:Envelope srsName=\"" + srsName + "\" srsDimension=\"3\">\r\n" + //NOTE: Would srsDimension="2" be better? Should the original Z get extracted?
106
107
108
109
				"    <gml:lowerCorner>" + envelope.getMinX() + " " + envelope.getMinY() + " " + zMin
				+ "</gml:lowerCorner>\r\n" +
				"    <gml:upperCorner>" + envelope.getMaxX() + " " + envelope.getMaxY() + " " + zMax
				+ "</gml:upperCorner>\r\n" +
110
111
112
113
				"  </gml:Envelope>\r\n" +
				"</gml:boundedBy>\r\n";
		return headerWithoutEnvelope + newEnvelope;
	}
114
}