RegionExtractor.java 4.85 KB
Newer Older
eric.duminil's avatar
eric.duminil committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package eu.simstadt.regionchooser;

import java.io.IOException;
import java.nio.file.Path;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Envelope;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.Point;
import com.vividsolutions.jts.io.ParseException;
import com.vividsolutions.jts.io.WKTReader;
import com.ximpleware.NavException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.simstadt.geo.fast_xml_parser.BuildingXmlNode;
import eu.simstadt.geo.fast_xml_parser.CityGmlIterator;


public class RegionExtractor
{
	private static final WKTReader wktReader = new WKTReader();

	private static final Logger LOGGER = Logger.getLogger(RegionExtractor.class.getName());
	private static final GeometryFactory gf = new GeometryFactory();

	/**
	 * Main method behind RegionChooser. Given a CityGML (as Path) and a geometry (as Well-known text POLYGON, in the
	 * same coordinate system as the CityGML), it iterates over each Building and checks if the building is inside the
	 * geometry. It only works with CityGML files smaller than 2GB. It uses VTD-XML parser instead of a whole
	 * Simstadt/Citydoctor/Citygml model.
	 * 
	 * 
	 * @param citygmlPath
	 * @param wktPolygon
	 * @param string
	 * @return a StringBuffer, full with the extracted Citygml, including header, buildings and footer.
	 * @throws ParseException
	 * @throws IOException
	 * @throws XPathEvalException
	 * @throws NavException
	 * @throws XPathParseException
	 * @throws NumberFormatException
	 */
	static public StringBuffer selectRegionDirectlyFromCityGML(Path citygmlPath, String wktPolygon, String srsName)
			throws ParseException, NumberFormatException, XPathParseException, NavException, XPathEvalException,
			IOException {

		int buildingsCount = 0;
		int foundBuildingsCount = 0;
		StringBuffer sb = new StringBuffer();
		Geometry poly = wktReader.read(wktPolygon);

		CityGmlIterator citygml = new CityGmlIterator(citygmlPath);
		for (BuildingXmlNode buildingXmlNode : citygml) {
			if (buildingsCount == 0) {
				sb.append(replaceEnvelopeInHeader(citygml.getHeader(), poly.getEnvelopeInternal(), srsName));
			}
			buildingsCount += 1;
			Coordinate coord = new Coordinate(buildingXmlNode.x, buildingXmlNode.y);
			Point point = gf.createPoint(coord);
			if (point.within(poly)) {
				foundBuildingsCount++;
				sb.append(buildingXmlNode.toString());
			}
			if (buildingsCount % 1000 == 0) {
				LOGGER.info("1000 buildings parsed");
			}
		}

		LOGGER.info("Buildings found in selected region " + foundBuildingsCount);
		sb.append(citygml.getFooter());
		return sb;
	}

	/**
	 * Some Citygml files include an envelope (bounding box), defined at the very beginning of the file. If the extracted
	 * region comes from a huge file (e.g. from NYC), it might inherit this header with a huge envelope. Some methods
	 * might get confused by this wrong envelope, so this method replaces the original envelope with the bounding box
	 * from the extracting polygon. The real envelope might be even smaller, but it could only be known at the end of the
	 * parsing, after having analyzed every building. The envelope should be written in the header. If present, min and
	 * max values for Z are kept.
	 * 
	 * @param header
	 * @param envelope
	 * @param srsName
	 * @return CityGML Header with an updated envelope
	 */
	private static String replaceEnvelopeInHeader(String header, Envelope envelope, String srsName) {
		//NOTE: Sorry for using a regex to parse XML. The header in itself isn't a valid XML, so this looked like the easiest solution.
		double zMin = 0;
		double zMax = 0;
		Pattern boundedByPattern = Pattern.compile(
				"(?is)<gml:boundedBy>.*?<gml:lowerCorner>(.*?)</gml:lowerCorner>\\s*<gml:upperCorner>(.*?)</gml:upperCorner>.*?</gml:boundedBy>");
		Matcher matcher = boundedByPattern.matcher(header);
		String headerWithoutEnvelope = header;
		if (matcher.find()) {
			headerWithoutEnvelope = matcher.replaceFirst("");
101
102
103
104
105
106
107
108
			String[] mins = matcher.group(1).split("\\s+");
			String[] maxs = matcher.group(2).split("\\s+");
			if (mins.length > 2) {
				zMin = Double.valueOf(mins[2]);
			}
			if (maxs.length > 2) {
				zMax = Double.valueOf(maxs[2]);
			}
eric.duminil's avatar
eric.duminil committed
109
110
111
112
113
114
115
116
117
118
119
120
		}
		String newEnvelope = "<gml:boundedBy>\r\n" +
				"  <gml:Envelope srsName=\"" + srsName + "\" srsDimension=\"3\">\r\n" + //NOTE: Would srsDimension="2" be better? Should the original Z get extracted?
				"    <gml:lowerCorner>" + envelope.getMinX() + " " + envelope.getMinY() + " " + zMin
				+ "</gml:lowerCorner>\r\n" +
				"    <gml:upperCorner>" + envelope.getMaxX() + " " + envelope.getMaxY() + " " + zMax
				+ "</gml:upperCorner>\r\n" +
				"  </gml:Envelope>\r\n" +
				"</gml:boundedBy>\r\n";
		return headerWithoutEnvelope + newEnvelope;
	}
}