RegionExtractor.java 5.49 KB
Newer Older
eric.duminil's avatar
eric.duminil committed
1
2
3
package eu.simstadt.regionchooser;

import java.io.IOException;
Eric Duminil's avatar
Eric Duminil committed
4
import java.io.Writer;
eric.duminil's avatar
eric.duminil committed
5
6
7
8
import java.nio.file.Path;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
9
10
11
12
13
14
15
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.io.WKTReader;
eric.duminil's avatar
eric.duminil committed
16
17
18
import com.ximpleware.NavException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
19
20
import eu.simstadt.regionchooser.fast_xml_parser.BuildingXmlNode;
import eu.simstadt.regionchooser.fast_xml_parser.CityGmlIterator;
eric.duminil's avatar
eric.duminil committed
21
22
23
24


public class RegionExtractor
{
Eric Duminil's avatar
Eric Duminil committed
25
	private static final WKTReader WKT_READER = new WKTReader();
eric.duminil's avatar
eric.duminil committed
26
27

	private static final Logger LOGGER = Logger.getLogger(RegionExtractor.class.getName());
Eric Duminil's avatar
Eric Duminil committed
28
	private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();
eric.duminil's avatar
eric.duminil committed
29
30

	/**
31
	 * Main method behind RegionChooser. Given CityGMLs (as Path[]) and a geometry (as Well-known text POLYGON, in the
eric.duminil's avatar
eric.duminil committed
32
33
34
	 * same coordinate system as the CityGML), it iterates over each Building and checks if the building is inside the
	 * geometry. It only works with CityGML files smaller than 2GB. It uses VTD-XML parser instead of a whole
	 * Simstadt/Citydoctor/Citygml model.
Eric Duminil's avatar
NOTE    
Eric Duminil committed
35
	 *
eric.duminil's avatar
eric.duminil committed
36
	 * @param wktPolygon
37
	 * @param srsName
Eric Duminil's avatar
Eric Duminil committed
38
	 * @param output
39
	 * @param citygmlPaths
Eric Duminil's avatar
NOTE    
Eric Duminil committed
40
41
	 *
	 *
Eric Duminil's avatar
Eric Duminil committed
42
43
	 * @writes the extracted Citygml, including header, buildings and footer to output
	 * @return counts of found building.
eric.duminil's avatar
eric.duminil committed
44
45
46
47
48
49
50
	 * @throws ParseException
	 * @throws IOException
	 * @throws XPathEvalException
	 * @throws NavException
	 * @throws XPathParseException
	 * @throws NumberFormatException
	 */
Eric Duminil's avatar
Eric Duminil committed
51
52
	static int selectRegionDirectlyFromCityGML(String wktPolygon, String srsName, Writer sb,
			Path... citygmlPaths) throws ParseException, XPathParseException, NavException, IOException {
Eric Duminil's avatar
Eric Duminil committed
53

eric.duminil's avatar
eric.duminil committed
54
55
		int buildingsCount = 0;
		int foundBuildingsCount = 0;
Eric Duminil's avatar
Eric Duminil committed
56
		Geometry poly = WKT_READER.read(wktPolygon);
eric.duminil's avatar
eric.duminil committed
57

58
59
60
61
		CityGmlIterator citygml = null;
		for (int i = 0; i < citygmlPaths.length; i++) {
			Path citygmlPath = citygmlPaths[i];
			LOGGER.info("Parsing " + citygmlPath);
Eric Duminil's avatar
Eric Duminil committed
62
			//TODO: Allow citygmlPath for folders too, and iterate over gmls?
63
64
65
66
67
68
69
70
			citygml = new CityGmlIterator(citygmlPath);
			for (BuildingXmlNode buildingXmlNode : citygml) {
				if (buildingsCount == 0) {
					sb.append(replaceEnvelopeInHeader(citygml.getHeader(), poly.getEnvelopeInternal(), srsName));
				}
				buildingsCount += 1;
				if (buildingXmlNode.hasCoordinates()) {
					Coordinate coord = new Coordinate(buildingXmlNode.x, buildingXmlNode.y);
Eric Duminil's avatar
Eric Duminil committed
71
					Point point = GEOMETRY_FACTORY.createPoint(coord);
72
73
74
75
76
77
78
					if (point.within(poly)) {
						foundBuildingsCount++;
						sb.append(buildingXmlNode.toString());
					}
				}
				if (buildingsCount % 1000 == 0) {
					LOGGER.info("1000 buildings parsed");
79
				}
eric.duminil's avatar
eric.duminil committed
80
			}
81
82
83
84
		}

		if (citygml == null) {
			throw new IllegalArgumentException("There should be at least one citygml");
eric.duminil's avatar
eric.duminil committed
85
86
		}

Eric Duminil's avatar
Eric Duminil committed
87
88
89
90
		if (foundBuildingsCount == 0) {
			LOGGER.warning("No building found in the selected region.");
		}

91
		LOGGER.info("Buildings found in selected region : " + foundBuildingsCount);
Eric Duminil's avatar
NOTE    
Eric Duminil committed
92
		//NOTE: This could be a problem if header starts with <core:CityModel> and footer ends with </CityModel>
eric.duminil's avatar
eric.duminil committed
93
		sb.append(citygml.getFooter());
Eric Duminil's avatar
Eric Duminil committed
94
		return foundBuildingsCount;
eric.duminil's avatar
eric.duminil committed
95
96
97
98
99
100
101
102
103
	}

	/**
	 * Some Citygml files include an envelope (bounding box), defined at the very beginning of the file. If the extracted
	 * region comes from a huge file (e.g. from NYC), it might inherit this header with a huge envelope. Some methods
	 * might get confused by this wrong envelope, so this method replaces the original envelope with the bounding box
	 * from the extracting polygon. The real envelope might be even smaller, but it could only be known at the end of the
	 * parsing, after having analyzed every building. The envelope should be written in the header. If present, min and
	 * max values for Z are kept.
Eric Duminil's avatar
NOTE    
Eric Duminil committed
104
	 *
eric.duminil's avatar
eric.duminil committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
	 * @param header
	 * @param envelope
	 * @param srsName
	 * @return CityGML Header with an updated envelope
	 */
	private static String replaceEnvelopeInHeader(String header, Envelope envelope, String srsName) {
		//NOTE: Sorry for using a regex to parse XML. The header in itself isn't a valid XML, so this looked like the easiest solution.
		double zMin = 0;
		double zMax = 0;
		Pattern boundedByPattern = Pattern.compile(
				"(?is)<gml:boundedBy>.*?<gml:lowerCorner>(.*?)</gml:lowerCorner>\\s*<gml:upperCorner>(.*?)</gml:upperCorner>.*?</gml:boundedBy>");
		Matcher matcher = boundedByPattern.matcher(header);
		String headerWithoutEnvelope = header;
		if (matcher.find()) {
			headerWithoutEnvelope = matcher.replaceFirst("");
120
121
122
123
124
125
126
127
			String[] mins = matcher.group(1).split("\\s+");
			String[] maxs = matcher.group(2).split("\\s+");
			if (mins.length > 2) {
				zMin = Double.valueOf(mins[2]);
			}
			if (maxs.length > 2) {
				zMax = Double.valueOf(maxs[2]);
			}
eric.duminil's avatar
eric.duminil committed
128
129
130
131
132
133
134
135
136
137
138
139
		}
		String newEnvelope = "<gml:boundedBy>\r\n" +
				"  <gml:Envelope srsName=\"" + srsName + "\" srsDimension=\"3\">\r\n" + //NOTE: Would srsDimension="2" be better? Should the original Z get extracted?
				"    <gml:lowerCorner>" + envelope.getMinX() + " " + envelope.getMinY() + " " + zMin
				+ "</gml:lowerCorner>\r\n" +
				"    <gml:upperCorner>" + envelope.getMaxX() + " " + envelope.getMaxY() + " " + zMax
				+ "</gml:upperCorner>\r\n" +
				"  </gml:Envelope>\r\n" +
				"</gml:boundedBy>\r\n";
		return headerWithoutEnvelope + newEnvelope;
	}
}