From 313d0081127b79d5e8e08534cb69ad6de5f942cd Mon Sep 17 00:00:00 2001 From: Riegel <alexander.riegel@hft-stuttgart.de> Date: Wed, 4 Dec 2024 13:17:16 +0100 Subject: [PATCH] Refactor: Rework parsing of CityGmlZipEntry --- .../citydoctor2/parser/CityGmlParser.java | 67 ++++++++++++++----- .../ziploader/CityGmlZipEntry.java | 4 +- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/CityDoctorParent/CityDoctorModel/src/main/java/de/hft/stuttgart/citydoctor2/parser/CityGmlParser.java b/CityDoctorParent/CityDoctorModel/src/main/java/de/hft/stuttgart/citydoctor2/parser/CityGmlParser.java index bdc2454..9cfbd4b 100644 --- a/CityDoctorParent/CityDoctorModel/src/main/java/de/hft/stuttgart/citydoctor2/parser/CityGmlParser.java +++ b/CityDoctorParent/CityDoctorModel/src/main/java/de/hft/stuttgart/citydoctor2/parser/CityGmlParser.java @@ -26,6 +26,8 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; import javax.xml.XMLConstants; import javax.xml.namespace.QName; @@ -189,28 +191,61 @@ public class CityGmlParser { } } - public static CityDoctorModel parseCityGmlStream(InputStream is, ParserConfiguration config) - throws CityGmlParseException, InvalidGmlFileException { - return parseCityGmlStream(is, config, null, null); - } + public static CityDoctorModel parseCityGmlZipEntry(ZipEntry entry, ZipFile archive, ParserConfiguration config) + throws CityGmlParseException, InvalidGmlFileException, IOException { + CityGMLContext context = getContext(); - public static CityDoctorModel parseCityGmlStream(InputStream is, ParserConfiguration config, ProgressListener l) - throws CityGmlParseException, InvalidGmlFileException { - return parseCityGmlStream(is, config, l, null); + if (config.getValidate()) { + InputStream vis = archive.getInputStream(entry); + List<String> messages = validateStream(vis,context); + if (!messages.isEmpty()) { + throw new InvalidGmlFileException("Invalid GML File. First error: \n" + messages.get(0)); + } + } + InputStream is = archive.getInputStream(entry); + return parseCityGmlStream(is, config, context); } - public static CityDoctorModel parseCityGmlStream(InputStream is, ParserConfiguration config, ProgressListener l, - GMLValidationHandler handler) throws CityGmlParseException, InvalidGmlFileException { - CityGMLContext context = getContext(); + private static List<String> validateStream(InputStream vis, CityGMLContext context) throws CityGmlParseException { + GMLValidationHandler handler = new GMLValidationHandler(); - if (config.getValidate()) { - //TODO: Think of something to XML-validate Inputstream + try { + BufferedInputStream bis = new BufferedInputStream(vis); + SchemaHandler schemaHandler = new ValidationSchemaHandler(context.getDefaultSchemaHandler()); + readAdditionalSchemaDefinitions(context, bis, schemaHandler); + Source[] schemas = schemaHandler.getSchemas(); + SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + schemaFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + Schema schema = schemaFactory.newSchema(schemas); + Validator validator = schema.newValidator(); + validator.setErrorHandler(handler); + validator.validate(new StreamSource(bis)); + return handler.getMessages(); + } catch (SchemaHandlerException | SAXException | IOException e) { + throw new CityGmlParseException("Failed to validate CityGML file", e); + } + } - //XML validation requires looking at nesting of tags - //not a problem for small files, but big files will run into memory limit and crash - // Maybe do it dirty by writing stream to temp file and passing it to parseCityGmlFile()? - throw new InvalidGmlFileException("Invalid GML File."); + private static void readAdditionalSchemaDefinitions(CityGMLContext context, BufferedInputStream bis, SchemaHandler schemaHandler) + throws CityGmlParseException { + bis.mark(Integer.MAX_VALUE); + try (XMLReader reader = XMLReaderFactory.newInstance(context.getXMLObjects()) + .withSchemaHandler(schemaHandler) + .createReader(bis)) { + reader.nextTag(); + bis.reset(); + } catch (Exception e) { + throw new CityGmlParseException("Failed to read Schema from stream.", e); } + } + + public static CityDoctorModel parseCityGmlStream(InputStream is, ParserConfiguration config, CityGMLContext context) + throws CityGmlParseException { + return parseCityGmlStream(is, config, null, context); + } + + public static CityDoctorModel parseCityGmlStream(InputStream is, ParserConfiguration config, ProgressListener l, CityGMLContext context) + throws CityGmlParseException { try { BufferedInputStream bis = new BufferedInputStream(is); diff --git a/CityDoctorParent/Extensions/CityDoctorZipLoader/src/main/java/de/hft/stuttgart/citydoctor2/ziploader/CityGmlZipEntry.java b/CityDoctorParent/Extensions/CityDoctorZipLoader/src/main/java/de/hft/stuttgart/citydoctor2/ziploader/CityGmlZipEntry.java index 329be7b..dd80c8a 100644 --- a/CityDoctorParent/Extensions/CityDoctorZipLoader/src/main/java/de/hft/stuttgart/citydoctor2/ziploader/CityGmlZipEntry.java +++ b/CityDoctorParent/Extensions/CityDoctorZipLoader/src/main/java/de/hft/stuttgart/citydoctor2/ziploader/CityGmlZipEntry.java @@ -13,7 +13,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -import java.util.zip.ZipInputStream; public class CityGmlZipEntry { @@ -31,9 +30,8 @@ public class CityGmlZipEntry { return new ErroneousEntry(entry, ZipEntryErrorType.EXCESSIVE_FILESIZE); } else { try { - InputStream is = archive.getInputStream(entry); CityGmlParser.gagLogger(true); - CityDoctorModel model = CityGmlParser.parseCityGmlStream(is, config); + CityDoctorModel model = CityGmlParser.parseCityGmlZipEntry(entry, archive, config); return new CityGmlZipEntry(entry.getName(), model); } catch (CityGmlParseException | InvalidGmlFileException e) { logger.error(e); -- GitLab