Upload New File

f2379645 · Ehlers · ed61647d · f2379645
Commit f2379645 authored 9 months ago by Ehlers
Show whitespace changes
Inline Side-by-side

Showing

with 161 additions and 0 deletions
+161 -0
--- a/gml_extrusion/gml_parsing.py
+++ b/gml_extrusion/gml_parsing.py
+import lxml.etree as ET
+import pandas as pd
+import numpy as np
+
+#------------------------------------------------------------------------------
+### Class GML Parsing ###
+#------------------------------------------------------------------------------
+
+class little_parser:
+    def __init__(self, file):
+        self.file = file
+    #------------------------------------------------------------------------------
+    ### start parsing gml file ###
+    #------------------------------------------------------------------------------
+    
+    def little_parser(self):
+        #------------------------------------------------------------------------------
+        ### functions ###
+        #------------------------------------------------------------------------------
+        
+        # function to generate needed prefix and tagsc(pt) to access text with simply using 
+        # tags for all kinds of CityGML files
+        def tags(tag):
+            pt = prefix_tag['pt'].iloc[prefix_tag[prefix_tag['tag']==tag].index]
+            pt = pt.values
+            pt = str(".//"+pt).strip('['']')[1:-1]
+            return pt
+        
+        # calculate areas from points
+        def plane_area(points):
+            # Convert the points to a numpy array
+            points = np.array(points)
+            # Extract x, y, and z coordinates
+            x = points[:, 0]
+            y = points[:, 1]
+            z = points[:, 2]
+            # Calculate the coefficients of the equation of the plane
+            a = np.sum((y - y[0]) * (z - z[0]))
+            b = np.sum((z - z[0]) * (x - x[0]))
+            c = np.sum((x - x[0]) * (y - y[0]))
+            d = -(a * x[0] + b * y[0] + c * z[0])
+            # Calculate the normal vector of the plane
+            normal_vector = np.array([a, b, c])
+            # Calculate the area of the parallelogram
+            area = np.linalg.norm(normal_vector)
+            return area
+        
+        # changing structure of points
+        def string_to_points_list(string):
+            # Split the string into a list of strings, each representing a point
+            point_strings = string.split()
+            # Convert each string to a float and group the three floats into a list
+            points = [[float(point_strings[i]), float(point_strings[i+1]), 
+                       float(point_strings[i+2])] for i in range(0, len(point_strings), 3)]
+            return points
+        # file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombühl_LoD2_mini.gml'
+        # file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombuehl_LoD2.gml'
+        # parse the CityGML file
+        tree = ET.parse(self.file)
+        root = tree.getroot()
+        
+        #------------------------------------------------------------------------------
+        ### generate prefix and tags of gml file ###
+        #------------------------------------------------------------------------------
+        prefix_tag = []
+        for element in root.iter():
+            prefix = {}
+            prefix['prefix'], prefix['tag'] = element.tag.rsplit("}", 1)    
+            prefix_tag.append(prefix)
+        
+        # list of all prefixes and tags in gml
+        prefix_tag = pd.DataFrame(prefix_tag).drop_duplicates(subset=['tag'])\
+                    .reset_index().drop(columns=['index'])
+        prefix_tag['pt'] = prefix_tag[prefix_tag.columns[0:]].apply(
+            lambda x: '}'.join(x.astype(str)), axis=1)
+        
+        #------------------------------------------------------------------------------
+        ### extracting information from gml file and creating a DataFrame ###
+        # building id, name, year, ....
+        # areas of surfaces
+        #------------------------------------------------------------------------------
+        
+        buildings = []
+        # find all the building elements in the CityGML file
+        for b in root.findall(tags('Building')):
+            #print(b.find(tags('name')))
+            tags_in_b = {elem.tag.split('}')[-1] for elem in b.iter()}            # extract the general information from GML
+            building = {}
+            building['id'] = b.get("{http://www.opengis.net/gml}id")
+            building['name'] = b.find(tags('name')).text\
+                                    if b.find(tags('name')) is not None\
+                                    else b.find(tags('name'))
+            # check if the building has year of construction, function, roof type, and measured height
+            if 'yearOfConstruction' in tags_in_b:
+                building['yearOfConstruction'] = b.find(tags('yearOfConstruction')).text\
+                                    if b.find(tags('yearOfConstruction')) is not None\
+                                    else b.find(tags('yearOfConstruction'))
+                # add random yearOfConstruction between 1900 and 2020 if None
+            else:
+                building['yearOfConstruction'] = np.random.randint(1900, 2020)
+                print(str(building['id']) + ': yearOfConstruction is missing. Random year is added.')
+            
+            if 'function' in tags_in_b:
+                building['function'] = b.find(tags('function')).text\
+                                        if b.find(tags('function')) is not None\
+                                        else b.find(tags('function'))
+            else:
+                building['function'] = 'unknown'
+                print(str(building['id']) + ': function is missing. Unknown is added.')
+            
+            if 'roofType' in tags_in_b:
+                building['roofType'] = b.find(tags('roofType')).text\
+                                        if b.find(tags('roofType')) is not None\
+                                        else b.find(tags('roofType'))
+            else:
+                building['roofType'] = 'unknown'
+                print(str(building['id']) + ': roofType is missing. Unknown is added.')
+            
+            if 'measuredHeight' in tags_in_b:
+                building['measuredHeight'] = b.find(tags('measuredHeight')).text\
+                                        if b.find(tags('measuredHeight')) is not None\
+                                        else b.find(tags('roofType'))
+            else:
+                building['measuredHeight'] = 'unknown'
+
+            # access building areas
+            ### go one level down to surfaces. findall is needed to make a seperation between different surfaces 
+            # Roof
+            i=0
+            area = 0
+            building['Roof'] = 0
+            for i, s in enumerate(b.findall(tags('RoofSurface'))):
+                string = s.find(tags('posList')).text
+                points = string_to_points_list(string)
+                area = plane_area(points)
+                # building['RoofSurface'+str(i)] = area
+                building['Roof'] += area
+            # Wall
+            j=0
+            building['Wall'] = 0
+            for j, s in enumerate(b.findall(tags('WallSurface'))):
+                string = s.find(tags('posList')).text
+                points = string_to_points_list(string)
+                area = plane_area(points)
+                # building['WallSurface'+str(j)] = area
+                building['Wall'] += area
+            # Ground
+            k=0
+            building['Ground'] = 0
+            for k, s in enumerate(b.findall(tags('GroundSurface'))):
+                string = s.find(tags('posList')).text
+                points = string_to_points_list(string)
+                area = plane_area(points)
+                # building['GroundSurface'+str(k)] = area
+                building['Ground'] += area
+            # add the building data to the list
+            buildings.append(building)
+        
+        # create a Pandas dataframe from the list of building data
+        building_info = pd.DataFrame(buildings)
+        return building_info