import lxml.etree as ET import pandas as pd import numpy as np #------------------------------------------------------------------------------ ### Class GML Parsing ### #------------------------------------------------------------------------------ class little_parser: def __init__(self, file): self.file = file #------------------------------------------------------------------------------ ### start parsing gml file ### #------------------------------------------------------------------------------ def little_parser(self): #------------------------------------------------------------------------------ ### functions ### #------------------------------------------------------------------------------ # function to generate needed prefix and tagsc(pt) to access text with simply using # tags for all kinds of CityGML files def tags(tag): pt = prefix_tag['pt'].iloc[prefix_tag[prefix_tag['tag']==tag].index] pt = pt.values pt = str(".//"+pt).strip('['']')[1:-1] return pt # calculate areas from points def plane_area(points): # Convert the points to a numpy array points = np.array(points) # Extract x, y, and z coordinates x = points[:, 0] y = points[:, 1] z = points[:, 2] # Calculate the coefficients of the equation of the plane a = np.sum((y - y[0]) * (z - z[0])) b = np.sum((z - z[0]) * (x - x[0])) c = np.sum((x - x[0]) * (y - y[0])) d = -(a * x[0] + b * y[0] + c * z[0]) # Calculate the normal vector of the plane normal_vector = np.array([a, b, c]) # Calculate the area of the parallelogram area = np.linalg.norm(normal_vector) return area # changing structure of points def string_to_points_list(string): # Split the string into a list of strings, each representing a point point_strings = string.split() # Convert each string to a float and group the three floats into a list points = [[float(point_strings[i]), float(point_strings[i+1]), float(point_strings[i+2])] for i in range(0, len(point_strings), 3)] return points # file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombühl_LoD2_mini.gml' # file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombuehl_LoD2.gml' # parse the CityGML file tree = ET.parse(self.file) root = tree.getroot() #------------------------------------------------------------------------------ ### generate prefix and tags of gml file ### #------------------------------------------------------------------------------ prefix_tag = [] for element in root.iter(): prefix = {} prefix['prefix'], prefix['tag'] = element.tag.rsplit("}", 1) prefix_tag.append(prefix) # list of all prefixes and tags in gml prefix_tag = pd.DataFrame(prefix_tag).drop_duplicates(subset=['tag'])\ .reset_index().drop(columns=['index']) prefix_tag['pt'] = prefix_tag[prefix_tag.columns[0:]].apply( lambda x: '}'.join(x.astype(str)), axis=1) #------------------------------------------------------------------------------ ### extracting information from gml file and creating a DataFrame ### # building id, name, year, .... # areas of surfaces #------------------------------------------------------------------------------ buildings = [] # find all the building elements in the CityGML file for b in root.findall(tags('Building')): #print(b.find(tags('name'))) tags_in_b = {elem.tag.split('}')[-1] for elem in b.iter()} # extract the general information from GML building = {} building['id'] = b.get("{http://www.opengis.net/gml}id") building['name'] = b.find(tags('name')).text\ if b.find(tags('name')) is not None\ else b.find(tags('name')) # check if the building has year of construction, function, roof type, and measured height if 'yearOfConstruction' in tags_in_b: building['yearOfConstruction'] = b.find(tags('yearOfConstruction')).text\ if b.find(tags('yearOfConstruction')) is not None\ else b.find(tags('yearOfConstruction')) # add random yearOfConstruction between 1900 and 2020 if None else: building['yearOfConstruction'] = np.random.randint(1900, 2020) print(str(building['id']) + ': yearOfConstruction is missing. Random year is added.') if 'function' in tags_in_b: building['function'] = b.find(tags('function')).text\ if b.find(tags('function')) is not None\ else b.find(tags('function')) else: building['function'] = 'unknown' print(str(building['id']) + ': function is missing. Unknown is added.') if 'roofType' in tags_in_b: building['roofType'] = b.find(tags('roofType')).text\ if b.find(tags('roofType')) is not None\ else b.find(tags('roofType')) else: building['roofType'] = 'unknown' print(str(building['id']) + ': roofType is missing. Unknown is added.') if 'measuredHeight' in tags_in_b: building['measuredHeight'] = b.find(tags('measuredHeight')).text\ if b.find(tags('measuredHeight')) is not None\ else b.find(tags('roofType')) else: building['measuredHeight'] = 'unknown' # access building areas ### go one level down to surfaces. findall is needed to make a seperation between different surfaces # Roof i=0 area = 0 building['Roof'] = 0 for i, s in enumerate(b.findall(tags('RoofSurface'))): string = s.find(tags('posList')).text points = string_to_points_list(string) area = plane_area(points) # building['RoofSurface'+str(i)] = area building['Roof'] += area # Wall j=0 building['Wall'] = 0 for j, s in enumerate(b.findall(tags('WallSurface'))): string = s.find(tags('posList')).text points = string_to_points_list(string) area = plane_area(points) # building['WallSurface'+str(j)] = area building['Wall'] += area # Ground k=0 building['Ground'] = 0 for k, s in enumerate(b.findall(tags('GroundSurface'))): string = s.find(tags('posList')).text points = string_to_points_list(string) area = plane_area(points) # building['GroundSurface'+str(k)] = area building['Ground'] += area # add the building data to the list buildings.append(building) # create a Pandas dataframe from the list of building data building_info = pd.DataFrame(buildings) return building_info