diff --git a/gml_extrusion/gml_parsing.py b/gml_extrusion/gml_parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..32e0e9be399753512a1ef2235f0452fe32df080d --- /dev/null +++ b/gml_extrusion/gml_parsing.py @@ -0,0 +1,161 @@ +import lxml.etree as ET +import pandas as pd +import numpy as np + +#------------------------------------------------------------------------------ +### Class GML Parsing ### +#------------------------------------------------------------------------------ + +class little_parser: + def __init__(self, file): + self.file = file + #------------------------------------------------------------------------------ + ### start parsing gml file ### + #------------------------------------------------------------------------------ + + def little_parser(self): + #------------------------------------------------------------------------------ + ### functions ### + #------------------------------------------------------------------------------ + + # function to generate needed prefix and tagsc(pt) to access text with simply using + # tags for all kinds of CityGML files + def tags(tag): + pt = prefix_tag['pt'].iloc[prefix_tag[prefix_tag['tag']==tag].index] + pt = pt.values + pt = str(".//"+pt).strip('['']')[1:-1] + return pt + + # calculate areas from points + def plane_area(points): + # Convert the points to a numpy array + points = np.array(points) + # Extract x, y, and z coordinates + x = points[:, 0] + y = points[:, 1] + z = points[:, 2] + # Calculate the coefficients of the equation of the plane + a = np.sum((y - y[0]) * (z - z[0])) + b = np.sum((z - z[0]) * (x - x[0])) + c = np.sum((x - x[0]) * (y - y[0])) + d = -(a * x[0] + b * y[0] + c * z[0]) + # Calculate the normal vector of the plane + normal_vector = np.array([a, b, c]) + # Calculate the area of the parallelogram + area = np.linalg.norm(normal_vector) + return area + + # changing structure of points + def string_to_points_list(string): + # Split the string into a list of strings, each representing a point + point_strings = string.split() + # Convert each string to a float and group the three floats into a list + points = [[float(point_strings[i]), float(point_strings[i+1]), + float(point_strings[i+2])] for i in range(0, len(point_strings), 3)] + return points + # file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombühl_LoD2_mini.gml' + # file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombuehl_LoD2.gml' + # parse the CityGML file + tree = ET.parse(self.file) + root = tree.getroot() + + #------------------------------------------------------------------------------ + ### generate prefix and tags of gml file ### + #------------------------------------------------------------------------------ + prefix_tag = [] + for element in root.iter(): + prefix = {} + prefix['prefix'], prefix['tag'] = element.tag.rsplit("}", 1) + prefix_tag.append(prefix) + + # list of all prefixes and tags in gml + prefix_tag = pd.DataFrame(prefix_tag).drop_duplicates(subset=['tag'])\ + .reset_index().drop(columns=['index']) + prefix_tag['pt'] = prefix_tag[prefix_tag.columns[0:]].apply( + lambda x: '}'.join(x.astype(str)), axis=1) + + #------------------------------------------------------------------------------ + ### extracting information from gml file and creating a DataFrame ### + # building id, name, year, .... + # areas of surfaces + #------------------------------------------------------------------------------ + + buildings = [] + # find all the building elements in the CityGML file + for b in root.findall(tags('Building')): + #print(b.find(tags('name'))) + tags_in_b = {elem.tag.split('}')[-1] for elem in b.iter()} # extract the general information from GML + building = {} + building['id'] = b.get("{http://www.opengis.net/gml}id") + building['name'] = b.find(tags('name')).text\ + if b.find(tags('name')) is not None\ + else b.find(tags('name')) + # check if the building has year of construction, function, roof type, and measured height + if 'yearOfConstruction' in tags_in_b: + building['yearOfConstruction'] = b.find(tags('yearOfConstruction')).text\ + if b.find(tags('yearOfConstruction')) is not None\ + else b.find(tags('yearOfConstruction')) + # add random yearOfConstruction between 1900 and 2020 if None + else: + building['yearOfConstruction'] = np.random.randint(1900, 2020) + print(str(building['id']) + ': yearOfConstruction is missing. Random year is added.') + + if 'function' in tags_in_b: + building['function'] = b.find(tags('function')).text\ + if b.find(tags('function')) is not None\ + else b.find(tags('function')) + else: + building['function'] = 'unknown' + print(str(building['id']) + ': function is missing. Unknown is added.') + + if 'roofType' in tags_in_b: + building['roofType'] = b.find(tags('roofType')).text\ + if b.find(tags('roofType')) is not None\ + else b.find(tags('roofType')) + else: + building['roofType'] = 'unknown' + print(str(building['id']) + ': roofType is missing. Unknown is added.') + + if 'measuredHeight' in tags_in_b: + building['measuredHeight'] = b.find(tags('measuredHeight')).text\ + if b.find(tags('measuredHeight')) is not None\ + else b.find(tags('roofType')) + else: + building['measuredHeight'] = 'unknown' + + # access building areas + ### go one level down to surfaces. findall is needed to make a seperation between different surfaces + # Roof + i=0 + area = 0 + building['Roof'] = 0 + for i, s in enumerate(b.findall(tags('RoofSurface'))): + string = s.find(tags('posList')).text + points = string_to_points_list(string) + area = plane_area(points) + # building['RoofSurface'+str(i)] = area + building['Roof'] += area + # Wall + j=0 + building['Wall'] = 0 + for j, s in enumerate(b.findall(tags('WallSurface'))): + string = s.find(tags('posList')).text + points = string_to_points_list(string) + area = plane_area(points) + # building['WallSurface'+str(j)] = area + building['Wall'] += area + # Ground + k=0 + building['Ground'] = 0 + for k, s in enumerate(b.findall(tags('GroundSurface'))): + string = s.find(tags('posList')).text + points = string_to_points_list(string) + area = plane_area(points) + # building['GroundSurface'+str(k)] = area + building['Ground'] += area + # add the building data to the list + buildings.append(building) + + # create a Pandas dataframe from the list of building data + building_info = pd.DataFrame(buildings) + return building_info