Commit f2379645 authored by Ehlers's avatar Ehlers
Browse files

Upload New File

parent ed61647d
import lxml.etree as ET
import pandas as pd
import numpy as np
#------------------------------------------------------------------------------
### Class GML Parsing ###
#------------------------------------------------------------------------------
class little_parser:
def __init__(self, file):
self.file = file
#------------------------------------------------------------------------------
### start parsing gml file ###
#------------------------------------------------------------------------------
def little_parser(self):
#------------------------------------------------------------------------------
### functions ###
#------------------------------------------------------------------------------
# function to generate needed prefix and tagsc(pt) to access text with simply using
# tags for all kinds of CityGML files
def tags(tag):
pt = prefix_tag['pt'].iloc[prefix_tag[prefix_tag['tag']==tag].index]
pt = pt.values
pt = str(".//"+pt).strip('['']')[1:-1]
return pt
# calculate areas from points
def plane_area(points):
# Convert the points to a numpy array
points = np.array(points)
# Extract x, y, and z coordinates
x = points[:, 0]
y = points[:, 1]
z = points[:, 2]
# Calculate the coefficients of the equation of the plane
a = np.sum((y - y[0]) * (z - z[0]))
b = np.sum((z - z[0]) * (x - x[0]))
c = np.sum((x - x[0]) * (y - y[0]))
d = -(a * x[0] + b * y[0] + c * z[0])
# Calculate the normal vector of the plane
normal_vector = np.array([a, b, c])
# Calculate the area of the parallelogram
area = np.linalg.norm(normal_vector)
return area
# changing structure of points
def string_to_points_list(string):
# Split the string into a list of strings, each representing a point
point_strings = string.split()
# Convert each string to a float and group the three floats into a list
points = [[float(point_strings[i]), float(point_strings[i+1]),
float(point_strings[i+2])] for i in range(0, len(point_strings), 3)]
return points
# file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombühl_LoD2_mini.gml'
# file = r'C:\Users\ge29duf\Documents\02_Forschung\P62\Tool_ne\Grombuehl_LoD2.gml'
# parse the CityGML file
tree = ET.parse(self.file)
root = tree.getroot()
#------------------------------------------------------------------------------
### generate prefix and tags of gml file ###
#------------------------------------------------------------------------------
prefix_tag = []
for element in root.iter():
prefix = {}
prefix['prefix'], prefix['tag'] = element.tag.rsplit("}", 1)
prefix_tag.append(prefix)
# list of all prefixes and tags in gml
prefix_tag = pd.DataFrame(prefix_tag).drop_duplicates(subset=['tag'])\
.reset_index().drop(columns=['index'])
prefix_tag['pt'] = prefix_tag[prefix_tag.columns[0:]].apply(
lambda x: '}'.join(x.astype(str)), axis=1)
#------------------------------------------------------------------------------
### extracting information from gml file and creating a DataFrame ###
# building id, name, year, ....
# areas of surfaces
#------------------------------------------------------------------------------
buildings = []
# find all the building elements in the CityGML file
for b in root.findall(tags('Building')):
#print(b.find(tags('name')))
tags_in_b = {elem.tag.split('}')[-1] for elem in b.iter()} # extract the general information from GML
building = {}
building['id'] = b.get("{http://www.opengis.net/gml}id")
building['name'] = b.find(tags('name')).text\
if b.find(tags('name')) is not None\
else b.find(tags('name'))
# check if the building has year of construction, function, roof type, and measured height
if 'yearOfConstruction' in tags_in_b:
building['yearOfConstruction'] = b.find(tags('yearOfConstruction')).text\
if b.find(tags('yearOfConstruction')) is not None\
else b.find(tags('yearOfConstruction'))
# add random yearOfConstruction between 1900 and 2020 if None
else:
building['yearOfConstruction'] = np.random.randint(1900, 2020)
print(str(building['id']) + ': yearOfConstruction is missing. Random year is added.')
if 'function' in tags_in_b:
building['function'] = b.find(tags('function')).text\
if b.find(tags('function')) is not None\
else b.find(tags('function'))
else:
building['function'] = 'unknown'
print(str(building['id']) + ': function is missing. Unknown is added.')
if 'roofType' in tags_in_b:
building['roofType'] = b.find(tags('roofType')).text\
if b.find(tags('roofType')) is not None\
else b.find(tags('roofType'))
else:
building['roofType'] = 'unknown'
print(str(building['id']) + ': roofType is missing. Unknown is added.')
if 'measuredHeight' in tags_in_b:
building['measuredHeight'] = b.find(tags('measuredHeight')).text\
if b.find(tags('measuredHeight')) is not None\
else b.find(tags('roofType'))
else:
building['measuredHeight'] = 'unknown'
# access building areas
### go one level down to surfaces. findall is needed to make a seperation between different surfaces
# Roof
i=0
area = 0
building['Roof'] = 0
for i, s in enumerate(b.findall(tags('RoofSurface'))):
string = s.find(tags('posList')).text
points = string_to_points_list(string)
area = plane_area(points)
# building['RoofSurface'+str(i)] = area
building['Roof'] += area
# Wall
j=0
building['Wall'] = 0
for j, s in enumerate(b.findall(tags('WallSurface'))):
string = s.find(tags('posList')).text
points = string_to_points_list(string)
area = plane_area(points)
# building['WallSurface'+str(j)] = area
building['Wall'] += area
# Ground
k=0
building['Ground'] = 0
for k, s in enumerate(b.findall(tags('GroundSurface'))):
string = s.find(tags('posList')).text
points = string_to_points_list(string)
area = plane_area(points)
# building['GroundSurface'+str(k)] = area
building['Ground'] += area
# add the building data to the list
buildings.append(building)
# create a Pandas dataframe from the list of building data
building_info = pd.DataFrame(buildings)
return building_info
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment