Initial commit

e73686e2 · Pithon Kabiro · 355ef552 · e73686e2 · e73686e2
Commit e73686e2 authored 4 years ago by Pithon Kabiro
Expand all Hide whitespace changes
Inline Side-by-side

Showing

with 316 additions and 0 deletions
+316 -0
--- a/MiddleRefurbishment_RainauLOD22_LOD2S2_3581560_5417300_geb_ForFZK_AllWohngebäude-Rathaus_Variant_YOC_DIN18599_HEATING.csv
+++ b/MiddleRefurbishment_RainauLOD22_LOD2S2_3581560_5417300_geb_ForFZK_AllWohngebäude-Rathaus_Variant_YOC_DIN18599_HEATING.csv
--- a/road_segmentation.py
+++ b/road_segmentation.py
+"""
+@author: Bin Xu-Sigurdsson, Fraunhofer ISE
+"""
+import os 
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import Point
+import osmnx as ox
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import shapely
+import numpy as np
+#from pyproj import Proj, transform
+from collections import Counter
+# may need to change this
+filepath = r'C:\street_segmentation'
+def read_csv(file):
+    """
+    read csv file and find the centriod of polygons
+    """
+    try:
+        if os.path.exists(file):            
+            data = pd.read_csv(file,  skiprows=[i for i in range(0,19)],header=None, sep =";", decimal = ",") 
+            data.columns = data.iloc[0]
+            data = data.loc[2:1790]
+            data.iloc[:,39:52]= data.iloc[:,39:52].apply(pd.to_numeric, errors='coerce', axis=1)
+            data['Longitude'] = [x.replace(',', '.') for x in data['Longitude']]
+            data['Longitude'] = data['Longitude'].astype(float)
+            data['Latitude'] = [x.replace(',', '.') for x in data['Latitude']]
+            data['Latitude'] = data['Latitude'].astype(float)
+            data['X-coordinate'] = [x.replace(',', '.') for x in data['X-coordinate']]
+            data['X-coordinate'] = data['X-coordinate'].astype(float)
+            data['Y-coordinate'] = [x.replace(',', '.') for x in data['Y-coordinate']]
+            data['Y-coordinate'] = data['Y-coordinate'].astype(float)
+            #data['Title'] = data3['Title'].astype(int)
+            #df = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data.Longitude, data.Latitude))
+            #data.loc[:,'hd_Specific_space_heating_demand']=data.loc[:,'hd_Specific_space_heating_demand'].replace(0, np.nan)
+            #data= data.dropna(subset=['hd_Specific_space_heating_demand'])        
+            #data=data.drop_duplicates()
+            #data['centriod_coord'] = data['geometry'].apply(lambda x: x.centroid)  
+            return(data)
+    except OSError:
+            print('Error: Input file is missing %s' %(file))
+# def proj2lonlat(x1, y1): 
+#     """
+#     """
+#     inProj = Proj(init= goal_crs)
+#     outProj = Proj(init='epsg:4326')
+#     x2,y2 = transform(inProj,outProj,x1,y1)
+#     return(x2,y2)   
+def get_osm(outputfile):
+    """
+    get osm street data
+    save graph nodes and edges as ESRI shapefiles to disk.
+    """
+    #f = file.total_bounds
+    #lonmin, latmin = proj2lonlat(f[0], f[1])
+    #lonmax, latmax = proj2lonlat(f[2], f[3])
+    n,s,e,w = latmax+0.001,latmin-0.001, lonmax+0.001,lonmin-0.001
+    custom_filter = '["highway"~"primary|secondary|tertiary|service|residential|living_street|unclassified"]'
+    graph = ox.graph_from_bbox(n,s,e,w, 
+                            network_type=None,
+                            infrastructure='way["highway"]',
+                            custom_filter=custom_filter)
+    #ox.plot_graph(graph)
+    ox.save_graph_shapefile(graph, filename= outputfile)
+def createFolder(foldername):
+    """
+    create one folder, where downloaded data should be 
+    """
+    try:
+        if not os.path.exists(os.path.join(filepath, foldername)):
+            os.makedirs(os.path.join(filepath, foldername))
+    except OSError:
+        print ('Error: Creating directory. ' +  os.path.join(filepath,foldername))    
+def save_as_shp(dd, output_n):
+    """
+    reprojecting and save as shp 
+    """
+    ddf = gpd.GeoDataFrame(dd, geometry='geometry',  crs= dd.crs)
+    #ddf.crs = {"init" :"epsg:4326"}
+    #ddf.crs = dd.crs
+    ddf_prj = ddf.to_crs("EPSG:31467")
+    ddf_prj.to_file(driver = 'ESRI Shapefile', filename = output_n)    
+def processing_save(dd, intsect_pts, seg_lines): 
+    """
+    dd: read osm data
+    intsect_pts: filename of intersection points(nodes)
+    seg_lines: filename of edges
+    """
+    edges = gpd.read_file(dd)
+    edges = edges[(edges['highway'] != 'secondary_link') & (edges['highway'] != 'primary_link') & (edges['highway'] != 'tertiary_link')]
+    edges = edges[['highway','geometry']]  
+    edges['ID'] = range(1,(len(edges)+1))  
+    # finding all end points of the lines
+    multi_pts = [ll.boundary for ll in edges['geometry']]
+    pts_ls = [Point(i.x, i.y) for sim_l in multi_pts for i in sim_l]
+    dd = pd.DataFrame(pts_ls)
+    dd.columns = ['geometry']
+    # convert to wkb
+    dd.loc[:, 'geometry'] = dd['geometry'].apply(lambda geom: geom.wkb)
+    # delete duplicates
+    dd = dd.drop_duplicates(['geometry'])
+    # convert back to shapely geometry
+    dd.loc[:, 'geometry'] = dd['geometry'].apply(lambda geom: shapely.wkb.loads(geom))      
+    dd['ID'] = range(1,(len(dd)+1))
+    dd.crs = edges.crs
+    save_as_shp(dd, intsect_pts)
+    save_as_shp(edges, seg_lines)
+def find_neigh(file_p, file_l):
+    """
+    file_p: file of intersection points(nodes)
+    file_l: file of edges
+    calculate the length of each edges
+    and find adjacent edges
+    """
+    point_f = gpd.read_file(file_p)
+    lines_f = gpd.read_file(file_l)
+    lines_f['start_p'] = np.nan
+    lines_f['end_p'] = np.nan
+    for index, row in lines_f.iterrows():
+        myrow = str(lines_f.geometry[index])
+        res_str = myrow[myrow.find("(")+1:myrow.find(")")]
+        res = res_str.split(',')
+        lines_f.loc[index,'start_p'] =  res[0]
+        lines_f.loc[index,'end_p'] = res[-1].strip()
+    connected_str = []
+    for i, r in point_f.iterrows():
+        this_row = str(point_f.geometry[i])
+        this_row_str = this_row[this_row.find("(")+1:this_row.find(")")]
+        t = lines_f[(lines_f['start_p'] == this_row_str) | (lines_f['end_p'] == this_row_str)].ID.tolist()
+        connected_str.append(t)
+    final_res =[]
+    for i in range(1,len(lines_f)+1):
+        mergerd = list(filter(lambda x: i in x, connected_str))
+        mergerd_flatting = [val for sublist in mergerd for val in sublist]
+        mergerd_flatting_clean = list(set(mergerd_flatting))
+        m1 = [x for x in mergerd_flatting_clean if x != i]
+        m2 = [i] + m1
+        final_res.append(m2)
+    lines_f['neighbor'] = [i[1:] for i in final_res]
+    #lines_f= lines_f.to_crs(goal_crs) 
+    lines_f.loc[:, 'length'] = round(lines_f['geometry'].length,3)
+    # Change order of the columns
+    lines_f = lines_f[['ID','geometry','length','neighbor']]
+    return(lines_f)
+def get_index_minvalue(mylist):
+    """
+    find the smallest value in a list and its index 
+    """
+    min_val = min(mylist)
+    myindex = [i for i, x in enumerate(mylist) if x == min(mylist)]
+    return(min_val, myindex)
+def reclassiy_building_age(row):
+    if row["Year of construction"] <= 1948:
+        return('Before 1948') 
+    elif (row["Year of construction"] >= 1949) and (row["Year of construction"] <= 1971):
+        return('1949-1971')
+    elif (row["Year of construction"] >= 1972) and (row["Year of construction"] <= 1990):
+        return('1972-1990')
+    elif (row["Year of construction"] >= 1991) and (row["Year of construction"] <= 2010):
+        return('1991-2010')
+    else:
+        return('After 2011')
+def listToString(org_list, seperator=' '):
+    """ Convert list to string, by joining all item in list with given separator.
+        Returns the concatenated string """
+    if org_list is None:
+        return ''
+    else:
+        return seperator.join(org_list)
+def min_distance(hf, outputf, output_n):
+    """
+    hf: heating demand file (building)
+    find the nearest road segment
+    """
+    # creating a geometry column
+    #hf['geometry'] = [Point(xy) for xy in zip(hf['X-coordinate'],hf['Y-coordinate'])]
+    hf["nearest_distance"] = np.nan
+    hf["ID"] = np.nan
+    line = find_neigh(save_pts, save_lines)
+    hf = gpd.GeoDataFrame(hf, geometry=gpd.points_from_xy(hf['X-coordinate'], hf['Y-coordinate']))
+    for i, r in hf.iterrows():
+        value_list = []
+        p = hf.loc[i,"geometry"]
+        for index, row in line.iterrows():
+            value_list.append(p.distance(row["geometry"]))
+        val_min, val_ind = get_index_minvalue(value_list)
+        hf.loc[i, "nearest_distance"] = round(val_min,1)
+        hf.loc[i, "ID"] = int(line.ID[val_ind].tolist()[0])# get the ID of line
+    merged = pd.merge(left = hf, right = line, how = 'left',left_on='ID', right_on='ID')
+    merged["Year of construction"] = merged["Year of construction"].apply(pd.to_numeric)
+    merged["Class year of construction"] = merged.apply(reclassiy_building_age, axis=1)
+    merged.iloc[:,39:52]= merged.iloc[:,39:52].apply(pd.to_numeric, errors='coerce', axis=1)
+    merge_sum = merged.groupby(["ID"]).sum()
+    #merge_sum = merge_sum.iloc[:,1:16]
+    df = pd.DataFrame(columns=["ID"]) 
+    col = ["GMLId", "Class year of construction","PrimaryUsageZoneType"]
+    for j in col:
+        res = merged.groupby(["ID"])[j].apply(lambda group_series: group_series.tolist()).reset_index()   
+        res["Stat_" + j] = res[j].apply(lambda x: Counter(x))  
+        df = pd.merge(res,df, on = "ID",how='outer')
+    df = df[["ID", "GMLId","Stat_PrimaryUsageZoneType","Stat_Class year of construction"]]
+    df["Building_Count"]= df["GMLId"].apply(lambda x: len(x))
+    df_res = pd.merge(df,merge_sum, on = "ID",how='outer')
+    df_res_f = pd.merge(df_res,line, on = "ID",how='outer')
+    df_res_f = df_res_f.round(3)
+    df_res_f = df_res_f[['ID', 'GMLId', 'Stat_PrimaryUsageZoneType',
+       'Stat_Class year of construction', 'Building_Count',  
+       'Total Yearly Heat+DHW demand', 'January Heating Demand',
+       'February Heating Demand', 'March Heating Demand',
+       'April Heating Demand', 'May Heating Demand', 'June Heating Demand',
+       'July Heating Demand', 'August Heating Demand',
+       'September Heating Demand', 'October Heating Demand',
+       'November Heating Demand', 'December Heating Demand',
+       'nearest_distance', 'length_y', 'geometry', 'neighbor']]
+    df_res_f.rename(columns = {'nearest_distance': 'Total Distance',
+                               'Stat_PrimaryUsageZoneType': 'Stat PrimaryUsageZoneType',
+                               'Stat_Class year of construction': 'Stat Class year of construction',
+                               'Building_Count': 'Building Count',
+                               'length_y': 'Length'}, inplace = True)
+    df_res_f.to_csv(outputf, sep=";", decimal= ',', index = False)    
+    df_res_shp = gpd.GeoDataFrame(df_res_f, geometry='geometry')
+    df_res_shp.crs = "EPSG:31467"
+    # # convert columns to string
+    # mycols = ['ID','GMLId', 'Building Count',
+    #    'Total Yearly Heat+DHW demand', 'January Heating Demand',
+    #    'February Heating Demand', 'March Heating Demand',
+    #    'April Heating Demand', 'May Heating Demand', 'June Heating Demand',
+    #    'July Heating Demand', 'August Heating Demand',
+    #    'September Heating Demand', 'October Heating Demand',
+    #    'November Heating Demand', 'December Heating Demand', 'Total Distance',
+    #    'Length']
+    # df_res_shp[mycols] = df_res_shp[mycols].astype(str)
+    mycols = ["ID","GMLId", "neighbor"]
+    df_res_shp[mycols] = df_res_shp[mycols].astype(str)
+    df_res_shp.to_file(driver = 'ESRI Shapefile', filename = output_n)   
+if __name__ == '__main__':  
+    fname = os.path.join(filepath,"medium_stoeckach_lod1_lod2_merge_yoc_DIN18599_HEATING.csv")
+    #input_zip_file = 'stoeckach_20200514.zip'
+    #unzipfile(input_zip_file)
+    #fname = os.path.join(filepath, '2_geodatabase', ('.').join(input_zip_file.split('.')[:-1]) + '.gdb')
+    heating_f =  read_csv(fname)
+    heating_f.iloc[:,39:52]= heating_f.iloc[:,39:52].apply(pd.to_numeric, errors='coerce', axis=1)
+    # saving building as pts
+    my_building_pts= gpd.GeoDataFrame(heating_f, geometry=gpd.points_from_xy(heating_f['X-coordinate'], heating_f['Y-coordinate']))
+    my_building_pts.crs = "EPSG:31467"
+    my_building_pts.to_file(driver = 'ESRI Shapefile', filename = os.path.join(filepath, 'Results', 'HFT_building_as_point.shp'))   
+    lonmin, latmin = min(heating_f['Longitude']),min(heating_f['Latitude'])
+    lonmax, latmax = max (heating_f['Longitude']),max(heating_f['Latitude'])
+    #heating_f = heating_f[heating_f['PrimaryUsageZoneType'] != 'industry']
+    place =  "Stoeckach, Stuttgart, Germany"  
+    goal_crs = "EPSG:31467"
+    area = place.split(',')[0]
+    folder_name =['OSM', 'Results']
+    [createFolder(i) for i in folder_name]
+    get_osm(os.path.join(filepath, 'OSM'))
+    ed = os.path.join(filepath, 'OSM', 'edges', 'edges.shp')
+    save_pts =os.path.join(filepath, 'Results', 'Intersection_pts_v2102.shp')
+    save_lines =os.path.join(filepath, 'Results', 'Segmentaion_lines_v2102.shp')
+    if os.path.exists(ed):
+        processing_save(ed, save_pts, save_lines)
+        min_distance(heating_f, os.path.join(filepath, 'Results','stat_lines_with_industry_v2102.csv'),os.path.join(filepath, 'Results','stat_lines_with_industry_v2102.shp'))
+        #min_distance(heating_f, os.path.join(filepath, 'Results','stat_lines_without_industry_v2102.csv'),os.path.join(filepath, 'Results','stat_lines_without_industry_v2102.shp'))