Commit 4ed3f015 authored by Eric Duminil's avatar Eric Duminil
Browse files

Klimabaeume open data

parent 6fed29db
input/*.parquet
\ No newline at end of file
%% Cell type:code id:761e55bc tags:
``` python
import pandas as pd
```
%% Cell type:code id:ccefa7cc tags:
``` python
from IPython.display import display
```
%% Cell type:code id:c8c647de tags:
``` python
df = pd.read_parquet('input/wuerzburg_trees.parquet', engine='fastparquet')
```
%% Cell type:code id:8ce189f0 tags:
``` python
df
```
%% Output
tree_number species_latin species_german \
0 23631 Tilia Cordata Greenspire Stadt-Linde
1 5463.1 Ulmus New Horizon Ulme New Horizon
2 58413 Ulmus New Horizon Ulme New Horizon
3 58753 Acer Platanoides Spitz-Ahorn
4 5540 Tilia Cordata Greenspire Stadt-Linde
... ... ... ...
158720 05_Ersatz_3_7023.2 Unbekannt Unbekannt
158721 5463.1 Ulmus New Horizon Ulme New Horizon
158722 05_Ersatz_3_7023.2 Unbekannt Unbekannt
158723 Gartenamt_Kontroll_2 Unbekannt Unbekannt
158724 65851 Fraxinus Angustifolia Raywood Esche Raywood
latitude longitude soil_composition vol_water_content_30 \
0 49.793440 9.940730 Boden Ringpark 37.0
1 49.796000 9.932720 Eigensubstrat Gartenamt 45.0
2 49.796580 9.941690 Boden-Sand-Lehm 4 22.0
3 49.782000 9.956770 VulkaTree 44.0
4 49.793180 9.933190 Boden-Sand-Lehm 4 35.0
... ... ... ... ...
158720 49.791642 9.935594 VulkaTree 59.0
158721 49.796000 9.932720 Eigensubstrat Gartenamt 36.0
158722 49.791642 9.935594 VulkaTree 59.0
158723 49.797012 10.001281 Eigensubstrat Gartenamt 0.0
158724 49.791200 9.927240 VulkaTree 49.0
vol_water_content_100 permittivity_30 permittivity_100 \
0 27.0 22.34 15.17
1 33.0 30.84 18.86
2 34.0 11.81 20.14
3 31.0 30.32 17.33
4 36.0 20.65 21.61
... ... ... ...
158720 NaN 53.11 NaN
158721 33.0 22.05 19.29
158722 NaN 53.06 NaN
158723 NaN 0.00 NaN
158724 37.0 37.43 22.84
conductivity_30 conductivity_100 usable_field_capacity_30 \
0 19.3 21.8 117.647059
1 20.9 0.0 176.190476
2 0.0 19.1 55.555556
3 0.0 0.0 139.130435
4 19.2 0.0 127.777778
... ... ... ...
158720 38.1 NaN 137.837838
158721 19.5 0.0 90.322581
158722 38.2 NaN 137.837838
158723 0.0 NaN -25.806452
158724 0.0 19.8 110.810811
usable_field_capacity_100 temperature_30 temperature_100 \
0 58.823529 25.06 23.19
1 119.047619 25.26 25.12
2 122.222222 26.51 24.40
3 82.608696 24.94 19.20
4 133.333333 23.13 19.85
... ... ... ...
158720 NaN 24.10 NaN
158721 80.645161 23.42 21.82
158722 NaN 24.10 NaN
158723 NaN 0.00 NaN
158724 78.378378 25.76 21.08
battery_percentage timestamp \
0 59.0 2023-07-12 10:18:26
1 56.0 2023-07-12 10:04:33
2 58.0 2023-07-12 10:57:41
3 59.0 2023-07-12 10:47:32
4 60.0 2023-07-12 10:06:59
... ... ...
158720 57.0 2024-07-22 10:23:46
158721 29.0 2024-07-22 11:25:45
158722 57.0 2024-07-22 11:24:16
158723 77.0 2024-07-22 11:35:41
158724 31.0 2024-07-22 11:35:44
koordinaten
0 b'\x01\x01\x00\x00\x00o\xbb\xd0\\\xa7\xe1#@K\x...
1 b'\x01\x01\x00\x00\x00v\xa6\xd0y\x8d\xdd#@\xd9...
2 b'\x01\x01\x00\x00\x00\xc3\xf0\x111%\xe2#@\xe7...
3 b'\x01\x01\x00\x00\x00}\x96\xe7\xc1\xdd\xe9#@j...
4 b'\x01\x01\x00\x00\x00\x92\xe8e\x14\xcb\xdd#@\...
... ...
158720 b'\x01\x01\x00\x00\x00\x96j*<\x06\xdf#@.\xd3\x...
158721 b'\x01\x01\x00\x00\x00v\xa6\xd0y\x8d\xdd#@\xd9...
158722 b'\x01\x01\x00\x00\x00\x96j*<\x06\xdf#@.\xd3\x...
158723 b'\x01\x01\x00\x00\x00\x01\x00\x00\xf8\xa7\x00...
158724 b'\x01\x01\x00\x00\x00\t\x16\x873\xbf\xda#@0L\...
[158725 rows x 19 columns]
%% Cell type:code id:57133341 tags:
``` python
interesting_columns = ['tree_number', 'species_latin', 'latitude', 'longitude']
```
%% Cell type:code id:4850e91c tags:
``` python
species = 'species_latin'
coordinates = ['latitude', 'longitude']
df[interesting_columns].drop_duplicates().groupby(species).apply(display)
```
%% Output
Empty DataFrame
Columns: []
Index: []
%% Cell type:code id:beeb0f88 tags:
``` python
lat_min, lon_min, lat_max, lon_max = df[coordinates].agg(['min', 'max']).stack().values
```
%% Cell type:code id:4bdd0adb tags:
``` python
df[interesting_columns].drop_duplicates().groupby(coordinates).apply(display)
```
%% Output
Empty DataFrame
Columns: []
Index: []
%% Cell type:code id:3c5a8c58 tags:
``` python
import folium
m = folium.Map()
m.fit_bounds([(lat_min, lon_min), (lat_max, lon_max)])
for same, grouped_df in df[interesting_columns].drop_duplicates().groupby(species):
for row in grouped_df.itertuples():
folium.Circle(
location=[row.latitude, row.longitude],
tooltip=row.species_latin,
radius=50, # [m]
color="black",
weight=1,
fill_opacity=0.9,
opacity=1,
fill_color="green",
fill=False, # gets overridden by fill_color
popup=f"Tree #{row.tree_number} ({row.species_latin})",
).add_to(m)
print()
m
```
%% Output
<folium.folium.Map at 0x220c87e8d90>
%% Cell type:code id:0de99f00 tags:
``` python
import folium
m = folium.Map()
folium.Marker(
location=[45.3311, -121.7113],
tooltip="Click me!",
popup="Timberline Lodge",
icon=folium.Icon(color="green"),
).add_to(m)
m
```
%% Output
<folium.folium.Map at 0x220c6f2de50>
%% Cell type:code id:0a37cc3c tags:
``` python
```
%% Cell type:code id:808b84d9 tags:
``` python
```
"""
Würzburger Klimabaeume - Bodenfeuchte
https://opendata.wuerzburg.de/explore/dataset/sls-klimabaeume/export/?disjunctive.species_german
"""
from pathlib import Path
import requests
import shutil
import pandas as pd
from rich import print
SCRIPT_DIR = Path(__file__).resolve().parent
INPUT_DIR = SCRIPT_DIR / 'input'
INPUT_FILE = INPUT_DIR / 'wuerzburg_trees.parquet'
INPUT_URL = "https://opendata.wuerzburg.de/api/explore/v2.1/catalog/datasets/sls-klimabaeume/exports/parquet?lang=en&timezone=Europe%2FBerlin"
USER_AGENT = {'User-agent': 'Mozilla/5.0'}
def download_file(download_url, download_path):
with requests.get(download_url, stream=True, headers=USER_AGENT) as r:
with open(download_path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
if INPUT_FILE.exists() and INPUT_FILE.stat().st_size > 0:
print(f"{INPUT_FILE} already here.")
else:
print(f"Downloading {INPUT_URL}")
download_file(INPUT_URL, INPUT_FILE)
print(" Done")
df = pd.read_parquet(INPUT_FILE, engine='fastparquet')
print(df)
interesting_columns = ['tree_number', 'species_latin', 'latitude', 'longitude']
species = 'species_latin'
coordinates = ['latitude', 'longitude']
df[interesting_columns].drop_duplicates().groupby(species).apply(print)
df[interesting_columns].drop_duplicates().groupby(coordinates).apply(print)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment