Commit 4ed3f015 authored by Eric Duminil's avatar Eric Duminil
Browse files

Klimabaeume open data

parent 6fed29db
input/*.parquet
\ No newline at end of file
This diff is collapsed.
"""
Würzburger Klimabaeume - Bodenfeuchte
https://opendata.wuerzburg.de/explore/dataset/sls-klimabaeume/export/?disjunctive.species_german
"""
from pathlib import Path
import requests
import shutil
import pandas as pd
from rich import print
SCRIPT_DIR = Path(__file__).resolve().parent
INPUT_DIR = SCRIPT_DIR / 'input'
INPUT_FILE = INPUT_DIR / 'wuerzburg_trees.parquet'
INPUT_URL = "https://opendata.wuerzburg.de/api/explore/v2.1/catalog/datasets/sls-klimabaeume/exports/parquet?lang=en&timezone=Europe%2FBerlin"
USER_AGENT = {'User-agent': 'Mozilla/5.0'}
def download_file(download_url, download_path):
with requests.get(download_url, stream=True, headers=USER_AGENT) as r:
with open(download_path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
if INPUT_FILE.exists() and INPUT_FILE.stat().st_size > 0:
print(f"{INPUT_FILE} already here.")
else:
print(f"Downloading {INPUT_URL}")
download_file(INPUT_URL, INPUT_FILE)
print(" Done")
df = pd.read_parquet(INPUT_FILE, engine='fastparquet')
print(df)
interesting_columns = ['tree_number', 'species_latin', 'latitude', 'longitude']
species = 'species_latin'
coordinates = ['latitude', 'longitude']
df[interesting_columns].drop_duplicates().groupby(species).apply(print)
df[interesting_columns].drop_duplicates().groupby(coordinates).apply(print)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment