From a6a1b4d03f138a9ffceb3c52cc1d520828ba801a Mon Sep 17 00:00:00 2001
From: Eric Duminil <eric.duminil@gmail.com>
Date: Thu, 4 Jul 2024 09:32:04 +0200
Subject: [PATCH] Adding cache, and precision

---
 get_coordinates_by_zipcode.py | 28 +++++++++++++++++-----------
 requirements.txt              |  1 +
 test_zipcode.py               |  1 +
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/get_coordinates_by_zipcode.py b/get_coordinates_by_zipcode.py
index 8b37ce7..12cff76 100644
--- a/get_coordinates_by_zipcode.py
+++ b/get_coordinates_by_zipcode.py
@@ -22,7 +22,6 @@ Also accepts multiple Zipcodes, or Zipcode prefix.
 """
 
 # TODO: Write tests
-# TODO: Use caching for shapes
 # TODO: Rename functions
 
 import argparse
@@ -35,37 +34,49 @@ from shapely.ops import unary_union
 INPUT_FOLDER = Path('plz')
 PLZ_FILENAME = 'plz-5stellig.geojson'
 PLZ_SHAPE_FILE = INPUT_FOLDER / PLZ_FILENAME
+PRECISION = 10  # [m]
+ONE_DEGREE = 40e6 / 360  # [m]
+
+PLZ_SHAPES = None
 
 
 def _download_plz_shapes_if_needed():
     if not PLZ_SHAPE_FILE.exists():
         from tqdm import tqdm
         import requests
+        print("Downloading %s..." % PLZ_FILENAME)
         URL = "https://downloads.suche-postleitzahl.org/v2/public/" + PLZ_FILENAME
         response = requests.get(URL, stream=True)
         INPUT_FOLDER.mkdir(exist_ok=True)
         with open(PLZ_SHAPE_FILE, "wb") as handle:
             for data in tqdm(response.iter_content(chunk_size=1024), unit='kB'):
                 handle.write(data)
+        print('  Done')
 
 
 def _get_plz_shapes():
-    print("Parsing %s..." % PLZ_FILENAME)
+    global PLZ_SHAPES
+    if PLZ_SHAPES:
+        return PLZ_SHAPES
+
     _download_plz_shapes_if_needed()
     try:
+        print("Parsing %s..." % PLZ_FILENAME)
         with open(PLZ_SHAPE_FILE) as f:
             print('  Done')
-            return json.load(f)
+            PLZ_SHAPES = json.load(f)
+            return PLZ_SHAPES
     except json.decoder.JSONDecodeError:
         PLZ_SHAPE_FILE.unlink()
         raise AttributeError(f"{PLZ_FILENAME} seems to be damaged. Removing it. Please try again!")
 
 
-def _get_coordinates(data, plz_patterns):
+def get_coordinates_by_zipcode(plz_patterns, precision=PRECISION):
+    plz_shapes = _get_plz_shapes()
     geometries = []
     for plz_pattern in plz_patterns:
         found = False
-        for plz_geojson in data['features']:
+        for plz_geojson in plz_shapes['features']:
             if re.match(plz_pattern, plz_geojson['properties']['plz']):
                 found = True
 
@@ -85,7 +96,7 @@ def _get_coordinates(data, plz_patterns):
             raise AttributeError(f"Sorry, no information could be found for PLZ={plz_pattern}")
 
     merged = unary_union(geometries)
-    wkt_polygon = merged.simplify(0.0001).wkt
+    wkt_polygon = merged.simplify(precision / ONE_DEGREE).wkt
     print(wkt_polygon)
     try:
         import pyperclip
@@ -99,11 +110,6 @@ def _get_coordinates(data, plz_patterns):
     return wkt_polygon
 
 
-def get_coordinates_by_zipcode(plzs):
-    plz_shapes = _get_plz_shapes()
-    return _get_coordinates(plz_shapes, plzs)
-
-
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Get WKT geometry for desired PLZs')
     parser.add_argument('plzs', metavar='PLZ', type=str, nargs='+',
diff --git a/requirements.txt b/requirements.txt
index 49ddecb..0083c46 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 pyproj
 requests
 pyperclip
+tqdm
diff --git a/test_zipcode.py b/test_zipcode.py
index e3342a7..f70d40b 100644
--- a/test_zipcode.py
+++ b/test_zipcode.py
@@ -7,6 +7,7 @@ class TestClass(unittest.TestCase):
     def test_method(self):
         self.assertEqual('foo'.upper(), 'FOO')
         get_coordinates_by_zipcode.get_coordinates_by_zipcode(['70567'])
+        get_coordinates_by_zipcode.get_coordinates_by_zipcode(['70567'], 1_000)
 
 
 if __name__ == '__main__':
-- 
GitLab