From 515d9b79f4a2db5ceb5b511c33d2c3c5dacdce20 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler.strauss@gmail.com>
Date: Wed, 3 Jun 2020 10:27:07 +0200
Subject: [PATCH] Restructuring into subpackages Improved on processing steps

---
 preprocessor/preprocessor/output.py           | 15 --------
 preprocessor/preprocessor/steps/__init__.py   |  0
 .../preprocessor/{ => steps}/georeference.py  |  0
 preprocessor/preprocessor/steps/output.py     | 38 +++++++++++++++++++
 .../preprocessor/{ => steps}/stack.py         | 13 ++-----
 .../preprocessor/{ => steps}/subdataset.py    |  5 +--
 .../preprocessor/transfer/__init__.py         |  1 +
 .../preprocessor/{ => transfer}/abc.py        |  0
 .../preprocessor/{ => transfer}/local.py      |  0
 .../preprocessor/{ => transfer}/swift.py      |  0
 .../{transfer.py => transfer/util.py}         |  0
 preprocessor/preprocessor/util.py             |  6 +++
 12 files changed, 50 insertions(+), 28 deletions(-)
 delete mode 100644 preprocessor/preprocessor/output.py
 create mode 100644 preprocessor/preprocessor/steps/__init__.py
 rename preprocessor/preprocessor/{ => steps}/georeference.py (100%)
 create mode 100644 preprocessor/preprocessor/steps/output.py
 rename preprocessor/preprocessor/{ => steps}/stack.py (71%)
 rename preprocessor/preprocessor/{ => steps}/subdataset.py (87%)
 create mode 100644 preprocessor/preprocessor/transfer/__init__.py
 rename preprocessor/preprocessor/{ => transfer}/abc.py (100%)
 rename preprocessor/preprocessor/{ => transfer}/local.py (100%)
 rename preprocessor/preprocessor/{ => transfer}/swift.py (100%)
 rename preprocessor/preprocessor/{transfer.py => transfer/util.py} (100%)
 create mode 100644 preprocessor/preprocessor/util.py

diff --git a/preprocessor/preprocessor/output.py b/preprocessor/preprocessor/output.py
deleted file mode 100644
index fcde4224..00000000
--- a/preprocessor/preprocessor/output.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from os.path import join, basename
-from glob import glob
-
-from osgeo import gdal
-
-
-def output_step(source_dir, target_dir, crs, driver, format_options):
-    for filename in glob(join(source_dir, '*')):
-        target_filename = join(target_dir, basename(filename))
-        gdal.Warp(target_filename, filename, options=gdal.WarpOptions(
-            dstSRS=crs,
-            format=driver,
-            creationOptions=format_options,
-        ))
-
diff --git a/preprocessor/preprocessor/steps/__init__.py b/preprocessor/preprocessor/steps/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/preprocessor/preprocessor/georeference.py b/preprocessor/preprocessor/steps/georeference.py
similarity index 100%
rename from preprocessor/preprocessor/georeference.py
rename to preprocessor/preprocessor/steps/georeference.py
diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py
new file mode 100644
index 00000000..c6a0743a
--- /dev/null
+++ b/preprocessor/preprocessor/steps/output.py
@@ -0,0 +1,38 @@
+import os
+from os.path import join, basename
+from glob import glob
+from uuid import uuid4
+
+from osgeo import gdal
+
+from .util import replace_ext
+
+
+def output_step(source_dir: os.PathLike, target_dir: os.PathLike, options: dict=None):
+    # find out the driver to get the extension
+    options = options if options is not None else {}
+    frmt = options.get('format', 'GTiff')
+    driver = gdal.GetDriverByName(frmt)
+    if not driver:
+        raise ValueError('Unsupported driver %s' % frmt)
+    extension = driver.GetMetadata()['DMD_EXTENSIONS'].split(' ')[0]
+
+    # warp each individual file
+    warped_files = []
+    for filename in glob(join(source_dir, '*')):
+        target_filename = join(target_dir, replace_ext(basename(filename), extension))
+        gdal.Warp(target_filename, filename, options=gdal.WarpOptions(
+            **options
+        ))
+        warped_files.append(target_filename)
+
+    if len(warped_files) > 1:
+        tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension))
+        gdal.Warp(tmp_filename, warped_files, options=gdal.WarpOptions(
+            **options
+        ))
+
+        # delete old files and rename the combined file to the first filename
+        for filename in warped_files:
+            os.unlink(filename)
+        os.rename(tmp_filename, warped_files[0])
diff --git a/preprocessor/preprocessor/stack.py b/preprocessor/preprocessor/steps/stack.py
similarity index 71%
rename from preprocessor/preprocessor/stack.py
rename to preprocessor/preprocessor/steps/stack.py
index a2abdeb3..3f999294 100644
--- a/preprocessor/preprocessor/stack.py
+++ b/preprocessor/preprocessor/steps/stack.py
@@ -6,12 +6,10 @@ from glob import glob
 from typing import List
 from osgeo import gdal
 
+from ..util import replace_ext
 
-def replace_ext(filename: os.PathLike, new_ext: str) -> os.PathLike:
-    return splitext(filename)[0] + ('' if new_ext.startswith('.') else '.') + new_ext
 
-
-def stack_bands_step(source_dir, target_dir, group_by: str=None, sort_by: str=None, order: List[str]=None):
+def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, group_by: str=None, sort_by: str=None, order: List[str]=None):
     """ Stack bands of the individual images
     """
     filenames = glob(join(source_dir, '*/*.tif'), recursive=True)
@@ -46,10 +44,5 @@ def stack_bands_step(source_dir, target_dir, group_by: str=None, sort_by: str=No
                 )
 
         # build a VRT to stack bands for each group
-        vrt_filename = replace_ext(join(source_dir, groupname), '.vrt')
+        vrt_filename = replace_ext(join(target_dir, groupname), '.vrt')
         gdal.BuildVRT(vrt_filename, group, separate=True)
-
-        # translate to TIFF
-        # TODO: necessary? maybe just keep a VRT instead
-        out_filename = replace_ext(join(target_dir, groupname), '.tif')
-        gdal.Translate(out_filename, vrt_filename, format='GTiff')
diff --git a/preprocessor/preprocessor/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py
similarity index 87%
rename from preprocessor/preprocessor/subdataset.py
rename to preprocessor/preprocessor/steps/subdataset.py
index fe86ba21..84be41a2 100644
--- a/preprocessor/preprocessor/subdataset.py
+++ b/preprocessor/preprocessor/steps/subdataset.py
@@ -2,11 +2,10 @@ import os
 from os.path import join, splitext, basename
 from glob import glob
 from typing import Set
-from osgeo import gdal
 
+from osgeo import gdal
 
-def replace_ext(filename: os.PathLike, new_ext: str) -> os.PathLike:
-    return splitext(filename)[0] + '' if new_ext.startswith('.') else '.' + new_ext
+from ..util import replace_ext
 
 
 def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, data_file_glob: str, subdataset_types: Set[str]=None):
diff --git a/preprocessor/preprocessor/transfer/__init__.py b/preprocessor/preprocessor/transfer/__init__.py
new file mode 100644
index 00000000..82412791
--- /dev/null
+++ b/preprocessor/preprocessor/transfer/__init__.py
@@ -0,0 +1 @@
+from .util import get_downloader, get_uploader
\ No newline at end of file
diff --git a/preprocessor/preprocessor/abc.py b/preprocessor/preprocessor/transfer/abc.py
similarity index 100%
rename from preprocessor/preprocessor/abc.py
rename to preprocessor/preprocessor/transfer/abc.py
diff --git a/preprocessor/preprocessor/local.py b/preprocessor/preprocessor/transfer/local.py
similarity index 100%
rename from preprocessor/preprocessor/local.py
rename to preprocessor/preprocessor/transfer/local.py
diff --git a/preprocessor/preprocessor/swift.py b/preprocessor/preprocessor/transfer/swift.py
similarity index 100%
rename from preprocessor/preprocessor/swift.py
rename to preprocessor/preprocessor/transfer/swift.py
diff --git a/preprocessor/preprocessor/transfer.py b/preprocessor/preprocessor/transfer/util.py
similarity index 100%
rename from preprocessor/preprocessor/transfer.py
rename to preprocessor/preprocessor/transfer/util.py
diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py
new file mode 100644
index 00000000..ebc6a6ed
--- /dev/null
+++ b/preprocessor/preprocessor/util.py
@@ -0,0 +1,6 @@
+import os
+from os.path import splitext
+
+
+def replace_ext(filename: os.PathLike, new_ext: str) -> os.PathLike:
+    return splitext(filename)[0] + ('' if new_ext.startswith('.') else '.') + new_ext
-- 
GitLab