From 3303a85f8945d3ae95e261cdd92714196aa3e039 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler.strauss@gmail.com>
Date: Tue, 25 Aug 2020 10:59:28 +0200
Subject: [PATCH] Adding calc step

---
 preprocessor/preprocessor/preprocess.py     |  5 +-
 preprocessor/preprocessor/steps/__init__.py |  2 +
 preprocessor/preprocessor/steps/calc.py     | 51 +++++++++++++++++++++
 3 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 preprocessor/preprocessor/steps/calc.py

diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py
index 57d9f155..29ecdb56 100644
--- a/preprocessor/preprocessor/preprocess.py
+++ b/preprocessor/preprocessor/preprocess.py
@@ -13,7 +13,7 @@ from .transfer import get_downloader, get_uploader
 from .archive import unpack_files
 from .metadata import extract_product_type_and_level
 from .steps import (
-    georeference_step, extract_subdataset_step, stack_bands_step, output_step
+    georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step
 )
 
 logging.basicConfig()
@@ -66,6 +66,7 @@ STEP_FUNCTIONS = {
     'custom_preprocessor': custom_preprocessor,
     'subdatasets': extract_subdataset_step,
     'georeference': georeference_step,
+    'calc': calc_step,
     'stack_bands': stack_bands_step,
     'output': output_step,
     'custom_postprocessor': custom_postprocessor,
@@ -159,7 +160,7 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N
 
         previous_step = 'unpack'
         # make processing steps
-        for step in ['custom_preprocessor', 'subdatasets', 'georeference', 'stack_bands', 'output', 'custom_postprocessor']:
+        for step in ['custom_preprocessor', 'subdatasets', 'georeference', 'calc', 'stack_bands', 'output', 'custom_postprocessor']:
             step_config = preprocess_config.get(step)
             if not step_config:
                 logger.debug('Skipping step %s as it is not configured.' % step)
diff --git a/preprocessor/preprocessor/steps/__init__.py b/preprocessor/preprocessor/steps/__init__.py
index 3f566c57..82fc0449 100644
--- a/preprocessor/preprocessor/steps/__init__.py
+++ b/preprocessor/preprocessor/steps/__init__.py
@@ -2,6 +2,7 @@ from .georeference import georeference_step
 from .output import output_step
 from .stack import stack_bands_step
 from .subdataset import extract_subdataset_step
+from .calc import calc_step
 
 
 __all__ = [
@@ -9,4 +10,5 @@ __all__ = [
     'output_step',
     'stack_bands_step',
     'extract_subdataset_step',
+    'calc_step',
 ]
diff --git a/preprocessor/preprocessor/steps/calc.py b/preprocessor/preprocessor/steps/calc.py
new file mode 100644
index 00000000..ad27cdb6
--- /dev/null
+++ b/preprocessor/preprocessor/steps/calc.py
@@ -0,0 +1,51 @@
+import os
+from os.path import basename, dirname, join
+from math import ceil, floor
+import logging
+import uuid
+import os
+import subprocess
+from typing import List
+from glob import glob
+import shutil
+from osgeo import gdal, osr
+
+from ..util import replace_ext
+
+
+def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, formulas: List[dict]):
+    for i, item in enumerate(formulas):
+        # get first filename as a base
+        filename = next(glob(join(source_dir, item['inputs']['glob'])))
+        target_filename = join(
+            target_dir,
+            replace_ext(basename(filename), item.get('output_postfix', '_proc%d' % i) + '.tif', False)
+        )
+
+        calc_formula(source_dir, item['inputs'], target_filename, item['formula'], item.get('data_type', 'Float32'))
+
+    # take all original files with from the last step
+    for filename in glob('%s/*' % source_dir):
+        shutil.copy(filename, join(target_dir, basename(filename)))
+
+
+def calc_formula(source_dir: os.PathLike, inputs: List[dict], target_filename: os.PathLike, formula: str, data_type: str="Float32", nodata_value: float=None):
+    cmd = [
+        "gdal_calc.py",
+        "--calc=%s" % formula,
+        "--outfile=%s" % target_filename,
+        "--type", data_type,
+    ]
+
+    for name, locator in inputs:
+        # select first
+        filename = next(glob(join(source_dir, locator['glob'])))
+        cmd.extend([
+            "-%s" % name, filename,
+            "-%s_band=%d" % locator.get('band', 1),
+        ])
+
+    if nodata_value is not None:
+        cmd.extend("--NoDataValue=%f" % nodata_value)
+
+    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-- 
GitLab