From ed3ad2be3d379c2a6a97ceabbace8a2115e41a58 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 26 Aug 2021 09:40:41 +0200 Subject: [PATCH 01/20] [preprocessor] extend metadata product type extraction from one to many will extract values of all configured product_types where xpath is not None and then apply the first one which is actually configured --- preprocessor/preprocessor/metadata.py | 38 +++++++++++-------------- preprocessor/preprocessor/preprocess.py | 18 ++++++++---- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/preprocessor/preprocessor/metadata.py b/preprocessor/preprocessor/metadata.py index 083d585d..adc0df12 100644 --- a/preprocessor/preprocessor/metadata.py +++ b/preprocessor/preprocessor/metadata.py @@ -1,4 +1,3 @@ - from lxml import etree @@ -14,34 +13,29 @@ def evaluate_xpath(root, xpath): return None -def extract_product_type_and_level(metadata_files, config): +def extract_product_types_and_levels(metadata_files, config): """ """ - product_type = None - product_level = None + product_types = [] + product_levels = [] for metadata_file in metadata_files: with open(metadata_file) as f: tree = etree.parse(f) root = tree.getroot() - if not product_type: - xpaths = config['type_extractor']['xpath'] + xpaths = config['type_extractor']['xpath'] + xpaths = [xpaths] if isinstance(xpaths, str) else xpaths + for xpath in xpaths: + product_type = evaluate_xpath(root, xpath) + if product_type is not None: + product_types.append(product_type) + + xpaths = config['level_extractor']['xpath'] + if xpaths: xpaths = [xpaths] if isinstance(xpaths, str) else xpaths for xpath in xpaths: - product_type = evaluate_xpath(root, xpath) - if product_type: - break - - if not product_level: - xpaths = config['level_extractor']['xpath'] - if xpaths: - xpaths = [xpaths] if isinstance(xpaths, str) else xpaths - for xpath in xpaths: - product_level = evaluate_xpath(root, xpath) - if product_level: - break - - if product_type and product_level: - break + product_level = evaluate_xpath(root, xpath) + if product_level is not None: + product_level.append(product_level) - return product_type, product_level \ No newline at end of file + return product_types, product_levels diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 15066ff4..d893ae63 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -1,16 +1,14 @@ import os import os.path -import itertools import importlib import logging import shutil -from typing import List from pprint import pformat from urllib.parse import urlparse from .transfer import get_downloader, get_uploader from .archive import unpack_files -from .metadata import extract_product_type_and_level +from .metadata import extract_product_types_and_levels from .steps import ( georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step ) @@ -161,12 +159,20 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N metadata_files = unpack_files(source_archive_path, 'extra', glob=config['metadata_glob'], case=config.get('glob_case', False)) # open the XML to retrieve the product type and level - product_type, product_level = extract_product_type_and_level(metadata_files, config) - logger.info('Detected product_type/level_type %s/%s' % (product_type, product_level)) + product_types, product_levels = extract_product_types_and_levels(metadata_files, config) + logger.info('Detected product_types: %s and level_types: %s' % (product_types, product_levels)) # get a concrete configuration for the type, filled with the defaults default_config = dict(config['preprocessing'].get('defaults', {})) - type_based_config = dict(config['preprocessing']['types'].get(product_type, {})) + + type_based_config = {} + for product_type in product_types: + # search metadata product type in configuration until it finds it + configured_preprocessor_config = dict(config['preprocessing']['types'].get(product_type, {})) + if configured_preprocessor_config != {}: + type_based_config = configured_preprocessor_config + break + default_config.update(type_based_config) preprocess_config = default_config logger.debug('Using preprocessing config %s' % pformat(preprocess_config)) -- GitLab From f36e7fd7fe834062220ee7ebd38025c31edd7a7a Mon Sep 17 00:00:00 2001 From: jankovicn Date: Thu, 26 Aug 2021 13:16:52 +0200 Subject: [PATCH 02/20] added first steps to stac preprocessor --- config/emg/emg_preprocessor-config.yml | 8 +++++--- docker-compose.emg.dev.yml | 7 +++++++ preprocessor/Dockerfile | 3 ++- preprocessor/entrypoint.sh | 6 ++++++ preprocessor/preprocessor/__main__.py | 3 +++ preprocessor/preprocessor/preprocess.py | 4 ++++ preprocessor/preprocessor/steps/output.py | 11 +++++++++-- 7 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 preprocessor/preprocessor/__main__.py diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index 0aa694ab..66fe0572 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -11,7 +11,7 @@ source: user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}' target: type: swift - replace: false + replace: true kwargs: username: !env '${OS_USERNAME}' password: !env '${OS_PASSWORD}' @@ -23,7 +23,7 @@ target: user_domain_name: !env '${OS_USER_DOMAIN_NAME}' container: !env '${UPLOAD_CONTAINER}' workdir: /tmp -keep_temp: false +keep_temp: true metadata_glob: "*GSC*.xml" type_extractor: xpath: @@ -42,6 +42,7 @@ preprocessing: - '*.h5' output: options: + panchromatic: true format: COG dstSRS: 'EPSG:4326' dstNodata: 0 @@ -135,11 +136,12 @@ preprocessing: - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - "*IMG_*_PMS_*.JP2" - "*IMG_*_PMS_*.tif" - "*IMG_*_MS_*.JP2" - "*IMG_*_MS_*.tif" + - "*IMG_*_P_*.JP2" + - "*IMG_*_P_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" diff --git a/docker-compose.emg.dev.yml b/docker-compose.emg.dev.yml index 962cdc08..e086e2d7 100644 --- a/docker-compose.emg.dev.yml +++ b/docker-compose.emg.dev.yml @@ -68,6 +68,8 @@ services: target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp @@ -80,6 +82,11 @@ services: - type: bind source: ./testing/ target: /testing/ + - type: bind + source: ./preprocessor/preprocessor + target: /usr/local/lib/python3.8/dist-packages/preprocessor-1.4.8-py3.8.egg/preprocessor/ + ports: + - 5678:5678 networks: extnet: name: emg-extnet diff --git a/preprocessor/Dockerfile b/preprocessor/Dockerfile index e0342805..0f99d386 100644 --- a/preprocessor/Dockerfile +++ b/preprocessor/Dockerfile @@ -41,7 +41,8 @@ USER root RUN apt update && \ apt install -y \ - python3-redis python3-keystoneclient python3-swiftclient python3-click python3-setuptools python3-jsonschema wait-for-it && \ + python3-redis python3-keystoneclient python3-swiftclient python3-pip python3-click python3-setuptools python3-jsonschema wait-for-it && \ + pip3 install pystac && \ apt autoremove -y && \ apt clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/preprocessor/entrypoint.sh b/preprocessor/entrypoint.sh index af0b5a97..2ccd50fc 100644 --- a/preprocessor/entrypoint.sh +++ b/preprocessor/entrypoint.sh @@ -2,6 +2,12 @@ SERVICES=${WAIT_SERVICES:=''} TIMEOUT=${WAIT_TIMEOUT:='15'} +DEBUG=${DEBUG:='false'} + +if [[ "$DEBUG" = 'true' ]] ; then + echo 'Installing debug dependencies' + pip3 install debugpy; +fi if [[ ! -z $SERVICES ]] ; then for service in $SERVICES ; do diff --git a/preprocessor/preprocessor/__main__.py b/preprocessor/preprocessor/__main__.py new file mode 100644 index 00000000..4cafccba --- /dev/null +++ b/preprocessor/preprocessor/__main__.py @@ -0,0 +1,3 @@ +from .cli import cli + +cli() diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 15066ff4..6ffbe4ec 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -243,6 +243,10 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N % (file_path, preprocess_timer.elapsed) ) + # build a stac asset + # the href of stac item should be the path to the uploaded file + # construct the item with as much metadata (datetime, bbox, geometry, properties) as possible + # item.add_asset(key, asset) return upload_filenames, file_path diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py index f7e035f6..8972594d 100644 --- a/preprocessor/preprocessor/steps/output.py +++ b/preprocessor/preprocessor/steps/output.py @@ -12,6 +12,7 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c # find out the driver to get the extension options = options if options is not None else {} frmt = options.get('format', 'GTiff') + panchromatic = options.pop('panchromatic', None) driver = gdal.GetDriverByName(frmt) if not driver: raise ValueError('Unsupported driver %s' % frmt) @@ -25,7 +26,7 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c gdal.Warp(target_filename, filename, **options) warped_files.append(target_filename) - if len(warped_files) > 1: + if len(warped_files) > 1 and not panchromatic: tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension)) logger.debug('Warping files %s' % warped_files) gdal.Warp(tmp_filename, warped_files, **options) @@ -33,4 +34,10 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c # delete old files and rename the combined file to the first filename for filename in warped_files: os.unlink(filename) - os.rename(tmp_filename, warped_files[0]) + os.rename(tmp_filename, warped_files[0]) + elif len(warped_files) > 2 and panchromatic: + # group the panchromatic data to one group + # group ms to one group + # don't merge + # output two file paths + pass -- GitLab From c536511fa9344e2468c3680ecdf9f001a09dbc3c Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 26 Aug 2021 17:38:18 +0200 Subject: [PATCH 03/20] typo --- preprocessor/preprocessor/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessor/preprocessor/metadata.py b/preprocessor/preprocessor/metadata.py index adc0df12..a8956d89 100644 --- a/preprocessor/preprocessor/metadata.py +++ b/preprocessor/preprocessor/metadata.py @@ -36,6 +36,6 @@ def extract_product_types_and_levels(metadata_files, config): for xpath in xpaths: product_level = evaluate_xpath(root, xpath) if product_level is not None: - product_level.append(product_level) + product_levels.append(product_level) return product_types, product_levels -- GitLab From 185345fb06b4971ad63dff3f6e883f5df4b95533 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Fri, 27 Aug 2021 11:14:21 +0200 Subject: [PATCH 04/20] allow to specify gdal_config_options in preprocessor per product type --- preprocessor/preprocessor/preprocess.py | 9 ++++++--- preprocessor/preprocessor/steps/subdataset.py | 3 +++ preprocessor/preprocessor/util.py | 20 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index d893ae63..9585721f 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -13,7 +13,7 @@ from .steps import ( georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step ) from .steps.browse_report import browse_georeference -from .util import workdir, Timer, get_size_in_bytes +from .util import workdir, Timer, get_size_in_bytes, apply_gdal_config_options, set_gdal_options from .exceptions import ExistsAtUploadError logging.basicConfig() @@ -75,6 +75,8 @@ def flatten(l): def preprocess_internal(preprocess_config, previous_step='unpack'): force_refresh = False + # apply specific gdal config options + original_config = apply_gdal_config_options(preprocess_config) # make processing steps for step in ['custom_preprocessor', 'subdatasets', 'georeference', 'calc', 'stack_bands', 'output', 'custom_postprocessor']: step_config = preprocess_config.get(step) @@ -106,6 +108,9 @@ def preprocess_internal(preprocess_config, previous_step='unpack'): previous_step = step + # put back original configuration for further steps + set_gdal_options(original_config) + if not os.path.isdir('upload') or force_refresh: try: os.mkdir('upload') @@ -116,8 +121,6 @@ def preprocess_internal(preprocess_config, previous_step='unpack'): copy_files(previous_step, 'upload', move=preprocess_config.get('move_files', False)) - - def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=None): """ Runs the preprocessing of a single file. """ diff --git a/preprocessor/preprocessor/steps/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py index 552bd29f..8d73a131 100644 --- a/preprocessor/preprocessor/steps/subdataset.py +++ b/preprocessor/preprocessor/steps/subdataset.py @@ -1,9 +1,12 @@ import os from os.path import join, basename from typing import Dict +import logging from ..util import replace_ext, gdal, get_all_data_files +logger = logging.getLogger(__name__) + def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, subdataset_types: Dict[str, str]=None): filenames = get_all_data_files(source_dir, preprocessor_config) diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index f1b58e6c..1f5695c0 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -102,3 +102,23 @@ def get_all_data_files(source_dir, preprocessor_config): # get only unique files to compensate for possibly bad glob yielding doubles, keeping order file_paths_filt = list(dict.fromkeys(file_paths_filt)) return file_paths_filt + + +def apply_gdal_config_options(preprocessor_config): + """ Applies config specific gdal configuration options for a given preprocessing step + Returning original values to allow switching them back after preprocessing done. + """ + original_gdal_config_options = {} + for config_option in preprocessor_config.get('gdal_config_options', []): + key, _, val = config_option.partition('=') + orig_val = gdal.GetConfigOption(key) + gdal.SetConfigOption(key, val) + original_gdal_config_options[key] = orig_val + return original_gdal_config_options + + +def set_gdal_options(config_options): + """ Sets a key, value dictionary of config options to gdal + """ + for key, value in config_options.items(): + gdal.SetConfigOption(key, value) -- GitLab From 452b2997da1567a51b08195bb3d17463f0ef3f3d Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 1 Sep 2021 09:23:13 +0200 Subject: [PATCH 05/20] update preprocessor image to gdal:ubuntu-full-3.3.1 --- preprocessor/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/preprocessor/Dockerfile b/preprocessor/Dockerfile index e0342805..113749b4 100644 --- a/preprocessor/Dockerfile +++ b/preprocessor/Dockerfile @@ -25,7 +25,7 @@ # IN THE SOFTWARE. #----------------------------------------------------------------------------- -FROM osgeo/gdal:ubuntu-full-3.2.1 +FROM osgeo/gdal:ubuntu-full-3.3.1 MAINTAINER EOX LABEL name="prism view server preprocessor" \ @@ -73,6 +73,7 @@ ENV INSTANCE_ID="prism-data-access-server_preprocessor" \ REDIS_PREPROCESS_PROGRESS_KEY="preprocessing_set" \ REDIS_PREPROCESS_SUCCESS_KEY="preprocess-success_set" \ GDAL_PAM_ENABLED="NO" \ + GDAL_ENABLE_DEPRECATED_DRIVER_JPEG2000="YES" \ PREPROCESSOR_DEBUG= ADD run-preprocessor.sh \ -- GitLab From b54c3750989f7051693073156f3e1ed3e46165c2 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 1 Sep 2021 15:17:30 +0200 Subject: [PATCH 06/20] [preprocessor] multiple sar related updates - update config-schema - allow data_file_globs to be configured for each step, with defaults still being applied for all other steps - allow to set creationOptions for calc step outputs - replace calc formula with template ${input_band_statistics_statname} (${A_1_statistics_max}) with actual band statistics - allow to use intermediate steps in gdal_calc for further steps (consider target_folder for glob in case finding via source_folder fails) --- preprocessor/preprocessor/config-schema.yaml | 24 ++++- preprocessor/preprocessor/steps/calc.py | 101 ++++++++++++++---- .../preprocessor/steps/georeference.py | 4 +- preprocessor/preprocessor/steps/output.py | 5 +- preprocessor/preprocessor/steps/stack.py | 4 +- preprocessor/preprocessor/steps/subdataset.py | 6 +- preprocessor/preprocessor/util.py | 9 +- 7 files changed, 122 insertions(+), 31 deletions(-) diff --git a/preprocessor/preprocessor/config-schema.yaml b/preprocessor/preprocessor/config-schema.yaml index d5c933bb..166e3794 100644 --- a/preprocessor/preprocessor/config-schema.yaml +++ b/preprocessor/preprocessor/config-schema.yaml @@ -38,7 +38,7 @@ properties: description: The local directory, where intermediary files are to be stored. type: string keep_temp: - description: Whether to keep temporary files for each step. DEPRECATED. + description: Whether to keep temporary files for each step. type: boolean metadata_glob: description: A file glob to select metadata files from the downloaded archive. @@ -95,6 +95,8 @@ required: - preprocessing definitions: steps: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. custom_preprocessor: description: Definition of a custom preprocessor step type: object @@ -112,6 +114,9 @@ definitions: description: The definition of the subdataset extraction step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array subdataset_types: description: Mapping of subdataset identifier to output filename postfix for subdatasets to be extracted for each data file. type: object @@ -121,6 +126,9 @@ definitions: georeference: type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array geotransforms: description: A list of geotransform methods to use type: array @@ -164,6 +172,9 @@ definitions: description: Definition of a calculation step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array formulas: description: A list of formulas to calculate type: array @@ -187,7 +198,7 @@ definitions: description: The output data type for the calculated file. (GDAL notation) type: string formula: - description: The formula to calculate. See gdal_calc.py for details. + description: "The formula to calculate. See gdal_calc.py for details. Can contain custom templates for getting band stats, like '${A_1_statistics_min}'." type: string output_postfix: description: The filename postfix to append to the output filename. By default an enumeration is used. @@ -195,10 +206,16 @@ definitions: nodata_value: description: Use this nodata value in the calculation. type: float + creationOptions: + description: List of creation options for gdal_calc.py output. + type: array stack_bands: description: Definition of a stack bands step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array group_by: description: A regex to group the input datasets, if consisting of multiple file. The first regex group is used for the grouping. type: string @@ -214,6 +231,9 @@ definitions: description: Definition of an output step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array options: description: "Options to be passed to `gdal.Warp`. See https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details" type: object diff --git a/preprocessor/preprocessor/steps/calc.py b/preprocessor/preprocessor/steps/calc.py index 67e98dce..7c9f982e 100644 --- a/preprocessor/preprocessor/steps/calc.py +++ b/preprocessor/preprocessor/steps/calc.py @@ -1,21 +1,30 @@ import os -from os.path import basename, dirname, join, isfile +from os.path import basename, join, isfile import subprocess from typing import List from glob import glob import shutil import logging +import re -from ..util import replace_ext +from ..util import replace_ext, get_all_data_files, gdal logger = logging.getLogger(__name__) -def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, formulas: List[dict]): +def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, formulas: List[dict], data_file_globs: List[str]=[]): for i, item in enumerate(formulas): - # get first filename as a base - filename = glob(join(source_dir, list(item['inputs'].values())[0]['glob']))[0] + # get first filename as a base, first looking into source_dir, then target_dir as fallback + filenames_source = glob(join(source_dir, list(item['inputs'].values())[0]['glob'])) + if len(filenames_source) > 0: + filename = filenames_source[0] + else: + filenames_target = glob(join(target_dir, list(item['inputs'].values())[0]['glob'])) + if len(filenames_target) > 0: + filename = filenames_target[0] + else: + raise Exception('No input file in source or target directory for calc: %s' % item) target_filename = join( target_dir, replace_ext(basename(filename), item.get('output_postfix', '_proc%d' % i) + '.tif', False) @@ -24,33 +33,89 @@ def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_con if isfile(target_filename): logger.warn('Calc output filename %s already exists' % target_filename) - calc_formula(source_dir, item['inputs'], target_filename, item['formula'], item.get('data_type', 'Float32'), item.get('nodata_value', None)) + calc_formula(source_dir, target_dir, item['inputs'], target_filename, item['formula'], item.get('data_type', 'Float32'), item.get('nodata_value', None), item.get('creationOptions', {})) - # take all original files with from the last step - for filename in glob('%s/*' % source_dir): + # take all original files with from the last step matching the data_file_glob + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) + for filename in filenames: target_filename = join(target_dir, basename(filename)) if isfile(target_filename): logger.warn('Calc output filename %s already exists' % target_filename) shutil.copy(filename, target_filename) -def calc_formula(source_dir: os.PathLike, inputs: List[dict], target_filename: os.PathLike, formula: str, data_type: str="Float32", nodata_value: float=None): +def calc_formula(source_dir: os.PathLike, target_dir: os.PathLike, inputs: List[dict], target_filename: os.PathLike, formula: str, data_type: str="Float32", nodata_value: float=None, creationOptions: List[str]=[]): + used_formula = formula cmd = [ "gdal_calc.py", - "--calc=%s" % formula, "--outfile=%s" % target_filename, "--type", data_type, ] - - for name in inputs: - # select first - filename = glob(join(source_dir, inputs[name]['glob']))[0] + for input_name in inputs: + # get first filename as a base, first looking into source_dir, then target_dir as fallback + filenames_source = glob(join(source_dir, inputs[input_name]['glob'])) + if len(filenames_source) > 0: + filename = filenames_source[0] + else: + filenames_target = glob(join(target_dir, inputs[input_name]['glob'])) + if len(filenames_target) > 0: + filename = filenames_target[0] + else: + raise Exception('No input file in source or target directory for formula: %s' % formula) + band_number = inputs[input_name].get('band', 1) cmd.extend([ - "-%s" % name, filename, - "--%s_band=%d" % (name, inputs[name].get('band', 1)), + "-%s" % input_name, filename, + "--%s_band=%d" % (input_name, band_number), ]) + # evaluate formula as a template + used_formula = evaluate_formula(formula, filename, input_name, band_number) + + cmd.extend([ + "--calc=%s" % used_formula, + ]) if nodata_value is not None: - cmd.append("--NoDataValue=%f" % nodata_value) + cmd.extend([ + "--NoDataValue=%s" % nodata_value, + ]) + + for option in creationOptions: + cmd.extend([ + "--co", + option, + ]) + + process = subprocess.run(cmd, capture_output=True, text=True) + logger.debug("gdal calc stderr: %s" % process.stderr) + - subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +def evaluate_formula(formula, filename, input_name, band): + """ Tries to replace a few common placeholders in the calc formula + """ + # formula: "10*log10((A.astype(float)-${A_1_statistics_max}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + evaluated_formula = formula + if "statistics" in formula: + # find all occurence of templates in form of ${A_1_statistics_max} + found = re.findall(r'(\${[0-9a-zA-Z_]*})', formula) + # get unique, keeping order + found = list(dict.fromkeys(found)) + for item in found: + parts = item.replace("${", "").replace("}", "").split("_") + # if current input is the one in template, replace it with statistics + if parts[0] == input_name and int(parts[1]) == band: + src_ds = gdal.Open(filename) + statistics = src_ds.GetRasterBand(band).GetStatistics(True, True) # force recount + if parts[3] == "min": + replace = "%s" % statistics[0] + elif parts[3] == "max": + replace = "%s" % statistics[1] + elif parts[3] == "mean": + replace = "%s" % statistics[2] + elif parts[3] == "std": + replace = "%s" % statistics[3] + else: + logger.warn("Unknown statistics found in expression %s" % item) + continue + # replace the template with actual value + evaluated_formula = evaluated_formula.replace(item, replace) + return evaluated_formula diff --git a/preprocessor/preprocessor/steps/georeference.py b/preprocessor/preprocessor/steps/georeference.py index ebe8b1ae..f9503e83 100644 --- a/preprocessor/preprocessor/steps/georeference.py +++ b/preprocessor/preprocessor/steps/georeference.py @@ -11,7 +11,7 @@ from ..util import gdal, osr, replace_ext, get_all_data_files logger = logging.getLogger(__name__) -def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, geotransforms: List[dict]): +def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, geotransforms: List[dict], data_file_globs: List[str]=[]): success = False for options in geotransforms: type_name = options['type'].lower() @@ -31,7 +31,7 @@ def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preproce else: raise Exception('Invalid georeference type %s' % type_name) try: - filenames = get_all_data_files(source_dir, preprocessor_config) + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) for filename in filenames: target_filename = join(target_dir, basename(filename)) georef_func(filename, target_filename, **opts_dict) diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py index f7e035f6..409a844d 100644 --- a/preprocessor/preprocessor/steps/output.py +++ b/preprocessor/preprocessor/steps/output.py @@ -1,6 +1,7 @@ import os from os.path import join, basename from uuid import uuid4 +from typing import List from ..util import replace_ext, gdal, get_all_data_files import logging @@ -8,7 +9,7 @@ import logging logger = logging.getLogger(__name__) -def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None): +def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None, data_file_globs: List[str]=[]): # find out the driver to get the extension options = options if options is not None else {} frmt = options.get('format', 'GTiff') @@ -18,7 +19,7 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c extension = driver.GetMetadata().get('DMD_EXTENSIONS', 'tif').split(' ')[0] # warp each individual file warped_files = [] - filenames = get_all_data_files(source_dir, preprocessor_config) + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) for filename in filenames: target_filename = join(target_dir, replace_ext(basename(filename), extension)) logger.debug('Warping file %s' % filename) diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py index 55d3eff5..456392c7 100644 --- a/preprocessor/preprocessor/steps/stack.py +++ b/preprocessor/preprocessor/steps/stack.py @@ -7,10 +7,10 @@ from typing import List from ..util import replace_ext, gdal, get_all_data_files -def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None): +def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None, data_file_globs: List[str]=[]): """ Stack bands of the individual images """ - filenames = get_all_data_files(source_dir, preprocessor_config) + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) # check if we have a group_by regex. If yes, use the first # re-group to group by. # Fallback is basename of file as groupname diff --git a/preprocessor/preprocessor/steps/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py index 8d73a131..2d351833 100644 --- a/preprocessor/preprocessor/steps/subdataset.py +++ b/preprocessor/preprocessor/steps/subdataset.py @@ -1,6 +1,6 @@ import os from os.path import join, basename -from typing import Dict +from typing import Dict, List import logging from ..util import replace_ext, gdal, get_all_data_files @@ -8,8 +8,8 @@ from ..util import replace_ext, gdal, get_all_data_files logger = logging.getLogger(__name__) -def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, subdataset_types: Dict[str, str]=None): - filenames = get_all_data_files(source_dir, preprocessor_config) +def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, subdataset_types: Dict[str, str]=None, data_file_globs: List[str]=[]): + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) if len(filenames) == 0: raise Exception('No datafiles were matched by the provided glob') diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index 1f5695c0..f4b4d7df 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -91,13 +91,18 @@ def get_size_in_bytes(file_path, unit): return convert_unit(size, unit) -def get_all_data_files(source_dir, preprocessor_config): +def get_all_data_files(source_dir, preprocessor_config, data_file_globs=[]): """ Based on 'data_file_globs' configuration, gets all unique data file paths from folder matching any of the globs""" # get all file paths recursively file_paths = [p for p in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(p)] # filter them by data_globs file_paths_filt = [] - for dataglob in preprocessor_config.get('data_file_globs', ['*']): + used_globs = preprocessor_config.get('data_file_globs', ['*']) + # override global data_file_globs by the provided one if possible + if len(data_file_globs) > 0: + used_globs = data_file_globs + + for dataglob in used_globs: file_paths_filt += filter_filenames(file_paths, dataglob, preprocessor_config.get('glob_case', False)) # get only unique files to compensate for possibly bad glob yielding doubles, keeping order file_paths_filt = list(dict.fromkeys(file_paths_filt)) -- GitLab From 18290edf46ef1e5ee4a67586bf51db78c41c779d Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 1 Sep 2021 16:55:31 +0200 Subject: [PATCH 07/20] [preprocessor] correct rotated geotransform via warp to vrt in stack_bands --- preprocessor/preprocessor/steps/calc.py | 1 + preprocessor/preprocessor/steps/stack.py | 33 ++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/preprocessor/preprocessor/steps/calc.py b/preprocessor/preprocessor/steps/calc.py index 7c9f982e..02f3cc67 100644 --- a/preprocessor/preprocessor/steps/calc.py +++ b/preprocessor/preprocessor/steps/calc.py @@ -105,6 +105,7 @@ def evaluate_formula(formula, filename, input_name, band): if parts[0] == input_name and int(parts[1]) == band: src_ds = gdal.Open(filename) statistics = src_ds.GetRasterBand(band).GetStatistics(True, True) # force recount + del src_ds if parts[3] == "min": replace = "%s" % statistics[0] elif parts[3] == "max": diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py index 456392c7..cc5757e6 100644 --- a/preprocessor/preprocessor/steps/stack.py +++ b/preprocessor/preprocessor/steps/stack.py @@ -10,6 +10,7 @@ from ..util import replace_ext, gdal, get_all_data_files def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None, data_file_globs: List[str]=[]): """ Stack bands of the individual images """ + import pdb;pdb.set_trace() filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) # check if we have a group_by regex. If yes, use the first # re-group to group by. @@ -46,6 +47,34 @@ def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preproces key=lambda v: re_sort_by.match(v).group(1) ) - # build a VRT to stack bands for each group vrt_filename = replace_ext(join(target_dir, groupname), '.vrt') - gdal.BuildVRT(vrt_filename, group, separate=True) + # correct rotated geotransforms as those prevent vrt creation + group_new = remove_rotated_geotransform(group, target_dir) + # build a VRT to stack bands for each group + gdal.BuildVRT(vrt_filename, group_new, separate=True) + + +def remove_rotated_geotransform(filenames, target_dir): + """ Unrotates geotransform to a common grid + """ + output_filenames = [] + for filename in filenames: + src_ds = gdal.Open(filename) + # validate if rotated geotransform + if src_ds.GetGeoTransform() and (src_ds.GetGeoTransform()[2] != 0.0 or src_ds.GetGeoTransform()[4] != 0.0): + # rotated geotransform, needs warping to unrotated grid + target_filename = join( + target_dir, + replace_ext(basename(filename), '_rotate' + '.vrt', False) + ) + intermediate_warp(src_ds, target_filename) + output_filenames.append(target_filename) + else: + output_filenames.append(filename) + del src_ds + return output_filenames + + +def intermediate_warp(src_ds, output_path=None, dst_SRS="EPSG:4326"): + nodata = src_ds.GetRasterBand(1).GetNoDataValue() or 0 + gdal.Warp(output_path, src_ds, dstSRS=dst_SRS, format="VRT", multithread=True, resampleAlg=gdal.GRA_Bilinear, srcNodata=nodata, dstNodata=nodata) -- GitLab From 11b13be6caefb6d222da7b9a5db8c07533c18a8a Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 1 Sep 2021 16:58:09 +0200 Subject: [PATCH 08/20] add preprocessor configuration for SAR_HIM_1B product_type --- .../csea-emg/csea-emg_preprocessor-config.yml | 41 +++++++++++ config/emg/emg_preprocessor-config.yml | 69 ++++++++++++++++--- .../frtx-emg/frtx-emg_preprocessor-config.yml | 41 +++++++++++ .../sace-emg/sace-emg_preprocessor-config.yml | 41 +++++++++++ 4 files changed, 181 insertions(+), 11 deletions(-) diff --git a/config/csea-emg/csea-emg_preprocessor-config.yml b/config/csea-emg/csea-emg_preprocessor-config.yml index 0aa694ab..40a563b0 100644 --- a/config/csea-emg/csea-emg_preprocessor-config.yml +++ b/config/csea-emg/csea-emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -227,6 +229,45 @@ preprocessing: DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_stretch_db.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI_stretch_db.tif" + - "*_SBI.tif" + sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index 0aa694ab..fe038f96 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -10,23 +10,17 @@ source: auth_version: !env '${ST_AUTH_VERSION_DOWNLOAD}' user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}' target: - type: swift - replace: false + type: local + replace: true kwargs: - username: !env '${OS_USERNAME}' - password: !env '${OS_PASSWORD}' - tenant_name: !env '${OS_TENANT_NAME}' - tenant_id: !env '${OS_TENANT_ID}' - region_name: !env '${OS_REGION_NAME}' - auth_version: !env '${ST_AUTH_VERSION}' - auth_url: !env '${OS_AUTH_URL}' - user_domain_name: !env '${OS_USER_DOMAIN_NAME}' - container: !env '${UPLOAD_CONTAINER}' + storage_path: /target workdir: /tmp keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -133,6 +127,20 @@ preprocessing: data_file_globs: - "*pansharpened_clip.tif" - "*_pansharpened.tif" + output: + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: True + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES SP06: data_file_globs: # throw away Panchromatic *_P_* @@ -227,6 +235,45 @@ preprocessing: DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_stretch_db.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI_stretch_db.tif" + - "*_SBI.tif" + sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/frtx-emg/frtx-emg_preprocessor-config.yml b/config/frtx-emg/frtx-emg_preprocessor-config.yml index 0aa694ab..40a563b0 100644 --- a/config/frtx-emg/frtx-emg_preprocessor-config.yml +++ b/config/frtx-emg/frtx-emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -227,6 +229,45 @@ preprocessing: DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_stretch_db.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI_stretch_db.tif" + - "*_SBI.tif" + sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/sace-emg/sace-emg_preprocessor-config.yml b/config/sace-emg/sace-emg_preprocessor-config.yml index 0aa694ab..40a563b0 100644 --- a/config/sace-emg/sace-emg_preprocessor-config.yml +++ b/config/sace-emg/sace-emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -227,6 +229,45 @@ preprocessing: DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_stretch_db.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI_stretch_db.tif" + - "*_SBI.tif" + sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 -- GitLab From 79635484a31bfb061b3d7f1a598c4384e88cff72 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 1 Sep 2021 17:45:26 +0200 Subject: [PATCH 09/20] fix typos, minor updates --- .../csea-emg/csea-emg_preprocessor-config.yml | 5 ++- config/emg/emg_preprocessor-config.yml | 33 ++++++++----------- .../frtx-emg/frtx-emg_preprocessor-config.yml | 5 ++- .../sace-emg/sace-emg_preprocessor-config.yml | 5 ++- preprocessor/preprocessor/steps/stack.py | 1 - 5 files changed, 19 insertions(+), 30 deletions(-) diff --git a/config/csea-emg/csea-emg_preprocessor-config.yml b/config/csea-emg/csea-emg_preprocessor-config.yml index 40a563b0..77152b77 100644 --- a/config/csea-emg/csea-emg_preprocessor-config.yml +++ b/config/csea-emg/csea-emg_preprocessor-config.yml @@ -235,7 +235,7 @@ preprocessing: data_file_globs: - "*.h5" - "*.tif" - - "*_stretch_db.vrt" + - "*_SBI.vrt" subdatasets: subdataset_types: '//S01/SBI': 'S01_SBI' @@ -265,9 +265,8 @@ preprocessing: nodata_value: 0 stack_bands: data_file_globs: - - "*_SBI_stretch_db.tif" - "*_SBI.tif" - sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" + - "*_SBI_stretch_db.tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index fe038f96..77152b77 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -10,10 +10,18 @@ source: auth_version: !env '${ST_AUTH_VERSION_DOWNLOAD}' user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}' target: - type: local - replace: true + type: swift + replace: false kwargs: - storage_path: /target + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + user_domain_name: !env '${OS_USER_DOMAIN_NAME}' + container: !env '${UPLOAD_CONTAINER}' workdir: /tmp keep_temp: false metadata_glob: "*GSC*.xml" @@ -127,20 +135,6 @@ preprocessing: data_file_globs: - "*pansharpened_clip.tif" - "*_pansharpened.tif" - output: - options: - format: COG - dstSRS: 'EPSG:4326' - dstNodata: 0 - multithread: True - warpMemoryLimit: 3000 - creationOptions: - - BLOCKSIZE=512 - - COMPRESS=DEFLATE - - NUM_THREADS=8 - - BIGTIFF=YES - - OVERVIEWS=AUTO - - PREDICTOR=YES SP06: data_file_globs: # throw away Panchromatic *_P_* @@ -241,7 +235,7 @@ preprocessing: data_file_globs: - "*.h5" - "*.tif" - - "*_stretch_db.vrt" + - "*_SBI.vrt" subdatasets: subdataset_types: '//S01/SBI': 'S01_SBI' @@ -271,9 +265,8 @@ preprocessing: nodata_value: 0 stack_bands: data_file_globs: - - "*_SBI_stretch_db.tif" - "*_SBI.tif" - sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" + - "*_SBI_stretch_db.tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/frtx-emg/frtx-emg_preprocessor-config.yml b/config/frtx-emg/frtx-emg_preprocessor-config.yml index 40a563b0..77152b77 100644 --- a/config/frtx-emg/frtx-emg_preprocessor-config.yml +++ b/config/frtx-emg/frtx-emg_preprocessor-config.yml @@ -235,7 +235,7 @@ preprocessing: data_file_globs: - "*.h5" - "*.tif" - - "*_stretch_db.vrt" + - "*_SBI.vrt" subdatasets: subdataset_types: '//S01/SBI': 'S01_SBI' @@ -265,9 +265,8 @@ preprocessing: nodata_value: 0 stack_bands: data_file_globs: - - "*_SBI_stretch_db.tif" - "*_SBI.tif" - sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" + - "*_SBI_stretch_db.tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/sace-emg/sace-emg_preprocessor-config.yml b/config/sace-emg/sace-emg_preprocessor-config.yml index 40a563b0..77152b77 100644 --- a/config/sace-emg/sace-emg_preprocessor-config.yml +++ b/config/sace-emg/sace-emg_preprocessor-config.yml @@ -235,7 +235,7 @@ preprocessing: data_file_globs: - "*.h5" - "*.tif" - - "*_stretch_db.vrt" + - "*_SBI.vrt" subdatasets: subdataset_types: '//S01/SBI': 'S01_SBI' @@ -265,9 +265,8 @@ preprocessing: nodata_value: 0 stack_bands: data_file_globs: - - "*_SBI_stretch_db.tif" - "*_SBI.tif" - sort_by: "(.*_SBI|.*_SBI_stretch_db).tif" + - "*_SBI_stretch_db.tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py index cc5757e6..92594fea 100644 --- a/preprocessor/preprocessor/steps/stack.py +++ b/preprocessor/preprocessor/steps/stack.py @@ -10,7 +10,6 @@ from ..util import replace_ext, gdal, get_all_data_files def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None, data_file_globs: List[str]=[]): """ Stack bands of the individual images """ - import pdb;pdb.set_trace() filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) # check if we have a group_by regex. If yes, use the first # re-group to group by. -- GitLab From f1d299865d651cf2311fc11605420b42e2a2c839 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 2 Sep 2021 10:16:09 +0200 Subject: [PATCH 10/20] several cleanups, stop dev stack exposure of container ports behind fw - remove extra volume mounts in dev compose files like /preprocessor, /data, /core - propagate DEV="TRUE" changes to other compose files - close dev compose stacks from outside when behind fw --- .gitignore | 1 - config/emg/emg_preprocessor-config.yml | 6 ++-- data/.gitkeep | 0 docker-compose.core12.dev.yml | 41 +++++---------------- docker-compose.core12.ops.yml | 2 +- docker-compose.csea-emg.dev.yml | 41 +++++---------------- docker-compose.dem.dev.yml | 43 ++++++----------------- docker-compose.demF.dev.yml | 41 +++++---------------- docker-compose.emg.dev.yml | 42 ++++------------------ docker-compose.frtx-emg.dev.yml | 41 +++++---------------- docker-compose.sace-emg.dev.yml | 41 +++++---------------- docker-compose.vhr18.dev.yml | 41 +++++---------------- preprocessor/preprocessor/steps/output.py | 4 +-- 13 files changed, 76 insertions(+), 268 deletions(-) delete mode 100644 data/.gitkeep diff --git a/.gitignore b/.gitignore index 5d6bab6d..28162099 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/data __pycache__ *.pyc .venv diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index 7b5485d5..8a985f80 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -11,7 +11,7 @@ source: user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}' target: type: swift - replace: true + replace: false kwargs: username: !env '${OS_USERNAME}' password: !env '${OS_PASSWORD}' @@ -23,7 +23,7 @@ target: user_domain_name: !env '${OS_USER_DOMAIN_NAME}' container: !env '${UPLOAD_CONTAINER}' workdir: /tmp -keep_temp: true +keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: @@ -44,7 +44,6 @@ preprocessing: - '*.h5' output: options: - panchromatic: true format: COG dstSRS: 'EPSG:4326' dstNodata: 0 @@ -137,6 +136,7 @@ preprocessing: - "*pansharpened_clip.tif" - "*_pansharpened.tif" SP06: + panchromatic: true data_file_globs: - "*IMG_*_PMS_*.JP2" - "*IMG_*_PMS_*.tif" diff --git a/data/.gitkeep b/data/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/docker-compose.core12.dev.yml b/docker-compose.core12.dev.yml index 61956f67..a4f612f4 100644 --- a/docker-compose.core12.dev.yml +++ b/docker-compose.core12.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,52 +30,33 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: core12-extnet diff --git a/docker-compose.core12.ops.yml b/docker-compose.core12.ops.yml index a1b9a7fb..d8e02380 100644 --- a/docker-compose.core12.ops.yml +++ b/docker-compose.core12.ops.yml @@ -1,5 +1,5 @@ version: "3.6" -x-vs-version: :release-1.4.7 # bumpversion +x-vs-version: :release-1.4.8 # bumpversion services: database: volumes: diff --git a/docker-compose.csea-emg.dev.yml b/docker-compose.csea-emg.dev.yml index e1a3bcd8..28a2ba0f 100644 --- a/docker-compose.csea-emg.dev.yml +++ b/docker-compose.csea-emg.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,52 +30,33 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: csea-emg-extnet diff --git a/docker-compose.dem.dev.yml b/docker-compose.dem.dev.yml index 877f6e40..d285111d 100644 --- a/docker-compose.dem.dev.yml +++ b/docker-compose.dem.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,53 +30,34 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: dem-extnet - external: true \ No newline at end of file + external: true diff --git a/docker-compose.demF.dev.yml b/docker-compose.demF.dev.yml index fc169ea2..8e498882 100644 --- a/docker-compose.demF.dev.yml +++ b/docker-compose.demF.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,52 +30,33 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: demF-extnet diff --git a/docker-compose.emg.dev.yml b/docker-compose.emg.dev.yml index e086e2d7..9fbdac29 100644 --- a/docker-compose.emg.dev.yml +++ b/docker-compose.emg.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,35 +30,18 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml @@ -73,20 +52,11 @@ services: volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ - - type: bind - source: ./preprocessor/preprocessor - target: /usr/local/lib/python3.8/dist-packages/preprocessor-1.4.8-py3.8.egg/preprocessor/ ports: - - 5678:5678 + - "127.0.0.1:5678:5678" networks: extnet: name: emg-extnet diff --git a/docker-compose.frtx-emg.dev.yml b/docker-compose.frtx-emg.dev.yml index e258aa10..6e63a69e 100644 --- a/docker-compose.frtx-emg.dev.yml +++ b/docker-compose.frtx-emg.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,52 +30,33 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: frtx-emg-extnet diff --git a/docker-compose.sace-emg.dev.yml b/docker-compose.sace-emg.dev.yml index 9a021941..d7928411 100644 --- a/docker-compose.sace-emg.dev.yml +++ b/docker-compose.sace-emg.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,52 +30,33 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: sace-emg-extnet diff --git a/docker-compose.vhr18.dev.yml b/docker-compose.vhr18.dev.yml index 62c9304a..8e196544 100644 --- a/docker-compose.vhr18.dev.yml +++ b/docker-compose.vhr18.dev.yml @@ -4,11 +4,7 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:5432:5432" sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +17,7 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - "127.0.0.1:80:80" volumes: - type: bind source: ./client/src/ @@ -34,52 +30,33 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:81:80" + - "127.0.0.1:82:8080" environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - "127.0.0.1:83:80" configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - "127.0.0.1:5678:5678" networks: extnet: name: vhr18-extnet diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py index b17fd36c..13bc163c 100644 --- a/preprocessor/preprocessor/steps/output.py +++ b/preprocessor/preprocessor/steps/output.py @@ -13,7 +13,7 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c # find out the driver to get the extension options = options if options is not None else {} frmt = options.get('format', 'GTiff') - panchromatic = options.pop('panchromatic', None) + panchromatic = preprocessor_config.get('panchromatic', None) driver = gdal.GetDriverByName(frmt) if not driver: raise ValueError('Unsupported driver %s' % frmt) @@ -35,7 +35,7 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c # delete old files and rename the combined file to the first filename for filename in warped_files: os.unlink(filename) - os.rename(tmp_filename, warped_files[0]) + os.rename(tmp_filename, warped_files[0]) elif len(warped_files) > 2 and panchromatic: # group the panchromatic data to one group # group ms to one group -- GitLab From b4dad028276db5dff9acddd37fb8cc984280ba4b Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 2 Sep 2021 15:27:43 +0200 Subject: [PATCH 11/20] [preprocessor] enable grouping for output step - re-use stack_bands logic for panchromatic use_case --- preprocessor/preprocessor/steps/output.py | 31 +++++---- preprocessor/preprocessor/steps/stack.py | 76 +++++++++++++---------- 2 files changed, 57 insertions(+), 50 deletions(-) diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py index 13bc163c..15eda4d5 100644 --- a/preprocessor/preprocessor/steps/output.py +++ b/preprocessor/preprocessor/steps/output.py @@ -4,16 +4,16 @@ from uuid import uuid4 from typing import List from ..util import replace_ext, gdal, get_all_data_files +from .stack import handle_group_sort, create_groups import logging logger = logging.getLogger(__name__) -def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None, data_file_globs: List[str]=[]): +def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None, data_file_globs: List[str]=[], group_by: str=None, sort_by: str=None, order: List[str]=None): # find out the driver to get the extension options = options if options is not None else {} frmt = options.get('format', 'GTiff') - panchromatic = preprocessor_config.get('panchromatic', None) driver = gdal.GetDriverByName(frmt) if not driver: raise ValueError('Unsupported driver %s' % frmt) @@ -27,18 +27,15 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c gdal.Warp(target_filename, filename, **options) warped_files.append(target_filename) - if len(warped_files) > 1 and not panchromatic: - tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension)) - logger.debug('Warping files %s' % warped_files) - gdal.Warp(tmp_filename, warped_files, **options) - - # delete old files and rename the combined file to the first filename - for filename in warped_files: - os.unlink(filename) - os.rename(tmp_filename, warped_files[0]) - elif len(warped_files) > 2 and panchromatic: - # group the panchromatic data to one group - # group ms to one group - # don't merge - # output two file paths - pass + # if more than 1 file present or more than 2 and group_by is configured (pan+ms) + if (len(warped_files) > 1 and not group_by) or len(warped_files) > 2: + groups = create_groups(group_by, warped_files) + for groupname, group in groups.items(): + handle_group_sort(sort_by, order, group, groupname) + tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension)) + logger.debug('Warping files %s' % group) + gdal.Warp(tmp_filename, group, **options) + # delete old files and rename the combined file to the first filename + for filename in group: + os.unlink(filename) + os.rename(tmp_filename, group[0]) diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py index 63ea4ac0..c21329ea 100644 --- a/preprocessor/preprocessor/steps/stack.py +++ b/preprocessor/preprocessor/steps/stack.py @@ -14,51 +14,61 @@ def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preproces """ Stack bands of the individual images """ filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) - # check if we have a group_by regex. If yes, use the first - # re-group to group by. - # Fallback is basename of file as groupname - if group_by: - re_group_by = re.compile(group_by) - groups = { - k: list(v) - for k, v in groupby(filenames, key=lambda v: re_group_by.match(v).group(1)) - } - else: - groups = {basename(filenames[0]): filenames} - + groups = create_groups(group_by, filenames) for groupname, group in groups.items(): # check if a sort_by is specified. if yes, use the sort_by regex group # and optionally a ordered list to order the filenames - logger.debug('Handling group before sort %s' % groupname) - if sort_by: - re_sort_by = re.compile(sort_by) - - if order: - group = [ - v for v in group - if re_sort_by.match(v) - and re_sort_by.match(v).group(1) in order - ] - - group = sorted( - group, - key=lambda v: order.index(re_sort_by.match(v).group(1)) - ) - else: - group = sorted( - group, - key=lambda v: re_sort_by.match(v).group(1) - ) + handle_group_sort(sort_by, order, group, groupname) vrt_filename = replace_ext(join(target_dir, groupname), '.vrt') # correct rotated geotransforms as those prevent vrt creation group_new = remove_rotated_geotransform(group, target_dir) logger.debug("Group contents %s" % group_new) - logger.debug("vrt_filename %s" % vrt_filename) # build a VRT to stack bands for each group gdal.BuildVRT(vrt_filename, group_new, separate=True) +def handle_group_sort(sort_by, order, group, groupname): + if sort_by: + logger.debug('Handling group before sort %s' % groupname) + re_sort_by = re.compile(sort_by) + + if order: + group = [ + v for v in group + if re_sort_by.match(v) + and re_sort_by.match(v).group(1) in order + ] + + group = sorted( + group, + key=lambda v: order.index(re_sort_by.match(v).group(1)) + ) + else: + group = sorted( + group, + key=lambda v: re_sort_by.match(v).group(1) + ) + + +def create_groups(group_by, filenames): + """ + Creates groups of files based on group_by configuration + check if we have a group_by regex. If yes, use the first + re-group to group by. + Fallback is basename of file as the only groupname + """ + if group_by: + re_group_by = re.compile(group_by) + groups = { + k: list(v) + for k, v in groupby(filenames, key=lambda v: re_group_by.match(v).group(1)) + } + else: + groups = {basename(filenames[0]): filenames} + return groups + + def remove_rotated_geotransform(filenames, target_dir): """ Unrotates geotransform to a common grid """ -- GitLab From 6c12b1acfc23df219904b6272e78320081542ef5 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 2 Sep 2021 16:04:00 +0200 Subject: [PATCH 12/20] update preprocessor configs in ew02, ew03, ph1a, ph1b, sp06, sp07 for emg* collections for pan/ms grouping --- .../csea-emg/csea-emg_preprocessor-config.yml | 159 ++++++++++++----- config/emg/emg_preprocessor-config.yml | 161 +++++++++++++----- .../frtx-emg/frtx-emg_preprocessor-config.yml | 159 ++++++++++++----- .../sace-emg/sace-emg_preprocessor-config.yml | 159 ++++++++++++----- 4 files changed, 480 insertions(+), 158 deletions(-) diff --git a/config/csea-emg/csea-emg_preprocessor-config.yml b/config/csea-emg/csea-emg_preprocessor-config.yml index 77152b77..e6191898 100644 --- a/config/csea-emg/csea-emg_preprocessor-config.yml +++ b/config/csea-emg/csea-emg_preprocessor-config.yml @@ -41,7 +41,6 @@ preprocessing: data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -137,27 +136,52 @@ preprocessing: - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on @@ -165,16 +189,27 @@ preprocessing: - type: no_op PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -182,16 +217,27 @@ preprocessing: - type: no_op PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -205,24 +251,59 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW01: data_file_globs: - "*.tif" diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index 8a985f80..e6191898 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -41,7 +41,6 @@ preprocessing: data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -136,30 +135,53 @@ preprocessing: - "*pansharpened_clip.tif" - "*_pansharpened.tif" SP06: - panchromatic: true data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" - - "*IMG_*_P_*.JP2" - - "*IMG_*_P_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on @@ -167,16 +189,27 @@ preprocessing: - type: no_op PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -184,16 +217,27 @@ preprocessing: - type: no_op PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -207,24 +251,59 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW01: data_file_globs: - "*.tif" diff --git a/config/frtx-emg/frtx-emg_preprocessor-config.yml b/config/frtx-emg/frtx-emg_preprocessor-config.yml index 77152b77..e6191898 100644 --- a/config/frtx-emg/frtx-emg_preprocessor-config.yml +++ b/config/frtx-emg/frtx-emg_preprocessor-config.yml @@ -41,7 +41,6 @@ preprocessing: data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -137,27 +136,52 @@ preprocessing: - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on @@ -165,16 +189,27 @@ preprocessing: - type: no_op PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -182,16 +217,27 @@ preprocessing: - type: no_op PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -205,24 +251,59 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW01: data_file_globs: - "*.tif" diff --git a/config/sace-emg/sace-emg_preprocessor-config.yml b/config/sace-emg/sace-emg_preprocessor-config.yml index 77152b77..e6191898 100644 --- a/config/sace-emg/sace-emg_preprocessor-config.yml +++ b/config/sace-emg/sace-emg_preprocessor-config.yml @@ -41,7 +41,6 @@ preprocessing: data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -137,27 +136,52 @@ preprocessing: - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on @@ -165,16 +189,27 @@ preprocessing: - type: no_op PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -182,16 +217,27 @@ preprocessing: - type: no_op PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc @@ -205,24 +251,59 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES EW01: data_file_globs: - "*.tif" -- GitLab From bcf766abfd6ba14dc9317f893aeb720f03592ea5 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Fri, 3 Sep 2021 09:53:47 +0200 Subject: [PATCH 13/20] fix docker compose dev port expose configs as per https://github.com/moby/moby/issues/32299 --- docker-compose.core12.dev.yml | 24 ++++++++++++++++++------ docker-compose.csea-emg.dev.yml | 24 ++++++++++++++++++------ docker-compose.dem.dev.yml | 24 ++++++++++++++++++------ docker-compose.demF.dev.yml | 24 ++++++++++++++++++------ docker-compose.emg.dev.yml | 24 ++++++++++++++++++------ docker-compose.frtx-emg.dev.yml | 24 ++++++++++++++++++------ docker-compose.sace-emg.dev.yml | 24 ++++++++++++++++++------ docker-compose.vhr18.dev.yml | 24 ++++++++++++++++++------ 8 files changed, 144 insertions(+), 48 deletions(-) diff --git a/docker-compose.core12.dev.yml b/docker-compose.core12.dev.yml index a4f612f4..987e49e4 100644 --- a/docker-compose.core12.dev.yml +++ b/docker-compose.core12.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: core12-extnet diff --git a/docker-compose.csea-emg.dev.yml b/docker-compose.csea-emg.dev.yml index 28a2ba0f..8f4afde7 100644 --- a/docker-compose.csea-emg.dev.yml +++ b/docker-compose.csea-emg.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: csea-emg-extnet diff --git a/docker-compose.dem.dev.yml b/docker-compose.dem.dev.yml index d285111d..f781d113 100644 --- a/docker-compose.dem.dev.yml +++ b/docker-compose.dem.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: dem-extnet diff --git a/docker-compose.demF.dev.yml b/docker-compose.demF.dev.yml index 8e498882..84e1c26a 100644 --- a/docker-compose.demF.dev.yml +++ b/docker-compose.demF.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: demF-extnet diff --git a/docker-compose.emg.dev.yml b/docker-compose.emg.dev.yml index 9fbdac29..856df42a 100644 --- a/docker-compose.emg.dev.yml +++ b/docker-compose.emg.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: emg-extnet diff --git a/docker-compose.frtx-emg.dev.yml b/docker-compose.frtx-emg.dev.yml index 6e63a69e..1f7c3538 100644 --- a/docker-compose.frtx-emg.dev.yml +++ b/docker-compose.frtx-emg.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: frtx-emg-extnet diff --git a/docker-compose.sace-emg.dev.yml b/docker-compose.sace-emg.dev.yml index d7928411..ff96c216 100644 --- a/docker-compose.sace-emg.dev.yml +++ b/docker-compose.sace-emg.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: sace-emg-extnet diff --git a/docker-compose.vhr18.dev.yml b/docker-compose.vhr18.dev.yml index 8e196544..0866e9bc 100644 --- a/docker-compose.vhr18.dev.yml +++ b/docker-compose.vhr18.dev.yml @@ -4,7 +4,9 @@ services: networks: - extnet ports: - - "127.0.0.1:5432:5432" + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -17,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "127.0.0.1:80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -30,8 +34,12 @@ services: environment: DEV: "true" ports: - - "127.0.0.1:81:80" - - "127.0.0.1:82:8080" + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: @@ -41,7 +49,9 @@ services: cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "127.0.0.1:83:80" + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml @@ -56,7 +66,9 @@ services: source: ./testing/ target: /testing/ ports: - - "127.0.0.1:5678:5678" + - mode: host + target: 5678 + published: 5678 networks: extnet: name: vhr18-extnet -- GitLab From 8e801570c9869172e82cfdb6ad736a1b2ce7fb5a Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Fri, 3 Sep 2021 17:17:33 +0200 Subject: [PATCH 14/20] [preprocessor] Create a minimal STAC Item with list of assets WIP, saving progress --- preprocessor/preprocessor/daemon.py | 1 - preprocessor/preprocessor/metadata.py | 28 ++++++++++++++++- preprocessor/preprocessor/preprocess.py | 40 +++++++++++------------ preprocessor/preprocessor/util.py | 42 +++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 22 deletions(-) diff --git a/preprocessor/preprocessor/daemon.py b/preprocessor/preprocessor/daemon.py index 009e1a54..3b6cbcf0 100644 --- a/preprocessor/preprocessor/daemon.py +++ b/preprocessor/preprocessor/daemon.py @@ -63,6 +63,5 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, p client.sadd(failure_set, browse['filename']) continue client.srem(progress_set, value) # for browse_report mode - # TODO: convert to string, list, .... for item in file_paths: client.lpush(write_queue, item) diff --git a/preprocessor/preprocessor/metadata.py b/preprocessor/preprocessor/metadata.py index a8956d89..463d9a94 100644 --- a/preprocessor/preprocessor/metadata.py +++ b/preprocessor/preprocessor/metadata.py @@ -1,11 +1,11 @@ from lxml import etree +from datetime import datetime, timezone def evaluate_xpath(root, xpath): """ """ result = root.xpath(xpath, namespaces=root.nsmap) - print(xpath, result) if result: if isinstance(result, list): return result[0] @@ -39,3 +39,29 @@ def extract_product_types_and_levels(metadata_files, config): product_levels.append(product_level) return product_types, product_levels + + +def extract_metadata_for_stac(metadata_files): + """ + Temporary function extracting necessary metadata to create a minimal STAC item. + For now the xpaths are hardcoded here + """ + GSC_SCHEMA = { + 'identifier': '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:identifier/text()', + } + + # just considering the first metadata file + metadata_file = next(iter(metadata_files.keys())) + with open(metadata_file) as f: + tree = etree.parse(f) + root = tree.getroot() + + output_metadata = { + "id": evaluate_xpath(root, GSC_SCHEMA['identifier']), + "geometry": {}, # optional + "bbox": None, # optional + "datetime": datetime.now(timezone.utc), + "properties": {}, + "extra_fields": {}, + } + return output_metadata diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 3f5b8340..932d6087 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -13,7 +13,7 @@ from .steps import ( georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step ) from .steps.browse_report import browse_georeference -from .util import workdir, Timer, get_size_in_bytes, apply_gdal_config_options, set_gdal_options +from .util import workdir, Timer, get_size_in_bytes, apply_gdal_config_options, set_gdal_options, flatten, create_simple_stac_item from .exceptions import ExistsAtUploadError logging.basicConfig() @@ -65,11 +65,6 @@ STEP_FUNCTIONS = { 'custom_postprocessor': custom_postprocessor, } - -def flatten(l): - return [item for sublist in l for item in sublist] - - # ----------------------------------------------------------------------------- @@ -224,15 +219,18 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N if len(os.listdir('upload')) == 0: # end here, so not only metadata file is uploaded raise Exception('No data files to upload, aborting.') - - paths_for_upload = ['upload', 'extra'] - upload_filenames = [] - for path_to_upload in paths_for_upload: - upload_filenames.extend([ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(path_to_upload) - for filename in filenames - ]) + + upload_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('upload') + for filename in filenames + ] + + extra_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('extra') + for filename in filenames + ] # send all files in the upload directory to the target storage logger.info( @@ -240,7 +238,9 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N % (len(upload_filenames), file_path) ) with Timer() as upload_timer: - uploader.upload(upload_filenames, file_path) + # returns dict local_path:upload_path + upload_images = uploader.upload(upload_filenames, file_path) + upload_extra = uploader.upload(extra_filenames, file_path) logger.info( 'Finished uploading after %.3f seconds.' @@ -252,10 +252,10 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N % (file_path, preprocess_timer.elapsed) ) - # build a stac asset - # the href of stac item should be the path to the uploaded file - # construct the item with as much metadata (datetime, bbox, geometry, properties) as possible - # item.add_asset(key, asset) + if preprocess_config.get('stac_output', False): + # build a stac asset + stac_item = create_simple_stac_item(upload_images, upload_extra) + return stac_item return upload_filenames, file_path diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index f4b4d7df..a6e2ba8f 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -4,8 +4,12 @@ from contextlib import contextmanager from tempfile import TemporaryDirectory, mkdtemp from time import time from glob import glob +import json +from typing import Dict +from pystac import Item, Asset from .archive import filter_filenames +from .metadata import extract_metadata_for_stac try: from osgeo import gdal @@ -26,6 +30,10 @@ def replace_ext(filename: os.PathLike, new_ext: str, force_dot: bool=True) -> os return splitext(filename)[0] + ('' if new_ext.startswith('.') or not force_dot else '.') + new_ext +def flatten(llist): + return [item for sublist in llist for item in sublist] + + @contextmanager def workdir(config: dict, use_dir: os.PathLike=None): prefix = config.get('prefix', 'preprocess_') @@ -127,3 +135,37 @@ def set_gdal_options(config_options): """ for key, value in config_options.items(): gdal.SetConfigOption(key, value) + + +def create_simple_stac_item(upload_files:Dict[str, str], extra_files:Dict[str, str]): + """ Temporary method creating a minimal STAC item from information about products uploaded and metadata files uploaded. + Accepts: 'upload_files' dictionary of upload_files (images), where key is local path and value is remote path. + 'extra_files' dictionary of extra_files (sidecar or metadata), where key is local path and value is remote path. + Assuming metadata file to read and create a STAC info from is first to pick by iterator. + """ + # get relevant metadata from GSC + metadata = extract_metadata_for_stac(extra_files) + stac_item = Item( + id=metadata["id"], + geometry=metadata["geometry"], + bbox=metadata["bbox"], + datetime=metadata["datetime"], + properties=metadata["properties"], + extra_fields=metadata["extra_fields"] + ) + for metadata_file in extra_files.values(): + metadata_asset = Asset( + href=metadata_file, + title="GSC Metadata file", + description="Accompanying GSC Metadata file copied over from source archive", + media_type="application/xml", + roles=["metadata"], + extra_fields={}, + ) + stac_item.add_asset('gsc_metadata', metadata_asset) + # add assets + # the href of stac item should be the path to the uploaded file + # construct the item with as much metadata (datetime, bbox, geometry, properties) as possible + + stac_item_json = json.dumps(stac_item.to_dict(False)) + return stac_item_json -- GitLab From 5882d5e5603c1ecbc64945452883bab7db387d67 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Mon, 6 Sep 2021 17:24:35 +0200 Subject: [PATCH 15/20] [preprocessor] finish minimal STAC item with list of assets --- preprocessor/preprocessor/daemon.py | 2 + preprocessor/preprocessor/preprocess.py | 33 +++++++++----- preprocessor/preprocessor/transfer/local.py | 5 ++- preprocessor/preprocessor/transfer/swift.py | 13 +++--- preprocessor/preprocessor/util.py | 49 ++++++++++++++++++--- 5 files changed, 75 insertions(+), 27 deletions(-) diff --git a/preprocessor/preprocessor/daemon.py b/preprocessor/preprocessor/daemon.py index 3b6cbcf0..99ea0668 100644 --- a/preprocessor/preprocessor/daemon.py +++ b/preprocessor/preprocessor/daemon.py @@ -36,6 +36,7 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, p client.srem(progress_set, value) if config['target'].get('pass_further_when_exists', True): # pass item to next queue even if file already exists + # TODO fix this for stac_output=true client.lpush(write_queue, value) continue except Exception as e: @@ -56,6 +57,7 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, p logger.exception(e) if config['target'].get('pass_further_when_exists', True): # pass item to next queue even if file already exists + # TODO fix this for stac_output=true file_paths.append(browse['browse_identifier']) continue except Exception as e: diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 932d6087..d7ed92f1 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -235,7 +235,7 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N # send all files in the upload directory to the target storage logger.info( 'Starting uploading of %d files to %s' - % (len(upload_filenames), file_path) + % (len(upload_filenames + extra_filenames), file_path) ) with Timer() as upload_timer: # returns dict local_path:upload_path @@ -255,7 +255,7 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N if preprocess_config.get('stac_output', False): # build a stac asset stac_item = create_simple_stac_item(upload_images, upload_extra) - return stac_item + return upload_filenames + extra_filenames, stac_item return upload_filenames, file_path @@ -328,24 +328,29 @@ def preprocess_browse(config: dict, browse_type: str, browse_report: dict, brows uploader = get_uploader( target_config['type'], target_config.get('args'), target_config.get('kwargs') ) - paths_for_upload = ['upload', 'extra'] - upload_filenames = [] - for path_to_upload in paths_for_upload: - upload_filenames.extend([ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(path_to_upload) - for filename in filenames - ]) + upload_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('upload') + for filename in filenames + ] + + extra_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('extra') + for filename in filenames + ] file_path = browse['browse_identifier'] or upload_filenames[0] # send all files in the upload directory to the target storage logger.info( 'Starting uploading of %d files to %s' - % (len(upload_filenames), file_path) + % (len(upload_filenames + extra_filenames), file_path) ) with Timer() as upload_timer: - uploader.upload(upload_filenames, file_path) + # returns dict local_path:upload_path + upload_images = uploader.upload(upload_filenames, file_path) + upload_extra = uploader.upload(extra_filenames, file_path) logger.info( 'Finished uploading after %.3f seconds.' @@ -356,5 +361,9 @@ def preprocess_browse(config: dict, browse_type: str, browse_report: dict, brows 'Finished preprocessing of browse "%s" after %.3f seconds.' % (filename, preprocess_timer.elapsed) ) + if preprocess_config.get('stac_output', False): + # build a stac asset + stac_item = create_simple_stac_item(upload_images, upload_extra) + return upload_filenames + extra_filenames, stac_item return upload_filenames, file_path diff --git a/preprocessor/preprocessor/transfer/local.py b/preprocessor/preprocessor/transfer/local.py index f1450f7d..6fa18b1f 100644 --- a/preprocessor/preprocessor/transfer/local.py +++ b/preprocessor/preprocessor/transfer/local.py @@ -25,6 +25,7 @@ class Uploader(Base): """ def upload(self, local_path: Union[os.PathLike, List[os.PathLike]], remote_dir: os.PathLike) -> List[os.PathLike]: paths = local_path if isinstance(local_path, List) else [local_path] + local_remote_mapping = {} remote_paths = [ os.path.join( self.storage_path, @@ -37,8 +38,8 @@ class Uploader(Base): for local_path, remote_path in zip(paths, remote_paths): os.makedirs(os.path.dirname(remote_path), exist_ok=True) shutil.copy2(local_path, remote_path) - - return remote_paths + local_remote_mapping[local_path] = remote_path + return local_remote_mapping def product_exists(self, remote_dir: os.PathLike) -> bool: remote_path = os.path.join(self.storage_path, remote_dir) diff --git a/preprocessor/preprocessor/transfer/swift.py b/preprocessor/preprocessor/transfer/swift.py index a58f7fb1..45e54b50 100644 --- a/preprocessor/preprocessor/transfer/swift.py +++ b/preprocessor/preprocessor/transfer/swift.py @@ -75,6 +75,7 @@ class Uploader(Base): """ def upload(self, local_path: Union[os.PathLike, List[os.PathLike]], remote_dir: os.PathLike) -> List[os.PathLike]: paths = local_path if isinstance(local_path, List) else [local_path] + local_remote_mapping = {} container, remote_dir = self.validate_container(remote_dir) remote_paths = [ os.path.join( @@ -83,13 +84,13 @@ class Uploader(Base): ) for path in paths ] - objects = [ - SwiftUploadObject( + objects = [] + for path, remote_path in zip(paths, remote_paths): + objects.append(SwiftUploadObject( path, object_name=remote_path - ) - for path, remote_path in zip(paths, remote_paths) - ] + )) + local_remote_mapping[path] = "%s/%s" % (container, remote_path) max_size = max([os.stat(path).st_size for path in paths]) options = {} @@ -120,7 +121,7 @@ class Uploader(Base): ) raise Exception('Failed to upload %s' % result["error"]) - return remote_paths + return local_remote_mapping def product_exists(self, remote_dir: os.PathLike) -> bool: with self.get_service() as swift: diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index a6e2ba8f..000ed9b6 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -153,19 +153,54 @@ def create_simple_stac_item(upload_files:Dict[str, str], extra_files:Dict[str, s properties=metadata["properties"], extra_fields=metadata["extra_fields"] ) - for metadata_file in extra_files.values(): + for i, metadata_file in enumerate(extra_files.values()): metadata_asset = Asset( href=metadata_file, - title="GSC Metadata file", + title="GSC Metadata file %s" % i, description="Accompanying GSC Metadata file copied over from source archive", media_type="application/xml", roles=["metadata"], extra_fields={}, ) - stac_item.add_asset('gsc_metadata', metadata_asset) - # add assets - # the href of stac item should be the path to the uploaded file - # construct the item with as much metadata (datetime, bbox, geometry, properties) as possible - + stac_item.add_asset('gsc_metadata_%s' % i, metadata_asset) + + counter = 1 + for image_path, uploaded_path in upload_files.items(): + title = description = "Preprocessed image" + # find out number of bands + ds = gdal.Open(image_path) + band_count = ds.RasterCount + del ds + if len(upload_files) > 1: + # TODO move this to configuration + # assuming PAN+MS product + if band_count == 1: + title += " - PAN" + description += " - Panchromatic" + else: + title += " - MS" + description += " - Multispectral" + else: + # TODO move content to configuration + pass + bands = [ + { + "name": "band%s" % j + } + for j in range(1, band_count + 1) + ] + image_asset = Asset( + href=uploaded_path, + title=title, + description=description, + media_type="image/tiff; application=geotiff; profile=cloud-optimized", + roles=["visual"], + extra_fields={ + "eo:bands": bands + }, + ) + stac_item.add_asset('preprocessed_image_%s' % counter, image_asset) + counter += 1 + stac_item_json = json.dumps(stac_item.to_dict(False)) return stac_item_json -- GitLab From 3c9f3aa866a4c9182de12efb252dfb27a84beb08 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Mon, 6 Sep 2021 17:27:56 +0200 Subject: [PATCH 16/20] add `stac_output:true` to preprocessor configs --- config/csea-emg/csea-emg_preprocessor-config.yml | 1 + config/dem/dem_preprocessor-config.yml | 1 + config/demF/demF_preprocessor-config.yml | 1 + config/emg/emg_preprocessor-config.yml | 1 + config/frtx-emg/frtx-emg_preprocessor-config.yml | 1 + config/sace-emg/sace-emg_preprocessor-config.yml | 1 + config/vhr18/vhr18_preprocessor-config.yml | 1 + 7 files changed, 7 insertions(+) diff --git a/config/csea-emg/csea-emg_preprocessor-config.yml b/config/csea-emg/csea-emg_preprocessor-config.yml index e6191898..67033738 100644 --- a/config/csea-emg/csea-emg_preprocessor-config.yml +++ b/config/csea-emg/csea-emg_preprocessor-config.yml @@ -36,6 +36,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true nested: true data_file_globs: diff --git a/config/dem/dem_preprocessor-config.yml b/config/dem/dem_preprocessor-config.yml index 8d35a8c7..314bce2d 100644 --- a/config/dem/dem_preprocessor-config.yml +++ b/config/dem/dem_preprocessor-config.yml @@ -33,6 +33,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in - '**/DEM/*.dt2' diff --git a/config/demF/demF_preprocessor-config.yml b/config/demF/demF_preprocessor-config.yml index 8d35a8c7..314bce2d 100644 --- a/config/demF/demF_preprocessor-config.yml +++ b/config/demF/demF_preprocessor-config.yml @@ -33,6 +33,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in - '**/DEM/*.dt2' diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index e6191898..67033738 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -36,6 +36,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true nested: true data_file_globs: diff --git a/config/frtx-emg/frtx-emg_preprocessor-config.yml b/config/frtx-emg/frtx-emg_preprocessor-config.yml index e6191898..67033738 100644 --- a/config/frtx-emg/frtx-emg_preprocessor-config.yml +++ b/config/frtx-emg/frtx-emg_preprocessor-config.yml @@ -36,6 +36,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true nested: true data_file_globs: diff --git a/config/sace-emg/sace-emg_preprocessor-config.yml b/config/sace-emg/sace-emg_preprocessor-config.yml index e6191898..67033738 100644 --- a/config/sace-emg/sace-emg_preprocessor-config.yml +++ b/config/sace-emg/sace-emg_preprocessor-config.yml @@ -36,6 +36,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true nested: true data_file_globs: diff --git a/config/vhr18/vhr18_preprocessor-config.yml b/config/vhr18/vhr18_preprocessor-config.yml index 0eb39189..f070ebf0 100644 --- a/config/vhr18/vhr18_preprocessor-config.yml +++ b/config/vhr18/vhr18_preprocessor-config.yml @@ -33,6 +33,7 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true move_files: true data_file_globs: - '*.tif' -- GitLab From 6e6b359541099a547aae958a5d752fe2f4da7213 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 8 Sep 2021 10:19:41 +0200 Subject: [PATCH 17/20] [preprocessor] STAC image assets use mapping from config as common_name --- preprocessor/preprocessor/config-schema.yaml | 27 ++++++++++++++++++-- preprocessor/preprocessor/preprocess.py | 4 +-- preprocessor/preprocessor/util.py | 22 +++++++++++----- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/preprocessor/preprocessor/config-schema.yaml b/preprocessor/preprocessor/config-schema.yaml index 166e3794..f0d966f4 100644 --- a/preprocessor/preprocessor/config-schema.yaml +++ b/preprocessor/preprocessor/config-schema.yaml @@ -95,8 +95,6 @@ required: - preprocessing definitions: steps: - data_file_globs: - description: Custom globs for filtering which files will be used for this step. custom_preprocessor: description: Definition of a custom preprocessor step type: object @@ -250,3 +248,28 @@ definitions: kwargs: description: The map of keyword arguments to pass to that function. type: object + move_files: + type: boolean + default: false + description: "Files will be moved between steps to save space instead of copied." + nested: + type: boolean + default: false + description: "Unpack step will descend into the sub-archives to extract files." + stac_output: + type: boolean + default: false + description: "Switch to generate STAC Item output JSON to output queue." + data_file_globs: + description: "Custom globs for filtering which files will be used as input to this step." + type: array + items: + type: string + stac_band_mapping: + type: object + description: "Mapping of expected band counts to lists of expected common band names." + items: + type: array + description: "List of individual sorted band names." + items: + type: string diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index d7ed92f1..ca8e2eec 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -254,7 +254,7 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N if preprocess_config.get('stac_output', False): # build a stac asset - stac_item = create_simple_stac_item(upload_images, upload_extra) + stac_item = create_simple_stac_item(preprocess_config, upload_images, upload_extra) return upload_filenames + extra_filenames, stac_item return upload_filenames, file_path @@ -363,7 +363,7 @@ def preprocess_browse(config: dict, browse_type: str, browse_report: dict, brows ) if preprocess_config.get('stac_output', False): # build a stac asset - stac_item = create_simple_stac_item(upload_images, upload_extra) + stac_item = create_simple_stac_item(preprocess_config, upload_images, upload_extra) return upload_filenames + extra_filenames, stac_item return upload_filenames, file_path diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index 000ed9b6..752382ff 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -137,7 +137,7 @@ def set_gdal_options(config_options): gdal.SetConfigOption(key, value) -def create_simple_stac_item(upload_files:Dict[str, str], extra_files:Dict[str, str]): +def create_simple_stac_item(preprocessor_config: dict, upload_files:Dict[str, str], extra_files:Dict[str, str]): """ Temporary method creating a minimal STAC item from information about products uploaded and metadata files uploaded. Accepts: 'upload_files' dictionary of upload_files (images), where key is local path and value is remote path. 'extra_files' dictionary of extra_files (sidecar or metadata), where key is local path and value is remote path. @@ -183,12 +183,20 @@ def create_simple_stac_item(upload_files:Dict[str, str], extra_files:Dict[str, s else: # TODO move content to configuration pass - bands = [ - { - "name": "band%s" % j - } - for j in range(1, band_count + 1) - ] + bands = [] + for j in range(band_count): + bands_d = {"name": "band%s" % (j + 1)} + # if mapping configured, extract common_name based on index of band in mapping + if preprocessor_config.get('stac_band_mapping', False): + band_common_names = preprocessor_config.get('stac_band_mapping').get(band_count, False) + if band_common_names: + band_common_names = [band_common_names] if isinstance(band_common_names, str) else band_common_names + bands_d["common_name"] = band_common_names[j] + else: + bands_d["common_name"] = "band%s" % (j + 1) + else: + bands_d["common_name"] = "band%s" % (j + 1) + bands.append(bands_d) image_asset = Asset( href=uploaded_path, title=title, -- GitLab From 48cdc3ffa5d0fca05e8a38430a1a8d160df0b952 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 8 Sep 2021 10:48:18 +0200 Subject: [PATCH 18/20] Add band mapping configuration to all preprocessor configs --- .../csea-emg/csea-emg_preprocessor-config.yml | 120 ++++++++++++++++++ config/dem/dem_preprocessor-config.yml | 2 + config/demF/demF_preprocessor-config.yml | 2 + config/emg/emg_preprocessor-config.yml | 120 ++++++++++++++++++ .../frtx-emg/frtx-emg_preprocessor-config.yml | 120 ++++++++++++++++++ .../sace-emg/sace-emg_preprocessor-config.yml | 120 ++++++++++++++++++ config/vhr18/vhr18_preprocessor-config.yml | 6 + 7 files changed, 490 insertions(+) diff --git a/config/csea-emg/csea-emg_preprocessor-config.yml b/config/csea-emg/csea-emg_preprocessor-config.yml index 67033738..c11c947d 100644 --- a/config/csea-emg/csea-emg_preprocessor-config.yml +++ b/config/csea-emg/csea-emg_preprocessor-config.yml @@ -37,6 +37,12 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: @@ -131,6 +137,20 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" @@ -161,6 +181,14 @@ preprocessing: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - "*IMG_*.JP2" @@ -188,6 +216,14 @@ preprocessing: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - "*IMG_*.JP2" @@ -216,6 +252,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - "*IMG_*.JP2" @@ -244,6 +288,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -269,6 +321,18 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - "*.tif" @@ -287,6 +351,27 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - "*.tif" @@ -305,9 +390,33 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" @@ -349,6 +458,17 @@ preprocessing: data_file_globs: - "*_SBI.tif" - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/dem/dem_preprocessor-config.yml b/config/dem/dem_preprocessor-config.yml index 314bce2d..2fd7f61a 100644 --- a/config/dem/dem_preprocessor-config.yml +++ b/config/dem/dem_preprocessor-config.yml @@ -34,6 +34,8 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 1: "height" move_files: true data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in - '**/DEM/*.dt2' diff --git a/config/demF/demF_preprocessor-config.yml b/config/demF/demF_preprocessor-config.yml index 314bce2d..2fd7f61a 100644 --- a/config/demF/demF_preprocessor-config.yml +++ b/config/demF/demF_preprocessor-config.yml @@ -34,6 +34,8 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 1: "height" move_files: true data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in - '**/DEM/*.dt2' diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index 67033738..c11c947d 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -37,6 +37,12 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: @@ -131,6 +137,20 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" @@ -161,6 +181,14 @@ preprocessing: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - "*IMG_*.JP2" @@ -188,6 +216,14 @@ preprocessing: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - "*IMG_*.JP2" @@ -216,6 +252,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - "*IMG_*.JP2" @@ -244,6 +288,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -269,6 +321,18 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - "*.tif" @@ -287,6 +351,27 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - "*.tif" @@ -305,9 +390,33 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" @@ -349,6 +458,17 @@ preprocessing: data_file_globs: - "*_SBI.tif" - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/frtx-emg/frtx-emg_preprocessor-config.yml b/config/frtx-emg/frtx-emg_preprocessor-config.yml index 67033738..c11c947d 100644 --- a/config/frtx-emg/frtx-emg_preprocessor-config.yml +++ b/config/frtx-emg/frtx-emg_preprocessor-config.yml @@ -37,6 +37,12 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: @@ -131,6 +137,20 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" @@ -161,6 +181,14 @@ preprocessing: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - "*IMG_*.JP2" @@ -188,6 +216,14 @@ preprocessing: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - "*IMG_*.JP2" @@ -216,6 +252,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - "*IMG_*.JP2" @@ -244,6 +288,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -269,6 +321,18 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - "*.tif" @@ -287,6 +351,27 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - "*.tif" @@ -305,9 +390,33 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" @@ -349,6 +458,17 @@ preprocessing: data_file_globs: - "*_SBI.tif" - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/sace-emg/sace-emg_preprocessor-config.yml b/config/sace-emg/sace-emg_preprocessor-config.yml index 67033738..c11c947d 100644 --- a/config/sace-emg/sace-emg_preprocessor-config.yml +++ b/config/sace-emg/sace-emg_preprocessor-config.yml @@ -37,6 +37,12 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: @@ -131,6 +137,20 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" @@ -161,6 +181,14 @@ preprocessing: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - "*IMG_*.JP2" @@ -188,6 +216,14 @@ preprocessing: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - "*IMG_*.JP2" @@ -216,6 +252,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - "*IMG_*.JP2" @@ -244,6 +288,14 @@ preprocessing: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -269,6 +321,18 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - "*.tif" @@ -287,6 +351,27 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - "*.tif" @@ -305,9 +390,33 @@ preprocessing: - BIGTIFF=YES - OVERVIEWS=AUTO - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" @@ -349,6 +458,17 @@ preprocessing: data_file_globs: - "*_SBI.tif" - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/vhr18/vhr18_preprocessor-config.yml b/config/vhr18/vhr18_preprocessor-config.yml index f070ebf0..a56f2a14 100644 --- a/config/vhr18/vhr18_preprocessor-config.yml +++ b/config/vhr18/vhr18_preprocessor-config.yml @@ -34,6 +34,12 @@ level_extractor: preprocessing: defaults: stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true data_file_globs: - '*.tif' -- GitLab From ac88afa9758a2676a8e55d68369acec06a6d34ec Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Tue, 14 Sep 2021 15:48:56 +0200 Subject: [PATCH 19/20] create null geometry in STAC item --- preprocessor/preprocessor/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessor/preprocessor/metadata.py b/preprocessor/preprocessor/metadata.py index 463d9a94..fcced508 100644 --- a/preprocessor/preprocessor/metadata.py +++ b/preprocessor/preprocessor/metadata.py @@ -58,7 +58,7 @@ def extract_metadata_for_stac(metadata_files): output_metadata = { "id": evaluate_xpath(root, GSC_SCHEMA['identifier']), - "geometry": {}, # optional + "geometry": None, # optional "bbox": None, # optional "datetime": datetime.now(timezone.utc), "properties": {}, -- GitLab From 6df7df063713bf918ade42e46e037813a23cd73f Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 16 Sep 2021 16:27:50 +0200 Subject: [PATCH 20/20] sort stac input assets by image name to know which of pan/ms coverages is 1/2 --- preprocessor/preprocessor/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index 752382ff..6ce37cc6 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -165,7 +165,7 @@ def create_simple_stac_item(preprocessor_config: dict, upload_files:Dict[str, st stac_item.add_asset('gsc_metadata_%s' % i, metadata_asset) counter = 1 - for image_path, uploaded_path in upload_files.items(): + for image_path, uploaded_path in sorted(upload_files.items()): title = description = "Preprocessed image" # find out number of bands ds = gdal.Open(image_path) -- GitLab