From dd01c9da20ee30a716ba87f41469a7a2c0c0a206 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek <lubomir.bucek@eox.at> Date: Mon, 28 Sep 2020 20:48:30 +0200 Subject: [PATCH] add product_exists method to check if 2 or more files in target and target.replace configuration default to false --- config/dem_preprocessor-config.yml | 1 + config/emg_preprocessor-config.yml | 1 + config/vhr18_preprocessor-config.yml | 1 + documentation/operator-guide/configuration.rst | 3 +-- preprocessor/preprocessor/config-schema.yaml | 4 ++++ preprocessor/preprocessor/preprocess.py | 11 ++++++++++- preprocessor/preprocessor/transfer/abc.py | 4 ++++ preprocessor/preprocessor/transfer/local.py | 7 +++++++ preprocessor/preprocessor/transfer/swift.py | 17 +++++++++++++++-- 9 files changed, 44 insertions(+), 5 deletions(-) diff --git a/config/dem_preprocessor-config.yml b/config/dem_preprocessor-config.yml index 8f34983a..3ae30fe3 100644 --- a/config/dem_preprocessor-config.yml +++ b/config/dem_preprocessor-config.yml @@ -11,6 +11,7 @@ source: user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}' target: type: swift + replace: false kwargs: username: !env '${OS_USERNAME}' password: !env '${OS_PASSWORD}' diff --git a/config/emg_preprocessor-config.yml b/config/emg_preprocessor-config.yml index a21950f7..dc6fc7b0 100644 --- a/config/emg_preprocessor-config.yml +++ b/config/emg_preprocessor-config.yml @@ -22,6 +22,7 @@ source: # user_domain_name: !env{{OS_USER_DOMAIN_NAME}} target: type: local + replace: true kwargs: storage_path: /mnt/data/target diff --git a/config/vhr18_preprocessor-config.yml b/config/vhr18_preprocessor-config.yml index a63ffb1d..359c52da 100644 --- a/config/vhr18_preprocessor-config.yml +++ b/config/vhr18_preprocessor-config.yml @@ -11,6 +11,7 @@ source: user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}' target: type: swift + replace: false kwargs: username: !env '${OS_USERNAME}' password: !env '${OS_PASSWORD}' diff --git a/documentation/operator-guide/configuration.rst b/documentation/operator-guide/configuration.rst index fedf0a95..9ecfbd6d 100644 --- a/documentation/operator-guide/configuration.rst +++ b/documentation/operator-guide/configuration.rst @@ -117,8 +117,6 @@ The following ``.env`` files are typically used: django admin user to be used with the admin GUI. * ``<stack-name>_obs.env``: This contains access parameters for the object storage(s). -* ``<stack-name>_preprocessor.env``: Preprocessor related environment - variables * ``<stack-name>_redis.env``: Redis access credentials and queue names @@ -173,6 +171,7 @@ retrieve the original product files: * ``OS_REGION_NAME_DOWNLOAD`` * ``OS_AUTH_URL_DOWNLOAD`` * ``ST_AUTH_VERSION_DOWNLOAD`` +* ``OS_USER_DOMAIN_NAME_DOWNLOAD`` VS Environment Variables ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/preprocessor/preprocessor/config-schema.yaml b/preprocessor/preprocessor/config-schema.yaml index dd67fed6..93f3f9b4 100644 --- a/preprocessor/preprocessor/config-schema.yaml +++ b/preprocessor/preprocessor/config-schema.yaml @@ -26,6 +26,10 @@ properties: description: Extra arguments. Use depends on actual implementation. type: object # required: [type] + replace: + description: If set to true, output replaces already existing files on target. If no existing are present, preprocessing does not start. + type: boolean + default: false workdir: description: The local directory, where intermediary files are to be stored. type: string diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 355505e7..626099a5 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -124,6 +124,16 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N """ with workdir(config, use_dir) as dirname, Timer() as preprocess_timer: logger.info('Preprocessing %s in %s' % (file_path, dirname)) + target_config = config['target'] + # check if target.replace is configured and if not, check storage if files there + if not target_config['replace']: + uploader = get_uploader( + target_config['type'], target_config.get('args'), target_config.get('kwargs') + ) + if uploader.product_exists(file_path): + raise Exception('Target.replace configuration is not set to true and objects already exist in target %s.' % file_path) + else: + logger.debug('Product does not yet exist on target') # check if we can reuse a previous download if not os.path.isdir('download'): os.mkdir('download') @@ -198,7 +208,6 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N preprocess_internal(preprocess_config, 'unpack') # get an uploader for the finalized images - target_config = config['target'] uploader = get_uploader( target_config['type'], target_config.get('args'), target_config.get('kwargs') ) diff --git a/preprocessor/preprocessor/transfer/abc.py b/preprocessor/preprocessor/transfer/abc.py index 409a2ea3..9cc9818d 100644 --- a/preprocessor/preprocessor/transfer/abc.py +++ b/preprocessor/preprocessor/transfer/abc.py @@ -30,3 +30,7 @@ class Uploader(ABC): @abstractmethod def upload(self, local_path: Union[PathLike, List[PathLike]], remote_dir: PathLike) -> List[PathLike]: pass + + @abstractmethod + def product_exists(self, remote_dir: PathLike) -> bool: + pass diff --git a/preprocessor/preprocessor/transfer/local.py b/preprocessor/preprocessor/transfer/local.py index 2ee3da83..f1450f7d 100644 --- a/preprocessor/preprocessor/transfer/local.py +++ b/preprocessor/preprocessor/transfer/local.py @@ -39,3 +39,10 @@ class Uploader(Base): shutil.copy2(local_path, remote_path) return remote_paths + + def product_exists(self, remote_dir: os.PathLike) -> bool: + remote_path = os.path.join(self.storage_path, remote_dir) + for r, d, f in os.walk(remote_path): + if len(f) >= 2: + return True + return False diff --git a/preprocessor/preprocessor/transfer/swift.py b/preprocessor/preprocessor/transfer/swift.py index 7ecd853c..ad57dabc 100644 --- a/preprocessor/preprocessor/transfer/swift.py +++ b/preprocessor/preprocessor/transfer/swift.py @@ -98,8 +98,8 @@ class Uploader(Base): options['use_slo'] = True with self.get_service() as swift: - # use container first part of path of container as upload container - container = self.container or paths[0].partition('/')[0] + # use container or first part of path + container = self.container or remote_dir.partition('/')[0] results = swift.upload(container=container, objects=objects, options=options) for result in results: @@ -120,3 +120,16 @@ class Uploader(Base): raise Exception('Failed to upload %s' % result["error"]) return remote_paths + + def product_exists(self, remote_dir: os.PathLike) -> bool: + with self.get_service() as swift: + # use container or first part of path + container = self.container or remote_dir.partition('/')[0] + list_parts_gen = swift.list( + container=container, options={"prefix": remote_dir}, + ) + for page in list_parts_gen: + if page["success"] and len(page["listing"]) >= 2: + # at least two files present -> pass validation + return True + return False -- GitLab