From d803c8ea8a08473c5ab851a196a283fd74db9c07 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 20 Oct 2020 13:35:47 +0200 Subject: [PATCH 01/54] Initial implementation of modular registrar --- core/registrar/__init__.py | 0 core/registrar/backend.py | 138 ++++++++++++++++++++++ core/registrar/context.py | 13 +++ core/registrar/exceptions.py | 4 + core/registrar/registrar.py | 52 +++++++++ core/registrar/scheme.py | 99 ++++++++++++++++ core/registrar/source.py | 214 +++++++++++++++++++++++++++++++++++ core/registrar/utils.py | 0 core/registrar/xml.py | 37 ++++++ 9 files changed, 557 insertions(+) create mode 100644 core/registrar/__init__.py create mode 100644 core/registrar/backend.py create mode 100644 core/registrar/context.py create mode 100644 core/registrar/exceptions.py create mode 100644 core/registrar/registrar.py create mode 100644 core/registrar/scheme.py create mode 100644 core/registrar/source.py create mode 100644 core/registrar/utils.py create mode 100644 core/registrar/xml.py diff --git a/core/registrar/__init__.py b/core/registrar/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/core/registrar/backend.py b/core/registrar/backend.py new file mode 100644 index 00000000..b20c0396 --- /dev/null +++ b/core/registrar/backend.py @@ -0,0 +1,138 @@ +import os +import re +import sys +import logging + +import django +from django.db import transaction +from django.contrib.gis.geos import GEOSGeometry +from eoxserver.resources.coverages import models +from eoxserver.resources.coverages.registration.product import ProductRegistrator +from eoxserver.resources.coverages.registration.browse import BrowseRegistrator +from eoxserver.resources.coverages.registration.mask import MaskRegistrator +from eoxserver.resources.coverages.registration.registrators.gdal import GDALRegistrator + +from .exceptions import RegistrationError +from .context import Context +from .source import Source + + +logger = logging.getLogger(__name__) + +class RegistrationResult: + pass + + +class Backend: + def register_item(self, item: Context) -> RegistrationResult: + raise NotImplementedError + + +class EOxServerBackend(Backend): + def __init__(self, instance_base_path: str, instance_name: str, mapping: dict, simplify_footprint_tolerance: int=None): + self.mapping = mapping + self.simplify_footprint_tolerance = simplify_footprint_tolerance + path = os.path.join(instance_base_path, instance_name) + if path not in sys.path: + sys.path.append(path) + + os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"{instance_name}.settings") # TODO: from config + django.setup() + + def exists(self, source: Source, item: Context): + return models.Product.objects.filter(identifier=item.itentifier).exists() + + def _get_storage_from_source(self, source: Source) -> list: + return [] + + @transaction.atomic + def register(self, source: Source, item: Context, replace: bool) -> RegistrationResult: + # get the mapping for this particular item + mapping = self.mapping.get(item.product_type, {}).get(item.level_name) + metadata_file = item.metadata_files[0] + + storage = self._get_storage_from_source(source) + + try: + models.ProductType.objects.get(name=item['product_type_name']) + except models.ProductType.DoesNotExist: + pass + + product, _ = ProductRegistrator().register( + metadata_locations=[storage + [metadata_file]], + type_name=item['product_type_name'], + replace=replace, + extended_metadata=True, + mask_locations=None, + package_path=None, + simplify_footprint_tolerance=self.simplify_footprint_tolerance, + overrides=item.metadata, + ) + if product.footprint.empty: + raise RegistrationError("No footprint was extracted. full product: %s" % product) + + # insert the product in the to be associated collections + for collection_id in mapping.get('collections', []): + collection = models.Collection.objects.get( + identifier=collection_id, + ) + models.collection_insert_eo_object(collection, product) + + # register coverages and link them to the product + for raster_identifier, coverage_type_name in mapping.get('coverages', {}).items(): + raster_item = item.raster_files.get(raster_identifier) + + report = GDALRegistrator().register( + data_locations=[storage + [raster_item]], + metadata_locations=[storage + [metadata_file]], + coverage_type_name=coverage_type_name, + overrides={ + "identifier": f'{product.identifier}__{raster_identifier}__coverage', + "footprint": None, + }, + replace=replace, + ) + logger.debug("Adding coverage to product") + models.product_add_coverage(product, report.coverage) + + # register browses + for raster_identifier, browse_type_name in mapping.get('browses', {}): + raster_item = item.raster_files.get(raster_identifier) + BrowseRegistrator().register( + product.identifier, + storage + [raster_item], + browse_type_name, + ) + + # register masks + for mask_identifier, mask_type_name in mapping.get('masks', {}): + mask_item = item.mask_files.get(mask_identifier) + MaskRegistrator().register( + product.identifier, + storage + [mask_item], + mask_type_name, + ) + + +BACKENDS = { + 'eoxserver': EOxServerBackend +} + +def get_backend(config: dict, path: str) -> Backend: + cfg_backends = config['backends'] + + for cfg_backend in cfg_backends: + if cfg_backend['filter']: + if re.match(cfg_backend['filter'], path): + break + else: + break + else: + # no source found + raise RegistrationError(f'Could not find a suitable backend for the path {path}') + + return BACKENDS[cfg_backend['type']]( + *cfg_backend.get('args', []), + **cfg_backend.get('kwargs', {}), + ) + diff --git a/core/registrar/context.py b/core/registrar/context.py new file mode 100644 index 00000000..81ded127 --- /dev/null +++ b/core/registrar/context.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass, field + + +@dataclass +class Context: + identifier: str + product_type: str = None + product_level: str = None + metadata: dict = field(default_factory=dict) + raster_files: dict = field(default_factory=dict) + metadata_files: dict = field(default_factory=dict) + masks: dict = field(default_factory=dict) + mask_files: dict = field(default_factory=dict) diff --git a/core/registrar/exceptions.py b/core/registrar/exceptions.py new file mode 100644 index 00000000..81a2e41e --- /dev/null +++ b/core/registrar/exceptions.py @@ -0,0 +1,4 @@ + + +class RegistrationError(Exception): + pass diff --git a/core/registrar/registrar.py b/core/registrar/registrar.py new file mode 100644 index 00000000..53e2379e --- /dev/null +++ b/core/registrar/registrar.py @@ -0,0 +1,52 @@ +import re + +from .source import get_source +from .exceptions import RegistrationError + + + +def register(config, path): + # TODO: select registration scheme (config, path) + source = get_source(config, path) + scheme = select_registation_scheme(config, path) + context = scheme.get_context(source, path) + + for pre_handler in get_pre_handlers(config): + pre_handler(config, path, context) + + for backend in get_backends(config): + if backend.exists(source, context): + if config.replace: + backend.register(source, context, replace=True) + else: + raise RegistrationError(f'Object {context} is already registered') + else: + backend.register(source, context, replace=False) + + for post_handler in get_post_handlers(config): + post_handler(config, path, context) + + +def select_registation_scheme(config, path): + cfg_schemes = config['schemes'] + for cfg_scheme in cfg_schemes: + if cfg_scheme['filter']: + if re.match(cfg_scheme['filter'], path): + break + else: + break + else: + # no source found + raise RegistrationError(f'Could not find a suitable scheme for the path {path}') + + +def get_pre_handlers(config): + pass + + +def get_post_handlers(config): + pass + + +def get_backends(config): + pass diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py new file mode 100644 index 00000000..d68a911a --- /dev/null +++ b/core/registrar/scheme.py @@ -0,0 +1,99 @@ +import re + +from os.path import join + +from .xml import read_xml, parse_metadata_schema, Parameter +from .context import Context +from .exceptions import RegistrationError + + +class RegistrationScheme: + def __init__(self, source, path): + self.source = source + self.path = path + + def get_context(self): + raise NotImplementedError + + + + +class Sentinel2RegistrationScheme(RegistrationScheme): + MTD_TL_SCHEMA = { + 'begin_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_START_TIME/text()', False, parse_datetime), + 'end_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_STOP_TIME/text()', False, parse_datetime), + 'identifier': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_URI/text()'), + 'level': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PROCESSING_LEVEL/text()'), + 'type': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_TYPE/text()'), + 'generation_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/GENERATION_TIME/text()', False, parse_datetime), + 'cloud_cover': Parameter('/n1:Level-2A_User_Product/n1:Quality_Indicators_Info/Cloud_Coverage_Assessment'), + 'image_file_paths': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/Product_Organisation/Granule_List/Granule/IMAGE_FILE/text()', True), + 'mask_file_paths': Parameter('/n1:Level-2A_Tile_ID/n1:Quality_Indicators_Info/Pixel_Level_QI/MASK_FILENAME', True), + } + + S2_NAMESPACES = { + 'n1': "https://psd-14.sentinel2.eo.esa.int/PSD/User_Product_Level-2A.xsd" + } + + def get_context(self): + metadata_file = join(self.path, 'MTD_TL.xml') + mtd_tree = read_xml(self.source, metadata_file) + + # get MTD metadata + + metadata = parse_metadata_schema(mtd_tree, self.MTD_TL_SCHEMA, self.S2_NAMESPACES) + + band_re = re.compile(r'.*([A-Z0-9]{3})_([0-9]{2}m)$') + raster_files = { + band_re.match(image_file_path).groups()[0]: f'{join(self.path, image_file_path)}.jp2' + for image_file_path in metadata['image_file_paths'] + } + + mask_type_re = re.compile(r'.*/MSK_([A-Z]*)_([A-Z0-9]{3}).[a-z0-9]+$') + mask_files = { + mask_type_re.match(mask_file_path).groups[0]: mask_file_path + for mask_file_path in metadata['mask_file_paths'] + } + + return Context( + identifier=metadata['identifier'], + raster_files=raster_files, + mask_files=mask_files, + metadata_files=[metadata_file], + metadata={ + 'begin_time': metadata['begin_time'], + 'end_time': metadata['end_time'], + 'generation_time': metadata['generation_time'], + 'cloud_cover': metadata['cloud_cover'], + } + ) + + + +class GSCRegistrationScheme(RegistrationScheme): + pass + + +REGISTRATION_SCHEMES = { + 'gsc': GSCRegistrationScheme, + 'sentinel-2': Sentinel2RegistrationScheme, +} + +def get_scheme(config: dict, path: str) -> RegistrationScheme: + cfg_schemes = config['schemes'] + + for cfg_scheme in cfg_schemes: + if cfg_scheme['filter']: + if re.match(cfg_scheme['filter'], path): + break + else: + break + else: + # no source found + raise RegistrationError(f'Could not find a suitable scheme for the path {path}') + + return REGISTRATION_SCHEMES[cfg_scheme['type']]( + *cfg_scheme.get('args', []), + **cfg_scheme.get('kwargs', {}), + ) + diff --git a/core/registrar/source.py b/core/registrar/source.py new file mode 100644 index 00000000..2230ee2a --- /dev/null +++ b/core/registrar/source.py @@ -0,0 +1,214 @@ +import re +from os.path import normpath, join, isabs +import shutil +from glob import glob +from fnmatch import fnmatch + +import boto3 +from swiftclient.multithreading import OutputManager +from swiftclient.service import SwiftError, SwiftService + + +class RegistrationError(Exception): + pass + + +class Source: + def list_files(self, path, glob_pattern=None): + raise NotImplementedError + + def get_file(self, path, target_path): + raise NotImplementedError + + def get_vsi_env_and_path(self, path): + raise NotImplementedError + + +class SwiftSource(Source): + def __init__(self, username=None, password=None, tenant_name=None, + tenant_id=None, region_name=None, user_domain_id=None, + user_domain_name=None, auth_url=None, auth_version=None, + container=None): + self.username = username + self.password = password + self.tenant_name = tenant_name + self.tenant_id = tenant_id + self.region_name = region_name + self.user_domain_id = user_domain_id + self.user_domain_name = user_domain_name + self.auth_url = auth_url + self.auth_version = auth_version # TODO: assume 3 + self.container = container + + def get_service(self): + return SwiftService(options={ + "os_username": self.username, + "os_password": self.password, + "os_tenant_name": self.tenant_name, + "os_tenant_id": self.tenant_id, + "os_region_name": self.region_name, + "os_auth_url": self.auth_url, + "auth_version": self.auth_version, + "os_user_domain_id": self.user_domain_id, + "os_user_domain_name": self.user_domain_name, + }) + + def get_container_and_path(self, path: str): + container = self.container + if container is None: + parts = (path[1:] if path.startswith('/') else path).split('/') + container, path = parts[0], parts[1:].join('/') + + return container, path + + + def list_files(self, path, glob_pattern=None): + container, path = self.get_container_and_path(path) + + with self.get_service() as swift: + pages = swift.list( + container=container, + options={"prefix": path}, + ) + + filenames = [] + for page in pages: + if page["success"]: + # at least two files present -> pass validation + for item in page["listing"]: + if glob_pattern is None or fnmatch(item['name'], glob_pattern): + filenames.append(item['name']) + else: + raise page['error'] + + return filenames + + def get_file(self, path, target_path): + container, path = self.get_container_and_path(path) + + with self.get_service() as swift: + results = swift.download( + container, + [path], + options={ + 'out_file': target_path + } + ) + + for result in results: + if not result["success"]: + raise Exception('Failed to download %s' % path) + + def get_vsi_env_and_path(self, path): + container, path = self.get_container_and_path(path) + return { + 'OS_IDENTITY_API_VERSION': self.auth_version, + 'OS_AUTH_URL': self.auth_url, + 'OS_USERNAME': self.username, + 'OS_PASSWORD': self.password, + 'OS_USER_DOMAIN_NAME': self.user_domain_name, + # 'OS_PROJECT_NAME': self.tena, + # 'OS_PROJECT_DOMAIN_NAME': , + 'OS_REGION_NAME': self.region_name, + }, f'/vsiswift/{container}/{path}' + + +class S3Source(Source): + def __init__(self, bucket_name=None, secret_access_key=None, access_key_id=None, endpoint_url=None, **client_kwargs): + # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client + # for client_kwargs + self.bucket_name = bucket_name + self.secret_access_key=secret_access_key + self.access_key_id=access_key_id + self.endpoint_url = endpoint_url + + self.client = boto3.client( + 's3', + aws_secret_access_key=secret_access_key, + aws_access_key_id=access_key_id, + endpoint_url=endpoint_url, + **client_kwargs, + ) + + def get_bucket_and_key(self, path: str): + container = self.bucket_name + if container is None: + parts = (path[1:] if path.startswith('/') else path).split('/') + container, path = parts[0], parts[1:].join('/') + + return container, path + + def list_files(self, path, glob_pattern=None): + bucket, key = self.get_bucket_and_key(path) + response = self.client.list_objects_v2( + Bucket=bucket, + Prefix=key, + ) + + return [ + item['Key'] + for item in response['Contents'] + if glob_pattern is None or fnmatch(item['Key'], glob_pattern) + ] + + def get_file(self, path, target_path): + bucket, key = self.get_bucket_and_key(path) + self.client.download_file(bucket, key, target_path) + + def get_vsi_env_and_path(self, path: str, streaming: bool=False): + bucket, key = self.get_bucket_and_key(path) + return { + 'AWS_SECRET_ACCESS_KEY': self.secret_access_key, + 'AWS_ACCESS_KEY_ID': self.access_key_id, + 'AWS_S3_ENDPOINT': self.endpoint_url, + }, f'/{"vsis3" if not streaming else "vsis3_streaming"}/{bucket}/{key}' + + +class LocalSource(Source): + def __init__(self, root_directory): + self.root_directory = root_directory + + def _join_path(self, path): + path = normpath(path) + if isabs(path): + path = path[1:] + + return join(self.root_directory, path) + + def list_files(self, path, glob_pattern=None): + if glob_pattern is not None: + return glob(join(self._join_path(path), glob_pattern)) + else: + return glob(join(self._join_path(path), '*')) + + def get_file(self, path, target_path): + shutil.copy(self._join_path(path), target_path) + + def get_vsi_env_and_path(self, path): + return {}, self._join_path(path) + + +SOURCE_TYPES = { + 'swift': SwiftSource, + 's3': S3Source, + 'local': LocalSource, +} + + +def get_source(config: dict, path: str) -> Source: + cfg_sources = config['sources'] + + for cfg_source in cfg_sources: + if cfg_source['filter']: + if re.match(cfg_source['filter'], path): + break + else: + break + else: + # no source found + raise RegistrationError(f'Could not find a suitable source for the path {path}') + + return SOURCE_TYPES[cfg_source['type']]( + *cfg_source.get('args', []), + **cfg_source.get('kwargs', {}) + ) diff --git a/core/registrar/utils.py b/core/registrar/utils.py new file mode 100644 index 00000000..e69de29b diff --git a/core/registrar/xml.py b/core/registrar/xml.py new file mode 100644 index 00000000..4a088b26 --- /dev/null +++ b/core/registrar/xml.py @@ -0,0 +1,37 @@ +from tempfile import NamedTemporaryFile +from dataclasses import dataclass, field +from typing import Union, Type, Optional, List, Callable, Any + +import lxml.etree + +from .source import Source + + +def read_xml(source: Source, path: str) -> lxml.etree._ElementTree: + with NamedTemporaryFile() as f: + source.get_file(path, f.name) + return lxml.etree.parse(f) + +@dataclass +class Parameter: + xpath: str + multi: bool = False + parser: Optional[Callable[[str], Any]] = None + namespaces: dict = field(default_factory=dict) + + +def parse_metadata_schema(tree: lxml.etree._ElementTree, schema: dict, namespaces: dict=None) -> dict: + out = {} + for key, param in schema.items(): + values = tree.xpath(param.xpath, namespaces=param.namespaces or namespaces) + if param.multi: + value = [ + param.parser(v) if param.parser else v + for v in values + ] + else: + value = param.parser(values[0]) if param.parser else values[0] + + out[key] = value + + return out -- GitLab From 882c1aef96f1e806ea7abae753e38ea2045a36f7 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 20 Oct 2020 16:07:04 +0200 Subject: [PATCH 02/54] Adding CLI and daemon files Adding config schema --- core/registrar/cli.py | 80 +++++++++++++++++++++++++++++++ core/registrar/config-schema.yaml | 67 ++++++++++++++++++++++++++ core/registrar/config.py | 39 +++++++++++++++ core/registrar/daemon.py | 26 ++++++++++ core/registrar/registrar.py | 20 ++------ 5 files changed, 215 insertions(+), 17 deletions(-) create mode 100644 core/registrar/cli.py create mode 100644 core/registrar/config-schema.yaml create mode 100644 core/registrar/config.py create mode 100644 core/registrar/daemon.py diff --git a/core/registrar/cli.py b/core/registrar/cli.py new file mode 100644 index 00000000..2703bdf8 --- /dev/null +++ b/core/registrar/cli.py @@ -0,0 +1,80 @@ +from os.path import join, dirname +import logging.config +import json + +import click +import yaml +import jsonschema + +from .registrar import register_file +from .daemon import run_daemon +from .config import load_config + + +def setup_logging(debug=False): + logging.config.dictConfig({ + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'brief': { + 'format': '%(levelname)s %(name)s: %(message)s' + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'DEBUG' if debug else 'INFO', + 'formatter': 'brief', + } + }, + 'root': { + 'handlers': ['console'], + 'level': 'DEBUG' if debug else 'INFO', + } + }) + + +def validate_config(config): + with open(join(dirname(__file__), 'config-schema.yaml')) as f: + schema = yaml.load(f) + + jsonschema.validate(config, schema) + + +@click.group() +def cli(): + pass + + +@cli.command(help='Run the registrar daemon, attaching to a Redis queue') +@click.option('--config-file', type=click.File('r')) +@click.option('--validate/--no-validate', default=False) +@click.option('--host', type=str) +@click.option('--port', type=int) +@click.option('--listen-queue', type=str) +@click.option('--write-queue', type=str) +@click.option('--debug/--no-debug', default=False) +def daemon(config_file=None, validate=False, host=None, port=None, listen_queue=None, write_queue=None, debug=False): + setup_logging(debug) + config = load_config(config_file) + if validate: + validate_config(config) + run_daemon(config, host, port, listen_queue, write_queue) + + +@cli.command(help='Run a single, one-off registration') +@click.argument('file_path', type=str) +@click.option('--config-file', type=click.File('r')) +@click.option('--validate/--no-validate', default=False) +@click.option('--replace/--no-replace', default=False) +@click.option('--debug/--no-debug', default=False) +def register(file_path, config_file=None, validate=False, debug=False): + setup_logging(debug) + config = load_config(config_file) + if validate: + validate_config(config) + + register_file(config, file_path) + +if __name__ == '__main__': + cli() diff --git a/core/registrar/config-schema.yaml b/core/registrar/config-schema.yaml new file mode 100644 index 00000000..ed85899e --- /dev/null +++ b/core/registrar/config-schema.yaml @@ -0,0 +1,67 @@ +$id: https://example.com/address.schema.json +$schema: http://json-schema.org/draft-07/schema# +type: object +properties: + source: + description: Input sources definitions + type: array + items: + description: A single source definition + type: object + properties: + type: + description: The source type. + type: string + enum: ['local', 's3', 'swift'] + filter: + description: Optional filter to only be used for these paths + type: string + args: + description: Constructor arguments + type: array + kwargs: + description: Constructor keyword arguments + type: object + schemes: + description: Registration schemes definitions + type: array + items: + description: A single registration scheme definition + type: object + properties: + type: + description: The registration scheme type. + type: string + enum: ['gsc', 'sentinel-2'] + filter: + description: Optional filter to only be used for these paths + type: string + args: + description: Constructor arguments + type: array + kwargs: + description: Constructor keyword arguments + type: object + backends: + description: Registration backends definitions + type: array + items: + description: A single registration scheme definition + type: object + properties: + type: + description: The registration scheme type. + type: string + enum: ['eoxserver'] + filter: + description: Optional filter to only be used for these paths + type: string + args: + description: Constructor arguments + type: array + kwargs: + description: Constructor keyword arguments + type: object + + # TODO: describe type specific args/kwargs + diff --git a/core/registrar/config.py b/core/registrar/config.py new file mode 100644 index 00000000..77534e94 --- /dev/null +++ b/core/registrar/config.py @@ -0,0 +1,39 @@ +import os +from typing import TextIO +import re + +import yaml + + +ENV_PATTERN = re.compile(r'.*?\${(\w+)}.*?') + +def constructor_env_variables(loader, node): + """ + Extracts the environment variable from the node's value + :param yaml.Loader loader: the yaml loader + :param node: the current node in the yaml + :return: the parsed string that contains the value of the environment + variable + """ + value = loader.construct_scalar(node) + match = ENV_PATTERN.findall(value) # to find all env variables in line + if match: + full_value = value + for g in match: + full_value = full_value.replace( + f'${{{g}}}', os.environ.get(g, g) + ) + return full_value + return value + + +def load_config(input_file: TextIO): + tag = '!env' + loader = yaml.SafeLoader + + # the tag will be used to mark where to start searching for the pattern + # e.g. somekey: !env somestring${MYENVVAR}blah blah blah + loader.add_implicit_resolver(tag, ENV_PATTERN, None) + loader.add_constructor(tag, constructor_env_variables) + + return yaml.load(input_file, Loader=loader) diff --git a/core/registrar/daemon.py b/core/registrar/daemon.py new file mode 100644 index 00000000..7c943627 --- /dev/null +++ b/core/registrar/daemon.py @@ -0,0 +1,26 @@ +import logging +import json + +import redis + +from .registrar import register_file + + +logger = logging.getLogger(__name__) + + +def run_daemon(config, host, port, listen_queue, write_queue): + """ Run the registrar daemon, listening on a redis queue + for files to be registered. After preprocessing the filename + of the registered files will be pushed to the output queue. + """ + # initialize the queue client + client = redis.Redis( + host=host, port=port, charset="utf-8", decode_responses=True + ) + logger.debug("waiting for redis queue '%s'..." % listen_queue) + while True: + # fetch an item from the queue to be registered + _, value = client.brpop(listen_queue) + # start the registration on that file + register_file(config, value) diff --git a/core/registrar/registrar.py b/core/registrar/registrar.py index 53e2379e..f7f984e8 100644 --- a/core/registrar/registrar.py +++ b/core/registrar/registrar.py @@ -1,14 +1,13 @@ import re from .source import get_source +from .scheme import get_scheme from .exceptions import RegistrationError - -def register(config, path): - # TODO: select registration scheme (config, path) +def register_file(config: dict, path: str): source = get_source(config, path) - scheme = select_registation_scheme(config, path) + scheme = get_scheme(config, path) context = scheme.get_context(source, path) for pre_handler in get_pre_handlers(config): @@ -27,19 +26,6 @@ def register(config, path): post_handler(config, path, context) -def select_registation_scheme(config, path): - cfg_schemes = config['schemes'] - for cfg_scheme in cfg_schemes: - if cfg_scheme['filter']: - if re.match(cfg_scheme['filter'], path): - break - else: - break - else: - # no source found - raise RegistrationError(f'Could not find a suitable scheme for the path {path}') - - def get_pre_handlers(config): pass -- GitLab From 352cc2368c345a60f8049753a3590608ed22468b Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 20 Oct 2020 16:07:29 +0200 Subject: [PATCH 03/54] Adding additional dependencies for registrar --- core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Dockerfile b/core/Dockerfile index 33baf310..e583326e 100644 --- a/core/Dockerfile +++ b/core/Dockerfile @@ -43,7 +43,7 @@ RUN apt update && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* RUN pip3 install . && \ - pip3 install python-keystoneclient python-swiftclient redis + pip3 install python-keystoneclient python-swiftclient redis click setuptools jsonschema ENV INSTANCE_ID="prism-view-server_core" \ INSTANCE_NAME="pvs_instance"\ -- GitLab From 6d6ba6b07acac4303a162a64fd59458e54b987fc Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:06:06 +0200 Subject: [PATCH 04/54] Fixing backend registration issues --- core/registrar/backend.py | 90 +++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 36 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index b20c0396..4d453e80 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -2,23 +2,20 @@ import os import re import sys import logging +from typing import List import django from django.db import transaction -from django.contrib.gis.geos import GEOSGeometry -from eoxserver.resources.coverages import models -from eoxserver.resources.coverages.registration.product import ProductRegistrator -from eoxserver.resources.coverages.registration.browse import BrowseRegistrator -from eoxserver.resources.coverages.registration.mask import MaskRegistrator -from eoxserver.resources.coverages.registration.registrators.gdal import GDALRegistrator +from django.contrib.gis.geos import GEOSGeometry, Polygon from .exceptions import RegistrationError from .context import Context -from .source import Source +from .source import Source, S3Source, SwiftSource, LocalSource logger = logging.getLogger(__name__) + class RegistrationResult: pass @@ -40,35 +37,49 @@ class EOxServerBackend(Backend): django.setup() def exists(self, source: Source, item: Context): - return models.Product.objects.filter(identifier=item.itentifier).exists() + from eoxserver.resources.coverages import models + return models.Product.objects.filter(identifier=item.identifier).exists() - def _get_storage_from_source(self, source: Source) -> list: - return [] + def _get_storage_from_source(self, source: Source, path: str) -> list: + return [source.name] if source.name else [] @transaction.atomic def register(self, source: Source, item: Context, replace: bool) -> RegistrationResult: + # ugly, ugly hack + from eoxserver.resources.coverages import models + from eoxserver.resources.coverages.registration.product import ProductRegistrator + from eoxserver.resources.coverages.registration.browse import BrowseRegistrator + from eoxserver.resources.coverages.registration.mask import MaskRegistrator + from eoxserver.resources.coverages.registration.registrators.gdal import GDALRegistrator + # get the mapping for this particular item - mapping = self.mapping.get(item.product_type, {}).get(item.level_name) + mapping = self.mapping[item.product_type][item.product_level] metadata_file = item.metadata_files[0] - storage = self._get_storage_from_source(source) + storage = self._get_storage_from_source(source, item.path) try: - models.ProductType.objects.get(name=item['product_type_name']) + models.ProductType.objects.get(name=mapping['product_type_name']) except models.ProductType.DoesNotExist: pass + footprint = GEOSGeometry(item.metadata.pop('footprint')) + product, _ = ProductRegistrator().register( metadata_locations=[storage + [metadata_file]], - type_name=item['product_type_name'], + type_name=mapping['product_type_name'], replace=replace, extended_metadata=True, mask_locations=None, package_path=None, simplify_footprint_tolerance=self.simplify_footprint_tolerance, - overrides=item.metadata, + overrides=dict( + identifier=item.identifier, + footprint=footprint, + **item.metadata + ), ) - if product.footprint.empty: + if not product.footprint or product.footprint.empty: raise RegistrationError("No footprint was extracted. full product: %s" % product) # insert the product in the to be associated collections @@ -96,8 +107,14 @@ class EOxServerBackend(Backend): models.product_add_coverage(product, report.coverage) # register browses - for raster_identifier, browse_type_name in mapping.get('browses', {}): + for raster_identifier, browse_type_name in mapping.get('browses', {}).items(): raster_item = item.raster_files.get(raster_identifier) + + raster_item = '/'.join(raster_item.split('/')[1:]) + + logger.info(f"Adding browse {browse_type_name or 'default'} {raster_item} to product") + logger.info(f'{storage + [raster_item]}') + BrowseRegistrator().register( product.identifier, storage + [raster_item], @@ -105,34 +122,35 @@ class EOxServerBackend(Backend): ) # register masks - for mask_identifier, mask_type_name in mapping.get('masks', {}): + for mask_identifier, mask_type_name in mapping.get('masks', {}).items(): mask_item = item.mask_files.get(mask_identifier) - MaskRegistrator().register( - product.identifier, - storage + [mask_item], - mask_type_name, - ) + if mask_item: + logger.info(f"Adding mask {mask_type_name} to product") + MaskRegistrator().register( + product.identifier, + storage + [mask_item], + mask_type_name, + ) BACKENDS = { 'eoxserver': EOxServerBackend } -def get_backend(config: dict, path: str) -> Backend: +def get_backends(config: dict, path: str) -> List[Backend]: cfg_backends = config['backends'] - for cfg_backend in cfg_backends: - if cfg_backend['filter']: - if re.match(cfg_backend['filter'], path): - break - else: - break - else: - # no source found + backends = [ + BACKENDS[cfg_backend['type']]( + *cfg_backend.get('args', []), + **cfg_backend.get('kwargs', {}), + ) + for cfg_backend in cfg_backends + if not cfg_backend.get('filter') or re.match(cfg_backend['filter'], path) + ] + + if not backends: raise RegistrationError(f'Could not find a suitable backend for the path {path}') - return BACKENDS[cfg_backend['type']]( - *cfg_backend.get('args', []), - **cfg_backend.get('kwargs', {}), - ) + return backends -- GitLab From 03775abc49cb7823533d414bb92b55a20e8999fa Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:07:18 +0200 Subject: [PATCH 05/54] Fixing parsing XML from downloaded files --- core/registrar/xml.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/core/registrar/xml.py b/core/registrar/xml.py index 4a088b26..519d8a54 100644 --- a/core/registrar/xml.py +++ b/core/registrar/xml.py @@ -1,16 +1,25 @@ -from tempfile import NamedTemporaryFile +from os import remove +from os.path import join, basename +from tempfile import gettempdir, gettempprefix from dataclasses import dataclass, field from typing import Union, Type, Optional, List, Callable, Any +import logging import lxml.etree from .source import Source +logger = logging.getLogger(__name__) + def read_xml(source: Source, path: str) -> lxml.etree._ElementTree: - with NamedTemporaryFile() as f: - source.get_file(path, f.name) - return lxml.etree.parse(f) + out_filename = join(gettempdir(), basename(path)) + try: + source.get_file(path, out_filename) + tree = lxml.etree.parse(out_filename) + finally: + remove(out_filename) + return tree @dataclass class Parameter: -- GitLab From d3160156c040c79a2fc7375d1ad3276a2e4912f4 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:09:50 +0200 Subject: [PATCH 06/54] Fixing passing of replace parameter --- core/registrar/cli.py | 7 ++++--- core/registrar/daemon.py | 9 +++++++-- core/registrar/registrar.py | 27 +++++++++++++++++---------- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/core/registrar/cli.py b/core/registrar/cli.py index 2703bdf8..bfae3d6a 100644 --- a/core/registrar/cli.py +++ b/core/registrar/cli.py @@ -49,17 +49,18 @@ def cli(): @cli.command(help='Run the registrar daemon, attaching to a Redis queue') @click.option('--config-file', type=click.File('r')) @click.option('--validate/--no-validate', default=False) +@click.option('--replace/--no-replace', default=False) @click.option('--host', type=str) @click.option('--port', type=int) @click.option('--listen-queue', type=str) -@click.option('--write-queue', type=str) +@click.option('--registered-set-key', type=str) @click.option('--debug/--no-debug', default=False) -def daemon(config_file=None, validate=False, host=None, port=None, listen_queue=None, write_queue=None, debug=False): +def daemon(config_file=None, validate=False, replace=False, host=None, port=None, listen_queue=None, registered_set_key=None, debug=False): setup_logging(debug) config = load_config(config_file) if validate: validate_config(config) - run_daemon(config, host, port, listen_queue, write_queue) + run_daemon(config, replace, host, port, listen_queue, registered_set_key) @cli.command(help='Run a single, one-off registration') diff --git a/core/registrar/daemon.py b/core/registrar/daemon.py index 7c943627..efdf1ff5 100644 --- a/core/registrar/daemon.py +++ b/core/registrar/daemon.py @@ -9,7 +9,7 @@ from .registrar import register_file logger = logging.getLogger(__name__) -def run_daemon(config, host, port, listen_queue, write_queue): +def run_daemon(config, replace, host, port, listen_queue, registered_set_key): """ Run the registrar daemon, listening on a redis queue for files to be registered. After preprocessing the filename of the registered files will be pushed to the output queue. @@ -23,4 +23,9 @@ def run_daemon(config, host, port, listen_queue, write_queue): # fetch an item from the queue to be registered _, value = client.brpop(listen_queue) # start the registration on that file - register_file(config, value) + try: + item = register_file(config, value, replace) + client.sadd(registered_set_key, item.identifier) + + except Exception as e: + logger.exception(e) diff --git a/core/registrar/registrar.py b/core/registrar/registrar.py index f7f984e8..53c9b102 100644 --- a/core/registrar/registrar.py +++ b/core/registrar/registrar.py @@ -1,11 +1,19 @@ import re +import logging from .source import get_source from .scheme import get_scheme +from .backend import get_backends from .exceptions import RegistrationError -def register_file(config: dict, path: str): +logger = logging.getLogger(__name__) + + +def register_file(config: dict, path: str, replace: bool=False): + """ Handle the registration of a single path. + """ + logger.info(f"Handling '{path}'.") source = get_source(config, path) scheme = get_scheme(config, path) context = scheme.get_context(source, path) @@ -13,26 +21,25 @@ def register_file(config: dict, path: str): for pre_handler in get_pre_handlers(config): pre_handler(config, path, context) - for backend in get_backends(config): + for backend in get_backends(config, path): if backend.exists(source, context): - if config.replace: + if replace: + logger.info(f"Replacing '{path}'.") backend.register(source, context, replace=True) else: raise RegistrationError(f'Object {context} is already registered') else: + logger.info(f"Registering '{path}'.") backend.register(source, context, replace=False) for post_handler in get_post_handlers(config): post_handler(config, path, context) + return context -def get_pre_handlers(config): - pass +def get_pre_handlers(config): + return [] def get_post_handlers(config): - pass - - -def get_backends(config): - pass + return [] -- GitLab From 26a30c2587eb37fecfc46e8184d589aae2d024f1 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:10:27 +0200 Subject: [PATCH 07/54] Adding mandatory source name --- core/registrar/source.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/core/registrar/source.py b/core/registrar/source.py index 2230ee2a..e513ae9f 100644 --- a/core/registrar/source.py +++ b/core/registrar/source.py @@ -3,17 +3,23 @@ from os.path import normpath, join, isabs import shutil from glob import glob from fnmatch import fnmatch +import logging import boto3 from swiftclient.multithreading import OutputManager from swiftclient.service import SwiftError, SwiftService +logger = logging.getLogger(__name__) + class RegistrationError(Exception): pass class Source: + def __init__(self, name: str=None): + self.name = name + def list_files(self, path, glob_pattern=None): raise NotImplementedError @@ -25,10 +31,12 @@ class Source: class SwiftSource(Source): - def __init__(self, username=None, password=None, tenant_name=None, + def __init__(self, name=None, username=None, password=None, tenant_name=None, tenant_id=None, region_name=None, user_domain_id=None, user_domain_name=None, auth_url=None, auth_version=None, container=None): + super().__init__(name) + self.username = username self.password = password self.tenant_name = tenant_name @@ -57,7 +65,7 @@ class SwiftSource(Source): container = self.container if container is None: parts = (path[1:] if path.startswith('/') else path).split('/') - container, path = parts[0], parts[1:].join('/') + container, path = parts[0], '/'.join(parts[1:]) return container, path @@ -114,13 +122,16 @@ class SwiftSource(Source): class S3Source(Source): - def __init__(self, bucket_name=None, secret_access_key=None, access_key_id=None, endpoint_url=None, **client_kwargs): + def __init__(self, name=None, bucket_name=None, secret_access_key=None, access_key_id=None, endpoint_url=None, strip_bucket=True, **client_kwargs): + super().__init__(name) + # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client # for client_kwargs self.bucket_name = bucket_name self.secret_access_key=secret_access_key self.access_key_id=access_key_id self.endpoint_url = endpoint_url + self.strip_bucket = strip_bucket self.client = boto3.client( 's3', @@ -131,15 +142,21 @@ class S3Source(Source): ) def get_bucket_and_key(self, path: str): - container = self.bucket_name - if container is None: + bucket = self.bucket_name + if bucket is None: + parts = (path[1:] if path.startswith('/') else path).split('/') + bucket, path = parts[0], '/'.join(parts[1:]) + elif self.strip_bucket: parts = (path[1:] if path.startswith('/') else path).split('/') - container, path = parts[0], parts[1:].join('/') + if parts[0] == bucket: + parts.pop(0) + path = '/'.join(parts) - return container, path + return bucket, path def list_files(self, path, glob_pattern=None): bucket, key = self.get_bucket_and_key(path) + logger.info(f'Listing S3 files for bucket {bucket} and prefix {key}') response = self.client.list_objects_v2( Bucket=bucket, Prefix=key, @@ -153,6 +170,7 @@ class S3Source(Source): def get_file(self, path, target_path): bucket, key = self.get_bucket_and_key(path) + logger.info(f'Retrieving file from S3 {bucket}/{key} to be stored at {target_path}') self.client.download_file(bucket, key, target_path) def get_vsi_env_and_path(self, path: str, streaming: bool=False): @@ -165,7 +183,9 @@ class S3Source(Source): class LocalSource(Source): - def __init__(self, root_directory): + def __init__(self, name, root_directory): + super().__init__(name) + self.root_directory = root_directory def _join_path(self, path): @@ -209,6 +229,7 @@ def get_source(config: dict, path: str) -> Source: raise RegistrationError(f'Could not find a suitable source for the path {path}') return SOURCE_TYPES[cfg_source['type']]( + cfg_source['name'], *cfg_source.get('args', []), **cfg_source.get('kwargs', {}) ) -- GitLab From 217c6fee3b759906a565384a8d18f000dbe3133d Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:11:11 +0200 Subject: [PATCH 08/54] Improving on S2 registration schema --- core/registrar/context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/registrar/context.py b/core/registrar/context.py index 81ded127..63844611 100644 --- a/core/registrar/context.py +++ b/core/registrar/context.py @@ -4,6 +4,7 @@ from dataclasses import dataclass, field @dataclass class Context: identifier: str + path: str product_type: str = None product_level: str = None metadata: dict = field(default_factory=dict) -- GitLab From 02973c9755b5ea5ef53f99dc5d62964cf836d15d Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:11:20 +0200 Subject: [PATCH 09/54] Cleanup --- core/registrar/backend.py | 2 -- core/registrar/scheme.py | 44 +++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 4d453e80..a9681039 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -111,9 +111,7 @@ class EOxServerBackend(Backend): raster_item = item.raster_files.get(raster_identifier) raster_item = '/'.join(raster_item.split('/')[1:]) - logger.info(f"Adding browse {browse_type_name or 'default'} {raster_item} to product") - logger.info(f'{storage + [raster_item]}') BrowseRegistrator().register( product.identifier, diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index d68a911a..b0a7652d 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -1,21 +1,36 @@ import re - from os.path import join +import logging from .xml import read_xml, parse_metadata_schema, Parameter from .context import Context +from .source import Source from .exceptions import RegistrationError -class RegistrationScheme: - def __init__(self, source, path): - self.source = source - self.path = path +logger = logging.getLogger(__name__) +class RegistrationScheme: def get_context(self): raise NotImplementedError +def parse_datetime(value): + return value + + +def pairwise(iterable): + "s -> (s0,s1), (s2,s3), (s4, s5), ..." + a = iter(iterable) + return zip(a, a) + + +def parse_footprint(value): + coord_list = ','.join( + f'{x} {y}' + for y, x in pairwise(value.split()) + ) + return f'POLYGON(({coord_list}))' class Sentinel2RegistrationScheme(RegistrationScheme): @@ -23,10 +38,11 @@ class Sentinel2RegistrationScheme(RegistrationScheme): 'begin_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_START_TIME/text()', False, parse_datetime), 'end_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_STOP_TIME/text()', False, parse_datetime), 'identifier': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_URI/text()'), + 'footprint': Parameter('/n1:Level-2A_User_Product/n1:Geometric_Info/Product_Footprint/Product_Footprint/Global_Footprint/EXT_POS_LIST/text()', False, parse_footprint), 'level': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PROCESSING_LEVEL/text()'), 'type': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_TYPE/text()'), 'generation_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/GENERATION_TIME/text()', False, parse_datetime), - 'cloud_cover': Parameter('/n1:Level-2A_User_Product/n1:Quality_Indicators_Info/Cloud_Coverage_Assessment'), + 'cloud_cover': Parameter('/n1:Level-2A_User_Product/n1:Quality_Indicators_Info/Cloud_Coverage_Assessment/text()'), 'image_file_paths': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/Product_Organisation/Granule_List/Granule/IMAGE_FILE/text()', True), 'mask_file_paths': Parameter('/n1:Level-2A_Tile_ID/n1:Quality_Indicators_Info/Pixel_Level_QI/MASK_FILENAME', True), } @@ -35,9 +51,9 @@ class Sentinel2RegistrationScheme(RegistrationScheme): 'n1': "https://psd-14.sentinel2.eo.esa.int/PSD/User_Product_Level-2A.xsd" } - def get_context(self): - metadata_file = join(self.path, 'MTD_TL.xml') - mtd_tree = read_xml(self.source, metadata_file) + def get_context(self, source: Source, path: str): + metadata_file = join(path, 'MTD_MSIL2A.xml') + mtd_tree = read_xml(source, metadata_file) # get MTD metadata @@ -45,18 +61,23 @@ class Sentinel2RegistrationScheme(RegistrationScheme): band_re = re.compile(r'.*([A-Z0-9]{3})_([0-9]{2}m)$') raster_files = { - band_re.match(image_file_path).groups()[0]: f'{join(self.path, image_file_path)}.jp2' + band_re.match(image_file_path).groups()[0]: f'{join(path, image_file_path)}.jp2' for image_file_path in metadata['image_file_paths'] } mask_type_re = re.compile(r'.*/MSK_([A-Z]*)_([A-Z0-9]{3}).[a-z0-9]+$') mask_files = { - mask_type_re.match(mask_file_path).groups[0]: mask_file_path + mask_type_re.match(mask_file_path).groups[0]: join(path, mask_file_path) for mask_file_path in metadata['mask_file_paths'] } + logger.info(f'{mask_files} {metadata["mask_file_paths"]}') + return Context( identifier=metadata['identifier'], + path=path, + product_type=metadata['type'], + product_level=metadata['level'], raster_files=raster_files, mask_files=mask_files, metadata_files=[metadata_file], @@ -65,6 +86,7 @@ class Sentinel2RegistrationScheme(RegistrationScheme): 'end_time': metadata['end_time'], 'generation_time': metadata['generation_time'], 'cloud_cover': metadata['cloud_cover'], + 'footprint': metadata['footprint'], } ) -- GitLab From 9777947153810f736e36abf6c4bcb02c0878ee0e Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:11:52 +0200 Subject: [PATCH 10/54] Fixing run-registrar.sh --- core/run-registrar.sh | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/core/run-registrar.sh b/core/run-registrar.sh index 348b4f75..a1bae617 100644 --- a/core/run-registrar.sh +++ b/core/run-registrar.sh @@ -6,13 +6,10 @@ if test "$REGISTRAR_REPLACE" = true; then replace="--replace" fi -python3 /registrar.py \ - --mode redis \ - --redis-host ${REDIS_HOST} \ - --redis-port ${REDIS_PORT} \ - --redis-register-queue-key ${REDIS_REGISTER_QUEUE_KEY} \ - --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} \ - --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} \ - --reporting-dir ${REPORTING_DIR} \ - --service-url ${SERVICE_URL} \ +registrar daemon \ + --config-file /config.yaml \ + --host ${REDIS_HOST} \ + --port ${REDIS_PORT} \ + --listen-queue ${REDIS_REGISTER_QUEUE_KEY} \ + --registered-set-key ${REDIS_REGISTERED_SET_KEY} \ ${replace} >&2 -- GitLab From fa6220bceb9d978eecde355ca760e901766087ab Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:12:39 +0200 Subject: [PATCH 11/54] Adding missing requirements Fixing installation of registrar source --- core/Dockerfile | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/core/Dockerfile b/core/Dockerfile index e583326e..35900bb4 100644 --- a/core/Dockerfile +++ b/core/Dockerfile @@ -43,7 +43,7 @@ RUN apt update && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* RUN pip3 install . && \ - pip3 install python-keystoneclient python-swiftclient redis click setuptools jsonschema + pip3 install python-keystoneclient python-swiftclient redis click setuptools jsonschema boto3 ENV INSTANCE_ID="prism-view-server_core" \ INSTANCE_NAME="pvs_instance"\ @@ -77,12 +77,22 @@ ADD rgbnir_definition.json \ configure.sh \ run-httpd.sh \ run-registrar.sh \ - registrar.py \ entrypoint.sh \ wait-initialized.sh \ initialized.sh \ / +RUN mkdir /registrar +ADD registrar/ \ + /registrar/registrar + +ADD setup.py \ + /registrar + +RUN cd /registrar && \ + ls && \ + python3 setup.py install + RUN chmod -v +x \ /configure.sh \ /run-registrar.sh \ -- GitLab From aac4ccb99d48acb429acee8a95f921185d5166fc Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 10:20:00 +0200 Subject: [PATCH 12/54] Adding missing setup.py --- core/setup.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 core/setup.py diff --git a/core/setup.py b/core/setup.py new file mode 100644 index 00000000..f64ba39c --- /dev/null +++ b/core/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +# with open("README.md", "r") as fh: +# long_description = fh.read() +long_description = "" + +setup( + name="registrar", # Replace with your own username + version="0.0.1", + author="", + author_email="", + description="preprocessor for PVS", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://gitlab.eox.at/esa/prism/vs/-/tree/master/core", + packages=find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.6', + entry_points={ + "console_scripts": [ + "registrar = registrar.cli:cli", + ], + } +) -- GitLab From b8866e1d75c29308e87ec3925406311dbd1812cc Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 22:10:17 +0200 Subject: [PATCH 13/54] Small fix for S3 path listing --- core/registrar/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/registrar/source.py b/core/registrar/source.py index e513ae9f..d752712e 100644 --- a/core/registrar/source.py +++ b/core/registrar/source.py @@ -163,7 +163,7 @@ class S3Source(Source): ) return [ - item['Key'] + f"{bucket}/{item['Key']}" for item in response['Contents'] if glob_pattern is None or fnmatch(item['Key'], glob_pattern) ] -- GitLab From 1d9a5ff917d6ee4f50ef0e5322e4087d6e908f8e Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 22:11:39 +0200 Subject: [PATCH 14/54] Fixing S2 metadata retrieval Getting masks from granule metadata --- core/registrar/scheme.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index b0a7652d..f3057fbe 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -34,7 +34,7 @@ def parse_footprint(value): class Sentinel2RegistrationScheme(RegistrationScheme): - MTD_TL_SCHEMA = { + MTD_MSIL2A_SCHEMA = { 'begin_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_START_TIME/text()', False, parse_datetime), 'end_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_STOP_TIME/text()', False, parse_datetime), 'identifier': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_URI/text()'), @@ -44,35 +44,46 @@ class Sentinel2RegistrationScheme(RegistrationScheme): 'generation_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/GENERATION_TIME/text()', False, parse_datetime), 'cloud_cover': Parameter('/n1:Level-2A_User_Product/n1:Quality_Indicators_Info/Cloud_Coverage_Assessment/text()'), 'image_file_paths': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/Product_Organisation/Granule_List/Granule/IMAGE_FILE/text()', True), - 'mask_file_paths': Parameter('/n1:Level-2A_Tile_ID/n1:Quality_Indicators_Info/Pixel_Level_QI/MASK_FILENAME', True), } - S2_NAMESPACES = { + MTD_TL_SCHEMA = { + 'mask_file_paths': Parameter('/n1:Level-2A_Tile_ID/n1:Quality_Indicators_Info/Pixel_Level_QI/MASK_FILENAME/text()', True), + } + + MTD_MSIL2A_NAMESPACES = { 'n1': "https://psd-14.sentinel2.eo.esa.int/PSD/User_Product_Level-2A.xsd" } + MTD_TL_NAMESPACES = { + 'n1': 'https://psd-14.sentinel2.eo.esa.int/PSD/S2_PDI_Level-2A_Tile_Metadata.xsd' + } + def get_context(self, source: Source, path: str): metadata_file = join(path, 'MTD_MSIL2A.xml') - mtd_tree = read_xml(source, metadata_file) - - # get MTD metadata + tree = read_xml(source, metadata_file) - metadata = parse_metadata_schema(mtd_tree, self.MTD_TL_SCHEMA, self.S2_NAMESPACES) + # get product metadata + metadata = parse_metadata_schema(tree, self.MTD_MSIL2A_SCHEMA, self.MTD_MSIL2A_NAMESPACES) - band_re = re.compile(r'.*([A-Z0-9]{3})_([0-9]{2}m)$') + band_re = re.compile(r'.*([A-Z0-9]{3}_[0-9]{2}m)$') raster_files = { band_re.match(image_file_path).groups()[0]: f'{join(path, image_file_path)}.jp2' for image_file_path in metadata['image_file_paths'] } + # get granule metadata + mtd_files = source.list_files(join(path, 'GRANULE'), '*/MTD_TL.xml') + logger.info(f'{mtd_files}') + tl_tree = read_xml(source, mtd_files[0]) + tile_metadata = parse_metadata_schema(tl_tree, self.MTD_TL_SCHEMA, self.MTD_TL_NAMESPACES) + mask_type_re = re.compile(r'.*/MSK_([A-Z]*)_([A-Z0-9]{3}).[a-z0-9]+$') mask_files = { - mask_type_re.match(mask_file_path).groups[0]: join(path, mask_file_path) - for mask_file_path in metadata['mask_file_paths'] + mask_type_re.match(mask_file_path).groups()[0]: join(path, mask_file_path) + for mask_file_path in tile_metadata['mask_file_paths'] + if mask_type_re.match(mask_file_path) is not None } - logger.info(f'{mask_files} {metadata["mask_file_paths"]}') - return Context( identifier=metadata['identifier'], path=path, -- GitLab From cc625aff30bb3a8ee07bc9f364d1b0b9b9919640 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 22:11:55 +0200 Subject: [PATCH 15/54] Fixing paths in EOxServer backend --- core/registrar/backend.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index a9681039..980cd40c 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -54,7 +54,7 @@ class EOxServerBackend(Backend): # get the mapping for this particular item mapping = self.mapping[item.product_type][item.product_level] - metadata_file = item.metadata_files[0] + metadata_file = '/'.join(item.metadata_files[0].split('/')[1:]) storage = self._get_storage_from_source(source, item.path) @@ -92,6 +92,9 @@ class EOxServerBackend(Backend): # register coverages and link them to the product for raster_identifier, coverage_type_name in mapping.get('coverages', {}).items(): raster_item = item.raster_files.get(raster_identifier) + raster_item = '/'.join(raster_item.split('/')[1:]) + + logger.info(f"Registering coverage {raster_item} as {coverage_type_name}") report = GDALRegistrator().register( data_locations=[storage + [raster_item]], -- GitLab From 4518044a24dbee7ebc190652a84399b2efad5656 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 22 Oct 2020 22:12:19 +0200 Subject: [PATCH 16/54] Going for latest tag --- core/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Dockerfile b/core/Dockerfile index 5e32a8c8..2266a9ba 100644 --- a/core/Dockerfile +++ b/core/Dockerfile @@ -25,7 +25,7 @@ # IN THE SOFTWARE. #----------------------------------------------------------------------------- -FROM eoxa/eoxserver:release-1.0.0-rc12 +FROM eoxa/eoxserver:latest LABEL name="prism view server core" \ vendor="EOX IT Services GmbH " \ -- GitLab From 07ca2aaf5c63c8ca21dbc2c6d2322910c686c7a4 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 29 Oct 2020 18:14:28 +0100 Subject: [PATCH 17/54] Adding initial configs for registrators (emg/vhr) Adding GSC scheme Fixing EOxServer backend: multiple files per coverage type possible --- config/dem_registrar-config.yml | 0 config/emg_registrar-config.yml | 270 ++++++++++++++++++++++++++++++ config/vhr18_registrar-config.yml | 174 +++++++++++++++++++ core/registrar/scheme.py | 38 ++++- 4 files changed, 481 insertions(+), 1 deletion(-) create mode 100644 config/dem_registrar-config.yml create mode 100644 config/emg_registrar-config.yml create mode 100644 config/vhr18_registrar-config.yml diff --git a/config/dem_registrar-config.yml b/config/dem_registrar-config.yml new file mode 100644 index 00000000..e69de29b diff --git a/config/emg_registrar-config.yml b/config/emg_registrar-config.yml new file mode 100644 index 00000000..5a1f37dc --- /dev/null +++ b/config/emg_registrar-config.yml @@ -0,0 +1,270 @@ +sources: + - type: swift + name: !env '${UPLOAD_CONTAINER}' + kwargs: + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + user_domain_name: !env '${OS_USER_DOMAIN_NAME}' + container: !env '${UPLOAD_CONTAINER}' + +schemes: + - type: gsc + +backends: + type: eoxserver + filter: + kwargs: + instance_base_path: /var/www/pvs/dev + instance_name: pvs_instance + mapping: + CS00: + ~: + product_type_name: !env '${COLLECTION}_Product_CS00' + collections: + - !env '${COLLECTION}' + coverages: + CS00: sar_hh_gray + CS01: + ~: + product_type_name: !env '${COLLECTION}_Product_CS01' + collections: + - !env '${COLLECTION}' + coverages: + CS01: sar_hh_gray + CS02: + ~: + product_type_name: !env '${COLLECTION}_Product_CS02' + collections: + - !env '${COLLECTION}' + coverages: + CS02: sar_hh_gray + CS03: + ~: + product_type_name: !env '${COLLECTION}_Product_CS03' + collections: + - !env '${COLLECTION}' + coverages: + CS03: sar_hh_gray + CS04: + ~: + product_type_name: !env '${COLLECTION}_Product_CS04' + collections: + - !env '${COLLECTION}' + coverages: + CS04: sar_hh_gray + DM01: + ~: + product_type_name: !env '${COLLECTION}_Product_DM01' + collections: + - !env '${COLLECTION}' + coverages: + DM01: RGNirByte + DM02: + ~: + product_type_name: !env '${COLLECTION}_Product_DM02' + collections: + - !env '${COLLECTION}' + coverages: + DM02: RGBNir + EQ02_3: + ~: + product_type_name: !env '${COLLECTION}_Product_EQ02_3' + collections: + - !env '${COLLECTION}' + coverages: + EQ02_3: RGB + EQ02_4: + ~: + product_type_name: !env '${COLLECTION}_Product_EQ02_4' + collections: + - !env '${COLLECTION}' + coverages: + EQ02_4: RGBNir + EW01: + ~: + product_type_name: !env '${COLLECTION}_Product_EW01' + collections: + - !env '${COLLECTION}' + coverages: + EW01: grayscale + EW02_3: + ~: + product_type_name: !env '${COLLECTION}_Product_EW02_3' + collections: + - !env '${COLLECTION}' + coverages: + EW02_3: RGB + EW02_4: + ~: + product_type_name: !env '${COLLECTION}_Product_EW02_4' + collections: + - !env '${COLLECTION}' + coverages: + EW02_4: RGBNir + EW02_8: + ~: + product_type_name: !env '${COLLECTION}_Product_EW02_8' + collections: + - !env '${COLLECTION}' + coverages: + EW02_8: CBGYRReNirNir2 + EW03_3: + ~: + product_type_name: !env '${COLLECTION}_Product_EW03_3' + collections: + - !env '${COLLECTION}' + coverages: + EW03_3: RGB + EW03_4: + ~: + product_type_name: !env '${COLLECTION}_Product_EW03_4' + collections: + - !env '${COLLECTION}' + coverages: + EW03_4: RGBNir + EW03_8: + ~: + product_type_name: !env '${COLLECTION}_Product_EW03_8' + collections: + - !env '${COLLECTION}' + coverages: + EW03_8: CBGYRReNirNir2 + GE01_4: + ~: + product_type_name: !env '${COLLECTION}_Product_GE01_4' + collections: + - !env '${COLLECTION}' + coverages: + GE01_4: RGBNir + GE01_3: + ~: + product_type_name: !env '${COLLECTION}_Product_GE01_3' + collections: + - !env '${COLLECTION}' + coverages: + GE01_3: RGB + GE01_1: + ~: + product_type_name: !env '${COLLECTION}_Product_GE01_1' + collections: + - !env '${COLLECTION}' + coverages: + GE01_1: grayscale + GY01: + ~: + product_type_name: !env '${COLLECTION}_Product_GY01' + collections: + - !env '${COLLECTION}' + coverages: + GY01: RGBNir + IK02: + ~: + product_type_name: !env '${COLLECTION}_Product_IK02' + collections: + - !env '${COLLECTION}' + coverages: + IK02: RGBNir + KS03: + ~: + product_type_name: !env '${COLLECTION}_Product_KS03' + collections: + - !env '${COLLECTION}' + coverages: + KS03: RGBNir + PH1A: + ~: + product_type_name: !env '${COLLECTION}_Product_PH1A' + collections: + - !env '${COLLECTION}' + coverages: + PH1A: RGBNir + PH1B: + ~: + product_type_name: !env '${COLLECTION}_Product_PH1B' + collections: + - !env '${COLLECTION}' + coverages: + PH1B: RGBNir + RE00: + ~: + product_type_name: !env '${COLLECTION}_Product_RE00' + collections: + - !env '${COLLECTION}' + coverages: + RE00: BGRReNir + RS02_2: + ~: + product_type_name: !env '${COLLECTION}_Product_RS02_2' + collections: + - !env '${COLLECTION}' + coverages: + RS02_2: sar_hh_gray + RS02_3: + ~: + product_type_name: !env '${COLLECTION}_Product_RS02_3' + collections: + - !env '${COLLECTION}' + coverages: + RS02_3: sar_hh_vv_gray + RS02_7: + ~: + product_type_name: !env '${COLLECTION}_Product_RS02_7' + collections: + - !env '${COLLECTION}' + coverages: + RS02_7: sar_hh_hv_vh_vv_rgb + SP04: + ~: + product_type_name: !env '${COLLECTION}_Product_SP04' + collections: + - !env '${COLLECTION}' + coverages: + SP04: RGBNirByte + SP05: + ~: + product_type_name: !env '${COLLECTION}_Product_SP05' + collections: + - !env '${COLLECTION}' + coverages: + SP05: RGNirByte + SP06: + ~: + product_type_name: !env '${COLLECTION}_Product_SP06' + collections: + - !env '${COLLECTION}' + coverages: + SP06: RGBNir + SP07: + ~: + product_type_name: !env '${COLLECTION}_Product_SP07' + collections: + - !env '${COLLECTION}' + coverages: + SP07: RGBNir + TX01_2: + ~: + product_type_name: !env '${COLLECTION}_Product_TX01_2' + collections: + - !env '${COLLECTION}' + coverages: + TX01_2: sar_hh_gray + TX01_3: + ~: + product_type_name: !env '${COLLECTION}_Product_TX01_3' + collections: + - !env '${COLLECTION}' + coverages: + TX01_3: sar_hh_vv_gray + TX01_7: + ~: + product_type_name: !env '${COLLECTION}_Product_TX01_7' + collections: + - !env '${COLLECTION}' + coverages: + TX01_7: sar_hh_hv_vh_vv_rgb + diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml new file mode 100644 index 00000000..fc26c38a --- /dev/null +++ b/config/vhr18_registrar-config.yml @@ -0,0 +1,174 @@ +sources: + - type: swift + name: !env '${UPLOAD_CONTAINER}' + kwargs: + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + user_domain_name: !env '${OS_USER_DOMAIN_NAME}' + container: !env '${UPLOAD_CONTAINER}' + +schemes: + - type: gsc + +backends: + type: eoxserver + filter: + kwargs: + instance_base_path: /var/www/pvs/dev + instance_name: pvs_instance + mapping: + PL00: + Level_1: + product_type_name: !env '${COLLECTION}_Product_PL00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + PL00: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_PL00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + PL00: RGBNir + DM02: + Level_1: + product_type_name: !env '${COLLECTION}_Product_DM02' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + DM02: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_DM02' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + DM02: RGBNir + KS03: + Level_1: + product_type_name: !env '${COLLECTION}_Product_KS03' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + KS03: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_KS03' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + KS03: RGBNir + KS04: + Level_1: + product_type_name: !env '${COLLECTION}_Product_KS04' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + KS04: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_KS04' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + KS04: RGBNir + PH1A: + Level_1: + product_type_name: !env '${COLLECTION}_Product_PH1A' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + PH1A: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_PH1A' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + PH1A: RGBNir + PH1B: + Level_1: + product_type_name: !env '${COLLECTION}_Product_PH1B' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + PH1B: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_PH1B' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + PH1B: RGBNir + SP06: + Level_1: + product_type_name: !env '${COLLECTION}_Product_SP06' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + SP06: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_SP06' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + SP06: RGBNir + SP07: + Level_1: + product_type_name: !env '${COLLECTION}_Product_SP07' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + SP07: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_SP07' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + SP07: RGBNir + SW00: + Level_1: + product_type_name: !env '${COLLECTION}_Product_SW00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + SW00: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_SW00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + SW00: RGBNir + TR00: + Level_1: + product_type_name: !env '${COLLECTION}_Product_TR00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + TR00: RGBNir + Level_3: + product_type_name: !env '${COLLECTION}_Product_TR00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + TR00: RGBNir diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index f3057fbe..e5225a4d 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -104,7 +104,43 @@ class Sentinel2RegistrationScheme(RegistrationScheme): class GSCRegistrationScheme(RegistrationScheme): - pass + GSC_SCHEMA = { + 'identifier': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:identifier/text()'), + 'type': Parameter('//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'), + 'level': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'), + 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()'), + } + + def get_context(self, source: Source, path: str) -> Context: + gcs_filenames = source.list_files(path, 'GSC*.xml') + metadata_file = gsc_filenames[0] + + tree = read_xml(source, metadata_file) + metadata = parse_metadata_schema(tl_tree, self.GSC_SCHEMA, tree.nsmap) + + tiff_files = { + metadata['type']: source.list_files(path, '*.tif') + source.list_files(path, '*.TIF') + } + + match = re.match(r'.*(Level_[0-9]+)$', metadata['level']) + if match: + level = match.groups()[0] + else: + level = None + + return Context( + identifier=metadata['identifier'], + path=path, + product_type=metadata['type'], + product_level=level, + raster_files=tiff_files, + masks={ + 'validity': metadata['mask'] + }, + metadata_files=[metadata_file], + metadata={ + } + ) REGISTRATION_SCHEMES = { -- GitLab From b09d1de2838a6344b38329ef3c23b4332a741f2c Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 29 Oct 2020 18:14:42 +0100 Subject: [PATCH 18/54] Fixing EOxServer backend --- core/registrar/backend.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 980cd40c..05507470 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -91,13 +91,16 @@ class EOxServerBackend(Backend): # register coverages and link them to the product for raster_identifier, coverage_type_name in mapping.get('coverages', {}).items(): - raster_item = item.raster_files.get(raster_identifier) - raster_item = '/'.join(raster_item.split('/')[1:]) + raster_items = item.raster_files.get(raster_identifier) + raster_items = [ + storage + '/'.join(raster_item.split('/')[1:]) + for raster_item in (raster_items if isinstance(raster_items, list) else [raster_items]) + ] logger.info(f"Registering coverage {raster_item} as {coverage_type_name}") report = GDALRegistrator().register( - data_locations=[storage + [raster_item]], + data_locations=raster_items, metadata_locations=[storage + [metadata_file]], coverage_type_name=coverage_type_name, overrides={ -- GitLab From 8d31a4a04227cc75f146cb70ac89237d4eeb0074 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 29 Oct 2020 18:21:10 +0100 Subject: [PATCH 19/54] Mounting registrar configs for each stack --- docker-compose.dem.yml | 8 +++++--- docker-compose.emg.yml | 7 +++++-- docker-compose.vhr18.yml | 7 +++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/docker-compose.dem.yml b/docker-compose.dem.yml index 8381edda..71f3fe9a 100644 --- a/docker-compose.dem.yml +++ b/docker-compose.dem.yml @@ -125,7 +125,7 @@ services: OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" configs: - source: preprocessor-config - target: /config.yaml + target: /config.yaml deploy: replicas: 1 networks: @@ -166,6 +166,8 @@ services: configs: - source: init-db target: /init-db.sh + - source: registrar-config + target: /config.yaml deploy: replicas: 1 networks: @@ -188,7 +190,6 @@ services: configs: - source: sftp-users-dem target: /etc/sftp/users.conf - ports: - "2222:22" deploy: @@ -218,6 +219,8 @@ configs: file: ./config/dem_index-ops.html preprocessor-config: file: ./config/dem_preprocessor-config.yml + registrar-config: + file: ./config/dem_registrar-config.yml volumes: db-data: redis-data: @@ -233,4 +236,3 @@ secrets: external: true DJANGO_PASSWORD: external: true - \ No newline at end of file diff --git a/docker-compose.emg.yml b/docker-compose.emg.yml index d725f610..660c5229 100644 --- a/docker-compose.emg.yml +++ b/docker-compose.emg.yml @@ -135,7 +135,7 @@ services: OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" configs: - source: preprocessor-config - target: /config.yaml + target: /config.yaml deploy: replicas: 1 networks: @@ -172,11 +172,12 @@ services: WAIT_SERVICES: "redis:6379 database:5432" OS_PASSWORD_FILE: "/run/secrets/OS_PASSWORD" OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" - REPORTING_DIR: '/mnt/reports/' configs: - source: init-db target: /init-db.sh + - source: registrar-config + target: /config.yaml deploy: replicas: 1 networks: @@ -219,6 +220,8 @@ configs: file: ./config/emg_index-ops.html preprocessor-config: file: ./config/emg_preprocessor-config.yml + registrar-config: + file: ./config/emg_registrar-config.yml volumes: db-data: redis-data: diff --git a/docker-compose.vhr18.yml b/docker-compose.vhr18.yml index 128aea0e..f70165b9 100644 --- a/docker-compose.vhr18.yml +++ b/docker-compose.vhr18.yml @@ -138,7 +138,7 @@ services: OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" configs: - source: preprocessor-config - target: /config.yaml + target: /config.yaml deploy: replicas: 1 networks: @@ -179,6 +179,8 @@ services: configs: - source: init-db target: /init-db.sh + - source: registrar-config + target: /config.yaml deploy: replicas: 1 networks: @@ -203,7 +205,6 @@ services: target: /etc/sftp/users.conf deploy: replicas: 1 - ports: - "2222:22" ingestor: @@ -225,6 +226,8 @@ configs: file: ./config/vhr18_index-ops.html preprocessor-config: file: ./config/vhr18_preprocessor-config.yml + registrar-config: + file: ./config/vhr18_registrar-config.yml volumes: db-data: redis-data: -- GitLab From 380c569cd2fcfc8656aad78b90ac32b2a21e3051 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Fri, 30 Oct 2020 17:06:36 +0100 Subject: [PATCH 20/54] Implementing GSC registration scheme Improving on EOxServer backend Storages/StorageAuth are now created when necessary --- core/registrar/backend.py | 90 ++++++++++++++++++++++++++++++++++++--- core/registrar/scheme.py | 9 ++-- core/registrar/source.py | 38 ++++++++++++----- 3 files changed, 115 insertions(+), 22 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 05507470..46c07a12 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -3,6 +3,7 @@ import re import sys import logging from typing import List +import json import django from django.db import transaction @@ -10,7 +11,7 @@ from django.contrib.gis.geos import GEOSGeometry, Polygon from .exceptions import RegistrationError from .context import Context -from .source import Source, S3Source, SwiftSource, LocalSource +from .source import Source, LocalSource, S3Source, SwiftSource logger = logging.getLogger(__name__) @@ -41,7 +42,75 @@ class EOxServerBackend(Backend): return models.Product.objects.filter(identifier=item.identifier).exists() def _get_storage_from_source(self, source: Source, path: str) -> list: - return [source.name] if source.name else [] + from eoxserver.backends import models as backends + + created_storage_auth = False + created_storage = False + storage_name = None + if isinstance(source, LocalSource): + storage, created_storage = backends.Storage.get_or_create( + name=source.name, + url=source.root_directory, + storage_type='local', + ) + storage_name = storage.name + elif isinstance(source, S3Source): + params = json.dumps({ + 'ACCESS_KEY_ID': source.access_key_id, + 'SECRET_ACCESS_KEY': source.secret_access_key, + }) + + storage_auth, created_storage_auth = backends.StorageAuth.objects.get_or_create( + name=source.endpoint_url, + url=source.endpoint_url, + storage_auth_type='S3', + auth_parameters=params, + ) + + bucket, _ = source.get_bucket_and_key(path) + + storage, created_storage = backends.Storage.objects.get_or_create( + name=source.name, + url=bucket, + storage_type='S3', + storage_auth=storage_auth, + ) + storage_name = storage.name + + elif isinstance(source, SwiftSource): + params = json.dumps({ + 'auth-version': str(source.auth_version), + 'identity-api-version': str(source.auth_version), + 'username': source.username, + 'password': source.password, + 'tenant-name': source.tenant_name, + 'tenant-id': source.tenant_id, + 'region-name': source.region_name, + }) + + storage_auth, created_storage_auth = backends.StorageAuth.objects.get_or_create( + name=source.auth_url, + url=source.auth_url_short or source.auth_url, + storage_auth_type='keystone', + auth_parameters=params, + ) + + container, _ = source.get_container_and_path(path) + + storage, created_storage = backends.Storage.objects.get_or_create( + name=source.name, + url=container, + storage_type='swift', + storage_auth=storage_auth, + ) + storage_name = storage.name + + if created_storage_auth: + logger.info(f'Created storage auth for {source.name}') + if created_storage: + logger.info(f'Created storage for {source.name}') + + return [storage_name] if storage_name else [] @transaction.atomic def register(self, source: Source, item: Context, replace: bool) -> RegistrationResult: @@ -53,9 +122,13 @@ class EOxServerBackend(Backend): from eoxserver.resources.coverages.registration.registrators.gdal import GDALRegistrator # get the mapping for this particular item - mapping = self.mapping[item.product_type][item.product_level] - metadata_file = '/'.join(item.metadata_files[0].split('/')[1:]) + type_mapping = self.mapping[item.product_type] + mapping = type_mapping.get(item.product_level) or type_mapping.get(None) + + if not mapping: + raise RegistrationError(f'Could not get mapping for {item.product_type} {item.product_level}') + metadata_file = '/'.join(item.metadata_files[0].split('/')[1:]) storage = self._get_storage_from_source(source, item.path) try: @@ -63,7 +136,10 @@ class EOxServerBackend(Backend): except models.ProductType.DoesNotExist: pass - footprint = GEOSGeometry(item.metadata.pop('footprint')) + if 'footprint' in item.metadata: + footprint = GEOSGeometry(item.metadata.pop('footprint')) + else: + footprint = None product, _ = ProductRegistrator().register( metadata_locations=[storage + [metadata_file]], @@ -93,11 +169,11 @@ class EOxServerBackend(Backend): for raster_identifier, coverage_type_name in mapping.get('coverages', {}).items(): raster_items = item.raster_files.get(raster_identifier) raster_items = [ - storage + '/'.join(raster_item.split('/')[1:]) + storage + ['/'.join(raster_item.split('/')[1:])] for raster_item in (raster_items if isinstance(raster_items, list) else [raster_items]) ] - logger.info(f"Registering coverage {raster_item} as {coverage_type_name}") + logger.info(f"Registering coverage{'s' if len(raster_items) > 1 else ''} {raster_items} as {coverage_type_name}") report = GDALRegistrator().register( data_locations=raster_items, diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index e5225a4d..ba857f34 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -109,17 +109,18 @@ class GSCRegistrationScheme(RegistrationScheme): 'type': Parameter('//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'), 'level': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'), 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()'), + 'footprint': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()'), } def get_context(self, source: Source, path: str) -> Context: - gcs_filenames = source.list_files(path, 'GSC*.xml') + gsc_filenames = source.list_files(path, ['GSC*.xml', 'GSC*.XML']) metadata_file = gsc_filenames[0] tree = read_xml(source, metadata_file) - metadata = parse_metadata_schema(tl_tree, self.GSC_SCHEMA, tree.nsmap) + metadata = parse_metadata_schema(tree, self.GSC_SCHEMA, tree.getroot().nsmap) tiff_files = { - metadata['type']: source.list_files(path, '*.tif') + source.list_files(path, '*.TIF') + metadata['type']: source.list_files(path, ['*.tif', '*.TIF']) } match = re.match(r'.*(Level_[0-9]+)$', metadata['level']) @@ -152,7 +153,7 @@ def get_scheme(config: dict, path: str) -> RegistrationScheme: cfg_schemes = config['schemes'] for cfg_scheme in cfg_schemes: - if cfg_scheme['filter']: + if cfg_scheme.get('filter'): if re.match(cfg_scheme['filter'], path): break else: diff --git a/core/registrar/source.py b/core/registrar/source.py index d752712e..fc0e0494 100644 --- a/core/registrar/source.py +++ b/core/registrar/source.py @@ -33,8 +33,8 @@ class Source: class SwiftSource(Source): def __init__(self, name=None, username=None, password=None, tenant_name=None, tenant_id=None, region_name=None, user_domain_id=None, - user_domain_name=None, auth_url=None, auth_version=None, - container=None): + user_domain_name=None, auth_url=None, auth_url_short=None, + auth_version=None, container=None): super().__init__(name) self.username = username @@ -45,6 +45,7 @@ class SwiftSource(Source): self.user_domain_id = user_domain_id self.user_domain_name = user_domain_name self.auth_url = auth_url + self.auth_url_short = auth_url_short self.auth_version = auth_version # TODO: assume 3 self.container = container @@ -70,9 +71,12 @@ class SwiftSource(Source): return container, path - def list_files(self, path, glob_pattern=None): + def list_files(self, path, glob_patterns=None): container, path = self.get_container_and_path(path) + if glob_patterns and not isinstance(glob_patterns, list): + glob_patterns = [glob_patterns] + with self.get_service() as swift: pages = swift.list( container=container, @@ -84,8 +88,12 @@ class SwiftSource(Source): if page["success"]: # at least two files present -> pass validation for item in page["listing"]: - if glob_pattern is None or fnmatch(item['name'], glob_pattern): - filenames.append(item['name']) + if glob_patterns is None or any( + fnmatch(item['name'], join(path, glob_pattern)) for glob_pattern in glob_patterns): + + filenames.append( + item['name'] if self.container else join(container, item['name']) + ) else: raise page['error'] @@ -154,7 +162,10 @@ class S3Source(Source): return bucket, path - def list_files(self, path, glob_pattern=None): + def list_files(self, path, glob_patterns=None): + if glob_patterns and not isinstance(glob_patterns, list): + glob_patterns = [glob_patterns] + bucket, key = self.get_bucket_and_key(path) logger.info(f'Listing S3 files for bucket {bucket} and prefix {key}') response = self.client.list_objects_v2( @@ -165,7 +176,9 @@ class S3Source(Source): return [ f"{bucket}/{item['Key']}" for item in response['Contents'] - if glob_pattern is None or fnmatch(item['Key'], glob_pattern) + if glob_patterns is None or any( + fnmatch(item['name'], glob_pattern) for glob_pattern in glob_patterns + ) ] def get_file(self, path, target_path): @@ -195,9 +208,12 @@ class LocalSource(Source): return join(self.root_directory, path) - def list_files(self, path, glob_pattern=None): - if glob_pattern is not None: - return glob(join(self._join_path(path), glob_pattern)) + def list_files(self, path, glob_patterns=None): + if glob_patterns and not isinstance(glob_patterns, list): + glob_patterns = [glob_patterns] + + if glob_patterns is not None: + return glob(join(self._join_path(path), glob_patterns[0])) # TODO else: return glob(join(self._join_path(path), '*')) @@ -219,7 +235,7 @@ def get_source(config: dict, path: str) -> Source: cfg_sources = config['sources'] for cfg_source in cfg_sources: - if cfg_source['filter']: + if cfg_source.get('filter'): if re.match(cfg_source['filter'], path): break else: -- GitLab From 1fb6af2c4c827601bd9023a58da54fa1ed88ca28 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Fri, 30 Oct 2020 17:07:01 +0100 Subject: [PATCH 21/54] Adjusting EMG registration config --- config/emg_init-db.sh | 17 ----------------- config/emg_registrar-config.yml | 5 ++--- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/config/emg_init-db.sh b/config/emg_init-db.sh index 5944a22d..7ce4d7e7 100644 --- a/config/emg_init-db.sh +++ b/config/emg_init-db.sh @@ -975,23 +975,6 @@ if python3 manage.py id check "${COLLECTION}"; then echo "Provided collection '${COLLECTION}' not valid." fi - python3 manage.py storageauth create auth-cloud-ovh "${OS_AUTH_URL_SHORT}" \ - --type keystone \ - -p auth-version "${ST_AUTH_VERSION}" \ - -p identity-api-version="${ST_AUTH_VERSION}" \ - -p username "${OS_USERNAME}" \ - -p password "${OS_PASSWORD}" \ - -p tenant-name "${OS_TENANT_NAME}" \ - -p tenant-id "${OS_TENANT_ID}" \ - -p region-name "${OS_REGION_NAME}" - - python3 manage.py storage create \ - ${UPLOAD_CONTAINER} ${UPLOAD_CONTAINER} \ - --type swift \ - --storage-auth auth-cloud-ovh - - - else echo "Using existing database" fi \ No newline at end of file diff --git a/config/emg_registrar-config.yml b/config/emg_registrar-config.yml index 5a1f37dc..6f85f7d3 100644 --- a/config/emg_registrar-config.yml +++ b/config/emg_registrar-config.yml @@ -9,14 +9,13 @@ sources: region_name: !env '${OS_REGION_NAME}' auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' - user_domain_name: !env '${OS_USER_DOMAIN_NAME}' - container: !env '${UPLOAD_CONTAINER}' + auth_url_short: !env '${OS_AUTH_URL_SHORT}' schemes: - type: gsc backends: - type: eoxserver + - type: eoxserver filter: kwargs: instance_base_path: /var/www/pvs/dev -- GitLab From 1cd24b7f5fcc77b03b3869468da15e7a61a1b573 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 2 Nov 2020 09:31:34 +0100 Subject: [PATCH 22/54] Adjusting VHR18 registration config --- config/vhr18_init-db.sh | 16 ---------------- config/vhr18_registrar-config.yml | 5 ++--- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/config/vhr18_init-db.sh b/config/vhr18_init-db.sh index 46c09742..3f1451df 100644 --- a/config/vhr18_init-db.sh +++ b/config/vhr18_init-db.sh @@ -401,22 +401,6 @@ if python3 manage.py id check "${COLLECTION}"; then echo "Provided collection '${COLLECTION}' not valid." fi - python3 manage.py storageauth create auth-cloud-ovh "${OS_AUTH_URL_SHORT}" \ - --type keystone \ - -p auth-version "${ST_AUTH_VERSION}" \ - -p identity-api-version="${ST_AUTH_VERSION}" \ - -p username "${OS_USERNAME}" \ - -p password "${OS_PASSWORD}" \ - -p tenant-name "${OS_TENANT_NAME}" \ - -p tenant-id "${OS_TENANT_ID}" \ - -p region-name "${OS_REGION_NAME}" - - python3 manage.py storage create \ - ${UPLOAD_CONTAINER} ${UPLOAD_CONTAINER} \ - --type swift \ - --storage-auth auth-cloud-ovh - - else echo "Using existing database" fi diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml index fc26c38a..eb5ed8c7 100644 --- a/config/vhr18_registrar-config.yml +++ b/config/vhr18_registrar-config.yml @@ -9,14 +9,13 @@ sources: region_name: !env '${OS_REGION_NAME}' auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' - user_domain_name: !env '${OS_USER_DOMAIN_NAME}' - container: !env '${UPLOAD_CONTAINER}' + auth_url_short: !env '${OS_AUTH_URL_SHORT}' schemes: - type: gsc backends: - type: eoxserver + - type: eoxserver filter: kwargs: instance_base_path: /var/www/pvs/dev -- GitLab From 05b7711516350d16b3e85d17c57b726b82d434a4 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 2 Nov 2020 09:32:21 +0100 Subject: [PATCH 23/54] Fixing cases for multi-bucket/container setup --- core/registrar/backend.py | 5 +++-- core/registrar/registrar.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 46c07a12..3bdb14d7 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -54,6 +54,7 @@ class EOxServerBackend(Backend): storage_type='local', ) storage_name = storage.name + elif isinstance(source, S3Source): params = json.dumps({ 'ACCESS_KEY_ID': source.access_key_id, @@ -70,7 +71,7 @@ class EOxServerBackend(Backend): bucket, _ = source.get_bucket_and_key(path) storage, created_storage = backends.Storage.objects.get_or_create( - name=source.name, + name=source.name if source.bucket else f'{source.name}-{bucket}', url=bucket, storage_type='S3', storage_auth=storage_auth, @@ -98,7 +99,7 @@ class EOxServerBackend(Backend): container, _ = source.get_container_and_path(path) storage, created_storage = backends.Storage.objects.get_or_create( - name=source.name, + name=source.name if source.container else f'{source.name}-{container}', url=container, storage_type='swift', storage_auth=storage_auth, diff --git a/core/registrar/registrar.py b/core/registrar/registrar.py index 53c9b102..7630613d 100644 --- a/core/registrar/registrar.py +++ b/core/registrar/registrar.py @@ -35,6 +35,7 @@ def register_file(config: dict, path: str, replace: bool=False): for post_handler in get_post_handlers(config): post_handler(config, path, context) + logger.info(f"Successfully {'replaced' if replace else 'registered'} '{path}'") return context -- GitLab From ab41b313232c43bc6a440ea074c66bdd14f34b99 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 2 Nov 2020 12:25:14 +0100 Subject: [PATCH 24/54] Implemented direct geometry masks Adjusted VHR 18 registration configuration to include masks --- config/vhr18_registrar-config.yml | 40 +++++++++++++++++++++++++++++++ core/registrar/backend.py | 14 ++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml index eb5ed8c7..0295be2a 100644 --- a/config/vhr18_registrar-config.yml +++ b/config/vhr18_registrar-config.yml @@ -29,6 +29,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: PL00: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_PL00' collections: @@ -36,6 +38,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: PL00: RGBNir + masks: + validity: validity DM02: Level_1: product_type_name: !env '${COLLECTION}_Product_DM02' @@ -44,6 +48,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: DM02: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_DM02' collections: @@ -51,6 +57,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: DM02: RGBNir + masks: + validity: validity KS03: Level_1: product_type_name: !env '${COLLECTION}_Product_KS03' @@ -59,6 +67,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: KS03: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_KS03' collections: @@ -66,6 +76,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: KS03: RGBNir + masks: + validity: validity KS04: Level_1: product_type_name: !env '${COLLECTION}_Product_KS04' @@ -74,6 +86,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: KS04: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_KS04' collections: @@ -81,6 +95,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: KS04: RGBNir + masks: + validity: validity PH1A: Level_1: product_type_name: !env '${COLLECTION}_Product_PH1A' @@ -89,6 +105,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: PH1A: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_PH1A' collections: @@ -96,6 +114,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: PH1A: RGBNir + masks: + validity: validity PH1B: Level_1: product_type_name: !env '${COLLECTION}_Product_PH1B' @@ -104,6 +124,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: PH1B: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_PH1B' collections: @@ -111,6 +133,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: PH1B: RGBNir + masks: + validity: validity SP06: Level_1: product_type_name: !env '${COLLECTION}_Product_SP06' @@ -119,6 +143,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: SP06: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_SP06' collections: @@ -126,6 +152,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: SP06: RGBNir + masks: + validity: validity SP07: Level_1: product_type_name: !env '${COLLECTION}_Product_SP07' @@ -134,6 +162,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: SP07: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_SP07' collections: @@ -141,6 +171,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: SP07: RGBNir + masks: + validity: validity SW00: Level_1: product_type_name: !env '${COLLECTION}_Product_SW00' @@ -149,6 +181,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: SW00: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_SW00' collections: @@ -156,6 +190,8 @@ backends: - !env '${COLLECTION}_Level_3' coverages: SW00: RGBNir + masks: + validity: validity TR00: Level_1: product_type_name: !env '${COLLECTION}_Product_TR00' @@ -164,6 +200,8 @@ backends: - !env '${COLLECTION}_Level_1' coverages: TR00: RGBNir + masks: + validity: validity Level_3: product_type_name: !env '${COLLECTION}_Product_TR00' collections: @@ -171,3 +209,5 @@ backends: - !env '${COLLECTION}_Level_3' coverages: TR00: RGBNir + masks: + validity: validity diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 3bdb14d7..2a83de1b 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -206,13 +206,25 @@ class EOxServerBackend(Backend): for mask_identifier, mask_type_name in mapping.get('masks', {}).items(): mask_item = item.mask_files.get(mask_identifier) if mask_item: - logger.info(f"Adding mask {mask_type_name} to product") + logger.info(f"Adding mask (file) {mask_type_name} to product") MaskRegistrator().register( product.identifier, storage + [mask_item], mask_type_name, ) + mask_item = item.masks.get(mask_identifier) + if mask_item: + logger.info(f"Adding mask (geometry) {mask_type_name} to product") + models.Mask.objects.create( + product=product, + mask_type=models.MaskType.objects.get( + product_type=product.product_type, + name=mask_type_name, + ), + geometry=mask_item, + ) + BACKENDS = { 'eoxserver': EOxServerBackend -- GitLab From f267ca1ad50eafcff219e4107d54c1b5e8d1d532 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 3 Nov 2020 09:47:23 +0100 Subject: [PATCH 25/54] Improved error handling in XML parsing --- core/registrar/xml.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/core/registrar/xml.py b/core/registrar/xml.py index 519d8a54..cc7b37ee 100644 --- a/core/registrar/xml.py +++ b/core/registrar/xml.py @@ -8,6 +8,7 @@ import logging import lxml.etree from .source import Source +from .exceptions import RegistrationError logger = logging.getLogger(__name__) @@ -29,6 +30,9 @@ class Parameter: namespaces: dict = field(default_factory=dict) +class ParserError(RegistrationError): + pass + def parse_metadata_schema(tree: lxml.etree._ElementTree, schema: dict, namespaces: dict=None) -> dict: out = {} for key, param in schema.items(): @@ -39,7 +43,10 @@ def parse_metadata_schema(tree: lxml.etree._ElementTree, schema: dict, namespace for v in values ] else: - value = param.parser(values[0]) if param.parser else values[0] + try: + value = param.parser(values[0]) if param.parser else values[0] + except IndexError: + raise ParserError(f'Failed to fetch single value for parameter {key}') out[key] = value -- GitLab From 3e97dfceb791efa2c7f7a3ebc93c0b28eb2dd4ea Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 3 Nov 2020 09:48:18 +0100 Subject: [PATCH 26/54] Fixed GSC footprint parsing Added option to extract level via RE --- core/registrar/scheme.py | 43 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index ba857f34..2e17a487 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -102,16 +102,51 @@ class Sentinel2RegistrationScheme(RegistrationScheme): ) +def parse_ring(string): + raw_coords = string.split() + return [(lon, lat) for lat, lon in pairwise(raw_coords)] + + +def parse_polygons_gsc(elem): + def serialize_coord_list(coords): + return ','.join( + f'{x} {y}' for x, y in coords + ) + + interior = serialize_coord_list( + parse_ring( + elem.xpath( + "gml:exterior/gml:LinearRing/gml:posList", namespaces=elem.nsmap + )[0].text.strip() + ) + ) + + exteriors = [ + f'''({ + serialize_coord_list( + parse_ring(poslist_elem.text.strip()) + ) + })''' + for poslist_elem in elem.xpath( + "gml:interior/gml:LinearRing/gml:posList", namespaces=elem.nsmap + ) + ] + + return f"POLYGON(({interior}){',' if exteriors else ''}{','.join(exteriors)})" + class GSCRegistrationScheme(RegistrationScheme): GSC_SCHEMA = { 'identifier': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:identifier/text()'), 'type': Parameter('//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'), 'level': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'), - 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()'), - 'footprint': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()'), + 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', True), + 'footprint': Parameter('(gsc:sar_metadata|gsc:opt_metadata)/gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', True, parse_polygons_gsc), } + def __init__(self, level_re: str=r'.*(Level_[0-9]+)$'): + self.level_re = level_re + def get_context(self, source: Source, path: str) -> Context: gsc_filenames = source.list_files(path, ['GSC*.xml', 'GSC*.XML']) metadata_file = gsc_filenames[0] @@ -123,7 +158,7 @@ class GSCRegistrationScheme(RegistrationScheme): metadata['type']: source.list_files(path, ['*.tif', '*.TIF']) } - match = re.match(r'.*(Level_[0-9]+)$', metadata['level']) + match = re.match(self.level_re, metadata['level']) if match: level = match.groups()[0] else: @@ -136,7 +171,7 @@ class GSCRegistrationScheme(RegistrationScheme): product_level=level, raster_files=tiff_files, masks={ - 'validity': metadata['mask'] + 'validity': metadata['mask'][0] if metadata['mask'] else None }, metadata_files=[metadata_file], metadata={ -- GitLab From b6e54acbd3caf7e5fe2e33602e2ebc79b5c689b0 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 3 Nov 2020 09:49:03 +0100 Subject: [PATCH 27/54] Implemented DEM registrar config --- config/dem_init-db.sh | 18 +-------- config/dem_registrar-config.yml | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/config/dem_init-db.sh b/config/dem_init-db.sh index 648f4b05..cfc2c4ec 100644 --- a/config/dem_init-db.sh +++ b/config/dem_init-db.sh @@ -42,7 +42,7 @@ if python3 manage.py id check "${COLLECTION}"; then python3 manage.py browsetype create "${COLLECTION}"_Product_COP-DEM_GLO-90-DGED --traceback \ --red "gray" \ --red-range -100 4000 \ - --red-nodata 0 + --red-nodata 0 python3 manage.py collectiontype create "${COLLECTION}"_Collection --traceback \ --coverage-type "float32_grayscale" \ @@ -66,22 +66,6 @@ if python3 manage.py id check "${COLLECTION}"; then echo "Provided collection '${COLLECTION}' not valid." fi - python3 manage.py storageauth create auth-cloud-ovh "${OS_AUTH_URL_SHORT}" \ - --type keystone \ - -p auth-version "${ST_AUTH_VERSION}" \ - -p identity-api-version="${ST_AUTH_VERSION}" \ - -p username "${OS_USERNAME}" \ - -p password "${OS_PASSWORD}" \ - -p tenant-name "${OS_TENANT_NAME}" \ - -p tenant-id "${OS_TENANT_ID}" \ - -p region-name "${OS_REGION_NAME}" - - python3 manage.py storage create \ - ${UPLOAD_CONTAINER} ${UPLOAD_CONTAINER} \ - --type swift \ - --storage-auth auth-cloud-ovh - - else echo "Using existing database" fi diff --git a/config/dem_registrar-config.yml b/config/dem_registrar-config.yml index e69de29b..905b3ce7 100644 --- a/config/dem_registrar-config.yml +++ b/config/dem_registrar-config.yml @@ -0,0 +1,68 @@ +sources: + - type: swift + name: !env '${UPLOAD_CONTAINER}' + kwargs: + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + auth_url_short: !env '${OS_AUTH_URL_SHORT}' + +schemes: + - type: gsc + kwargs: + level_re: '([A-Z0-9-_]+)/.*' + +backends: + - type: eoxserver + filter: + kwargs: + instance_base_path: /var/www/pvs/dev + instance_name: pvs_instance + mapping: + DEM1: + COP-DEM_EEA-10-DGED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_EEA-10-DGED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_EEA-10-DGED' + coverages: + DEM1: float32_grayscale + COP-DEM_EEA-10-INSP: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_EEA-10-INSP' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_EEA-10-INSP' + coverages: + DEM1: float32_grayscale + COP-DEM_GLO-30-DGED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-30-DGED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-30-DGED' + coverages: + DEM1: float32_grayscale + COP-DEM_GLO-90-DGED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-90-DGED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-90-DGED' + coverages: + DEM1: float32_grayscale + COP-DEM_GLO-30-DTED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-30-DTED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-30-DTED' + coverages: + DEM1: int16_grayscale + COP-DEM_GLO-90-DTED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-90-DTED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-90-DTED' + coverages: + DEM1: int16_grayscale -- GitLab From d760cb32551cbda5ef30886887315fe19f290e69 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 3 Nov 2020 09:49:19 +0100 Subject: [PATCH 28/54] Fixed wrong logging backend config --- docker-compose.dem.dev.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.dem.dev.yml b/docker-compose.dem.dev.yml index 8ac49a6c..1b36628d 100644 --- a/docker-compose.dem.dev.yml +++ b/docker-compose.dem.dev.yml @@ -35,8 +35,6 @@ services: - type: bind source: ./core/ target: /core/ - logging: - driver: "fluentd" cache: ports: - "83:80" -- GitLab From db9006a7e04d035964a13b625a8bd2d5a89086c2 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 9 Nov 2020 12:30:51 +0100 Subject: [PATCH 29/54] Fixing registrar invocation in testing --- testing/registrar_test.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh index 1d3e0aae..020235fb 100755 --- a/testing/registrar_test.sh +++ b/testing/registrar_test.sh @@ -8,10 +8,8 @@ IFS="," while read product; do docker exec -e OS_PASSWORD=$OS_PASSWORD -i $(docker ps -qf "name=emg-pvs_registrar") \ - python3 /registrar.py \ - --objects-prefix $product \ - --service-url $SERVICE_URL \ - --reporting-dir "/mnt/reports" \ + registrar register \ + --config-file /config.yaml \ <<<$product done < "$product_list_file" -- GitLab From ecc3e02c74f1783ad42629bb0e1b872f8993c44d Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 9 Nov 2020 13:04:25 +0100 Subject: [PATCH 30/54] Fixing invocation of registrar --- testing/registrar_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh index 020235fb..6f8e7fb4 100755 --- a/testing/registrar_test.sh +++ b/testing/registrar_test.sh @@ -10,7 +10,7 @@ while read product; do docker exec -e OS_PASSWORD=$OS_PASSWORD -i $(docker ps -qf "name=emg-pvs_registrar") \ registrar register \ --config-file /config.yaml \ - <<<$product + "$product" done < "$product_list_file" -- GitLab From 63384b458ab4531d8e8dc7bcd167dd695fc171c1 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 9 Nov 2020 13:40:31 +0100 Subject: [PATCH 31/54] Fixing `replace` argument for one-off registrations --- core/registrar/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/registrar/cli.py b/core/registrar/cli.py index bfae3d6a..0a2943ff 100644 --- a/core/registrar/cli.py +++ b/core/registrar/cli.py @@ -69,13 +69,13 @@ def daemon(config_file=None, validate=False, replace=False, host=None, port=None @click.option('--validate/--no-validate', default=False) @click.option('--replace/--no-replace', default=False) @click.option('--debug/--no-debug', default=False) -def register(file_path, config_file=None, validate=False, debug=False): +def register(file_path, config_file=None, validate=False, replace=False, debug=False): setup_logging(debug) config = load_config(config_file) if validate: validate_config(config) - register_file(config, file_path) + register_file(config, file_path, replace) if __name__ == '__main__': cli() -- GitLab From d628f09dd62a699440de452a3d93162d91a9d0fa Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 9 Nov 2020 22:50:37 +0100 Subject: [PATCH 32/54] Fixing wrong item lookup in S3 source --- core/registrar/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/registrar/source.py b/core/registrar/source.py index fc0e0494..20e079b9 100644 --- a/core/registrar/source.py +++ b/core/registrar/source.py @@ -177,7 +177,7 @@ class S3Source(Source): f"{bucket}/{item['Key']}" for item in response['Contents'] if glob_patterns is None or any( - fnmatch(item['name'], glob_pattern) for glob_pattern in glob_patterns + fnmatch(item['Key'], glob_pattern) for glob_pattern in glob_patterns ) ] -- GitLab From a19754e60232e86a48bfdcfe2fa77790b3624e2b Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 10 Nov 2020 09:35:06 +0100 Subject: [PATCH 33/54] Fixing typo in backend --- core/registrar/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 2a83de1b..d48e64d2 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -71,7 +71,7 @@ class EOxServerBackend(Backend): bucket, _ = source.get_bucket_and_key(path) storage, created_storage = backends.Storage.objects.get_or_create( - name=source.name if source.bucket else f'{source.name}-{bucket}', + name=source.name if source.bucket_name else f'{source.name}-{bucket}', url=bucket, storage_type='S3', storage_auth=storage_auth, -- GitLab From 02fc7e642060359af2c0fafc0198a8ccdaea2d2d Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 10 Nov 2020 11:01:12 +0100 Subject: [PATCH 34/54] Fixing S3 endpoint URL in GDAL friendly format --- core/registrar/backend.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index d48e64d2..10fd8782 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -61,9 +61,15 @@ class EOxServerBackend(Backend): 'SECRET_ACCESS_KEY': source.secret_access_key, }) + endpoint_url = source.endpoint_url + if endpoint_url.startswith('https://'): + endpoint_url = endpoint_url[len('https://'):] + elif endpoint_url.startswith('http://'): + endpoint_url = endpoint_url[len('http://'):] + storage_auth, created_storage_auth = backends.StorageAuth.objects.get_or_create( - name=source.endpoint_url, - url=source.endpoint_url, + name=endpoint_url, + url=endpoint_url, storage_auth_type='S3', auth_parameters=params, ) -- GitLab From c4a363d59a41b50838fa33221b59ddf9fcd12441 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 11:25:38 +0100 Subject: [PATCH 35/54] adding container to registrar config yml --- config/dem_registrar-config.yml | 1 + config/emg_registrar-config.yml | 1 + config/vhr18_registrar-config.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/config/dem_registrar-config.yml b/config/dem_registrar-config.yml index 905b3ce7..657ba3bf 100644 --- a/config/dem_registrar-config.yml +++ b/config/dem_registrar-config.yml @@ -10,6 +10,7 @@ sources: auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' auth_url_short: !env '${OS_AUTH_URL_SHORT}' + container: !env '${UPLOAD_CONTAINER}' schemes: - type: gsc diff --git a/config/emg_registrar-config.yml b/config/emg_registrar-config.yml index 6f85f7d3..8898524d 100644 --- a/config/emg_registrar-config.yml +++ b/config/emg_registrar-config.yml @@ -10,6 +10,7 @@ sources: auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' auth_url_short: !env '${OS_AUTH_URL_SHORT}' + container: !env '${UPLOAD_CONTAINER}' schemes: - type: gsc diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml index 0295be2a..17e9f670 100644 --- a/config/vhr18_registrar-config.yml +++ b/config/vhr18_registrar-config.yml @@ -10,6 +10,7 @@ sources: auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' auth_url_short: !env '${OS_AUTH_URL_SHORT}' + container: !env '${UPLOAD_CONTAINER}' schemes: - type: gsc -- GitLab From f0771a9de3a4c5f767329dae1bd6b1f95dd7f700 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 11 Nov 2020 12:54:45 +0100 Subject: [PATCH 36/54] Adding pre/post handler support --- core/registrar/config-schema.yaml | 35 +++++++++++++++++++++-- core/registrar/post_handlers.py | 47 +++++++++++++++++++++++++++++++ core/registrar/registrar.py | 21 ++++++++++++-- core/registrar/utils.py | 12 ++++++++ core/registrar/xml.py | 1 + 5 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 core/registrar/post_handlers.py diff --git a/core/registrar/config-schema.yaml b/core/registrar/config-schema.yaml index ed85899e..c6d6659c 100644 --- a/core/registrar/config-schema.yaml +++ b/core/registrar/config-schema.yaml @@ -62,6 +62,35 @@ properties: kwargs: description: Constructor keyword arguments type: object - - # TODO: describe type specific args/kwargs - + pre_handlers: + description: List of handlers to be run prior the registration of an item. + type: array + items: + description: A single pre-registration handler + type: object + properties: + path: + description: Python module path to the registration handler + type: string + args: + description: arguments for the initialization of the handler + type: array + kwargs: + description: keyword arguments for the initialization of the handler + type: object + post_handlers: + description: List of handlers to be run prior the registration of an item. + type: array + items: + description: A single pre-registration handler + type: object + properties: + path: + description: Python module path to the registration handler + type: string + args: + description: arguments for the initialization of the handler + type: array + kwargs: + description: keyword arguments for the initialization of the handler + type: object diff --git a/core/registrar/post_handlers.py b/core/registrar/post_handlers.py new file mode 100644 index 00000000..44d6ed20 --- /dev/null +++ b/core/registrar/post_handlers.py @@ -0,0 +1,47 @@ +import os.path +import textwrap +from datetime import datetime + +from .context import Context +from .utils import isoformat +from .xml import escape + + +class ReportingPostHandler: + def __init__(self, service_url: str, reporting_dir: str): + self.service_url = service_url + self.reporting_dir = reporting_dir + + def __call__(self, config: dict, path: str, context: Context): + inserted = datetime.now() + timestamp = inserted.strftime("%Y%m%dT%H%M%S") + with open(os.path.join(self.reporting_dir, 'item_%s_%s.xml' % (timestamp, context.identifier)), 'w') as f: + f.write(textwrap.dedent(""" + + + {identifier} + {availability_time} + + WCS + {wms_capabilities_url} + + + WMS + {wcs_capabilities_url} + + + """.format( + identifier=escape(context.identifier), + availability_time=escape(isoformat(inserted)), + wcs_capabilities_url=escape( + '%s/ows?service=wcs&request=GetCapabilities&cql=identifier="%s"' + % (self.service_url, context.identifier) + ), + wms_capabilities_url=escape( + '%s/ows?service=wms&request=GetCapabilities&cql=identifier="%s"' + % (self.service_url, context.identifier) + ), + ))) diff --git a/core/registrar/registrar.py b/core/registrar/registrar.py index 7630613d..14ed7303 100644 --- a/core/registrar/registrar.py +++ b/core/registrar/registrar.py @@ -1,5 +1,6 @@ import re import logging +import importlib from .source import get_source from .scheme import get_scheme @@ -39,8 +40,24 @@ def register_file(config: dict, path: str, replace: bool=False): return context +def _get_handlers(config, name): + handlers = [] + for handler_def in config.get(name, []): + module_path, handler_name = handler_def['path'].rpartition('.') + handler_cls = getattr(importlib.import_module(module_path), handler_name) + handlers.append( + handler_cls( + *handler_def.get('args', []), + **handler_def.get('kwargs', []), + ) + ) + + return handlers + + def get_pre_handlers(config): - return [] + return _get_handlers(config, 'pre_handlers') + def get_post_handlers(config): - return [] + return _get_handlers(config, 'post_handlers') diff --git a/core/registrar/utils.py b/core/registrar/utils.py index e69de29b..8a522c91 100644 --- a/core/registrar/utils.py +++ b/core/registrar/utils.py @@ -0,0 +1,12 @@ +def isoformat(dt): + """ Formats a datetime object to an ISO string. Timezone naive datetimes are + are treated as UTC Zulu. UTC Zulu is expressed with the proper "Z" + ending and not with the "+00:00" offset declaration. + + :param dt: the :class:`datetime.datetime` to encode + :returns: an encoded string + """ + if not dt.utcoffset(): + dt = dt.replace(tzinfo=None) + return dt.isoformat("T") + "Z" + return dt.isoformat("T") diff --git a/core/registrar/xml.py b/core/registrar/xml.py index cc7b37ee..8a25ff07 100644 --- a/core/registrar/xml.py +++ b/core/registrar/xml.py @@ -4,6 +4,7 @@ from tempfile import gettempdir, gettempprefix from dataclasses import dataclass, field from typing import Union, Type, Optional, List, Callable, Any import logging +from xml.sax.saxutils import escape import lxml.etree -- GitLab From f609bd92a875b749b81e329a6c81b710bb41a401 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 13:25:01 +0100 Subject: [PATCH 37/54] minor updates to op guide --- documentation/operator-guide/access.rst | 17 ++++++++++------- documentation/operator-guide/management.rst | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/documentation/operator-guide/access.rst b/documentation/operator-guide/access.rst index b96c281e..9dafab51 100644 --- a/documentation/operator-guide/access.rst +++ b/documentation/operator-guide/access.rst @@ -68,7 +68,7 @@ Currently setting individual authorization rules on a ``Collection`` and ``Servi Configuration ~~~~~~~~~~~~~ -For correct configuration of Shibboleth SP3 on a new stack, several steps need to be done. Most of these configurations are usually done in the :ref:`_initialization` step using ``pvs_starter`` tool. Still, it is advised to check following steps, understand them and change if necessary. +For correct configuration of Shibboleth SP3 on a new stack, several steps need to be done. Most of these configurations are usually done in the :ref:`initialization` step using ``pvs_starter`` tool. Still, it is advised to check following steps, understand them and change if necessary. Briefly summarized, SP and IdP need to exchange metadata and certificates to trust each other, SP needs to know which attributes the IdP will be sending about the logged-in user and respective access-control rules are configured based on those attributes. Most of the configurations are done via docker configs defined in the docker compose - Create a pair of key, certificate using attached Shibboleth ``config/shibboleth/keygen.sh`` in the cloned vs repository and save them as respective docker secrets. @@ -81,6 +81,7 @@ Briefly summarized, SP and IdP need to exchange metadata and certificates to tru docker secret create _SHIB_KEY sp-signing-key.pem - Get IDP metadata and save it as a docker config. Also read the entityID of the IdP for further use in referencing it in your ``shibboleth2.xml`` configuration. + .. code-block:: bash docker config create idp-metadata idp-metadata-received.xml @@ -88,13 +89,14 @@ Briefly summarized, SP and IdP need to exchange metadata and certificates to tru - Configure Apache ServerName used inside the ``shibauth`` service by modifying ``APACHE_SERVERNAME`` environment variable of corresponding ``shibauth`` service in ``docker-compose..ops.yml``. This URL should resolve to the actual service URL. - Modify shibboleth2.xml content by setting your "entityID" in Additionally edit the "entityID" value inside ``SSO`` element to match the IdP "entityID". Note that "entityID" does not need to resolve to an actual service URL. + .. code-block:: xml - - - SAML2 - + + + + SAML2 + + - Deploy your shibauth service and exchange your SP metadata with the IdP provider and have them recognize your SP. Necessary metadata needs to be downloaded from url ``/Shibboleth.sso/Metadata``. @@ -108,6 +110,7 @@ Briefly summarized, SP and IdP need to exchange metadata and certificates to tru - Create custom access rules based on these attributes and map these access controls to different internal apache routes to which Traefik ForwardAuth middleware will point. Access rules are created in ``config/shibboleth/-ac.xml``. + Example of external Access control rules configuration: .. code-block:: xml diff --git a/documentation/operator-guide/management.rst b/documentation/operator-guide/management.rst index bbb33703..40fa410b 100644 --- a/documentation/operator-guide/management.rst +++ b/documentation/operator-guide/management.rst @@ -116,6 +116,7 @@ In order to select any other option from the Kibana toolkit, click the horizonta .. _fig_kibana_menu: .. figure:: images/kibana_1.* + :width: 250 :alt: Kibana menu *Kibana menu* -- GitLab From 53ea6c9d0c823c1e4d3969f79aa246a325ec581a Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 13:57:20 +0100 Subject: [PATCH 38/54] minor --- README.md | 2 +- documentation/operator-guide/configuration.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 734ddd8a..c2cd9c2a 100644 --- a/README.md +++ b/README.md @@ -224,7 +224,7 @@ printf "" | docker secret create DJANGO_PASSWORD - printf "" | docker secret create OS_PASSWORD - # configs creation -printf ":::" | docker config create sftp-users - +printf ":::" | docker config create sftp_users_ - # for production base stack deployment, additional basic authentication credentials list need to be created # format of such a list used by traefik are username:hashedpassword (MD5, SHA1, BCrypt) sudo apt-get install apache2-utils diff --git a/documentation/operator-guide/configuration.rst b/documentation/operator-guide/configuration.rst index 44b18d7e..dce3a837 100644 --- a/documentation/operator-guide/configuration.rst +++ b/documentation/operator-guide/configuration.rst @@ -516,7 +516,7 @@ An example of creating configurations for sftp image using the following command .. code-block:: bash - printf ":::" | docker config create sftp-users - + printf ":::" | docker config create sftp-users- - An example of creating ``OS_PASSWORD`` as secret using the following command : -- GitLab From c26fffd401ba63b8c9a222165c91602b0a6b3e13 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 14:38:46 +0100 Subject: [PATCH 39/54] update sftp ports --- README.md | 4 ++-- docker-compose.dem.yml | 2 +- docker-compose.emg.yml | 2 +- documentation/operator-guide/management.rst | 2 +- testing/registrar_test.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c2cd9c2a..dd15e52f 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ The following services are defined via docker compose files. * uses external atmoz/sftp image * provides sftp access to two volumes for report exchange on registration result xmls and ingest requirement xmls -* accessible on swarm master on port 2222 +* accessible on swarm master on port 2222-22xx * credentials supplied via config # Usage @@ -304,7 +304,7 @@ On production machine, `fluentd` is set as a logging driver for docker daemon by The `SFTP` image allow remote access into 2 logging folders, you can define (edit/add) users, passwords and (UID/GID) using `docker config create` mentioned above. -In the below example the username is `eox`, once the stack is deployed you can sftp into the logging folders through port 2222 on -if you are running the dev stack- localhost : +In the below example the username is `eox`, once the stack is deployed you can sftp into the logging folders through port 2222 (for ``vhr18``, ``emg`` and ``dem`` have 2223 and 2224 respectively) if you are running the dev stack localhost : ```bash sftp -P 2222 eox@127.0.0.1 diff --git a/docker-compose.dem.yml b/docker-compose.dem.yml index 455cbe09..fc4b355f 100644 --- a/docker-compose.dem.yml +++ b/docker-compose.dem.yml @@ -185,7 +185,7 @@ services: - source: sftp_users_dem target: /etc/sftp/users.conf ports: - - "2222:22" + - "2224:22" deploy: replicas: 1 ingestor: diff --git a/docker-compose.emg.yml b/docker-compose.emg.yml index bbcf9602..06da6329 100644 --- a/docker-compose.emg.yml +++ b/docker-compose.emg.yml @@ -196,7 +196,7 @@ services: target: /etc/sftp/users.conf ports: - - "2222:22" + - "2223:22" deploy: replicas: 1 configs: diff --git a/documentation/operator-guide/management.rst b/documentation/operator-guide/management.rst index 40fa410b..181866b9 100644 --- a/documentation/operator-guide/management.rst +++ b/documentation/operator-guide/management.rst @@ -63,7 +63,7 @@ Inspecting reports ------------------ Once a product is registered, a xml report containing wcs and wms getcapabilities of the registered product is generated and can be accessed by connecting to the `SFTP` service via the sftp protocol. -In order to log into the logging folders through port 2222 on the hosting ip (e.g. localhost if you are running the dev stack) The following command can be used: +In order to log into the logging folders through port 2222 (for ``vhr18``, ``emg`` and ``dem`` have 2223 and 2224 respectively) on the hosting ip (e.g. localhost if you are running the dev stack) The following command can be used: .. code-block:: bash diff --git a/testing/registrar_test.py b/testing/registrar_test.py index b53da07c..5a8397f3 100644 --- a/testing/registrar_test.py +++ b/testing/registrar_test.py @@ -34,7 +34,7 @@ def identifiers(): def sftp_connection(): username = os.environ['sftp_users_emg'].split(':')[0] password = os.environ['sftp_users_emg'].split(':')[1] - transport = paramiko.Transport(('docker', 2222)) + transport = paramiko.Transport(('docker', 2223)) transport.connect(username=username, password=password) with paramiko.SFTPClient.from_transport(transport) as sftp: yield sftp -- GitLab From 99fc52c514b3310cfdbdc8a3ac4062e99728938e Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 11 Nov 2020 14:42:23 +0100 Subject: [PATCH 40/54] Fixing reporting post-handler --- core/registrar/post_handlers.py | 8 +++++++- core/registrar/registrar.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/core/registrar/post_handlers.py b/core/registrar/post_handlers.py index 44d6ed20..2e0f7345 100644 --- a/core/registrar/post_handlers.py +++ b/core/registrar/post_handlers.py @@ -1,12 +1,16 @@ import os.path import textwrap from datetime import datetime +import logging from .context import Context from .utils import isoformat from .xml import escape +logger = logging.getLogger(__name__) + + class ReportingPostHandler: def __init__(self, service_url: str, reporting_dir: str): self.service_url = service_url @@ -15,7 +19,9 @@ class ReportingPostHandler: def __call__(self, config: dict, path: str, context: Context): inserted = datetime.now() timestamp = inserted.strftime("%Y%m%dT%H%M%S") - with open(os.path.join(self.reporting_dir, 'item_%s_%s.xml' % (timestamp, context.identifier)), 'w') as f: + filename = os.path.join(self.reporting_dir, 'item_%s_%s.xml' % (timestamp, context.identifier)) + logger.info(f"Generating report for path {path} at {filename}") + with open(filename, 'w') as f: f.write(textwrap.dedent(""" Date: Wed, 11 Nov 2020 14:42:44 +0100 Subject: [PATCH 41/54] Adjusting registrar configs for DEM, EMG, VHR18 --- config/dem_registrar-config.yml | 6 ++++++ config/emg_registrar-config.yml | 5 +++++ config/vhr18_registrar-config.yml | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/config/dem_registrar-config.yml b/config/dem_registrar-config.yml index 905b3ce7..f0679d22 100644 --- a/config/dem_registrar-config.yml +++ b/config/dem_registrar-config.yml @@ -66,3 +66,9 @@ backends: - !env '${COLLECTION}_COP-DEM_GLO-90-DTED' coverages: DEM1: int16_grayscale + +post_handlers: + - path: registrar.post_handlers.ReportingPostHandler + kwargs: + service_url: dem.pass.copernicus.eu + reporting_dir: /mnt/reports/ diff --git a/config/emg_registrar-config.yml b/config/emg_registrar-config.yml index 6f85f7d3..2d6e1ee4 100644 --- a/config/emg_registrar-config.yml +++ b/config/emg_registrar-config.yml @@ -267,3 +267,8 @@ backends: coverages: TX01_7: sar_hh_hv_vh_vv_rgb +post_handlers: + - path: registrar.post_handlers.ReportingPostHandler + kwargs: + service_url: emg.pass.copernicus.eu + reporting_dir: /mnt/reports/ diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml index 0295be2a..a2bed82a 100644 --- a/config/vhr18_registrar-config.yml +++ b/config/vhr18_registrar-config.yml @@ -211,3 +211,9 @@ backends: TR00: RGBNir masks: validity: validity + +post_handlers: + - path: registrar.post_handlers.ReportingPostHandler + kwargs: + service_url: vhr18.pass.copernicus.eu + reporting_dir: /mnt/reports/ -- GitLab From 723355ae7bf8332618078bba322c56c6662f2b17 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 14:57:50 +0100 Subject: [PATCH 42/54] fixing my bad merge --- testing/registrar_test.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh index 6b190894..25a8ba08 100755 --- a/testing/registrar_test.sh +++ b/testing/registrar_test.sh @@ -10,8 +10,6 @@ docker exec -i $(docker ps -qf "name=emg-pvs_renderer") /wait-initialized.sh while read product; do docker exec -e OS_PASSWORD=$OS_PASSWORD -i $(docker ps -qf "name=emg-pvs_registrar") \ registrar register \ - --service-url $SERVICE_URL \ - --reporting-dir "/mnt/reports" \ --config-file /config.yaml \ "$product" -- GitLab From 309a5e0ac857d8f1483a8fb57e9408169c8fa2c4 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 15:05:36 +0100 Subject: [PATCH 43/54] fix shibauth traefik labels --- docker-compose.dem.ops.yml | 20 ++++++++++---------- docker-compose.emg.ops.yml | 20 ++++++++++---------- docker-compose.vhr18.ops.yml | 20 ++++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/docker-compose.dem.ops.yml b/docker-compose.dem.ops.yml index 0fa2a23e..5184b39d 100644 --- a/docker-compose.dem.ops.yml +++ b/docker-compose.dem.ops.yml @@ -187,18 +187,18 @@ services: constraints: [node.role == manager] labels: # router for basic auth based access (https) - - "traefik.http.routers.shibauth.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth.middlewares=compress@file,cors@file" - - "traefik.http.routers.shibauth.tls=true" - - "traefik.http.routers.shibauth.tls.certresolver=default" - - "traefik.http.routers.shibauth.entrypoints=https" + - "traefik.http.routers.dem-shibauth.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.dem-shibauth.middlewares=compress@file,cors@file" + - "traefik.http.routers.dem-shibauth.tls=true" + - "traefik.http.routers.dem-shibauth.tls.certresolver=default" + - "traefik.http.routers.dem-shibauth.entrypoints=https" # router for basic auth based access (http) - - "traefik.http.routers.shibauth-redirect.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth-redirect.middlewares=redirect@file" - - "traefik.http.routers.shibauth-redirect.entrypoints=http" + - "traefik.http.routers.dem-shibauth-redirect.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.dem-shibauth-redirect.middlewares=redirect@file" + - "traefik.http.routers.dem-shibauth-redirect.entrypoints=http" # general - - "traefik.http.services.shibauth.loadbalancer.sticky=false" - - "traefik.http.services.shibauth.loadbalancer.server.port=80" + - "traefik.http.services.dem-shibauth.loadbalancer.sticky=false" + - "traefik.http.services.dem-shibauth.loadbalancer.server.port=80" - "traefik.docker.network=dem-extnet" - "traefik.docker.lbswarm=true" - "traefik.enable=true" diff --git a/docker-compose.emg.ops.yml b/docker-compose.emg.ops.yml index 98a5ba55..4f4d5cd0 100644 --- a/docker-compose.emg.ops.yml +++ b/docker-compose.emg.ops.yml @@ -187,18 +187,18 @@ services: constraints: [node.role == manager] labels: # router for basic auth based access (https) - - "traefik.http.routers.shibauth.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth.middlewares=compress@file,cors@file" - - "traefik.http.routers.shibauth.tls=true" - - "traefik.http.routers.shibauth.tls.certresolver=default" - - "traefik.http.routers.shibauth.entrypoints=https" + - "traefik.http.routers.emg-shibauth.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.emg-shibauth.middlewares=compress@file,cors@file" + - "traefik.http.routers.emg-shibauth.tls=true" + - "traefik.http.routers.emg-shibauth.tls.certresolver=default" + - "traefik.http.routers.emg-shibauth.entrypoints=https" # router for basic auth based access (http) - - "traefik.http.routers.shibauth-redirect.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth-redirect.middlewares=redirect@file" - - "traefik.http.routers.shibauth-redirect.entrypoints=http" + - "traefik.http.routers.emg-shibauth-redirect.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.emg-shibauth-redirect.middlewares=redirect@file" + - "traefik.http.routers.emg-shibauth-redirect.entrypoints=http" # general - - "traefik.http.services.shibauth.loadbalancer.sticky=false" - - "traefik.http.services.shibauth.loadbalancer.server.port=80" + - "traefik.http.services.emg-shibauth.loadbalancer.sticky=false" + - "traefik.http.services.emg-shibauth.loadbalancer.server.port=80" - "traefik.docker.network=emg-extnet" - "traefik.docker.lbswarm=true" - "traefik.enable=true" diff --git a/docker-compose.vhr18.ops.yml b/docker-compose.vhr18.ops.yml index 1c8a5f4c..c0baef92 100644 --- a/docker-compose.vhr18.ops.yml +++ b/docker-compose.vhr18.ops.yml @@ -187,18 +187,18 @@ services: constraints: [node.role == manager] labels: # router for basic auth based access (https) - - "traefik.http.routers.shibauth.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth.middlewares=compress@file,cors@file" - - "traefik.http.routers.shibauth.tls=true" - - "traefik.http.routers.shibauth.tls.certresolver=default" - - "traefik.http.routers.shibauth.entrypoints=https" + - "traefik.http.routers.vhr18-shibauth.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.vhr18-shibauth.middlewares=compress@file,cors@file" + - "traefik.http.routers.vhr18-shibauth.tls=true" + - "traefik.http.routers.vhr18-shibauth.tls.certresolver=default" + - "traefik.http.routers.vhr18-shibauth.entrypoints=https" # router for basic auth based access (http) - - "traefik.http.routers.shibauth-redirect.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth-redirect.middlewares=redirect@file" - - "traefik.http.routers.shibauth-redirect.entrypoints=http" + - "traefik.http.routers.vhr18-shibauth-redirect.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.vhr18-shibauth-redirect.middlewares=redirect@file" + - "traefik.http.routers.vhr18-shibauth-redirect.entrypoints=http" # general - - "traefik.http.services.shibauth.loadbalancer.sticky=false" - - "traefik.http.services.shibauth.loadbalancer.server.port=80" + - "traefik.http.services.vhr18-shibauth.loadbalancer.sticky=false" + - "traefik.http.services.vhr18-shibauth.loadbalancer.server.port=80" - "traefik.docker.network=vhr18-extnet" - "traefik.docker.lbswarm=true" - "traefik.enable=true" -- GitLab From 6934263925c19992170f5937b582ace5caf67255 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 11 Nov 2020 16:02:29 +0100 Subject: [PATCH 44/54] Resolving merge issues --- testing/registrar_test.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh index 6b190894..25a8ba08 100755 --- a/testing/registrar_test.sh +++ b/testing/registrar_test.sh @@ -10,8 +10,6 @@ docker exec -i $(docker ps -qf "name=emg-pvs_renderer") /wait-initialized.sh while read product; do docker exec -e OS_PASSWORD=$OS_PASSWORD -i $(docker ps -qf "name=emg-pvs_registrar") \ registrar register \ - --service-url $SERVICE_URL \ - --reporting-dir "/mnt/reports" \ --config-file /config.yaml \ "$product" -- GitLab From e170c0ecad2dd9f6e449dcbd77bc6adf74744cb6 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 11 Nov 2020 16:19:02 +0100 Subject: [PATCH 45/54] Adding additional xPath path as footprint location. --- core/registrar/scheme.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index 2e17a487..979524f5 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -141,7 +141,10 @@ class GSCRegistrationScheme(RegistrationScheme): 'type': Parameter('//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'), 'level': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'), 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', True), - 'footprint': Parameter('(gsc:sar_metadata|gsc:opt_metadata)/gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', True, parse_polygons_gsc), + 'footprint': Parameter( + '(gsc:sar_metadata|gsc:opt_metadata|gsc:report)/gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', + True, parse_polygons_gsc + ), } def __init__(self, level_re: str=r'.*(Level_[0-9]+)$'): -- GitLab From 22a496d0e466f2da87b1cd1782b2ab11cc284eb6 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 11 Nov 2020 16:30:47 +0100 Subject: [PATCH 46/54] Adding additional xPath path as footprint location. --- core/registrar/scheme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index 979524f5..8a65249c 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -142,7 +142,7 @@ class GSCRegistrationScheme(RegistrationScheme): 'level': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'), 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', True), 'footprint': Parameter( - '(gsc:sar_metadata|gsc:opt_metadata|gsc:report)/gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', + '//gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', True, parse_polygons_gsc ), } -- GitLab From 01c45828a0771863d3ebd2de62141c9c6562177b Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 16:32:38 +0100 Subject: [PATCH 47/54] cleanup --- README.md | 2 +- core/registrar.py | 498 ---------------------------------------------- core/setup.py | 2 +- 3 files changed, 2 insertions(+), 500 deletions(-) delete mode 100644 core/registrar.py diff --git a/README.md b/README.md index dd15e52f..abd02d86 100644 --- a/README.md +++ b/README.md @@ -253,7 +253,7 @@ docker stack deploy -c docker-compose.emg.yml -c docker-compose.emg.dev.yml -c d Deploy base & logging stack in production environment: ``` docker stack deploy -c docker-compose.base.ops.yml base-pvs -docker stack deploy -c docker-compose.logging.yml docker-compose.logging.ops.yml logging +docker stack deploy -c docker-compose.logging.yml -c docker-compose.logging.ops.yml logging ``` Deploy the stack in production environment: Please note that in order to reuse existing database volumes, needs to be the same. Here we use `vhr18-pvs` but in operational service `vhr18-pdas` is used. diff --git a/core/registrar.py b/core/registrar.py deleted file mode 100644 index 60b805e3..00000000 --- a/core/registrar.py +++ /dev/null @@ -1,498 +0,0 @@ -#!/usr/bin/env python -# ----------------------------------------------------------------------------- -# -# Project: registrar.py -# Authors: Stephan Meissl -# -# ----------------------------------------------------------------------------- -# Copyright (c) 2019 EOX IT Services GmbH -# -# Python script to register products. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies of this Software or works derived from this Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# ----------------------------------------------------------------------------- - - -import sys -import os -import argparse -import textwrap -import logging -import traceback -from xml.sax.saxutils import escape -import subprocess - -import redis -import lxml.etree -from swiftclient.service import SwiftService - -import django -from django.db import transaction -from django.contrib.gis.geos import GEOSGeometry -from osgeo import gdal - -path = os.path.join(os.getenv('INSTALL_DIR', "/var/www/pvs"), "pvs_instance") -if path not in sys.path: - sys.path.append(path) - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pvs_instance.settings") -django.setup() - -from eoxserver.backends import access -from eoxserver.contrib import vsi -from eoxserver.backends import models as backends -from eoxserver.core.util.timetools import isoformat -from eoxserver.resources.coverages import models -from eoxserver.resources.coverages.registration.product import ( - ProductRegistrator -) -from eoxserver.resources.coverages.registration.registrators.gdal import ( - GDALRegistrator -) - -logger = logging.getLogger(__name__) - -def setup_logging(verbosity): - # start logging setup - # get command line level - verbosity = verbosity - if verbosity == 0: - level = logging.CRITICAL - elif verbosity == 1: - level = logging.ERROR - elif verbosity == 2: - level = logging.WARNING - elif verbosity == 3: - level = logging.INFO - else: - level = logging.DEBUG - logger.setLevel(level) - sh = logging.StreamHandler() - sh.setLevel(level) - formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s") - sh.setFormatter(formatter) - logger.addHandler(sh) - # finished logging setup - - -def set_gdal_swift_auth(): - # parsing command line output of swift auth - auth_keys = subprocess.check_output(["swift", "auth"]).decode(sys.stdout.encoding).split("\n") - storage_url = auth_keys[0].split("OS_STORAGE_URL=")[1] - auth_token = auth_keys[1].split("OS_AUTH_TOKEN=")[1] - # setting gdal config - gdal.SetConfigOption("SWIFT_STORAGE_URL", storage_url) - gdal.SetConfigOption("SWIFT_AUTH_TOKEN", auth_token) - - -def add_mask(product): - metadata_item = product.metadata_items.all()[0] - with access.vsi_open(metadata_item) as f: - tree = lxml.etree.parse(f) - root = tree.getroot() - wkt = tree.xpath( - '//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', - namespaces=root.nsmap - )[0] - geometry = GEOSGeometry(wkt) - mask_type = models.MaskType.objects.get(product_type=product.product_type) - logger.debug("Adding mask") - models.Mask.objects.create( - product=product, - mask_type=mask_type, - geometry=geometry, - ) - - -def get_product_type_and_level(metadata_item): - level = None - with access.vsi_open(metadata_item) as f: - tree = lxml.etree.parse(f) - root = tree.getroot() - - try: - xp = '//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()' - product_type_name = tree.xpath(xp, namespaces=root.nsmap)[0] - except Exception as e: - logger.debug( - 'Failed to determine product type of %s, error was %s' - % (metadata_item.location, e) - ) - - try: - xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()' - parent_identifier = tree.xpath(xp, namespaces=root.nsmap)[0] - - if parent_identifier.endswith('Level_1'): - level = 'Level_1' - if parent_identifier.endswith('Level_3'): - level = 'Level_3' - else: - raise Exception('Invalid parent identifier type name %s' % parent_identifier) - except Exception as e: - logger.debug( - 'Failed to determine product level of %s, error was %s' - % (metadata_item.location, e) - ) - - return product_type_name, level - - -def get_product_collection(metadata_file): - # in case collection needs to be determined from metadata - try: - if metadata_file.startswith("/vsiswift"): - set_gdal_swift_auth() - with vsi.open(metadata_file, "r") as f: - tree = lxml.etree.parse(f) - root = tree.getroot() - xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()' - product_type_name = tree.xpath(xp, namespaces=root.nsmap) - extracted = product_type_name[0].split('/')[0] - return extracted - except Exception as e: - logger.debug( - 'Failed to determine product collection for metadata file %s, error was %s' - % (metadata_file, e) - ) - - -def get_product_type_from_band_count(product_type_name, file_path): - # get raster band count via gdal - logger.debug("Opening file using GDAL: %s" % file_path) - if file_path.startswith("/vsiswift"): - set_gdal_swift_auth() - src_ds = gdal.Open(file_path) - if src_ds is None: - raise RegistrationError("Band check: failed to open dataset: %s " % file_path) - # try to fetch product model with _bandcount - product_type_name_upd = "%s_%s" % (product_type_name, src_ds.RasterCount) - try: - product_type_model = models.ProductType.objects.get(name=product_type_name_upd) - return product_type_model - except models.ProductType.DoesNotExist: - raise RegistrationError("Product Type: '%s' was not found" % product_type_name_upd) - - -class RegistrationError(Exception): - pass - - -@transaction.atomic -def registrar( - collection_stack, - objects_prefix, upload_container=None, replace=False, client=None, registered_set_key=None, - reporting_dir=None, service_url=None - -): - logger.info("Starting registration of product '%s'." % objects_prefix) - - metadata_package, data_package = None, None - if not upload_container: - # assuming objects_prefix = bucket/itemname - upload_container = objects_prefix.partition("/")[0] - objects_prefix = objects_prefix.partition("/")[2] - with SwiftService() as swift: - list_parts_gen = swift.list( - container=upload_container, options={"prefix": objects_prefix}, - ) - for page in list_parts_gen: - if page["success"]: - for item in page["listing"]: - if item["name"].endswith(".xml"): - metadata_package = item["name"] - elif item["name"].endswith(".TIF") or \ - item["name"].endswith(".tif"): - data_package = item["name"] - elif not item["name"].endswith(".tar"): - raise RegistrationError( - "Product with objects prefix '%s' has " - "wrong content '%s'." - % (objects_prefix, item["name"]) - ) - else: - logger.error(page["error"]) - raise RegistrationError( - "No product found with objects prefix '%s'." - % objects_prefix - ) - - if metadata_package is None or data_package is None: - raise RegistrationError( - "Product with objects prefix '%s' has missing content." - % objects_prefix - ) - logger.debug("Found objects '%s' and '%s'." % (data_package, metadata_package)) - - storage = backends.Storage.objects.get(name=upload_container) - metadata_item = models.MetaDataItem(storage=storage, location=metadata_package) - - product_type, level = get_product_type_and_level(metadata_item) - if collection_stack == 'DEM': - # special for DEM files, collection name === product_type - gdal_metadata_file_path = "/vsiswift/%s/%s" % (upload_container, metadata_package) - product_type = get_product_collection(gdal_metadata_file_path) - logger.debug("Registering product") - product_type_name = "%s_Product_%s" % (collection_stack, product_type) - - try: - # first find product type by name from path - product_type_model = models.ProductType.objects.get(name=product_type_name) - except models.ProductType.DoesNotExist: - # if not found, maybe there are more product types with _bandcount suffix - gdal_file_path = "/vsiswift/%s/%s" % (upload_container, data_package) - product_type_model = get_product_type_from_band_count(product_type_name, gdal_file_path) - product_type_name = product_type_model.name - coverage_type_names = product_type_model.allowed_coverage_types.all() - if len(coverage_type_names) > 1: - logger.warning("More available 'CoverageType' found, selecting the first one.") - coverage_type_name = coverage_type_names[0].name - - product, replaced = ProductRegistrator().register( - metadata_locations=[[upload_container, - metadata_package, ], ], - type_name=product_type_name, - replace=replace, - extended_metadata=True, - mask_locations=None, - package_path=None, - simplify_footprint_tolerance=0.0001, # ~10meters - overrides={}, - ) - if product.footprint.empty: - product.delete() - raise RegistrationError("No footprint was extracted. full product: %s" % product) - - collection = models.Collection.objects.get( - identifier=collection_stack - ) - logger.debug("Inserting product into collection %s" % collection_stack) - models.collection_insert_eo_object(collection, product) - - if collection_stack == "DEM": - # also insert it to its own collection - collection_own = models.Collection.objects.get( - identifier="%s_%s" % (collection, product_type) - ) - logger.debug("Inserting product to collection %s_%s" % (collection, product_type)) - models.collection_insert_eo_object(collection_own, product) - - if level == 'Level_1': - collection_level_1 = models.Collection.objects.get( - identifier="%s_Level_1" % collection - ) - logger.debug("Inserting product to collection %s_Level_1" % collection) - models.collection_insert_eo_object(collection_level_1, product) - elif level == 'Level_3': - collection_level_3 = models.Collection.objects.get( - identifier="%s_Level_3" % collection - ) - logger.debug("Inserting product to collection %s_Level_3" % collection) - models.collection_insert_eo_object(collection_level_3, product) - - logger.debug("Registering coverage") - report = GDALRegistrator().register( - data_locations=[[upload_container, data_package, ], ], - metadata_locations=[[upload_container, - metadata_package, ], ], - coverage_type_name=coverage_type_name, - overrides={ - "identifier": "%s__coverage" % product.identifier, - "footprint": None, - }, - replace=replace, - ) - logger.debug("Adding coverage to product") - models.product_add_coverage(product, report.coverage) - - try: - add_mask(product) - except Exception as e: - logger.debug("Couldn't add mask.") - logger.debug(traceback.format_exc()) - logger.debug("%s: %s\n" % (type(e).__name__, str(e))) - - if client is not None: - logger.debug( - "Storing times in redis queue '%s" % registered_set_key - ) - client.sadd( - registered_set_key, "%s/%s" - % ( - product.begin_time.strftime("%Y%m%dT%H%M%S"), - product.end_time.strftime("%Y%m%dT%H%M%S") - ) - ) - - timestamp = product.inserted.strftime("%Y%m%dT%H%M%S") - - if reporting_dir is not None: - with open(os.path.join(reporting_dir, 'item_%s_%s.xml' % (timestamp, product.identifier)),'w') as f: - f.write(textwrap.dedent(""" - - - {identifier} - {availability_time} - - WCS - {wms_capabilities_url} - - - WMS - {wcs_capabilities_url} - - - """.format( - identifier=escape(product.identifier), - availability_time=escape(isoformat(product.inserted)), - wcs_capabilities_url=escape( - '%s/ows?service=wcs&request=GetCapabilities&cql=identifier="%s"' - % (service_url, product.identifier) - ), - wms_capabilities_url=escape( - '%s/ows?service=wms&request=GetCapabilities&cql=identifier="%s"' - % (service_url, product.identifier) - ), - ))) - - logger.info( - "Successfully finished registration of product '%s'." % objects_prefix - ) - - -def registrar_redis_wrapper( - collection, - upload_container, - replace=False, host="localhost", port=6379, - register_queue_key="register_queue", - registered_set_key="registered_set", - reporting_dir=None, - service_url=None, -): - client = redis.Redis( - host=host, port=port, charset="utf-8", decode_responses=True - ) - while True: - logger.debug("waiting for redis queue '%s'..." % register_queue_key) - value = client.brpop(register_queue_key) - try: - registrar( - collection, - value[1], - upload_container, - replace=replace, - client=client, - registered_set_key=registered_set_key, - reporting_dir=reporting_dir, - service_url=service_url, - ) - except Exception as e: - logger.debug(traceback.format_exc()) - logger.error("%s: %s\n" % (type(e).__name__, str(e))) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.description = textwrap.dedent("""\ - Register products. - """) - - parser.add_argument( - "--mode", default="standard", choices=["standard", "redis"], - help=( - "The mode to run the registrar. Either one-off (standard) or " - "reading from a redis queue." - ) - ) - parser.add_argument( - "--objects-prefix", default=None, - help=( - "Prefix to objects holding the metadata and data of product." - ) - ) - parser.add_argument( - "--replace", action="store_true", - help=( - "Replace existing products instead of skipping the registration." - ) - ) - parser.add_argument( - "--redis-register-queue-key", default="register_queue" - ) - parser.add_argument( - "--redis-registered-set-key", default="registered_set" - ) - parser.add_argument( - "--redis-host", default="localhost" - ) - parser.add_argument( - "--redis-port", type=int, default=6379 - ) - parser.add_argument( - "--reporting-dir", - ) - parser.add_argument( - "--service-url", - ) - - parser.add_argument( - "-v", "--verbosity", type=int, default=3, choices=[0, 1, 2, 3, 4], - help=( - "Set verbosity of log output " - "(4=DEBUG, 3=INFO, 2=WARNING, 1=ERROR, 0=CRITICAL). (default: 3)" - ) - ) - - arg_values = parser.parse_args() - - setup_logging(arg_values.verbosity) - - collection = os.environ.get('COLLECTION') - if collection is None: - logger.critical("Collection environment variable not set.") - sys.exit(1) - - upload_container = os.environ.get('UPLOAD_CONTAINER') - if upload_container is None: - logger.warn("UPLOAD_CONTAINER environment variable not set. Assuming part of path bucket/item") - - if arg_values.mode == "standard": - registrar( - collection, - arg_values.objects_prefix, - upload_container, - replace=arg_values.replace, - reporting_dir=arg_values.reporting_dir, - service_url=arg_values.service_url, - ) - else: - registrar_redis_wrapper( - collection, - upload_container, - replace=arg_values.replace, - host=arg_values.redis_host, - port=arg_values.redis_port, - register_queue_key=arg_values.redis_register_queue_key, - registered_set_key=arg_values.redis_registered_set_key, - reporting_dir=arg_values.reporting_dir, - service_url=arg_values.service_url, - ) diff --git a/core/setup.py b/core/setup.py index f64ba39c..b44d89b2 100644 --- a/core/setup.py +++ b/core/setup.py @@ -9,7 +9,7 @@ setup( version="0.0.1", author="", author_email="", - description="preprocessor for PVS", + description="registrar for PVS", long_description=long_description, long_description_content_type="text/markdown", url="https://gitlab.eox.at/esa/prism/vs/-/tree/master/core", -- GitLab From 8676a1cf231a57d4e2c4c48922d9501268de93c8 Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Wed, 11 Nov 2020 16:46:53 +0100 Subject: [PATCH 48/54] update user guide to 1.1.1 --- README.md | 2 +- ...df => View-Server_-_User-Guide_v1.1.1.pdf} | Bin 3751586 -> 3751593 bytes client/html/prism.js | 2 +- documentation/user-guide/Makefile | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename client/html/{View-Server_-_User-Guide_v1.1.0.pdf => View-Server_-_User-Guide_v1.1.1.pdf} (99%) diff --git a/README.md b/README.md index abd02d86..d7870362 100644 --- a/README.md +++ b/README.md @@ -361,7 +361,7 @@ A hotfix to production is developed in a branch initiated from master, then merg Create a TAR from source code: ```bash -git archive --prefix release-1.0.0.rc.1/ -o release-1.0.0.rc.1.tar.gz -9 master +git archive --prefix release-1.0.0/ -o release-1.0.0.tar.gz -9 master ``` Save Docker images: diff --git a/client/html/View-Server_-_User-Guide_v1.1.0.pdf b/client/html/View-Server_-_User-Guide_v1.1.1.pdf similarity index 99% rename from client/html/View-Server_-_User-Guide_v1.1.0.pdf rename to client/html/View-Server_-_User-Guide_v1.1.1.pdf index 191ee76989bbb51cb75093ceb93b129a81f962c4..5da1ce74147ee0b8705a6610fa1e963c3452094e 100644 GIT binary patch delta 14983 zcma)@Yph+zRmX9H>*S_!a^pCUHc9Oyj`Of{XZBiGo8jCkHQJvZmRLaA# zqqb_pAuUi;O50G8-KG^Kg{UAEsD(;ZKfS^vo46{m4C!ZaMzS zKc4&2)*BA3JXT%y-d}rg|3{v<{+c)Z?emxX#lL>;L!WtK-&gAoer?;zGhg5O*k66T z-+b)X4_*1}`|1x~ap2lle&bj7yqMlvf9CRU{`>u>X20^~zyJ9!eeu#C{NB4C`|BsS z@A~mOKJnz%TlW0X(HFn;wX6Q{L-%~;wU=MG|I9bO`DYLO&?W!*$hW`o)_Cy7AH6dV zzx(iIzy0zB!&qZvXtl=l}ERfBElEe*C5ntbKaV zMNiMR{OQeq@YBEl?9umqVe==eWADG?ZI^y_?U@Vfk8Qtw^@o3P*9ZRiKhFJ`zxlE6 zI{L_w<0l?JdidcL{`csE?_Fuz9bt$4r%LTdj&Azs(M>kaVVukOZpLQD8yH&{=P}M_ ztS~NMypeGsV=Ln##>I?P#wCnxj7u5Y8J96GXI#O!lJO?Sn;BOzu4Y`rcnf0(V<+QU z#&wMA8M_!aFm^L;WZcBKnQ;qa4`VOmR>m45Fe0O3)QpDFG7_VE;pnFR!vDGMp_uyI z#CC7{o;K`_@u9Rg@MrIXUFeU*zV7-X!!N(uf8}*e{E654yDp4S`GJ^jYa)M#7hdac z73kZq^&fj(T=AFoOtgH3q&gRMwu?o!uF0i_;E2o#b=~U<{R;`=Y zbg@&N)_pf;Jm)ckaqKF$x+YfTL^cWebVI7D&WhDd8&#O9?z0o7jk&&0C2Vp~VX7tu z|8Z#jygKAz|CYRMraM@-buKnPu9ZG)Oq=aS*rrnT@c%?VIM_9 zb;>b&4Rs;8P3uBd(~W8JFB|?~bVec!SkpOebodzAhM1~WG-ThZ!@>Y^T?lwpV0Bg1 zms+bkB1Yc~r_RsY?kwrnT}3QWQ|E)^u{wrfbp;XyON6Mq6#VqA)n&mc=i%^D5`D~0 zGd;&uG{Z7-98N;Qd%eEv%f%sCpKC`p2WLTb#se#>T#J=J4|AUH{sS8xE~n0^B=?m& za5;Bsx(#PotR!-Zx;Q*{!ICvLp>-D71a?bgZ$j_9I3CFPw1#M^yVPKhbNks?BSATp zMimItO;)AFl+>EdO1(8#v5mviZ(QQ%$a;E_DFk!y*jg5uof*|p+YTQ1?lyx3LYkKlM<|$f{dL$@P8`egrh2hXHlo*;` z-B$&#B-4#VBL@Pk)1U}O`l)NIL*ub%^M>PF^Xg0vL4^lUYc(&X&Od}MG$JSoLZyKN zdW{B5uxg6@i=6}{42tx8ETIy$cPW*oVR=`pZo~wX=)|VMq%^fPR?+2JR0NY=LqAwo zrQzg7{)jg@H_l7MuIt3&jK2oE5c;42vkgtsO@K$53~6!10d+y_Ro1W^5S;~;$~DI7 zM*?xOYKP}8CSZt;(Fm4Nl7K8#jl*)^4QC*6*!`%l)Zii8sJAyPNmNKp6F%e5M-nAKmQ=D_0a#SiHmqxh)2rV187o~Ce%2jXe45)AoSZ8xz}i|A z4VL=(5*H^1u&-s8z_KRBmO5$D2v%tv0Bh7&fRU#a`k4j5&INf~O09cF%BT(rEd+ID zG3KO!mwBKCWB5`Fr28D$ESb0n`BTL2te}+LCj(^bhEv;oeiTTR)g5(x*J-j0*{bP> z)k`r9u|fM0MJ`@vQR-sCts{XHy?PI0uc`xU^$7HROBTG&W9zh3nDk8;PMS<&AI3k6 z+s;%9Qj%lSIcOY$e^PH8;zsc?v4>vU%(rggsf5gMSbXT!OoRTQg7Mj}aEH>{N~G-A`Cn#)dYhBGPcom42bQ-L+D5p0ncP6yHoJ9fO-@Uhqvmp@{% zk>rnUmYT;HFA=6|B*xD8m|R{M8$sE@k%_D>wuB^2J45W6X$Ln}oO_9}_HEz9`^psq zI`xBfa+=1E#hfUih>+Fr)aCv(k&tFRzhqa9t~-$zN#n@K_Tm*sER?5X#3s+b*wv0E zhq^^$td&xzr~t&8`8G?kLQYJL?B^-HnN3VfCTfXh^YW5;n$(bwP>iZ=BeWJ#p-N;} zvvOCvt|g&&SXA3)UM+Pcj9m_BSQ2|T40O(8=X!I{Ks^{>d`l{Jr85)zQ7!(%up~K# zVfap6Pe*#`u()JiJBh2==_L_joNrlp28k{YaypxPJ( zG$nVj5^MD3Pz`6^Cv~*@;J+@?A}SbTu^_Ch z)9&p^P{NjBu1#g%(rP&UW|zNONlDgHsDw6Ct*42DO*^I=;#DrN7{^Yw$*?T#POu!r z%^Ft8;WMn3Aa7VZd7DX11}2$k?Ndz?Z629(u|~skF89pV7)e@oUhPa_T)wn}ah~M) zC0WYfkX-dZiZyNV+_ddmX?Ni}Y5Gj(d`enIu+PIOWRiB5H4Q59tX6(&dB+^WK|8Eo zAmuCR`;H6K!5aU+C1e&T-j~TDjC4G2O_TY=p42dYyAz zqupt%-^w*0gq=*?i5`MoesR#HO5i?)&RNsU{)HSkY`QR!5E25J^)81dMB+Wz>er70-xC z-*{HjLU0X4v`h21kQ!kqkxSQRSdvzg(WBv4d^=)#i)-Ji^+S;4QC5em(EN`tn_kVC zCU%N>IDND8vvx{(&{WFAfeb3Uc_oQu#>(w9Ys}&uSR*OkoX5tln#T|`Wnv@o%|3Q_ zG1Mrv-+dZLaun_$r^%-jqbn@eFUEsDP^ne0+@;~z9v3f- z6_+l>^gx?a69q7vIGlonyuN{B;=;Nl0a>mzO3X+|{sCE6XBj>GNb#*jY_G|>U02uATf>18hec^% zJlPTf!6f0c_jP2PYU$STDnB;HT>J?^&iUk>rd@z>FK6Ebq;ipO?I}oGwvQ>z`byeL zCLsAo@$o2(I3h6Ktdt9}_Gaz2lVoji!shMck+G(A7N7&|ujoye5ZY>@V18A~T--jj zh|0v#)8vlZ`C77mdpFWd3DRUHbXrJfOHHC6+QKChQ-*W>V0zU9ia3AMUr*!ZzlLUupsgr=6%q#zPWl(F&PK+eu?s=_$aW+_w+ zErb$_l8M`Hpc!T z3p}QfHn*)1Hutmw^KdM=Fl+p?_w&7`_{iQ;TNg5c(f(U3a*`9{F6Y6+>dEzQ&kFtA zfmXvR?|~#8;Z0K@11fkKjVP|rFnV6aJV%{Ml}!CUo%ND3&hMnDmQOVcna-gg7sn zq3PGlZHFyn@z!?XZox!byxXM(G-+m@j=+RWvb1q9yAd8v=ViyK^^IEiF zq371aZ*!Ch2<*A%c6ujp%=L4Y-pP@gVf7AonE(w~m97w#ENKMrn-z6G9EODVI`ko& zXqord#dcZ~gGc6ywKTy6!@}ek@k<%jk}S85nuCkk@=)`ho77lzN3eI>Nj|5qMy+RR zh2Eh%mmL{8E2Vd2Jr-O-tQVjTzTw}<8~*0Y~nJng&V|DAFF%D#Oo$4?x7{KWeoKl;#$ RCD7cfuI$?N_IDm!`9D30yvG0l delta 15690 zcma)@f6Qh_Rmbygsco0Gu&|X#Xzw)Vnz38nk z|BVmaymP~eH$C>5H~hh+58nL56Zc*An(H5W&-BrI9)0bF*F1F1n~#3^!vFri&%f)D z%WnA8W8XM^?j?V9;x9gQ+u?KWy5kqm``f4f{N3-r;cNf#!Mnb3-r2AF=d=FvzC#~5 zQf~YCyT5&z$XEP2lp2K)9<1phQ z#!oVy$GDjBe8we=7cgGPcoE}L#$}8njLR86#dtB}C5$T=S2AA8cp2lT882sC#drnd zDB~F8m5jiMjKs){fl(MEW4izJPQBoVFFw`6sW9Ynw1jfY(HL(Xj|TpZ4sV`Lrjygj zc=FVldwx{^@o5ph{mFXbf^alVZ;0_#DZOf_XFmR9Jtoi(o~(cPw3hquQ}z0%L-@W! z?aHS^_biX|=Byk-oOJdUFf<`k2koF~U5<${HNqFvTD|a<1cw z$GkfC5EF;ub~nW6gc)+wBS|S&#qv;s3Ikc|g_$|`5~~vCp*9sJ4G}ZlHddP(22YoR zJI+!jJT(O#;C45%Wu*%(3y(`8w{B9xSXxPEe*fHdr!XM$Z#$T1&WcU?HUFuV?ngHN+~yaC1l|daZK9LL(g@KJ&M}?+7!x5Io>} zyb3Y@Ui~Q$K=h2`*pv=8vy}kNOICykxro%k78I!sBeCom+g$aci|=s)=2T>EnprT* z)q%;RAsDQxH%kGN5laV)`_QE%H;0Ias*i+4|7-OlbrTPVW+wL6@7Pik4N@#PXs8Aw z>*(i#5s^07Y)L8mEm^`cOef5GL%7h9^A&YICZI+_Fn62;`IbbOT;{22(gEX1or+0_F)u#Wo`pI=hbu=&;)!@MV?!Z(F$UI44gIf(7=flcE6dv0o6(c|yag+=xb@ zE4>GdzXgea=pSj+_Xl*KWfcn&Bzj?tiPn8a7kVEMF-sy_KNn%rkflB}#@fvQBaC&E zZNX8J4a15=zy*`57_9`|+>&+vXH0a*V4BG?naZg@62pQ7m<3y(@yfwP^4Kb+9r52% zBym1edt`C_jt43`Y1P<*L>;9ox*o}7!IC;Uj(r+9&c*+2T(C7!B);3^Dh|mor+zRJ zhK`BR;p2LY_7`>68e8RmF(Hr`2*Nb=0Yr#FwLof%Wv>@p@=3~3oS7VxVbZvW;|wUl zB$8ZFR(1=LB85S7_dwmMmd@NJS<|v!lN$luvfiTqwSfSZeF(Mqv<2%vbedj~3k^-( zw+X#f1McR3RAQRzEtAD%f)yNYry0YNRLq8HGHMu>sdHxv9Wf9H^_nmlhveuu(cWf^ zE{mfvY?Oq?Fv@6zG5v4UKTX70m5{ET$MQaOw4J)S^qfeIogIyxo8P^ER&Vr^kr2i7 zlNU^qjRjMKYZOEpOAw~Yaa!I#uN%c{Ef}cPEYXiBE9wW+!qw=sa3!UyE}x_s%C$3X zvHx}cXN<+TOc?EMFAGYvGt9N^Y_4j7immUcz~9Q_AaOfWbm_Ij13=pw+`?Gue8Wf? zI$JQTiGu@cVHV$SAYELY%r)+9!RU%ODZ?=9Zm#v6H9P@B<)Y2~_L;*ji&mGQDW zuYU4?{zzuZt09SBHYSre8pFmx4AU^l7kH|GM{hF-nAE|UfTae_|LmH`kt8nYT3st& zVN9GbU>eGeuBoqJJ(}{XV_M~Z5E3KS;BL#!FfFkR(-O-tOuavMFuaOj4Pkm?#s9Hm zO{>Y=<(iQC#bK28NlMn7=obW|U*Q^DMWqqnSpDC$xU4POkb!2ot&@ zJrgXQ=Qg?T(2e_Glg(Y?|3(^`rPA$yNz>qDOvqGJz=juan0i86Z@ZdW^1lV_{W1n; zIa0q9yT>F49TR$%D679YV7L_#h6WNX@op|W=>JKDYmBwRT`;}Jg^<-Z6R08Bidg?5 zjO2udjfi)sr-Clxb~)FUeF<{}IaOxq!Pxqnn*V%0WT6VGKqvEYSHcFX}|AfE9c z$B8iGG)vI2$#g9`sSR^4q+o*t9izi9ySY_@4s>|iT}^NDzXjBWv%UTXw-X+=`eP#2 zaTEJv;#nUR_BLZYxS-nt68`|RTqnacYc-5dVzwtQbE{WBpI`KhQFYT|g*JK0!@O+_ z-^6qSATYe$1>e9g^m-ZWNBg;6IOI{ z_3?6sF%i`hPEwc{U6nT6qC$;;s8t^>cz1ItzO6?U(&v8*D3NW!WOteFf^WybSlhI; zEVCXRbNBqrD*v+ui#)w8m~_7K&<59PMb&MSP_CbAVJvopwk&zf;_XOrPX! zvfL6DG!^!9A<_iZ^5@!lJq+CvI9}Qp|F?F*d{i18^`St6w|cE-x) z+MZ9@&HqpSEC$1sny3K-S z*4=u>M86F*Y?ZvQ|F!y!PX3K{Y(!8QJc1BzT98`TSJH2?8P}3E9(5K6VxXqq3*ny2-2IN2V3(xjW&--mX9~Q@_Zkc& z?yK;=m;V)lm^)9p4tf=hu_lxj@-=$mfayzhj4h`_4ci#`U&&SiZG#J@FJMOJl1xgP zEQN`cIecp(>rwN%@5<=9r@51cOGZ5;9Sg(snT=KxZ9Tc31=|u^;C8u6*f5&1y8>?V zKkHpdcFTE^8uuz(gmKrCT0rwe*3-n(`ePb5xc`8vLVJXj1x=VDc>uzY%Jy0?tV&r< z*PL!$PiGiKgXDiks4jElD=Ymr3}Gf$TC-nixgm^8!)WSlJ#eh^KbSa6H^xpg(5Z2a z+#F?$fc~o&gwbqyM#3tgm+!N9-j*9-U&qK~%O>{|MnAh{$K+kuQtXG&`0#x3URyz)1*7^%!=!s) zOn6%8)VeGvJ{OG7Q`NIjo^&r%7UW?<->zHGFl}!vm}Ctmj7yV@vCBBf-uKyDo5fWb zlcwKN^Es|QRq#m!eOPq_6Q6>3jQUA8H*m%awio0_Y*idP!tlM6O9Dy4x6;(bfDuOO zF$AR3?id%oE!h^vKJurfD|gn^>dIW+?#v$Z082c5ICGQ+6;q)=mrVVaIly zrFX+eMlcpH#Wyqj45U}JG0BqtESPjCj7f3jFzbnJ!GoN#YLKUv(~c92-IruA(GK&n z;NI)e?#JWyN-EFZCUn_oYZK%NFj`;pbjQ!XRQ=3$M?xfW+9G#yM?&{Y=Faw0J}E)j z$v$lhzW{^A{d{JyZnrgNu5LbC6XGKlJf2( zeHGM|`NTt+5Fjo6`v)7FwpRiwbwtD>P nA4~D82ag{=_?vg$@{T)y`yHolJ;?q?(&d9!TygD<*B$(Sterms & conditions","start_download":"Start download of one product","start_download_plural":"Start download of {{count}} products","confirm_download":"'Download' starts the download of one product","confirm_download_plural":"'Download' starts the download of {{count}} products","confirm_note":"Note, the browser might open multiple confirmation dialogs or issue a warning. Besides, typically browsers have a limit of 6 concurrent connections. If popups are blocked by your browser, please enable them for this site.","download_size_warning":"The estimated size of the download without compression is {{estimated_size}}MB. This might exceed the capabilities of the service.","max_bbox_warning":"Maximum size of bounding box: {{max_bbox_size}} was exceeded by: {{max_bbox_exceed}} on axis: {{max_bbox_axis}}.","max_resolution_warning":"Maximum resolution: {{max_resolution}} was exceeded. Attempt to download might fail with an error.","download_bands_warning":"The selected format supports at most {{maxBands}} bands, but {{requestedBands}} are requested.","timefilter_warning":"Search results may differ from products shown on map because of using a separate time filter","advancedfilter_warning":"Search results may differ from products shown on map because of using additional filters","toomanyresults_warning":"Search results may differ from products shown on map because search results are too big","dynamic-histogram-title":"If enabled, time distributions are shown for current spatial filter.\nIf disabled, they are shown globally.","tutorial1":"

Welcome to the Catalog Client

Let's briefly walk through the main functionalities. Hit Next to start.

Feel free to Skip any time and to resume later using the button at the bottom right.

You can also find detailed information about the Client and referenced services here: User guide document HTML or PDF

.","tutorial2":"

The Map

Noticed the map in the back? It displays available satellite data.

Navigate to your area of interest by panning and zooming, either using left click + drag and mouse wheel scroll or one finger drag and two finger pinch.

Satisfied with the displayed data? Not yet? Let's change the time of interest Next.

","tutorial3":"

The Timeslider

The timeslider displays the distribution of data over time. The gray area shows the currently selected time interval.

Navigate to your time of interest again by panning (only in the bottom area) and zooming. Change the time interval selection by using left click + drag in the top area.

Did you notice how the data displayed on the map and the search results changed?

","tutorial4":"

Search Results

The satellite data within the current area and time of interest is listed here.

Inspect details by hovering over an item and hitting or select items for download.

Your result list is too long? Let's apply more filters Next.

","tutorial5":"

Filters

These filters allow to narrow down the search results. Note that the time and spatial filters are already applied via the map and timeslider.

Apply Additional Filters by selecting or typing the values of interest.

Did the search result list get smaller?

","tutorial6":"

Map layers

Available map and data layers are listed here.

Select and deselect Layers for viewing and searching. Overlays and Base Layers are only for the viewing on the map.

Satisfied with your search results?

Iterate all steps until you find the right satellite data to enjoy.

"} + module.exports = {"layer_failed":"Failed to access layer '{{value}}'","search_error":"An error occurred during the search","search_norecords":"No products matched the search","search_n_layers_selected":"One layer selected to show","search_n_layers_selected_plural":"{{count}} layers selected to show","load_more":"Load {{count}} more","download_norecords":"No products selected for download","terms_and_conditions":"I have read and agree to the terms & conditions","start_download":"Start download of one product","start_download_plural":"Start download of {{count}} products","confirm_download":"'Download' starts the download of one product","confirm_download_plural":"'Download' starts the download of {{count}} products","confirm_note":"Note, the browser might open multiple confirmation dialogs or issue a warning. Besides, typically browsers have a limit of 6 concurrent connections. If popups are blocked by your browser, please enable them for this site.","download_size_warning":"The estimated size of the download without compression is {{estimated_size}}MB. This might exceed the capabilities of the service.","max_bbox_warning":"Maximum size of bounding box: {{max_bbox_size}} was exceeded by: {{max_bbox_exceed}} on axis: {{max_bbox_axis}}.","max_resolution_warning":"Maximum resolution: {{max_resolution}} was exceeded. Attempt to download might fail with an error.","download_bands_warning":"The selected format supports at most {{maxBands}} bands, but {{requestedBands}} are requested.","timefilter_warning":"Search results may differ from products shown on map because of using a separate time filter","advancedfilter_warning":"Search results may differ from products shown on map because of using additional filters","toomanyresults_warning":"Search results may differ from products shown on map because search results are too big","dynamic-histogram-title":"If enabled, time distributions are shown for current spatial filter.\nIf disabled, they are shown globally.","tutorial1":"

Welcome to the Catalog Client

Let's briefly walk through the main functionalities. Hit Next to start.

Feel free to Skip any time and to resume later using the button at the bottom right.

You can also find detailed information about the Client and referenced services here: User guide document HTML or PDF

.","tutorial2":"

The Map

Noticed the map in the back? It displays available satellite data.

Navigate to your area of interest by panning and zooming, either using left click + drag and mouse wheel scroll or one finger drag and two finger pinch.

Satisfied with the displayed data? Not yet? Let's change the time of interest Next.

","tutorial3":"

The Timeslider

The timeslider displays the distribution of data over time. The gray area shows the currently selected time interval.

Navigate to your time of interest again by panning (only in the bottom area) and zooming. Change the time interval selection by using left click + drag in the top area.

Did you notice how the data displayed on the map and the search results changed?

","tutorial4":"

Search Results

The satellite data within the current area and time of interest is listed here.

Inspect details by hovering over an item and hitting or select items for download.

Your result list is too long? Let's apply more filters Next.

","tutorial5":"

Filters

These filters allow to narrow down the search results. Note that the time and spatial filters are already applied via the map and timeslider.

Apply Additional Filters by selecting or typing the values of interest.

Did the search result list get smaller?

","tutorial6":"

Map layers

Available map and data layers are listed here.

Select and deselect Layers for viewing and searching. Overlays and Base Layers are only for the viewing on the map.

Satisfied with your search results?

Iterate all steps until you find the right satellite data to enjoy.

"} /***/ }), /* 1069 */ diff --git a/documentation/user-guide/Makefile b/documentation/user-guide/Makefile index e994f10c..0f20b85a 100644 --- a/documentation/user-guide/Makefile +++ b/documentation/user-guide/Makefile @@ -8,7 +8,7 @@ SPHINXBUILD ?= sphinx-build SPHINXAUTOBUILD ?= sphinx-autobuild SOURCEDIR = . BUILDDIR = _build -USERGUIDE_VERSION = v1.1.0 +USERGUIDE_VERSION = v1.1.1 # Put it first so that "make" without argument is like "make help". help: -- GitLab From 0ecca40291763ce9561729affd15e6b8da05e0f8 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 12 Nov 2020 09:56:40 +0100 Subject: [PATCH 49/54] Using gitlab CI artifacts for logfiles --- .gitlab-ci.yml | 9 +++++++++ testing/gitlab_test.sh | 11 ++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d0841394..71ecd68e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,6 +40,9 @@ build-tag: - docker push "$IMAGE_6":$CI_COMMIT_TAG only: - tags + artifacts: + paths: + - /tmp/*.log build-master-staging: image: registry.gitlab.eox.at/esa/prism/vs/docker-base-testing:latest stage: build @@ -80,6 +83,9 @@ build-master-staging: - master except: - tags + artifacts: + paths: + - /tmp/*.log build: image: registry.gitlab.eox.at/esa/prism/vs/docker-base-testing:latest stage: build @@ -112,3 +118,6 @@ build: - tags - staging - master + artifacts: + paths: + - /tmp/*.log diff --git a/testing/gitlab_test.sh b/testing/gitlab_test.sh index 08920c0a..0e6e2765 100755 --- a/testing/gitlab_test.sh +++ b/testing/gitlab_test.sh @@ -9,7 +9,6 @@ cat $emg_db > ../env/emg_db.env cat $emg_django > ../env/emg_django.env cat $emg_obs > ../env/emg_obs.env - # use `pvs_testing` bucket instead sed -i -e 's/emg-data/pvs_testing/g' ../env/emg.env @@ -23,7 +22,6 @@ set -o allexport source ../env/emg.env set +o allexport - mkdir data docker swarm init docker network create -d overlay emg-extnet @@ -33,7 +31,7 @@ printf $OS_PASSWORD_DOWNLOAD | docker secret create OS_PASSWORD_DOWNLOAD - printf $DJANGO_PASSWORD | docker secret create DJANGO_PASSWORD - printf $OS_PASSWORD | docker secret create OS_PASSWORD - -# create docker configs +# create docker configs printf $sftp_users_emg | docker config create sftp_users_emg - docker stack deploy -c ../docker-compose.emg.yml -c ../docker-compose.emg.dev.yml emg-pvs @@ -48,10 +46,9 @@ if [ $? -ne 0 ] then echo "Failure in tests, logging from services:" for service in $(docker service ls --format "{{.Name}}"); do - echo "________________________________________" - docker service ps $service --no-trunc - docker service logs $service - done + docker service ps $service --no-trunc >> "/tmp/$service.log" + docker service logs $service >> "/tmp/$service.log" + done exit 1 fi -- GitLab From 70e7e3d737743bb9a6608ce050a4b94fd137a1e4 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 12 Nov 2020 14:20:41 +0100 Subject: [PATCH 50/54] Fixing source handling: splitting container/path. Fixing footprint extraction from GSCs --- core/registrar/backend.py | 11 ++++++----- core/registrar/scheme.py | 3 ++- core/registrar/source.py | 15 ++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/core/registrar/backend.py b/core/registrar/backend.py index 10fd8782..17144f47 100644 --- a/core/registrar/backend.py +++ b/core/registrar/backend.py @@ -74,7 +74,7 @@ class EOxServerBackend(Backend): auth_parameters=params, ) - bucket, _ = source.get_bucket_and_key(path) + bucket, _ = source.get_container_and_path(path) storage, created_storage = backends.Storage.objects.get_or_create( name=source.name if source.bucket_name else f'{source.name}-{bucket}', @@ -135,7 +135,8 @@ class EOxServerBackend(Backend): if not mapping: raise RegistrationError(f'Could not get mapping for {item.product_type} {item.product_level}') - metadata_file = '/'.join(item.metadata_files[0].split('/')[1:]) + _, metadata_file = source.get_container_and_path(item.metadata_files[0]) + # metadata_file = '/'.join(item.metadata_files[0].split('/')[1:]) storage = self._get_storage_from_source(source, item.path) try: @@ -176,7 +177,7 @@ class EOxServerBackend(Backend): for raster_identifier, coverage_type_name in mapping.get('coverages', {}).items(): raster_items = item.raster_files.get(raster_identifier) raster_items = [ - storage + ['/'.join(raster_item.split('/')[1:])] + storage + [source.get_container_and_path(raster_item)[1]] for raster_item in (raster_items if isinstance(raster_items, list) else [raster_items]) ] @@ -199,7 +200,7 @@ class EOxServerBackend(Backend): for raster_identifier, browse_type_name in mapping.get('browses', {}).items(): raster_item = item.raster_files.get(raster_identifier) - raster_item = '/'.join(raster_item.split('/')[1:]) + _, raster_item = source.get_container_and_path(raster_item) logger.info(f"Adding browse {browse_type_name or 'default'} {raster_item} to product") BrowseRegistrator().register( @@ -210,7 +211,7 @@ class EOxServerBackend(Backend): # register masks for mask_identifier, mask_type_name in mapping.get('masks', {}).items(): - mask_item = item.mask_files.get(mask_identifier) + _, mask_item = source.get_container_and_path(item.mask_files.get(mask_identifier)) if mask_item: logger.info(f"Adding mask (file) {mask_type_name} to product") MaskRegistrator().register( diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py index 8a65249c..71d508d3 100644 --- a/core/registrar/scheme.py +++ b/core/registrar/scheme.py @@ -143,7 +143,7 @@ class GSCRegistrationScheme(RegistrationScheme): 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', True), 'footprint': Parameter( '//gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', - True, parse_polygons_gsc + False, parse_polygons_gsc ), } @@ -178,6 +178,7 @@ class GSCRegistrationScheme(RegistrationScheme): }, metadata_files=[metadata_file], metadata={ + 'footprint': metadata['footprint'], } ) diff --git a/core/registrar/source.py b/core/registrar/source.py index 20e079b9..ce043662 100644 --- a/core/registrar/source.py +++ b/core/registrar/source.py @@ -20,6 +20,9 @@ class Source: def __init__(self, name: str=None): self.name = name + def get_container_and_path(self, path): + raise NotImplementedError + def list_files(self, path, glob_pattern=None): raise NotImplementedError @@ -70,7 +73,6 @@ class SwiftSource(Source): return container, path - def list_files(self, path, glob_patterns=None): container, path = self.get_container_and_path(path) @@ -149,7 +151,7 @@ class S3Source(Source): **client_kwargs, ) - def get_bucket_and_key(self, path: str): + def get_container_and_path(self, path: str): bucket = self.bucket_name if bucket is None: parts = (path[1:] if path.startswith('/') else path).split('/') @@ -166,7 +168,7 @@ class S3Source(Source): if glob_patterns and not isinstance(glob_patterns, list): glob_patterns = [glob_patterns] - bucket, key = self.get_bucket_and_key(path) + bucket, key = self.get_container_and_path(path) logger.info(f'Listing S3 files for bucket {bucket} and prefix {key}') response = self.client.list_objects_v2( Bucket=bucket, @@ -182,12 +184,12 @@ class S3Source(Source): ] def get_file(self, path, target_path): - bucket, key = self.get_bucket_and_key(path) + bucket, key = self.get_container_and_path(path) logger.info(f'Retrieving file from S3 {bucket}/{key} to be stored at {target_path}') self.client.download_file(bucket, key, target_path) def get_vsi_env_and_path(self, path: str, streaming: bool=False): - bucket, key = self.get_bucket_and_key(path) + bucket, key = self.get_container_and_path(path) return { 'AWS_SECRET_ACCESS_KEY': self.secret_access_key, 'AWS_ACCESS_KEY_ID': self.access_key_id, @@ -201,6 +203,9 @@ class LocalSource(Source): self.root_directory = root_directory + def get_container_and_path(self, path): + return (self.root_directory, path) + def _join_path(self, path): path = normpath(path) if isabs(path): -- GitLab From 22376149f0b5b4f86eef7fe003c430ce5f66f26d Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 12 Nov 2020 15:19:28 +0100 Subject: [PATCH 51/54] maybe fix build --- testing/registrar_test.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh index 25a8ba08..ef398367 100755 --- a/testing/registrar_test.sh +++ b/testing/registrar_test.sh @@ -2,8 +2,6 @@ product_list_file=$1 echo "Starting registrar test" -OS_PASSWORD=$(docker exec -i $(docker ps -qf "name=emg-pvs_registrar") cat /run/secrets/OS_PASSWORD) - IFS="," docker exec -i $(docker ps -qf "name=emg-pvs_registrar") /wait-initialized.sh docker exec -i $(docker ps -qf "name=emg-pvs_renderer") /wait-initialized.sh -- GitLab From d40afc4e9bb68db9f6b963a4ddd4ad4ec966032e Mon Sep 17 00:00:00 2001 From: Lubomir Bucek Date: Thu, 12 Nov 2020 15:43:01 +0100 Subject: [PATCH 52/54] wait for db specifically --- testing/gitlab_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/gitlab_test.sh b/testing/gitlab_test.sh index 0e6e2765..4503809d 100755 --- a/testing/gitlab_test.sh +++ b/testing/gitlab_test.sh @@ -35,7 +35,7 @@ printf $OS_PASSWORD | docker secret create OS_PASSWORD - printf $sftp_users_emg | docker config create sftp_users_emg - docker stack deploy -c ../docker-compose.emg.yml -c ../docker-compose.emg.dev.yml emg-pvs -./docker-stack-wait.sh -n renderer -n registrar -n preprocessor -n ingestor -n sftp emg-pvs +./docker-stack-wait.sh -n renderer -n registrar -n preprocessor -n database -n sftp emg-pvs docker service ls # perform the testing -- GitLab From 5019a40dfa9d777c92daaf766c712158e1976de0 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 12 Nov 2020 16:30:22 +0100 Subject: [PATCH 53/54] Explicit list of artifacts --- .gitlab-ci.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 71ecd68e..74dd9a1c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -120,4 +120,14 @@ build: - master artifacts: paths: - - /tmp/*.log + # - /tmp/emg-pvs_cache + # - /tmp/emg-pvs_client + - /tmp/emg-pvs_database + # - /tmp/emg-pvs_fluentd + # - /tmp/emg-pvs_ingestor + - /tmp/emg-pvs_preprocessor + # - /tmp/emg-pvs_redis + - /tmp/emg-pvs_registrar + - /tmp/emg-pvs_renderer + # - /tmp/emg-pvs_seeder + # - /tmp/emg-pvs_sftp -- GitLab From f13cee361df017f43c26335d3886d785e7540f81 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 12 Nov 2020 16:40:20 +0100 Subject: [PATCH 54/54] Fixing typo --- core/registrar/post_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/registrar/post_handlers.py b/core/registrar/post_handlers.py index 2e0f7345..322d77c4 100644 --- a/core/registrar/post_handlers.py +++ b/core/registrar/post_handlers.py @@ -24,7 +24,7 @@ class ReportingPostHandler: with open(filename, 'w') as f: f.write(textwrap.dedent(""" - -- GitLab