EOX GitLab Instance

Skip to content
Snippets Groups Projects
registrar.py 15.8 KiB
Newer Older
Stephan's avatar
Stephan committed
#!/usr/bin/env python
# -----------------------------------------------------------------------------
#
# Project: registrar.py
# Authors: Stephan Meissl <stephan.meissl@eox.at>
#
# -----------------------------------------------------------------------------
# Copyright (c) 2019 EOX IT Services GmbH
#
# Python script to register products.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies of this Software or works derived from this Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
# -----------------------------------------------------------------------------


import sys
import os
import argparse
import textwrap
import logging
import traceback
import redis
Stephan's avatar
Stephan committed

import lxml.etree
from swiftclient.service import SwiftService

import django
from django.db import transaction
from django.contrib.gis.geos import GEOSGeometry
Stephan's avatar
Stephan committed

path = os.path.join(os.getenv('INSTALL_DIR', "/var/www/pvs"), "pvs_instance")
Stephan's avatar
Stephan committed
if path not in sys.path:
    sys.path.append(path)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pvs_instance.settings")
Stephan's avatar
Stephan committed
django.setup()

from eoxserver.backends import access
from eoxserver.contrib import vsi
from eoxserver.backends import models as backends
Stephan's avatar
Stephan committed
from eoxserver.resources.coverages import models
from eoxserver.resources.coverages.registration.product import (
    ProductRegistrator
)
from eoxserver.resources.coverages.registration.registrators.gdal import (
    GDALRegistrator
)

logger = logging.getLogger(__name__)


def setup_logging(verbosity):
    # start logging setup
    # get command line level
    verbosity = verbosity
    if verbosity == 0:
        level = logging.CRITICAL
    elif verbosity == 1:
        level = logging.ERROR
    elif verbosity == 2:
        level = logging.WARNING
    elif verbosity == 3:
        level = logging.INFO
    else:
        level = logging.DEBUG
    logger.setLevel(level)
    sh = logging.StreamHandler()
    sh.setLevel(level)
    formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
    sh.setFormatter(formatter)
    logger.addHandler(sh)
    # finished logging setup


def set_gdal_swift_auth():
    # parsing command line output of swift auth
    auth_keys = subprocess.check_output(["swift", "auth"]).split("\n")
    storage_url = auth_keys[0].split("OS_STORAGE_URL=")[1]
    auth_token = auth_keys[1].split("OS_AUTH_TOKEN=")[1]
    # setting gdal config
    gdal.SetConfigOption("SWIFT_STORAGE_URL", storage_url)
    gdal.SetConfigOption("SWIFT_AUTH_TOKEN", auth_token)


Stephan's avatar
Stephan committed
def add_mask(product):
    metadata_item = product.metadata_items.all()[0]
    with access.vsi_open(metadata_item) as f:
        tree = lxml.etree.parse(f)
    root = tree.getroot()
    wkt = tree.xpath(
        '//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()',
        namespaces=root.nsmap
    )[0]
    geometry = GEOSGeometry(wkt)
    mask_type = models.MaskType.objects.get(product_type=product.product_type)
    logger.debug("Adding mask")
Stephan's avatar
Stephan committed
    models.Mask.objects.create(
        product=product,
        mask_type=mask_type,
        geometry=geometry,
    )


def get_product_type_and_level(metadata_item):
Mussab Abdalla's avatar
Mussab Abdalla committed
    level = None
    with access.vsi_open(metadata_item) as f:
        tree = lxml.etree.parse(f)
    root = tree.getroot()

Stephan's avatar
Stephan committed
    try:
        xp = '//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'
Stephan's avatar
Stephan committed
        product_type_name = tree.xpath(xp, namespaces=root.nsmap)[0]
    except Exception as e:
        logger.debug(
            'Failed to determine product type of %s, error was %s'
            % (metadata_item.location, e)
        )

    try:
        xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'
        parent_identifier = tree.xpath(xp, namespaces=root.nsmap)[0]

        print("parent identifier --->", parent_identifier)
        if parent_identifier.endswith('Level_1'):
            level = 'Level_1'
        if parent_identifier.endswith('Level_3'):
            level = 'Level_3'
Stephan's avatar
Stephan committed
        else:
            raise Exception('Invalid parent identifier type name %s' % parent_identifier)
Stephan's avatar
Stephan committed
    except Exception as e:
        logger.debug(
            'Failed to determine product level of %s, error was %s'
            % (metadata_item.location, e)
    return product_type_name, level

Stephan's avatar
Stephan committed

def get_product_collection(metadata_file):
    # in case collection needs to be determined from metadata
    try:
        if metadata_file.startswith("/vsiswift"):
            set_gdal_swift_auth()
        with vsi.open(metadata_file, "r") as f:
            tree = lxml.etree.parse(f)
        root = tree.getroot()
        xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'
        product_type_name = tree.xpath(xp, namespaces=root.nsmap)
        extracted = product_type_name[0].split('/')[0]
        return extracted
    except Exception as e:
        logger.debug(
            'Failed to determine product collection for metadata file %s, error was %s'
            % (metadata_file, e)
        )


def get_product_type_from_band_count(product_type_name, file_path):
    # get raster band count via gdal
    logger.debug("Opening file using GDAL: %s" % file_path)
    if file_path.startswith("/vsiswift"):
        set_gdal_swift_auth()
    src_ds = gdal.Open(file_path)
    if src_ds is None:
        raise RegistrationError("Band check: failed to open dataset: %s " % file_path)
    # try to fetch product model with _bandcount
    product_type_name_upd = "%s_%s" % (product_type_name, src_ds.RasterCount)
    try:
        product_type_model = models.ProductType.objects.get(name=product_type_name_upd)
        return product_type_model
    except models.ProductType.DoesNotExist:
        raise RegistrationError("Product Type: '%s' was not found" % product_type_name_upd)


Stephan's avatar
Stephan committed
class RegistrationError(Exception):
    pass


@transaction.atomic
def registrar(
    objects_prefix, upload_container=None, replace=False, client=None, registered_set_key=None
Stephan's avatar
Stephan committed
):
    logger.info("Starting registration of product '%s'." % objects_prefix)

    metadata_package, data_package = None, None
    if not upload_container:
        # assuming objects_prefix = bucket/itemname
        upload_container = objects_prefix.split("/")[0]
        objects_prefix = objects_prefix.split("/")[1]
Stephan's avatar
Stephan committed
    with SwiftService() as swift:
        list_parts_gen = swift.list(
            container=upload_container, options={"prefix": objects_prefix},
Stephan's avatar
Stephan committed
        )
        for page in list_parts_gen:
            print(page)
Stephan's avatar
Stephan committed
            if page["success"]:
                for item in page["listing"]:
                    if item["name"].endswith(".xml"):
                        metadata_package = item["name"]
                    elif item["name"].endswith(".TIF") or \
                            item["name"].endswith(".tif"):
                        data_package = item["name"]
                    elif not item["name"].endswith(".tar"):
Stephan's avatar
Stephan committed
                        raise RegistrationError(
                            "Product with objects prefix '%s' has "
                            "wrong content '%s'."
                            % (objects_prefix, item["name"])
                        )
            else:
                raise RegistrationError(
                    "No product found with objects prefix '%s'."
                    % objects_prefix
                )

    if metadata_package is None or data_package is None:
        raise RegistrationError(
            "Product with objects prefix '%s' has missing content."
            % objects_prefix
        )
    logger.debug("Found objects '%s' and '%s'." % (data_package, metadata_package))
Stephan's avatar
Stephan committed

    storage = backends.Storage.objects.get(name=upload_container)
    metadata_item = models.MetaDataItem(storage=storage, location=metadata_package)

    product_type, level = get_product_type_and_level(metadata_item)
    if collection_stack == 'DEM':
        # special for DEM files, collection name === product_type
        gdal_metadata_file_path = "/vsiswift/%s/%s" % (upload_container, metadata_package)
        product_type = get_product_collection(gdal_metadata_file_path)
    logger.debug("Registering product")
    product_type_name = "%s_Product_%s" % (collection_stack, product_type)

    try:
        # first find product type by name from path
        product_type_model = models.ProductType.objects.get(name=product_type_name)
    except models.ProductType.DoesNotExist:
        # if not found, maybe there are more product types with _bandcount suffix
        gdal_file_path = "/vsiswift/%s/%s" % (upload_container, data_package)
        product_type_model = get_product_type_from_band_count(product_type_name, gdal_file_path)
        product_type_name = product_type_model.name
    coverage_type_names = product_type_model.allowed_coverage_types.all()
    if len(coverage_type_names) > 1:
        logger.warning("More available 'CoverageType' found, selecting the first one.")
    coverage_type_name = coverage_type_names[0].name

Stephan's avatar
Stephan committed
    product, replaced = ProductRegistrator().register(
        metadata_locations=[[upload_container,
Stephan's avatar
Stephan committed
                             metadata_package, ], ],
Stephan's avatar
Stephan committed
        replace=replace,
        extended_metadata=True,
        mask_locations=None,
        package_path=None,
        simplify_footprint_tolerance=0.0001,  # ~10meters
Stephan's avatar
Stephan committed
        overrides={},
    )
    if product.footprint.empty:
        product.delete()
        raise RegistrationError("No footprint was extracted. full product: %s" % product)
Stephan's avatar
Stephan committed
    collection = models.Collection.objects.get(
Stephan's avatar
Stephan committed
    )
    logger.debug("Inserting product into collection %s" % collection_stack)
Stephan's avatar
Stephan committed
    models.collection_insert_eo_object(collection, product)

    if collection_stack == "DEM":
        # also insert it to its own collection
        collection_own = models.Collection.objects.get(
            identifier="%s_%s" % (collection, product_type)
        )
        logger.debug("Inserting product to collection %s_%s" % (collection, product_type))
        models.collection_insert_eo_object(collection_own, product)

Stephan's avatar
Stephan committed
    if level == 'Level_1':
        collection_level_1 = models.Collection.objects.get(
            identifier="%s_Level_1" % collection
        )
        logger.debug("Inserting product to collection %s_Level_1" % collection)
Stephan's avatar
Stephan committed
        models.collection_insert_eo_object(collection_level_1, product)
    elif level == 'Level_3':
        collection_level_3 = models.Collection.objects.get(
            identifier="%s_Level_3" % collection
        )
        logger.debug("Inserting product to collection %s_Level_3" % collection)
Stephan's avatar
Stephan committed
        models.collection_insert_eo_object(collection_level_3, product)

    logger.debug("Registering coverage")
Stephan's avatar
Stephan committed
    report = GDALRegistrator().register(
        data_locations=[[upload_container, data_package, ], ],
        metadata_locations=[[upload_container,
Stephan's avatar
Stephan committed
                             metadata_package, ], ],
        coverage_type_name=coverage_type_name,
        overrides={
            "identifier": "%s__coverage" % product.identifier,
            "footprint": None,
        },
Stephan's avatar
Stephan committed
        replace=replace,
    )
    logger.debug("Adding coverage to product")
Stephan's avatar
Stephan committed
    models.product_add_coverage(product, report.coverage)

    try:
        add_mask(product)
    except Exception as e:
        logger.debug("Couldn't add mask.")
Stephan's avatar
Stephan committed
        logger.debug(traceback.format_exc())
        logger.debug("%s: %s\n" % (type(e).__name__, str(e)))
Stephan's avatar
Stephan committed

    if client is not None:
        logger.debug(
            "Storing times in redis queue '%s" % registered_set_key
        )
        client.sadd(
            registered_set_key, "%s/%s"
            % (
                product.begin_time.strftime("%Y%m%dT%H%M%S"),
                product.end_time.strftime("%Y%m%dT%H%M%S")
            )
        )

    logger.info(
        "Successfully finished registration of product '%s'." % objects_prefix
    )


def registrar_redis_wrapper(
    collection,
Stephan's avatar
Stephan committed
    replace=False, host="localhost", port=6379,
    register_queue_key="register_queue",
    registered_set_key="registered_set",
):
    client = redis.Redis(
        host=host, port=port, charset="utf-8", decode_responses=True
    )
    while True:
        logger.debug("waiting for redis queue '%s'..." % register_queue_key)
        value = client.brpop(register_queue_key)
        try:
            registrar(
                collection,
                value[1],
Stephan's avatar
Stephan committed
                replace=replace,
                client=client,
                registered_set_key=registered_set_key
            )
        except Exception as e:
            logger.debug(traceback.format_exc())
            logger.error("%s: %s\n" % (type(e).__name__, str(e)))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.description = textwrap.dedent("""\
    Register products.
    """)

    parser.add_argument(
        "--mode", default="standard", choices=["standard", "redis"],
        help=(
            "The mode to run the registrar. Either one-off (standard) or "
            "reading from a redis queue."
        )
    )
    parser.add_argument(
        "--objects-prefix", default=None,
        help=(
            "Prefix to objects holding the metadata and data of product."
        )
    )
    parser.add_argument(
        "--replace", action="store_true",
        help=(
            "Replace existing products instead of skipping the registration."
        )
    )
    parser.add_argument(
        "--redis-register-queue-key", default="register_queue"
    )
    parser.add_argument(
        "--redis-registered-set-key", default="registered_set"
    )
    parser.add_argument(
        "--redis-host", default="localhost"
    )
    parser.add_argument(
        "--redis-port", type=int, default=6379
    )

    parser.add_argument(
        "-v", "--verbosity", type=int, default=3, choices=[0, 1, 2, 3, 4],
        help=(
            "Set verbosity of log output "
            "(4=DEBUG, 3=INFO, 2=WARNING, 1=ERROR, 0=CRITICAL). (default: 3)"
        )
    )

    arg_values = parser.parse_args()

    setup_logging(arg_values.verbosity)

    collection = os.environ.get('COLLECTION')
    if collection is None:
        logger.critical("Collection environment variable not set.")
Stephan's avatar
Stephan committed
        sys.exit(1)

    upload_container = os.environ.get('UPLOAD_CONTAINER')
    if upload_container is None:
        logger.warn("UPLOAD_CONTAINER environment variable not set. Assuming part of path bucket/item")
Stephan's avatar
Stephan committed
    if arg_values.mode == "standard":
        registrar(
            collection,
            arg_values.objects_prefix,
Stephan's avatar
Stephan committed
            replace=arg_values.replace,
        )
    else:
        registrar_redis_wrapper(
            collection,
Stephan's avatar
Stephan committed
            replace=arg_values.replace,
            host=arg_values.redis_host,
            port=arg_values.redis_port,
            register_queue_key=arg_values.redis_register_queue_key,
            registered_set_key=arg_values.redis_registered_set_key,
        )