diff --git a/ingestor/.gitignore b/ingestor/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..47a030ee6e598acbf69f31b10980132fb9f77583 --- /dev/null +++ b/ingestor/.gitignore @@ -0,0 +1,134 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ \ No newline at end of file diff --git a/ingestor/Dockerfile b/ingestor/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d47d193fe5c0db893650f57b57e342dc40b26767 --- /dev/null +++ b/ingestor/Dockerfile @@ -0,0 +1,57 @@ +#------------------------------------------------------------------------------ +# +# Project: prism view server +# Authors: Fabian Schindler <fabian.schindler@eox.at> +# +#------------------------------------------------------------------------------ +# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#----------------------------------------------------------------------------- + +FROM ubuntu:18.04 + +MAINTAINER EOX +LABEL name="prism view server cache" \ + vendor="EOX IT Services GmbH <https://eox.at>" \ + license="MIT Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>" \ + type="prism view server ingestor" \ + version="0.0.1-dev" + +USER root +ADD install.sh ingestor.py \ + / +RUN ./install.sh + +ENV COLLECTION_ID= \ + INSTANCE_ID="prism-view-server_ingestor" \ + RENDERER_HOST= \ + COLLECTION= \ + REDIS_HOST= \ + REDIS_PORT="6379" \ + REDIS_QUEUE_KEY="preprocess-md_queue" \ + ST_AUTH_VERSION=3 \ + OS_AUTH_URL= \ + OS_USERNAME= \ + OS_PASSWORD= \ + OS_TENANT_NAME= \ + OS_TENANT_ID= \ + OS_REGION_NAME= + +CMD ["python3", "/ingestor.py"] diff --git a/ingestor/__init__.py b/ingestor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ingestor/app.py b/ingestor/app.py new file mode 100644 index 0000000000000000000000000000000000000000..a9d01e39b421abc76f48acb0a429bf90c808d3ac --- /dev/null +++ b/ingestor/app.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +#------------------------------------------------------------------------------ +# +# Project: prism view server +# Authors: Fabian Schindler <fabian.schindler@eox.at> +# +#------------------------------------------------------------------------------ +# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#----------------------------------------------------------------------------- + +import os +import logging +import logging.config + +from flask import Flask, request, Response, jsonify + +from ingestor.browse_report import parse_browse_report + + +app = Flask(__name__, static_url_path='') + +logger = logging.getLogger(__name__) + +logging.config.dictConfig({ + 'version': 1, + 'formatters': { + 'simple': { + 'format': '%(levelname)s: %(message)s', + }, + 'verbose': { + 'format': '[%(asctime)s][%(module)s] %(levelname)s: %(message)s', + } + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler', + 'formatter': 'verbose', + } + }, + 'loggers': { + '': { + 'handlers': ['console'], + 'level': 'DEBUG', + 'propagate': False, + } + } +}) + + +@app.route('/') +def ingest(): + try: + return Response(status=202) + except: + return Response(status=400) diff --git a/ingestor/ingestor/__init__.py b/ingestor/ingestor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ingestor/ingestor/browse_report.py b/ingestor/ingestor/browse_report.py new file mode 100644 index 0000000000000000000000000000000000000000..11eeb32ed43a910e7e1c95ae2b9af711f49fd942 --- /dev/null +++ b/ingestor/ingestor/browse_report.py @@ -0,0 +1,135 @@ +#------------------------------------------------------------------------------ +# +# Project: prism view server +# Authors: Fabian Schindler <fabian.schindler@eox.at> +# +#------------------------------------------------------------------------------ +# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies of this Software or works derived from this Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#----------------------------------------------------------------------------- + +from lxml import etree +import dateutil.parser + +from .util import pairwise + + +class BrowseReportParserError(ValueError): + pass + +NS_REP = 'http://ngeo.eo.esa.int/schema/browseReport' +NS_BSI = 'http://ngeo.eo.esa.int/schema/browse/ingestion' + + +nsmap = { + 'rep': NS_REP, + 'bsi': NS_BSI +} + + +def rep(tag): + return f'{{{NS_REP}}}{tag}' + + +def bsi(tag): + return f'{{{NS_BSI}}}{tag}' + + +def parse_browse_report(input_file): + """ + :returns: list of browses + """ + try: + tree = etree.parse(input_file) + except etree.XMLSyntaxError as e: + raise BrowseReportParserError('Failed to parse XML.') from e + root = tree.getroot() + + if not root.tag in [rep('browseReport'), bsi('ingestBrowse')]: + raise BrowseReportParserError( + 'Document is not a browse report or an ingest browse instruction.' + ) + + return { + 'responsible_or_name': root.find(rep('responsibleOrgName')), + 'date_time': dateutil.parser.parse(root.find(rep('dateTime'))), + 'browse_type': root.findtext(rep('browseType')), + 'browses': [ + parse_browse(elem) + for elem in root.iterfind(rep('browse')) + ], + } + + +def parse_browse(elem): + browse = { + 'type': '', + 'filename': elem.findtext(rep('fileName')), + 'image_type': elem.findtext(rep('imageType')), + 'reference_system_identifier': elem.findtext( + rep('referenceSystemIdentifier') + ), + 'start_time': dateutil.parser.parse(elem.findtext(rep('startTime'))), + 'end_time': dateutil.parser.parse(elem.findtext(rep('endTime'))), + } + + rectified_elem = elem.find(rep('rectifiedBrowse')) + footprint_elem = elem.find(rep('footprint')) + geotiff_elem = elem.find(rep('modelInGeotiff')) + regular_grid_browse = elem.find(rep('modelInGeotiff')) + + if rectified_elem: + browse['type'] = 'rectified_browse' + browse['rectified'] = { + 'coord_list': [ + (float(x), float(y)) + for x, y in pairwise( + footprint_elem.findtext(rep('coordList')).split() + ) + ], + } + + elif footprint_elem: + browse['type'] = 'footprint_browse' + browse['footprint'] = { + 'col_row_list': [ + (int(x), int(y)) + for x, y in pairwise( + footprint_elem.findtext(rep('colRowList')).split() + ) + ], + 'coord_list': [ + (float(x), float(y)) + for x, y in pairwise( + footprint_elem.findtext(rep('coordList')).split() + ) + ], + } + + elif geotiff_elem: + browse['type'] = 'model_in_geotiff_browse' + browse['model_in_geotiff'] = True + + elif regular_grid_browse: + # TODO: other browse types + browse['type'] = 'regular_grid_browse' + raise NotImplementedError('Regular grid browses are not supported') + + return browse diff --git a/ingestor/ingestor/util.py b/ingestor/ingestor/util.py new file mode 100644 index 0000000000000000000000000000000000000000..f42bbae01cb2a1b56febecc4c5a0e709ebae6c26 --- /dev/null +++ b/ingestor/ingestor/util.py @@ -0,0 +1,4 @@ +def pairwise(iterable): + "s -> (s0,s1), (s2,s3), (s4, s5), ..." + a = iter(iterable) + return zip(a, a) diff --git a/ingestor/install.sh b/ingestor/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..13630ebd99c68be2025a1d581368ee47c4b16727 --- /dev/null +++ b/ingestor/install.sh @@ -0,0 +1,9 @@ +#!/bin/bash +echo "Running install.sh" + +apt update + +echo "Installing packages" +DEBIAN_FRONTEND=noninteractive apt install -y python3-redis + +rm -rf /var/lib/apt/lists/* diff --git a/ingestor/tests/__init__.py b/ingestor/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ingestor/tests/data/footprint_browse.xml b/ingestor/tests/data/footprint_browse.xml new file mode 100644 index 0000000000000000000000000000000000000000..8ab11ec5333b8b612e8da391d38d560e39997b27 --- /dev/null +++ b/ingestor/tests/data/footprint_browse.xml @@ -0,0 +1,18 @@ +<?xml version='1.0' encoding='UTF-8'?> +<rep:browseReport xmlns:rep="http://ngeo.eo.esa.int/ngEO/browseReport/1.0" version="1.3"> + <rep:responsibleOrgName>Generated by Eoli 2 ngEO Converter V1.2.0</rep:responsibleOrgName> + <rep:dateTime>2013-09-25T14:54:38Z</rep:dateTime> + <rep:browseType>SAR</rep:browseType> + <rep:browse> + <rep:browseIdentifier>ERS-2-11040113373745-1507.SAR_IM0_0P.BP</rep:browseIdentifier> + <rep:fileName>ERS-2-11040113373745-1507.SAR_IM0_0P.BP.jpg</rep:fileName> + <rep:imageType>Jpeg</rep:imageType> + <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier> + <rep:footprint nodeNumber="5"> + <rep:colRowList>0 0 500 0 500 250 0 250 0 0</rep:colRowList> + <rep:coordList>83.66 42.31 84.53 42.42 84.48 51.28 83.61 50.32 83.66 42.31</rep:coordList> + </rep:footprint> + <rep:startTime>2011-04-01T13:37:37Z</rep:startTime> + <rep:endTime>2011-04-01T13:37:52Z</rep:endTime> + </rep:browse> +</rep:browseReport> diff --git a/ingestor/tests/data/model_in_geotiff_browse.xml b/ingestor/tests/data/model_in_geotiff_browse.xml new file mode 100644 index 0000000000000000000000000000000000000000..624a01e9e142fb48200fd855440eace4a8b6858a --- /dev/null +++ b/ingestor/tests/data/model_in_geotiff_browse.xml @@ -0,0 +1,19 @@ +<?xml version="1.0" encoding="UTF-8"?> +<bsi:ingestBrowse xmlns:bsi="http://ngeo.eo.esa.int/schema/browse/ingestion" + xmlns:rep="http://ngeo.eo.esa.int/ngEO/browseReport/1.0" version="1.3"> + <rep:responsibleOrgName>DMI</rep:responsibleOrgName> + <rep:dateTime>2012-07-13T11:54:26Z</rep:dateTime> + <rep:browseType>SAR</rep:browseType> + <rep:browse xmlns:gsc="http://earth.esa.int/gsc" + xmlns:gml="http://www.opengis.net/gml" + xmlns:eop="http://earth.esa.int/eop" + xmlns:opt="http://earth.esa.int/opt"> + <rep:browseIdentifier>ID_DODWH_MG2_CORE_09DM010001_1</rep:browseIdentifier> + <rep:fileName>ID_DEIMOS01-v2_DE0028bfp_L3R.tif</rep:fileName> + <rep:imageType>TIFF</rep:imageType> + <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier> + <rep:modelInGeotiff>true</rep:modelInGeotiff> + <rep:startTime>2011-02-01T11:48:01Z</rep:startTime> + <rep:endTime>2011-02-01T11:48:27Z</rep:endTime> + </rep:browse> +</bsi:ingestBrowse> diff --git a/ingestor/tests/data/rectified_browse.xml b/ingestor/tests/data/rectified_browse.xml new file mode 100644 index 0000000000000000000000000000000000000000..1daa44670f6c9808ec9c5b30a962b12a6c8aacf3 --- /dev/null +++ b/ingestor/tests/data/rectified_browse.xml @@ -0,0 +1,19 @@ +<?xml version='1.0' encoding='UTF-8'?> +<rep:browseReport version="1.1" xsi:schemaLocation="http://ngeo.eo.esa.int/schema/browseReport IF-ngEO-BrowseReport-1.1.xsd" + xmlns:rep="http://ngeo.eo.esa.int/schema/browseReport" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <rep:responsibleOrgName>SLAP 03.03</rep:responsibleOrgName> + <rep:dateTime>2014-07-24T11:58:24Z</rep:dateTime> + <rep:browseType>NGEO-LIGHT</rep:browseType> + <rep:browse> + <rep:browseIdentifier>LS05_RFUI_TM__GTC_1P_19910928T071939_19910928T072007_040292_0172_0031_B10D</rep:browseIdentifier> + <rep:fileName>http://landsat-ds.eo.esa.int/metadata/LandsatTMCloudFreeCoverage/1991/09/28/LS05_RFUI_TM__GTC_1P_19910928T071939_19910928T072007_040292_0172_0031_B10D.BP.PNG</rep:fileName> + <rep:imageType>PNG</rep:imageType> + <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier> + <rep:rectifiedBrowse> + <rep:coordList>40.8395 40.1005 42.6645 42.7907</rep:coordList> + </rep:rectifiedBrowse> + <rep:startTime>1991-09-28T07:19:39Z</rep:startTime> + <rep:endTime>1991-09-28T07:20:07Z</rep:endTime> + </rep:browse> +</rep:browseReport> diff --git a/ingestor/tests/test_browse_report.py b/ingestor/tests/test_browse_report.py new file mode 100644 index 0000000000000000000000000000000000000000..6b376a2f78578a8b7c56bb598237363f58f22b7b --- /dev/null +++ b/ingestor/tests/test_browse_report.py @@ -0,0 +1,21 @@ +from os.path import dirname, join + +from ..ingestor.browse_report import parse_browse_report + + +TEST_DATA_DIR = join(dirname(__file__), 'data') + + +def test_parse_footprint_browse(): + with open(join(TEST_DATA_DIR, 'footprint_browse.xml')) as f: + browse_report = parse_browse_report(f) + + +def test_parse_model_in_geotiff_browse(): + with open(join(TEST_DATA_DIR, 'model_in_geotiff_browse.xml')) as f: + browse_report = parse_browse_report(f) + + +def test_parse_rectified_browse(): + with open(join(TEST_DATA_DIR, 'rectified_browse.xml')) as f: + browse_report = parse_browse_report(f)