diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d084139476fe2ea9874549e85dbecf0df06e6326..74dd9a1c64a66e9d8422aa440f720b0c63196a39 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,6 +40,9 @@ build-tag: - docker push "$IMAGE_6":$CI_COMMIT_TAG only: - tags + artifacts: + paths: + - /tmp/*.log build-master-staging: image: registry.gitlab.eox.at/esa/prism/vs/docker-base-testing:latest stage: build @@ -80,6 +83,9 @@ build-master-staging: - master except: - tags + artifacts: + paths: + - /tmp/*.log build: image: registry.gitlab.eox.at/esa/prism/vs/docker-base-testing:latest stage: build @@ -112,3 +118,16 @@ build: - tags - staging - master + artifacts: + paths: + # - /tmp/emg-pvs_cache + # - /tmp/emg-pvs_client + - /tmp/emg-pvs_database + # - /tmp/emg-pvs_fluentd + # - /tmp/emg-pvs_ingestor + - /tmp/emg-pvs_preprocessor + # - /tmp/emg-pvs_redis + - /tmp/emg-pvs_registrar + - /tmp/emg-pvs_renderer + # - /tmp/emg-pvs_seeder + # - /tmp/emg-pvs_sftp diff --git a/README.md b/README.md index 734ddd8ac41799920d6ecdd4a93fa5e39590f8a9..d7870362abaf0c7fe73322e26fcbdd9cc49deb62 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ The following services are defined via docker compose files. * uses external atmoz/sftp image * provides sftp access to two volumes for report exchange on registration result xmls and ingest requirement xmls -* accessible on swarm master on port 2222 +* accessible on swarm master on port 2222-22xx * credentials supplied via config # Usage @@ -224,7 +224,7 @@ printf "" | docker secret create DJANGO_PASSWORD - printf "" | docker secret create OS_PASSWORD - # configs creation -printf ":::" | docker config create sftp-users - +printf ":::" | docker config create sftp_users_ - # for production base stack deployment, additional basic authentication credentials list need to be created # format of such a list used by traefik are username:hashedpassword (MD5, SHA1, BCrypt) sudo apt-get install apache2-utils @@ -253,7 +253,7 @@ docker stack deploy -c docker-compose.emg.yml -c docker-compose.emg.dev.yml -c d Deploy base & logging stack in production environment: ``` docker stack deploy -c docker-compose.base.ops.yml base-pvs -docker stack deploy -c docker-compose.logging.yml docker-compose.logging.ops.yml logging +docker stack deploy -c docker-compose.logging.yml -c docker-compose.logging.ops.yml logging ``` Deploy the stack in production environment: Please note that in order to reuse existing database volumes, needs to be the same. Here we use `vhr18-pvs` but in operational service `vhr18-pdas` is used. @@ -304,7 +304,7 @@ On production machine, `fluentd` is set as a logging driver for docker daemon by The `SFTP` image allow remote access into 2 logging folders, you can define (edit/add) users, passwords and (UID/GID) using `docker config create` mentioned above. -In the below example the username is `eox`, once the stack is deployed you can sftp into the logging folders through port 2222 on -if you are running the dev stack- localhost : +In the below example the username is `eox`, once the stack is deployed you can sftp into the logging folders through port 2222 (for ``vhr18``, ``emg`` and ``dem`` have 2223 and 2224 respectively) if you are running the dev stack localhost : ```bash sftp -P 2222 eox@127.0.0.1 @@ -361,7 +361,7 @@ A hotfix to production is developed in a branch initiated from master, then merg Create a TAR from source code: ```bash -git archive --prefix release-1.0.0.rc.1/ -o release-1.0.0.rc.1.tar.gz -9 master +git archive --prefix release-1.0.0/ -o release-1.0.0.tar.gz -9 master ``` Save Docker images: diff --git a/client/html/View-Server_-_User-Guide_v1.1.0.pdf b/client/html/View-Server_-_User-Guide_v1.1.1.pdf similarity index 99% rename from client/html/View-Server_-_User-Guide_v1.1.0.pdf rename to client/html/View-Server_-_User-Guide_v1.1.1.pdf index 191ee76989bbb51cb75093ceb93b129a81f962c4..5da1ce74147ee0b8705a6610fa1e963c3452094e 100644 Binary files a/client/html/View-Server_-_User-Guide_v1.1.0.pdf and b/client/html/View-Server_-_User-Guide_v1.1.1.pdf differ diff --git a/client/html/prism.js b/client/html/prism.js index 734b60a673df2e5a033776e85065fda688893a3c..be1602a24bed84e2a015f33e8bba06f6b9b2ace2 100644 --- a/client/html/prism.js +++ b/client/html/prism.js @@ -168570,7 +168570,7 @@ return /******/ (function(modules) { // webpackBootstrap /* 1068 */ /***/ (function(module, exports) { - module.exports = {"layer_failed":"Failed to access layer '{{value}}'","search_error":"An error occurred during the search","search_norecords":"No products matched the search","search_n_layers_selected":"One layer selected to show","search_n_layers_selected_plural":"{{count}} layers selected to show","load_more":"Load {{count}} more","download_norecords":"No products selected for download","terms_and_conditions":"I have read and agree to the terms & conditions","start_download":"Start download of one product","start_download_plural":"Start download of {{count}} products","confirm_download":"'Download' starts the download of one product","confirm_download_plural":"'Download' starts the download of {{count}} products","confirm_note":"Note, the browser might open multiple confirmation dialogs or issue a warning. Besides, typically browsers have a limit of 6 concurrent connections. If popups are blocked by your browser, please enable them for this site.","download_size_warning":"The estimated size of the download without compression is {{estimated_size}}MB. This might exceed the capabilities of the service.","max_bbox_warning":"Maximum size of bounding box: {{max_bbox_size}} was exceeded by: {{max_bbox_exceed}} on axis: {{max_bbox_axis}}.","max_resolution_warning":"Maximum resolution: {{max_resolution}} was exceeded. Attempt to download might fail with an error.","download_bands_warning":"The selected format supports at most {{maxBands}} bands, but {{requestedBands}} are requested.","timefilter_warning":"Search results may differ from products shown on map because of using a separate time filter","advancedfilter_warning":"Search results may differ from products shown on map because of using additional filters","toomanyresults_warning":"Search results may differ from products shown on map because search results are too big","dynamic-histogram-title":"If enabled, time distributions are shown for current spatial filter.\nIf disabled, they are shown globally.","tutorial1":"

Welcome to the Catalog Client

Let's briefly walk through the main functionalities. Hit Next to start.

Feel free to Skip any time and to resume later using the button at the bottom right.

You can also find detailed information about the Client and referenced services here: User guide document HTML or PDF

.","tutorial2":"

The Map

Noticed the map in the back? It displays available satellite data.

Navigate to your area of interest by panning and zooming, either using left click + drag and mouse wheel scroll or one finger drag and two finger pinch.

Satisfied with the displayed data? Not yet? Let's change the time of interest Next.

","tutorial3":"

The Timeslider

The timeslider displays the distribution of data over time. The gray area shows the currently selected time interval.

Navigate to your time of interest again by panning (only in the bottom area) and zooming. Change the time interval selection by using left click + drag in the top area.

Did you notice how the data displayed on the map and the search results changed?

","tutorial4":"

Search Results

The satellite data within the current area and time of interest is listed here.

Inspect details by hovering over an item and hitting or select items for download.

Your result list is too long? Let's apply more filters Next.

","tutorial5":"

Filters

These filters allow to narrow down the search results. Note that the time and spatial filters are already applied via the map and timeslider.

Apply Additional Filters by selecting or typing the values of interest.

Did the search result list get smaller?

","tutorial6":"

Map layers

Available map and data layers are listed here.

Select and deselect Layers for viewing and searching. Overlays and Base Layers are only for the viewing on the map.

Satisfied with your search results?

Iterate all steps until you find the right satellite data to enjoy.

"} + module.exports = {"layer_failed":"Failed to access layer '{{value}}'","search_error":"An error occurred during the search","search_norecords":"No products matched the search","search_n_layers_selected":"One layer selected to show","search_n_layers_selected_plural":"{{count}} layers selected to show","load_more":"Load {{count}} more","download_norecords":"No products selected for download","terms_and_conditions":"I have read and agree to the terms & conditions","start_download":"Start download of one product","start_download_plural":"Start download of {{count}} products","confirm_download":"'Download' starts the download of one product","confirm_download_plural":"'Download' starts the download of {{count}} products","confirm_note":"Note, the browser might open multiple confirmation dialogs or issue a warning. Besides, typically browsers have a limit of 6 concurrent connections. If popups are blocked by your browser, please enable them for this site.","download_size_warning":"The estimated size of the download without compression is {{estimated_size}}MB. This might exceed the capabilities of the service.","max_bbox_warning":"Maximum size of bounding box: {{max_bbox_size}} was exceeded by: {{max_bbox_exceed}} on axis: {{max_bbox_axis}}.","max_resolution_warning":"Maximum resolution: {{max_resolution}} was exceeded. Attempt to download might fail with an error.","download_bands_warning":"The selected format supports at most {{maxBands}} bands, but {{requestedBands}} are requested.","timefilter_warning":"Search results may differ from products shown on map because of using a separate time filter","advancedfilter_warning":"Search results may differ from products shown on map because of using additional filters","toomanyresults_warning":"Search results may differ from products shown on map because search results are too big","dynamic-histogram-title":"If enabled, time distributions are shown for current spatial filter.\nIf disabled, they are shown globally.","tutorial1":"

Welcome to the Catalog Client

Let's briefly walk through the main functionalities. Hit Next to start.

Feel free to Skip any time and to resume later using the button at the bottom right.

You can also find detailed information about the Client and referenced services here: User guide document HTML or PDF

.","tutorial2":"

The Map

Noticed the map in the back? It displays available satellite data.

Navigate to your area of interest by panning and zooming, either using left click + drag and mouse wheel scroll or one finger drag and two finger pinch.

Satisfied with the displayed data? Not yet? Let's change the time of interest Next.

","tutorial3":"

The Timeslider

The timeslider displays the distribution of data over time. The gray area shows the currently selected time interval.

Navigate to your time of interest again by panning (only in the bottom area) and zooming. Change the time interval selection by using left click + drag in the top area.

Did you notice how the data displayed on the map and the search results changed?

","tutorial4":"

Search Results

The satellite data within the current area and time of interest is listed here.

Inspect details by hovering over an item and hitting or select items for download.

Your result list is too long? Let's apply more filters Next.

","tutorial5":"

Filters

These filters allow to narrow down the search results. Note that the time and spatial filters are already applied via the map and timeslider.

Apply Additional Filters by selecting or typing the values of interest.

Did the search result list get smaller?

","tutorial6":"

Map layers

Available map and data layers are listed here.

Select and deselect Layers for viewing and searching. Overlays and Base Layers are only for the viewing on the map.

Satisfied with your search results?

Iterate all steps until you find the right satellite data to enjoy.

"} /***/ }), /* 1069 */ diff --git a/config/dem_init-db.sh b/config/dem_init-db.sh index 648f4b051fbac1b0e3ff40c36aec93cb259256d1..cfc2c4ec310c545a05bd12d87fff5a562f8ca72f 100644 --- a/config/dem_init-db.sh +++ b/config/dem_init-db.sh @@ -42,7 +42,7 @@ if python3 manage.py id check "${COLLECTION}"; then python3 manage.py browsetype create "${COLLECTION}"_Product_COP-DEM_GLO-90-DGED --traceback \ --red "gray" \ --red-range -100 4000 \ - --red-nodata 0 + --red-nodata 0 python3 manage.py collectiontype create "${COLLECTION}"_Collection --traceback \ --coverage-type "float32_grayscale" \ @@ -66,22 +66,6 @@ if python3 manage.py id check "${COLLECTION}"; then echo "Provided collection '${COLLECTION}' not valid." fi - python3 manage.py storageauth create auth-cloud-ovh "${OS_AUTH_URL_SHORT}" \ - --type keystone \ - -p auth-version "${ST_AUTH_VERSION}" \ - -p identity-api-version="${ST_AUTH_VERSION}" \ - -p username "${OS_USERNAME}" \ - -p password "${OS_PASSWORD}" \ - -p tenant-name "${OS_TENANT_NAME}" \ - -p tenant-id "${OS_TENANT_ID}" \ - -p region-name "${OS_REGION_NAME}" - - python3 manage.py storage create \ - ${UPLOAD_CONTAINER} ${UPLOAD_CONTAINER} \ - --type swift \ - --storage-auth auth-cloud-ovh - - else echo "Using existing database" fi diff --git a/config/dem_registrar-config.yml b/config/dem_registrar-config.yml new file mode 100644 index 0000000000000000000000000000000000000000..5c255c039420827c238520095ca71f3f37460acb --- /dev/null +++ b/config/dem_registrar-config.yml @@ -0,0 +1,75 @@ +sources: + - type: swift + name: !env '${UPLOAD_CONTAINER}' + kwargs: + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + auth_url_short: !env '${OS_AUTH_URL_SHORT}' + container: !env '${UPLOAD_CONTAINER}' + +schemes: + - type: gsc + kwargs: + level_re: '([A-Z0-9-_]+)/.*' + +backends: + - type: eoxserver + filter: + kwargs: + instance_base_path: /var/www/pvs/dev + instance_name: pvs_instance + mapping: + DEM1: + COP-DEM_EEA-10-DGED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_EEA-10-DGED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_EEA-10-DGED' + coverages: + DEM1: float32_grayscale + COP-DEM_EEA-10-INSP: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_EEA-10-INSP' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_EEA-10-INSP' + coverages: + DEM1: float32_grayscale + COP-DEM_GLO-30-DGED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-30-DGED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-30-DGED' + coverages: + DEM1: float32_grayscale + COP-DEM_GLO-90-DGED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-90-DGED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-90-DGED' + coverages: + DEM1: float32_grayscale + COP-DEM_GLO-30-DTED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-30-DTED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-30-DTED' + coverages: + DEM1: int16_grayscale + COP-DEM_GLO-90-DTED: + product_type_name: !env '${COLLECTION}_Product_COP-DEM_GLO-90-DTED' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_COP-DEM_GLO-90-DTED' + coverages: + DEM1: int16_grayscale + +post_handlers: + - path: registrar.post_handlers.ReportingPostHandler + kwargs: + service_url: dem.pass.copernicus.eu + reporting_dir: /mnt/reports/ diff --git a/config/emg_init-db.sh b/config/emg_init-db.sh index 5944a22d495d77b97a2b2e7dfa9cee068742c984..7ce4d7e77ce45259a46dd32d869c66fc93c79bc7 100644 --- a/config/emg_init-db.sh +++ b/config/emg_init-db.sh @@ -975,23 +975,6 @@ if python3 manage.py id check "${COLLECTION}"; then echo "Provided collection '${COLLECTION}' not valid." fi - python3 manage.py storageauth create auth-cloud-ovh "${OS_AUTH_URL_SHORT}" \ - --type keystone \ - -p auth-version "${ST_AUTH_VERSION}" \ - -p identity-api-version="${ST_AUTH_VERSION}" \ - -p username "${OS_USERNAME}" \ - -p password "${OS_PASSWORD}" \ - -p tenant-name "${OS_TENANT_NAME}" \ - -p tenant-id "${OS_TENANT_ID}" \ - -p region-name "${OS_REGION_NAME}" - - python3 manage.py storage create \ - ${UPLOAD_CONTAINER} ${UPLOAD_CONTAINER} \ - --type swift \ - --storage-auth auth-cloud-ovh - - - else echo "Using existing database" fi \ No newline at end of file diff --git a/config/emg_registrar-config.yml b/config/emg_registrar-config.yml new file mode 100644 index 0000000000000000000000000000000000000000..e9103d8db2aa011cfbbdcff1e8e111246880e778 --- /dev/null +++ b/config/emg_registrar-config.yml @@ -0,0 +1,275 @@ +sources: + - type: swift + name: !env '${UPLOAD_CONTAINER}' + kwargs: + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + auth_url_short: !env '${OS_AUTH_URL_SHORT}' + container: !env '${UPLOAD_CONTAINER}' + +schemes: + - type: gsc + +backends: + - type: eoxserver + filter: + kwargs: + instance_base_path: /var/www/pvs/dev + instance_name: pvs_instance + mapping: + CS00: + ~: + product_type_name: !env '${COLLECTION}_Product_CS00' + collections: + - !env '${COLLECTION}' + coverages: + CS00: sar_hh_gray + CS01: + ~: + product_type_name: !env '${COLLECTION}_Product_CS01' + collections: + - !env '${COLLECTION}' + coverages: + CS01: sar_hh_gray + CS02: + ~: + product_type_name: !env '${COLLECTION}_Product_CS02' + collections: + - !env '${COLLECTION}' + coverages: + CS02: sar_hh_gray + CS03: + ~: + product_type_name: !env '${COLLECTION}_Product_CS03' + collections: + - !env '${COLLECTION}' + coverages: + CS03: sar_hh_gray + CS04: + ~: + product_type_name: !env '${COLLECTION}_Product_CS04' + collections: + - !env '${COLLECTION}' + coverages: + CS04: sar_hh_gray + DM01: + ~: + product_type_name: !env '${COLLECTION}_Product_DM01' + collections: + - !env '${COLLECTION}' + coverages: + DM01: RGNirByte + DM02: + ~: + product_type_name: !env '${COLLECTION}_Product_DM02' + collections: + - !env '${COLLECTION}' + coverages: + DM02: RGBNir + EQ02_3: + ~: + product_type_name: !env '${COLLECTION}_Product_EQ02_3' + collections: + - !env '${COLLECTION}' + coverages: + EQ02_3: RGB + EQ02_4: + ~: + product_type_name: !env '${COLLECTION}_Product_EQ02_4' + collections: + - !env '${COLLECTION}' + coverages: + EQ02_4: RGBNir + EW01: + ~: + product_type_name: !env '${COLLECTION}_Product_EW01' + collections: + - !env '${COLLECTION}' + coverages: + EW01: grayscale + EW02_3: + ~: + product_type_name: !env '${COLLECTION}_Product_EW02_3' + collections: + - !env '${COLLECTION}' + coverages: + EW02_3: RGB + EW02_4: + ~: + product_type_name: !env '${COLLECTION}_Product_EW02_4' + collections: + - !env '${COLLECTION}' + coverages: + EW02_4: RGBNir + EW02_8: + ~: + product_type_name: !env '${COLLECTION}_Product_EW02_8' + collections: + - !env '${COLLECTION}' + coverages: + EW02_8: CBGYRReNirNir2 + EW03_3: + ~: + product_type_name: !env '${COLLECTION}_Product_EW03_3' + collections: + - !env '${COLLECTION}' + coverages: + EW03_3: RGB + EW03_4: + ~: + product_type_name: !env '${COLLECTION}_Product_EW03_4' + collections: + - !env '${COLLECTION}' + coverages: + EW03_4: RGBNir + EW03_8: + ~: + product_type_name: !env '${COLLECTION}_Product_EW03_8' + collections: + - !env '${COLLECTION}' + coverages: + EW03_8: CBGYRReNirNir2 + GE01_4: + ~: + product_type_name: !env '${COLLECTION}_Product_GE01_4' + collections: + - !env '${COLLECTION}' + coverages: + GE01_4: RGBNir + GE01_3: + ~: + product_type_name: !env '${COLLECTION}_Product_GE01_3' + collections: + - !env '${COLLECTION}' + coverages: + GE01_3: RGB + GE01_1: + ~: + product_type_name: !env '${COLLECTION}_Product_GE01_1' + collections: + - !env '${COLLECTION}' + coverages: + GE01_1: grayscale + GY01: + ~: + product_type_name: !env '${COLLECTION}_Product_GY01' + collections: + - !env '${COLLECTION}' + coverages: + GY01: RGBNir + IK02: + ~: + product_type_name: !env '${COLLECTION}_Product_IK02' + collections: + - !env '${COLLECTION}' + coverages: + IK02: RGBNir + KS03: + ~: + product_type_name: !env '${COLLECTION}_Product_KS03' + collections: + - !env '${COLLECTION}' + coverages: + KS03: RGBNir + PH1A: + ~: + product_type_name: !env '${COLLECTION}_Product_PH1A' + collections: + - !env '${COLLECTION}' + coverages: + PH1A: RGBNir + PH1B: + ~: + product_type_name: !env '${COLLECTION}_Product_PH1B' + collections: + - !env '${COLLECTION}' + coverages: + PH1B: RGBNir + RE00: + ~: + product_type_name: !env '${COLLECTION}_Product_RE00' + collections: + - !env '${COLLECTION}' + coverages: + RE00: BGRReNir + RS02_2: + ~: + product_type_name: !env '${COLLECTION}_Product_RS02_2' + collections: + - !env '${COLLECTION}' + coverages: + RS02_2: sar_hh_gray + RS02_3: + ~: + product_type_name: !env '${COLLECTION}_Product_RS02_3' + collections: + - !env '${COLLECTION}' + coverages: + RS02_3: sar_hh_vv_gray + RS02_7: + ~: + product_type_name: !env '${COLLECTION}_Product_RS02_7' + collections: + - !env '${COLLECTION}' + coverages: + RS02_7: sar_hh_hv_vh_vv_rgb + SP04: + ~: + product_type_name: !env '${COLLECTION}_Product_SP04' + collections: + - !env '${COLLECTION}' + coverages: + SP04: RGBNirByte + SP05: + ~: + product_type_name: !env '${COLLECTION}_Product_SP05' + collections: + - !env '${COLLECTION}' + coverages: + SP05: RGNirByte + SP06: + ~: + product_type_name: !env '${COLLECTION}_Product_SP06' + collections: + - !env '${COLLECTION}' + coverages: + SP06: RGBNir + SP07: + ~: + product_type_name: !env '${COLLECTION}_Product_SP07' + collections: + - !env '${COLLECTION}' + coverages: + SP07: RGBNir + TX01_2: + ~: + product_type_name: !env '${COLLECTION}_Product_TX01_2' + collections: + - !env '${COLLECTION}' + coverages: + TX01_2: sar_hh_gray + TX01_3: + ~: + product_type_name: !env '${COLLECTION}_Product_TX01_3' + collections: + - !env '${COLLECTION}' + coverages: + TX01_3: sar_hh_vv_gray + TX01_7: + ~: + product_type_name: !env '${COLLECTION}_Product_TX01_7' + collections: + - !env '${COLLECTION}' + coverages: + TX01_7: sar_hh_hv_vh_vv_rgb + +post_handlers: + - path: registrar.post_handlers.ReportingPostHandler + kwargs: + service_url: emg.pass.copernicus.eu + reporting_dir: /mnt/reports/ diff --git a/config/vhr18_init-db.sh b/config/vhr18_init-db.sh index 46c0974298ad602e0784d02dc6a0ca707f5430c0..3f1451dfd7a0e3afe7286672fc546bdbdfe75fab 100644 --- a/config/vhr18_init-db.sh +++ b/config/vhr18_init-db.sh @@ -401,22 +401,6 @@ if python3 manage.py id check "${COLLECTION}"; then echo "Provided collection '${COLLECTION}' not valid." fi - python3 manage.py storageauth create auth-cloud-ovh "${OS_AUTH_URL_SHORT}" \ - --type keystone \ - -p auth-version "${ST_AUTH_VERSION}" \ - -p identity-api-version="${ST_AUTH_VERSION}" \ - -p username "${OS_USERNAME}" \ - -p password "${OS_PASSWORD}" \ - -p tenant-name "${OS_TENANT_NAME}" \ - -p tenant-id "${OS_TENANT_ID}" \ - -p region-name "${OS_REGION_NAME}" - - python3 manage.py storage create \ - ${UPLOAD_CONTAINER} ${UPLOAD_CONTAINER} \ - --type swift \ - --storage-auth auth-cloud-ovh - - else echo "Using existing database" fi diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml new file mode 100644 index 0000000000000000000000000000000000000000..cee5f0f3edf66fea4f6d783084d268cbe83dd0ca --- /dev/null +++ b/config/vhr18_registrar-config.yml @@ -0,0 +1,220 @@ +sources: + - type: swift + name: !env '${UPLOAD_CONTAINER}' + kwargs: + username: !env '${OS_USERNAME}' + password: !env '${OS_PASSWORD}' + tenant_name: !env '${OS_TENANT_NAME}' + tenant_id: !env '${OS_TENANT_ID}' + region_name: !env '${OS_REGION_NAME}' + auth_version: !env '${ST_AUTH_VERSION}' + auth_url: !env '${OS_AUTH_URL}' + auth_url_short: !env '${OS_AUTH_URL_SHORT}' + container: !env '${UPLOAD_CONTAINER}' + +schemes: + - type: gsc + +backends: + - type: eoxserver + filter: + kwargs: + instance_base_path: /var/www/pvs/dev + instance_name: pvs_instance + mapping: + PL00: + Level_1: + product_type_name: !env '${COLLECTION}_Product_PL00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + PL00: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_PL00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + PL00: RGBNir + masks: + validity: validity + DM02: + Level_1: + product_type_name: !env '${COLLECTION}_Product_DM02' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + DM02: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_DM02' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + DM02: RGBNir + masks: + validity: validity + KS03: + Level_1: + product_type_name: !env '${COLLECTION}_Product_KS03' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + KS03: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_KS03' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + KS03: RGBNir + masks: + validity: validity + KS04: + Level_1: + product_type_name: !env '${COLLECTION}_Product_KS04' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + KS04: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_KS04' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + KS04: RGBNir + masks: + validity: validity + PH1A: + Level_1: + product_type_name: !env '${COLLECTION}_Product_PH1A' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + PH1A: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_PH1A' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + PH1A: RGBNir + masks: + validity: validity + PH1B: + Level_1: + product_type_name: !env '${COLLECTION}_Product_PH1B' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + PH1B: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_PH1B' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + PH1B: RGBNir + masks: + validity: validity + SP06: + Level_1: + product_type_name: !env '${COLLECTION}_Product_SP06' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + SP06: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_SP06' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + SP06: RGBNir + masks: + validity: validity + SP07: + Level_1: + product_type_name: !env '${COLLECTION}_Product_SP07' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + SP07: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_SP07' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + SP07: RGBNir + masks: + validity: validity + SW00: + Level_1: + product_type_name: !env '${COLLECTION}_Product_SW00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + SW00: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_SW00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + SW00: RGBNir + masks: + validity: validity + TR00: + Level_1: + product_type_name: !env '${COLLECTION}_Product_TR00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_1' + coverages: + TR00: RGBNir + masks: + validity: validity + Level_3: + product_type_name: !env '${COLLECTION}_Product_TR00' + collections: + - !env '${COLLECTION}' + - !env '${COLLECTION}_Level_3' + coverages: + TR00: RGBNir + masks: + validity: validity + +post_handlers: + - path: registrar.post_handlers.ReportingPostHandler + kwargs: + service_url: vhr18.pass.copernicus.eu + reporting_dir: /mnt/reports/ diff --git a/core/Dockerfile b/core/Dockerfile index b8c8148d756b1b001327d16a3df038e456ee4698..70fb15f1662252baaec7cc3b2e7e82520bbc77de 100644 --- a/core/Dockerfile +++ b/core/Dockerfile @@ -43,7 +43,7 @@ RUN apt update && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* RUN pip3 install . && \ - pip3 install python-keystoneclient python-swiftclient redis + pip3 install python-keystoneclient python-swiftclient redis click setuptools jsonschema boto3 ENV INSTANCE_ID="prism-view-server_core" \ INSTANCE_NAME="pvs_instance"\ @@ -77,12 +77,22 @@ ADD rgbnir_definition.json \ configure.sh \ run-httpd.sh \ run-registrar.sh \ - registrar.py \ entrypoint.sh \ wait-initialized.sh \ initialized.sh \ / +RUN mkdir /registrar +ADD registrar/ \ + /registrar/registrar + +ADD setup.py \ + /registrar + +RUN cd /registrar && \ + ls && \ + python3 setup.py install + RUN chmod -v +x \ /configure.sh \ /run-registrar.sh \ diff --git a/core/registrar.py b/core/registrar.py deleted file mode 100644 index 60b805e36dd57358854682339fdf4e1fcb257d6f..0000000000000000000000000000000000000000 --- a/core/registrar.py +++ /dev/null @@ -1,498 +0,0 @@ -#!/usr/bin/env python -# ----------------------------------------------------------------------------- -# -# Project: registrar.py -# Authors: Stephan Meissl -# -# ----------------------------------------------------------------------------- -# Copyright (c) 2019 EOX IT Services GmbH -# -# Python script to register products. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies of this Software or works derived from this Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# ----------------------------------------------------------------------------- - - -import sys -import os -import argparse -import textwrap -import logging -import traceback -from xml.sax.saxutils import escape -import subprocess - -import redis -import lxml.etree -from swiftclient.service import SwiftService - -import django -from django.db import transaction -from django.contrib.gis.geos import GEOSGeometry -from osgeo import gdal - -path = os.path.join(os.getenv('INSTALL_DIR', "/var/www/pvs"), "pvs_instance") -if path not in sys.path: - sys.path.append(path) - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pvs_instance.settings") -django.setup() - -from eoxserver.backends import access -from eoxserver.contrib import vsi -from eoxserver.backends import models as backends -from eoxserver.core.util.timetools import isoformat -from eoxserver.resources.coverages import models -from eoxserver.resources.coverages.registration.product import ( - ProductRegistrator -) -from eoxserver.resources.coverages.registration.registrators.gdal import ( - GDALRegistrator -) - -logger = logging.getLogger(__name__) - -def setup_logging(verbosity): - # start logging setup - # get command line level - verbosity = verbosity - if verbosity == 0: - level = logging.CRITICAL - elif verbosity == 1: - level = logging.ERROR - elif verbosity == 2: - level = logging.WARNING - elif verbosity == 3: - level = logging.INFO - else: - level = logging.DEBUG - logger.setLevel(level) - sh = logging.StreamHandler() - sh.setLevel(level) - formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s") - sh.setFormatter(formatter) - logger.addHandler(sh) - # finished logging setup - - -def set_gdal_swift_auth(): - # parsing command line output of swift auth - auth_keys = subprocess.check_output(["swift", "auth"]).decode(sys.stdout.encoding).split("\n") - storage_url = auth_keys[0].split("OS_STORAGE_URL=")[1] - auth_token = auth_keys[1].split("OS_AUTH_TOKEN=")[1] - # setting gdal config - gdal.SetConfigOption("SWIFT_STORAGE_URL", storage_url) - gdal.SetConfigOption("SWIFT_AUTH_TOKEN", auth_token) - - -def add_mask(product): - metadata_item = product.metadata_items.all()[0] - with access.vsi_open(metadata_item) as f: - tree = lxml.etree.parse(f) - root = tree.getroot() - wkt = tree.xpath( - '//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', - namespaces=root.nsmap - )[0] - geometry = GEOSGeometry(wkt) - mask_type = models.MaskType.objects.get(product_type=product.product_type) - logger.debug("Adding mask") - models.Mask.objects.create( - product=product, - mask_type=mask_type, - geometry=geometry, - ) - - -def get_product_type_and_level(metadata_item): - level = None - with access.vsi_open(metadata_item) as f: - tree = lxml.etree.parse(f) - root = tree.getroot() - - try: - xp = '//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()' - product_type_name = tree.xpath(xp, namespaces=root.nsmap)[0] - except Exception as e: - logger.debug( - 'Failed to determine product type of %s, error was %s' - % (metadata_item.location, e) - ) - - try: - xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()' - parent_identifier = tree.xpath(xp, namespaces=root.nsmap)[0] - - if parent_identifier.endswith('Level_1'): - level = 'Level_1' - if parent_identifier.endswith('Level_3'): - level = 'Level_3' - else: - raise Exception('Invalid parent identifier type name %s' % parent_identifier) - except Exception as e: - logger.debug( - 'Failed to determine product level of %s, error was %s' - % (metadata_item.location, e) - ) - - return product_type_name, level - - -def get_product_collection(metadata_file): - # in case collection needs to be determined from metadata - try: - if metadata_file.startswith("/vsiswift"): - set_gdal_swift_auth() - with vsi.open(metadata_file, "r") as f: - tree = lxml.etree.parse(f) - root = tree.getroot() - xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()' - product_type_name = tree.xpath(xp, namespaces=root.nsmap) - extracted = product_type_name[0].split('/')[0] - return extracted - except Exception as e: - logger.debug( - 'Failed to determine product collection for metadata file %s, error was %s' - % (metadata_file, e) - ) - - -def get_product_type_from_band_count(product_type_name, file_path): - # get raster band count via gdal - logger.debug("Opening file using GDAL: %s" % file_path) - if file_path.startswith("/vsiswift"): - set_gdal_swift_auth() - src_ds = gdal.Open(file_path) - if src_ds is None: - raise RegistrationError("Band check: failed to open dataset: %s " % file_path) - # try to fetch product model with _bandcount - product_type_name_upd = "%s_%s" % (product_type_name, src_ds.RasterCount) - try: - product_type_model = models.ProductType.objects.get(name=product_type_name_upd) - return product_type_model - except models.ProductType.DoesNotExist: - raise RegistrationError("Product Type: '%s' was not found" % product_type_name_upd) - - -class RegistrationError(Exception): - pass - - -@transaction.atomic -def registrar( - collection_stack, - objects_prefix, upload_container=None, replace=False, client=None, registered_set_key=None, - reporting_dir=None, service_url=None - -): - logger.info("Starting registration of product '%s'." % objects_prefix) - - metadata_package, data_package = None, None - if not upload_container: - # assuming objects_prefix = bucket/itemname - upload_container = objects_prefix.partition("/")[0] - objects_prefix = objects_prefix.partition("/")[2] - with SwiftService() as swift: - list_parts_gen = swift.list( - container=upload_container, options={"prefix": objects_prefix}, - ) - for page in list_parts_gen: - if page["success"]: - for item in page["listing"]: - if item["name"].endswith(".xml"): - metadata_package = item["name"] - elif item["name"].endswith(".TIF") or \ - item["name"].endswith(".tif"): - data_package = item["name"] - elif not item["name"].endswith(".tar"): - raise RegistrationError( - "Product with objects prefix '%s' has " - "wrong content '%s'." - % (objects_prefix, item["name"]) - ) - else: - logger.error(page["error"]) - raise RegistrationError( - "No product found with objects prefix '%s'." - % objects_prefix - ) - - if metadata_package is None or data_package is None: - raise RegistrationError( - "Product with objects prefix '%s' has missing content." - % objects_prefix - ) - logger.debug("Found objects '%s' and '%s'." % (data_package, metadata_package)) - - storage = backends.Storage.objects.get(name=upload_container) - metadata_item = models.MetaDataItem(storage=storage, location=metadata_package) - - product_type, level = get_product_type_and_level(metadata_item) - if collection_stack == 'DEM': - # special for DEM files, collection name === product_type - gdal_metadata_file_path = "/vsiswift/%s/%s" % (upload_container, metadata_package) - product_type = get_product_collection(gdal_metadata_file_path) - logger.debug("Registering product") - product_type_name = "%s_Product_%s" % (collection_stack, product_type) - - try: - # first find product type by name from path - product_type_model = models.ProductType.objects.get(name=product_type_name) - except models.ProductType.DoesNotExist: - # if not found, maybe there are more product types with _bandcount suffix - gdal_file_path = "/vsiswift/%s/%s" % (upload_container, data_package) - product_type_model = get_product_type_from_band_count(product_type_name, gdal_file_path) - product_type_name = product_type_model.name - coverage_type_names = product_type_model.allowed_coverage_types.all() - if len(coverage_type_names) > 1: - logger.warning("More available 'CoverageType' found, selecting the first one.") - coverage_type_name = coverage_type_names[0].name - - product, replaced = ProductRegistrator().register( - metadata_locations=[[upload_container, - metadata_package, ], ], - type_name=product_type_name, - replace=replace, - extended_metadata=True, - mask_locations=None, - package_path=None, - simplify_footprint_tolerance=0.0001, # ~10meters - overrides={}, - ) - if product.footprint.empty: - product.delete() - raise RegistrationError("No footprint was extracted. full product: %s" % product) - - collection = models.Collection.objects.get( - identifier=collection_stack - ) - logger.debug("Inserting product into collection %s" % collection_stack) - models.collection_insert_eo_object(collection, product) - - if collection_stack == "DEM": - # also insert it to its own collection - collection_own = models.Collection.objects.get( - identifier="%s_%s" % (collection, product_type) - ) - logger.debug("Inserting product to collection %s_%s" % (collection, product_type)) - models.collection_insert_eo_object(collection_own, product) - - if level == 'Level_1': - collection_level_1 = models.Collection.objects.get( - identifier="%s_Level_1" % collection - ) - logger.debug("Inserting product to collection %s_Level_1" % collection) - models.collection_insert_eo_object(collection_level_1, product) - elif level == 'Level_3': - collection_level_3 = models.Collection.objects.get( - identifier="%s_Level_3" % collection - ) - logger.debug("Inserting product to collection %s_Level_3" % collection) - models.collection_insert_eo_object(collection_level_3, product) - - logger.debug("Registering coverage") - report = GDALRegistrator().register( - data_locations=[[upload_container, data_package, ], ], - metadata_locations=[[upload_container, - metadata_package, ], ], - coverage_type_name=coverage_type_name, - overrides={ - "identifier": "%s__coverage" % product.identifier, - "footprint": None, - }, - replace=replace, - ) - logger.debug("Adding coverage to product") - models.product_add_coverage(product, report.coverage) - - try: - add_mask(product) - except Exception as e: - logger.debug("Couldn't add mask.") - logger.debug(traceback.format_exc()) - logger.debug("%s: %s\n" % (type(e).__name__, str(e))) - - if client is not None: - logger.debug( - "Storing times in redis queue '%s" % registered_set_key - ) - client.sadd( - registered_set_key, "%s/%s" - % ( - product.begin_time.strftime("%Y%m%dT%H%M%S"), - product.end_time.strftime("%Y%m%dT%H%M%S") - ) - ) - - timestamp = product.inserted.strftime("%Y%m%dT%H%M%S") - - if reporting_dir is not None: - with open(os.path.join(reporting_dir, 'item_%s_%s.xml' % (timestamp, product.identifier)),'w') as f: - f.write(textwrap.dedent(""" - - - {identifier} - {availability_time} - - WCS - {wms_capabilities_url} - - - WMS - {wcs_capabilities_url} - - - """.format( - identifier=escape(product.identifier), - availability_time=escape(isoformat(product.inserted)), - wcs_capabilities_url=escape( - '%s/ows?service=wcs&request=GetCapabilities&cql=identifier="%s"' - % (service_url, product.identifier) - ), - wms_capabilities_url=escape( - '%s/ows?service=wms&request=GetCapabilities&cql=identifier="%s"' - % (service_url, product.identifier) - ), - ))) - - logger.info( - "Successfully finished registration of product '%s'." % objects_prefix - ) - - -def registrar_redis_wrapper( - collection, - upload_container, - replace=False, host="localhost", port=6379, - register_queue_key="register_queue", - registered_set_key="registered_set", - reporting_dir=None, - service_url=None, -): - client = redis.Redis( - host=host, port=port, charset="utf-8", decode_responses=True - ) - while True: - logger.debug("waiting for redis queue '%s'..." % register_queue_key) - value = client.brpop(register_queue_key) - try: - registrar( - collection, - value[1], - upload_container, - replace=replace, - client=client, - registered_set_key=registered_set_key, - reporting_dir=reporting_dir, - service_url=service_url, - ) - except Exception as e: - logger.debug(traceback.format_exc()) - logger.error("%s: %s\n" % (type(e).__name__, str(e))) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.description = textwrap.dedent("""\ - Register products. - """) - - parser.add_argument( - "--mode", default="standard", choices=["standard", "redis"], - help=( - "The mode to run the registrar. Either one-off (standard) or " - "reading from a redis queue." - ) - ) - parser.add_argument( - "--objects-prefix", default=None, - help=( - "Prefix to objects holding the metadata and data of product." - ) - ) - parser.add_argument( - "--replace", action="store_true", - help=( - "Replace existing products instead of skipping the registration." - ) - ) - parser.add_argument( - "--redis-register-queue-key", default="register_queue" - ) - parser.add_argument( - "--redis-registered-set-key", default="registered_set" - ) - parser.add_argument( - "--redis-host", default="localhost" - ) - parser.add_argument( - "--redis-port", type=int, default=6379 - ) - parser.add_argument( - "--reporting-dir", - ) - parser.add_argument( - "--service-url", - ) - - parser.add_argument( - "-v", "--verbosity", type=int, default=3, choices=[0, 1, 2, 3, 4], - help=( - "Set verbosity of log output " - "(4=DEBUG, 3=INFO, 2=WARNING, 1=ERROR, 0=CRITICAL). (default: 3)" - ) - ) - - arg_values = parser.parse_args() - - setup_logging(arg_values.verbosity) - - collection = os.environ.get('COLLECTION') - if collection is None: - logger.critical("Collection environment variable not set.") - sys.exit(1) - - upload_container = os.environ.get('UPLOAD_CONTAINER') - if upload_container is None: - logger.warn("UPLOAD_CONTAINER environment variable not set. Assuming part of path bucket/item") - - if arg_values.mode == "standard": - registrar( - collection, - arg_values.objects_prefix, - upload_container, - replace=arg_values.replace, - reporting_dir=arg_values.reporting_dir, - service_url=arg_values.service_url, - ) - else: - registrar_redis_wrapper( - collection, - upload_container, - replace=arg_values.replace, - host=arg_values.redis_host, - port=arg_values.redis_port, - register_queue_key=arg_values.redis_register_queue_key, - registered_set_key=arg_values.redis_registered_set_key, - reporting_dir=arg_values.reporting_dir, - service_url=arg_values.service_url, - ) diff --git a/core/registrar/__init__.py b/core/registrar/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/core/registrar/backend.py b/core/registrar/backend.py new file mode 100644 index 0000000000000000000000000000000000000000..17144f4753c8ee240388d6ecdf94d217110e47ed --- /dev/null +++ b/core/registrar/backend.py @@ -0,0 +1,256 @@ +import os +import re +import sys +import logging +from typing import List +import json + +import django +from django.db import transaction +from django.contrib.gis.geos import GEOSGeometry, Polygon + +from .exceptions import RegistrationError +from .context import Context +from .source import Source, LocalSource, S3Source, SwiftSource + + +logger = logging.getLogger(__name__) + + +class RegistrationResult: + pass + + +class Backend: + def register_item(self, item: Context) -> RegistrationResult: + raise NotImplementedError + + +class EOxServerBackend(Backend): + def __init__(self, instance_base_path: str, instance_name: str, mapping: dict, simplify_footprint_tolerance: int=None): + self.mapping = mapping + self.simplify_footprint_tolerance = simplify_footprint_tolerance + path = os.path.join(instance_base_path, instance_name) + if path not in sys.path: + sys.path.append(path) + + os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"{instance_name}.settings") # TODO: from config + django.setup() + + def exists(self, source: Source, item: Context): + from eoxserver.resources.coverages import models + return models.Product.objects.filter(identifier=item.identifier).exists() + + def _get_storage_from_source(self, source: Source, path: str) -> list: + from eoxserver.backends import models as backends + + created_storage_auth = False + created_storage = False + storage_name = None + if isinstance(source, LocalSource): + storage, created_storage = backends.Storage.get_or_create( + name=source.name, + url=source.root_directory, + storage_type='local', + ) + storage_name = storage.name + + elif isinstance(source, S3Source): + params = json.dumps({ + 'ACCESS_KEY_ID': source.access_key_id, + 'SECRET_ACCESS_KEY': source.secret_access_key, + }) + + endpoint_url = source.endpoint_url + if endpoint_url.startswith('https://'): + endpoint_url = endpoint_url[len('https://'):] + elif endpoint_url.startswith('http://'): + endpoint_url = endpoint_url[len('http://'):] + + storage_auth, created_storage_auth = backends.StorageAuth.objects.get_or_create( + name=endpoint_url, + url=endpoint_url, + storage_auth_type='S3', + auth_parameters=params, + ) + + bucket, _ = source.get_container_and_path(path) + + storage, created_storage = backends.Storage.objects.get_or_create( + name=source.name if source.bucket_name else f'{source.name}-{bucket}', + url=bucket, + storage_type='S3', + storage_auth=storage_auth, + ) + storage_name = storage.name + + elif isinstance(source, SwiftSource): + params = json.dumps({ + 'auth-version': str(source.auth_version), + 'identity-api-version': str(source.auth_version), + 'username': source.username, + 'password': source.password, + 'tenant-name': source.tenant_name, + 'tenant-id': source.tenant_id, + 'region-name': source.region_name, + }) + + storage_auth, created_storage_auth = backends.StorageAuth.objects.get_or_create( + name=source.auth_url, + url=source.auth_url_short or source.auth_url, + storage_auth_type='keystone', + auth_parameters=params, + ) + + container, _ = source.get_container_and_path(path) + + storage, created_storage = backends.Storage.objects.get_or_create( + name=source.name if source.container else f'{source.name}-{container}', + url=container, + storage_type='swift', + storage_auth=storage_auth, + ) + storage_name = storage.name + + if created_storage_auth: + logger.info(f'Created storage auth for {source.name}') + if created_storage: + logger.info(f'Created storage for {source.name}') + + return [storage_name] if storage_name else [] + + @transaction.atomic + def register(self, source: Source, item: Context, replace: bool) -> RegistrationResult: + # ugly, ugly hack + from eoxserver.resources.coverages import models + from eoxserver.resources.coverages.registration.product import ProductRegistrator + from eoxserver.resources.coverages.registration.browse import BrowseRegistrator + from eoxserver.resources.coverages.registration.mask import MaskRegistrator + from eoxserver.resources.coverages.registration.registrators.gdal import GDALRegistrator + + # get the mapping for this particular item + type_mapping = self.mapping[item.product_type] + mapping = type_mapping.get(item.product_level) or type_mapping.get(None) + + if not mapping: + raise RegistrationError(f'Could not get mapping for {item.product_type} {item.product_level}') + + _, metadata_file = source.get_container_and_path(item.metadata_files[0]) + # metadata_file = '/'.join(item.metadata_files[0].split('/')[1:]) + storage = self._get_storage_from_source(source, item.path) + + try: + models.ProductType.objects.get(name=mapping['product_type_name']) + except models.ProductType.DoesNotExist: + pass + + if 'footprint' in item.metadata: + footprint = GEOSGeometry(item.metadata.pop('footprint')) + else: + footprint = None + + product, _ = ProductRegistrator().register( + metadata_locations=[storage + [metadata_file]], + type_name=mapping['product_type_name'], + replace=replace, + extended_metadata=True, + mask_locations=None, + package_path=None, + simplify_footprint_tolerance=self.simplify_footprint_tolerance, + overrides=dict( + identifier=item.identifier, + footprint=footprint, + **item.metadata + ), + ) + if not product.footprint or product.footprint.empty: + raise RegistrationError("No footprint was extracted. full product: %s" % product) + + # insert the product in the to be associated collections + for collection_id in mapping.get('collections', []): + collection = models.Collection.objects.get( + identifier=collection_id, + ) + models.collection_insert_eo_object(collection, product) + + # register coverages and link them to the product + for raster_identifier, coverage_type_name in mapping.get('coverages', {}).items(): + raster_items = item.raster_files.get(raster_identifier) + raster_items = [ + storage + [source.get_container_and_path(raster_item)[1]] + for raster_item in (raster_items if isinstance(raster_items, list) else [raster_items]) + ] + + logger.info(f"Registering coverage{'s' if len(raster_items) > 1 else ''} {raster_items} as {coverage_type_name}") + + report = GDALRegistrator().register( + data_locations=raster_items, + metadata_locations=[storage + [metadata_file]], + coverage_type_name=coverage_type_name, + overrides={ + "identifier": f'{product.identifier}__{raster_identifier}__coverage', + "footprint": None, + }, + replace=replace, + ) + logger.debug("Adding coverage to product") + models.product_add_coverage(product, report.coverage) + + # register browses + for raster_identifier, browse_type_name in mapping.get('browses', {}).items(): + raster_item = item.raster_files.get(raster_identifier) + + _, raster_item = source.get_container_and_path(raster_item) + logger.info(f"Adding browse {browse_type_name or 'default'} {raster_item} to product") + + BrowseRegistrator().register( + product.identifier, + storage + [raster_item], + browse_type_name, + ) + + # register masks + for mask_identifier, mask_type_name in mapping.get('masks', {}).items(): + _, mask_item = source.get_container_and_path(item.mask_files.get(mask_identifier)) + if mask_item: + logger.info(f"Adding mask (file) {mask_type_name} to product") + MaskRegistrator().register( + product.identifier, + storage + [mask_item], + mask_type_name, + ) + + mask_item = item.masks.get(mask_identifier) + if mask_item: + logger.info(f"Adding mask (geometry) {mask_type_name} to product") + models.Mask.objects.create( + product=product, + mask_type=models.MaskType.objects.get( + product_type=product.product_type, + name=mask_type_name, + ), + geometry=mask_item, + ) + + +BACKENDS = { + 'eoxserver': EOxServerBackend +} + +def get_backends(config: dict, path: str) -> List[Backend]: + cfg_backends = config['backends'] + + backends = [ + BACKENDS[cfg_backend['type']]( + *cfg_backend.get('args', []), + **cfg_backend.get('kwargs', {}), + ) + for cfg_backend in cfg_backends + if not cfg_backend.get('filter') or re.match(cfg_backend['filter'], path) + ] + + if not backends: + raise RegistrationError(f'Could not find a suitable backend for the path {path}') + + return backends + diff --git a/core/registrar/cli.py b/core/registrar/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..0a2943ffa33a7c93227e2ce43e5faf4ac687d35f --- /dev/null +++ b/core/registrar/cli.py @@ -0,0 +1,81 @@ +from os.path import join, dirname +import logging.config +import json + +import click +import yaml +import jsonschema + +from .registrar import register_file +from .daemon import run_daemon +from .config import load_config + + +def setup_logging(debug=False): + logging.config.dictConfig({ + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'brief': { + 'format': '%(levelname)s %(name)s: %(message)s' + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'DEBUG' if debug else 'INFO', + 'formatter': 'brief', + } + }, + 'root': { + 'handlers': ['console'], + 'level': 'DEBUG' if debug else 'INFO', + } + }) + + +def validate_config(config): + with open(join(dirname(__file__), 'config-schema.yaml')) as f: + schema = yaml.load(f) + + jsonschema.validate(config, schema) + + +@click.group() +def cli(): + pass + + +@cli.command(help='Run the registrar daemon, attaching to a Redis queue') +@click.option('--config-file', type=click.File('r')) +@click.option('--validate/--no-validate', default=False) +@click.option('--replace/--no-replace', default=False) +@click.option('--host', type=str) +@click.option('--port', type=int) +@click.option('--listen-queue', type=str) +@click.option('--registered-set-key', type=str) +@click.option('--debug/--no-debug', default=False) +def daemon(config_file=None, validate=False, replace=False, host=None, port=None, listen_queue=None, registered_set_key=None, debug=False): + setup_logging(debug) + config = load_config(config_file) + if validate: + validate_config(config) + run_daemon(config, replace, host, port, listen_queue, registered_set_key) + + +@cli.command(help='Run a single, one-off registration') +@click.argument('file_path', type=str) +@click.option('--config-file', type=click.File('r')) +@click.option('--validate/--no-validate', default=False) +@click.option('--replace/--no-replace', default=False) +@click.option('--debug/--no-debug', default=False) +def register(file_path, config_file=None, validate=False, replace=False, debug=False): + setup_logging(debug) + config = load_config(config_file) + if validate: + validate_config(config) + + register_file(config, file_path, replace) + +if __name__ == '__main__': + cli() diff --git a/core/registrar/config-schema.yaml b/core/registrar/config-schema.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c6d6659c1bade600b576ccf0f0593a25f704aaec --- /dev/null +++ b/core/registrar/config-schema.yaml @@ -0,0 +1,96 @@ +$id: https://example.com/address.schema.json +$schema: http://json-schema.org/draft-07/schema# +type: object +properties: + source: + description: Input sources definitions + type: array + items: + description: A single source definition + type: object + properties: + type: + description: The source type. + type: string + enum: ['local', 's3', 'swift'] + filter: + description: Optional filter to only be used for these paths + type: string + args: + description: Constructor arguments + type: array + kwargs: + description: Constructor keyword arguments + type: object + schemes: + description: Registration schemes definitions + type: array + items: + description: A single registration scheme definition + type: object + properties: + type: + description: The registration scheme type. + type: string + enum: ['gsc', 'sentinel-2'] + filter: + description: Optional filter to only be used for these paths + type: string + args: + description: Constructor arguments + type: array + kwargs: + description: Constructor keyword arguments + type: object + backends: + description: Registration backends definitions + type: array + items: + description: A single registration scheme definition + type: object + properties: + type: + description: The registration scheme type. + type: string + enum: ['eoxserver'] + filter: + description: Optional filter to only be used for these paths + type: string + args: + description: Constructor arguments + type: array + kwargs: + description: Constructor keyword arguments + type: object + pre_handlers: + description: List of handlers to be run prior the registration of an item. + type: array + items: + description: A single pre-registration handler + type: object + properties: + path: + description: Python module path to the registration handler + type: string + args: + description: arguments for the initialization of the handler + type: array + kwargs: + description: keyword arguments for the initialization of the handler + type: object + post_handlers: + description: List of handlers to be run prior the registration of an item. + type: array + items: + description: A single pre-registration handler + type: object + properties: + path: + description: Python module path to the registration handler + type: string + args: + description: arguments for the initialization of the handler + type: array + kwargs: + description: keyword arguments for the initialization of the handler + type: object diff --git a/core/registrar/config.py b/core/registrar/config.py new file mode 100644 index 0000000000000000000000000000000000000000..77534e9411ce9e598d059a53460d40565408870d --- /dev/null +++ b/core/registrar/config.py @@ -0,0 +1,39 @@ +import os +from typing import TextIO +import re + +import yaml + + +ENV_PATTERN = re.compile(r'.*?\${(\w+)}.*?') + +def constructor_env_variables(loader, node): + """ + Extracts the environment variable from the node's value + :param yaml.Loader loader: the yaml loader + :param node: the current node in the yaml + :return: the parsed string that contains the value of the environment + variable + """ + value = loader.construct_scalar(node) + match = ENV_PATTERN.findall(value) # to find all env variables in line + if match: + full_value = value + for g in match: + full_value = full_value.replace( + f'${{{g}}}', os.environ.get(g, g) + ) + return full_value + return value + + +def load_config(input_file: TextIO): + tag = '!env' + loader = yaml.SafeLoader + + # the tag will be used to mark where to start searching for the pattern + # e.g. somekey: !env somestring${MYENVVAR}blah blah blah + loader.add_implicit_resolver(tag, ENV_PATTERN, None) + loader.add_constructor(tag, constructor_env_variables) + + return yaml.load(input_file, Loader=loader) diff --git a/core/registrar/context.py b/core/registrar/context.py new file mode 100644 index 0000000000000000000000000000000000000000..6384461116fa35526986998599c81eb1333a1666 --- /dev/null +++ b/core/registrar/context.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass, field + + +@dataclass +class Context: + identifier: str + path: str + product_type: str = None + product_level: str = None + metadata: dict = field(default_factory=dict) + raster_files: dict = field(default_factory=dict) + metadata_files: dict = field(default_factory=dict) + masks: dict = field(default_factory=dict) + mask_files: dict = field(default_factory=dict) diff --git a/core/registrar/daemon.py b/core/registrar/daemon.py new file mode 100644 index 0000000000000000000000000000000000000000..efdf1ff565e5bda7c1b6bb80cd6465e027829cfe --- /dev/null +++ b/core/registrar/daemon.py @@ -0,0 +1,31 @@ +import logging +import json + +import redis + +from .registrar import register_file + + +logger = logging.getLogger(__name__) + + +def run_daemon(config, replace, host, port, listen_queue, registered_set_key): + """ Run the registrar daemon, listening on a redis queue + for files to be registered. After preprocessing the filename + of the registered files will be pushed to the output queue. + """ + # initialize the queue client + client = redis.Redis( + host=host, port=port, charset="utf-8", decode_responses=True + ) + logger.debug("waiting for redis queue '%s'..." % listen_queue) + while True: + # fetch an item from the queue to be registered + _, value = client.brpop(listen_queue) + # start the registration on that file + try: + item = register_file(config, value, replace) + client.sadd(registered_set_key, item.identifier) + + except Exception as e: + logger.exception(e) diff --git a/core/registrar/exceptions.py b/core/registrar/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..81a2e41ebfd0375fda00dbfc0a3ca6d4306ce5b0 --- /dev/null +++ b/core/registrar/exceptions.py @@ -0,0 +1,4 @@ + + +class RegistrationError(Exception): + pass diff --git a/core/registrar/post_handlers.py b/core/registrar/post_handlers.py new file mode 100644 index 0000000000000000000000000000000000000000..322d77c4bc624ee48a3a28a4a2aabaea2cac03cb --- /dev/null +++ b/core/registrar/post_handlers.py @@ -0,0 +1,53 @@ +import os.path +import textwrap +from datetime import datetime +import logging + +from .context import Context +from .utils import isoformat +from .xml import escape + + +logger = logging.getLogger(__name__) + + +class ReportingPostHandler: + def __init__(self, service_url: str, reporting_dir: str): + self.service_url = service_url + self.reporting_dir = reporting_dir + + def __call__(self, config: dict, path: str, context: Context): + inserted = datetime.now() + timestamp = inserted.strftime("%Y%m%dT%H%M%S") + filename = os.path.join(self.reporting_dir, 'item_%s_%s.xml' % (timestamp, context.identifier)) + logger.info(f"Generating report for path {path} at {filename}") + with open(filename, 'w') as f: + f.write(textwrap.dedent(""" + + + {identifier} + {availability_time} + + WCS + {wms_capabilities_url} + + + WMS + {wcs_capabilities_url} + + + """.format( + identifier=escape(context.identifier), + availability_time=escape(isoformat(inserted)), + wcs_capabilities_url=escape( + '%s/ows?service=wcs&request=GetCapabilities&cql=identifier="%s"' + % (self.service_url, context.identifier) + ), + wms_capabilities_url=escape( + '%s/ows?service=wms&request=GetCapabilities&cql=identifier="%s"' + % (self.service_url, context.identifier) + ), + ))) diff --git a/core/registrar/registrar.py b/core/registrar/registrar.py new file mode 100644 index 0000000000000000000000000000000000000000..968ff05422939d68ec30bd83c3ad62f963a37091 --- /dev/null +++ b/core/registrar/registrar.py @@ -0,0 +1,63 @@ +import re +import logging +import importlib + +from .source import get_source +from .scheme import get_scheme +from .backend import get_backends +from .exceptions import RegistrationError + + +logger = logging.getLogger(__name__) + + +def register_file(config: dict, path: str, replace: bool=False): + """ Handle the registration of a single path. + """ + logger.info(f"Handling '{path}'.") + source = get_source(config, path) + scheme = get_scheme(config, path) + context = scheme.get_context(source, path) + + for pre_handler in get_pre_handlers(config): + pre_handler(config, path, context) + + for backend in get_backends(config, path): + if backend.exists(source, context): + if replace: + logger.info(f"Replacing '{path}'.") + backend.register(source, context, replace=True) + else: + raise RegistrationError(f'Object {context} is already registered') + else: + logger.info(f"Registering '{path}'.") + backend.register(source, context, replace=False) + + for post_handler in get_post_handlers(config): + post_handler(config, path, context) + + logger.info(f"Successfully {'replaced' if replace else 'registered'} '{path}'") + return context + + +def _get_handlers(config, name): + handlers = [] + for handler_def in config.get(name, []): + module_path, _, handler_name = handler_def['path'].rpartition('.') + handler_cls = getattr(importlib.import_module(module_path), handler_name) + handlers.append( + handler_cls( + *handler_def.get('args', []), + **handler_def.get('kwargs', []), + ) + ) + + return handlers + + +def get_pre_handlers(config): + return _get_handlers(config, 'pre_handlers') + + +def get_post_handlers(config): + return _get_handlers(config, 'post_handlers') diff --git a/core/registrar/scheme.py b/core/registrar/scheme.py new file mode 100644 index 0000000000000000000000000000000000000000..71d508d350c8c4a724afb1870ae5cfc0d17ee439 --- /dev/null +++ b/core/registrar/scheme.py @@ -0,0 +1,208 @@ +import re +from os.path import join +import logging + +from .xml import read_xml, parse_metadata_schema, Parameter +from .context import Context +from .source import Source +from .exceptions import RegistrationError + + +logger = logging.getLogger(__name__) + +class RegistrationScheme: + def get_context(self): + raise NotImplementedError + + +def parse_datetime(value): + return value + + +def pairwise(iterable): + "s -> (s0,s1), (s2,s3), (s4, s5), ..." + a = iter(iterable) + return zip(a, a) + + +def parse_footprint(value): + coord_list = ','.join( + f'{x} {y}' + for y, x in pairwise(value.split()) + ) + return f'POLYGON(({coord_list}))' + + +class Sentinel2RegistrationScheme(RegistrationScheme): + MTD_MSIL2A_SCHEMA = { + 'begin_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_START_TIME/text()', False, parse_datetime), + 'end_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_STOP_TIME/text()', False, parse_datetime), + 'identifier': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_URI/text()'), + 'footprint': Parameter('/n1:Level-2A_User_Product/n1:Geometric_Info/Product_Footprint/Product_Footprint/Global_Footprint/EXT_POS_LIST/text()', False, parse_footprint), + 'level': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PROCESSING_LEVEL/text()'), + 'type': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_TYPE/text()'), + 'generation_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/GENERATION_TIME/text()', False, parse_datetime), + 'cloud_cover': Parameter('/n1:Level-2A_User_Product/n1:Quality_Indicators_Info/Cloud_Coverage_Assessment/text()'), + 'image_file_paths': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/Product_Organisation/Granule_List/Granule/IMAGE_FILE/text()', True), + } + + MTD_TL_SCHEMA = { + 'mask_file_paths': Parameter('/n1:Level-2A_Tile_ID/n1:Quality_Indicators_Info/Pixel_Level_QI/MASK_FILENAME/text()', True), + } + + MTD_MSIL2A_NAMESPACES = { + 'n1': "https://psd-14.sentinel2.eo.esa.int/PSD/User_Product_Level-2A.xsd" + } + + MTD_TL_NAMESPACES = { + 'n1': 'https://psd-14.sentinel2.eo.esa.int/PSD/S2_PDI_Level-2A_Tile_Metadata.xsd' + } + + def get_context(self, source: Source, path: str): + metadata_file = join(path, 'MTD_MSIL2A.xml') + tree = read_xml(source, metadata_file) + + # get product metadata + metadata = parse_metadata_schema(tree, self.MTD_MSIL2A_SCHEMA, self.MTD_MSIL2A_NAMESPACES) + + band_re = re.compile(r'.*([A-Z0-9]{3}_[0-9]{2}m)$') + raster_files = { + band_re.match(image_file_path).groups()[0]: f'{join(path, image_file_path)}.jp2' + for image_file_path in metadata['image_file_paths'] + } + + # get granule metadata + mtd_files = source.list_files(join(path, 'GRANULE'), '*/MTD_TL.xml') + logger.info(f'{mtd_files}') + tl_tree = read_xml(source, mtd_files[0]) + tile_metadata = parse_metadata_schema(tl_tree, self.MTD_TL_SCHEMA, self.MTD_TL_NAMESPACES) + + mask_type_re = re.compile(r'.*/MSK_([A-Z]*)_([A-Z0-9]{3}).[a-z0-9]+$') + mask_files = { + mask_type_re.match(mask_file_path).groups()[0]: join(path, mask_file_path) + for mask_file_path in tile_metadata['mask_file_paths'] + if mask_type_re.match(mask_file_path) is not None + } + + return Context( + identifier=metadata['identifier'], + path=path, + product_type=metadata['type'], + product_level=metadata['level'], + raster_files=raster_files, + mask_files=mask_files, + metadata_files=[metadata_file], + metadata={ + 'begin_time': metadata['begin_time'], + 'end_time': metadata['end_time'], + 'generation_time': metadata['generation_time'], + 'cloud_cover': metadata['cloud_cover'], + 'footprint': metadata['footprint'], + } + ) + + +def parse_ring(string): + raw_coords = string.split() + return [(lon, lat) for lat, lon in pairwise(raw_coords)] + + +def parse_polygons_gsc(elem): + def serialize_coord_list(coords): + return ','.join( + f'{x} {y}' for x, y in coords + ) + + interior = serialize_coord_list( + parse_ring( + elem.xpath( + "gml:exterior/gml:LinearRing/gml:posList", namespaces=elem.nsmap + )[0].text.strip() + ) + ) + + exteriors = [ + f'''({ + serialize_coord_list( + parse_ring(poslist_elem.text.strip()) + ) + })''' + for poslist_elem in elem.xpath( + "gml:interior/gml:LinearRing/gml:posList", namespaces=elem.nsmap + ) + ] + + return f"POLYGON(({interior}){',' if exteriors else ''}{','.join(exteriors)})" + + +class GSCRegistrationScheme(RegistrationScheme): + GSC_SCHEMA = { + 'identifier': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:identifier/text()'), + 'type': Parameter('//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'), + 'level': Parameter('//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'), + 'mask': Parameter('//gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:vendorSpecific/eop:SpecificInformation[eop:localAttribute/text() = "CF_POLY"]/eop:localValue/text()', True), + 'footprint': Parameter( + '//gml:target/eop:Footprint/gml:multiExtentOf/gml:MultiSurface/gml:surfaceMembers/gml:Polygon', + False, parse_polygons_gsc + ), + } + + def __init__(self, level_re: str=r'.*(Level_[0-9]+)$'): + self.level_re = level_re + + def get_context(self, source: Source, path: str) -> Context: + gsc_filenames = source.list_files(path, ['GSC*.xml', 'GSC*.XML']) + metadata_file = gsc_filenames[0] + + tree = read_xml(source, metadata_file) + metadata = parse_metadata_schema(tree, self.GSC_SCHEMA, tree.getroot().nsmap) + + tiff_files = { + metadata['type']: source.list_files(path, ['*.tif', '*.TIF']) + } + + match = re.match(self.level_re, metadata['level']) + if match: + level = match.groups()[0] + else: + level = None + + return Context( + identifier=metadata['identifier'], + path=path, + product_type=metadata['type'], + product_level=level, + raster_files=tiff_files, + masks={ + 'validity': metadata['mask'][0] if metadata['mask'] else None + }, + metadata_files=[metadata_file], + metadata={ + 'footprint': metadata['footprint'], + } + ) + + +REGISTRATION_SCHEMES = { + 'gsc': GSCRegistrationScheme, + 'sentinel-2': Sentinel2RegistrationScheme, +} + +def get_scheme(config: dict, path: str) -> RegistrationScheme: + cfg_schemes = config['schemes'] + + for cfg_scheme in cfg_schemes: + if cfg_scheme.get('filter'): + if re.match(cfg_scheme['filter'], path): + break + else: + break + else: + # no source found + raise RegistrationError(f'Could not find a suitable scheme for the path {path}') + + return REGISTRATION_SCHEMES[cfg_scheme['type']]( + *cfg_scheme.get('args', []), + **cfg_scheme.get('kwargs', {}), + ) + diff --git a/core/registrar/source.py b/core/registrar/source.py new file mode 100644 index 0000000000000000000000000000000000000000..ce04366251c1c3711d79e0c010ceba9dcf118403 --- /dev/null +++ b/core/registrar/source.py @@ -0,0 +1,256 @@ +import re +from os.path import normpath, join, isabs +import shutil +from glob import glob +from fnmatch import fnmatch +import logging + +import boto3 +from swiftclient.multithreading import OutputManager +from swiftclient.service import SwiftError, SwiftService + + +logger = logging.getLogger(__name__) + +class RegistrationError(Exception): + pass + + +class Source: + def __init__(self, name: str=None): + self.name = name + + def get_container_and_path(self, path): + raise NotImplementedError + + def list_files(self, path, glob_pattern=None): + raise NotImplementedError + + def get_file(self, path, target_path): + raise NotImplementedError + + def get_vsi_env_and_path(self, path): + raise NotImplementedError + + +class SwiftSource(Source): + def __init__(self, name=None, username=None, password=None, tenant_name=None, + tenant_id=None, region_name=None, user_domain_id=None, + user_domain_name=None, auth_url=None, auth_url_short=None, + auth_version=None, container=None): + super().__init__(name) + + self.username = username + self.password = password + self.tenant_name = tenant_name + self.tenant_id = tenant_id + self.region_name = region_name + self.user_domain_id = user_domain_id + self.user_domain_name = user_domain_name + self.auth_url = auth_url + self.auth_url_short = auth_url_short + self.auth_version = auth_version # TODO: assume 3 + self.container = container + + def get_service(self): + return SwiftService(options={ + "os_username": self.username, + "os_password": self.password, + "os_tenant_name": self.tenant_name, + "os_tenant_id": self.tenant_id, + "os_region_name": self.region_name, + "os_auth_url": self.auth_url, + "auth_version": self.auth_version, + "os_user_domain_id": self.user_domain_id, + "os_user_domain_name": self.user_domain_name, + }) + + def get_container_and_path(self, path: str): + container = self.container + if container is None: + parts = (path[1:] if path.startswith('/') else path).split('/') + container, path = parts[0], '/'.join(parts[1:]) + + return container, path + + def list_files(self, path, glob_patterns=None): + container, path = self.get_container_and_path(path) + + if glob_patterns and not isinstance(glob_patterns, list): + glob_patterns = [glob_patterns] + + with self.get_service() as swift: + pages = swift.list( + container=container, + options={"prefix": path}, + ) + + filenames = [] + for page in pages: + if page["success"]: + # at least two files present -> pass validation + for item in page["listing"]: + if glob_patterns is None or any( + fnmatch(item['name'], join(path, glob_pattern)) for glob_pattern in glob_patterns): + + filenames.append( + item['name'] if self.container else join(container, item['name']) + ) + else: + raise page['error'] + + return filenames + + def get_file(self, path, target_path): + container, path = self.get_container_and_path(path) + + with self.get_service() as swift: + results = swift.download( + container, + [path], + options={ + 'out_file': target_path + } + ) + + for result in results: + if not result["success"]: + raise Exception('Failed to download %s' % path) + + def get_vsi_env_and_path(self, path): + container, path = self.get_container_and_path(path) + return { + 'OS_IDENTITY_API_VERSION': self.auth_version, + 'OS_AUTH_URL': self.auth_url, + 'OS_USERNAME': self.username, + 'OS_PASSWORD': self.password, + 'OS_USER_DOMAIN_NAME': self.user_domain_name, + # 'OS_PROJECT_NAME': self.tena, + # 'OS_PROJECT_DOMAIN_NAME': , + 'OS_REGION_NAME': self.region_name, + }, f'/vsiswift/{container}/{path}' + + +class S3Source(Source): + def __init__(self, name=None, bucket_name=None, secret_access_key=None, access_key_id=None, endpoint_url=None, strip_bucket=True, **client_kwargs): + super().__init__(name) + + # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client + # for client_kwargs + self.bucket_name = bucket_name + self.secret_access_key=secret_access_key + self.access_key_id=access_key_id + self.endpoint_url = endpoint_url + self.strip_bucket = strip_bucket + + self.client = boto3.client( + 's3', + aws_secret_access_key=secret_access_key, + aws_access_key_id=access_key_id, + endpoint_url=endpoint_url, + **client_kwargs, + ) + + def get_container_and_path(self, path: str): + bucket = self.bucket_name + if bucket is None: + parts = (path[1:] if path.startswith('/') else path).split('/') + bucket, path = parts[0], '/'.join(parts[1:]) + elif self.strip_bucket: + parts = (path[1:] if path.startswith('/') else path).split('/') + if parts[0] == bucket: + parts.pop(0) + path = '/'.join(parts) + + return bucket, path + + def list_files(self, path, glob_patterns=None): + if glob_patterns and not isinstance(glob_patterns, list): + glob_patterns = [glob_patterns] + + bucket, key = self.get_container_and_path(path) + logger.info(f'Listing S3 files for bucket {bucket} and prefix {key}') + response = self.client.list_objects_v2( + Bucket=bucket, + Prefix=key, + ) + + return [ + f"{bucket}/{item['Key']}" + for item in response['Contents'] + if glob_patterns is None or any( + fnmatch(item['Key'], glob_pattern) for glob_pattern in glob_patterns + ) + ] + + def get_file(self, path, target_path): + bucket, key = self.get_container_and_path(path) + logger.info(f'Retrieving file from S3 {bucket}/{key} to be stored at {target_path}') + self.client.download_file(bucket, key, target_path) + + def get_vsi_env_and_path(self, path: str, streaming: bool=False): + bucket, key = self.get_container_and_path(path) + return { + 'AWS_SECRET_ACCESS_KEY': self.secret_access_key, + 'AWS_ACCESS_KEY_ID': self.access_key_id, + 'AWS_S3_ENDPOINT': self.endpoint_url, + }, f'/{"vsis3" if not streaming else "vsis3_streaming"}/{bucket}/{key}' + + +class LocalSource(Source): + def __init__(self, name, root_directory): + super().__init__(name) + + self.root_directory = root_directory + + def get_container_and_path(self, path): + return (self.root_directory, path) + + def _join_path(self, path): + path = normpath(path) + if isabs(path): + path = path[1:] + + return join(self.root_directory, path) + + def list_files(self, path, glob_patterns=None): + if glob_patterns and not isinstance(glob_patterns, list): + glob_patterns = [glob_patterns] + + if glob_patterns is not None: + return glob(join(self._join_path(path), glob_patterns[0])) # TODO + else: + return glob(join(self._join_path(path), '*')) + + def get_file(self, path, target_path): + shutil.copy(self._join_path(path), target_path) + + def get_vsi_env_and_path(self, path): + return {}, self._join_path(path) + + +SOURCE_TYPES = { + 'swift': SwiftSource, + 's3': S3Source, + 'local': LocalSource, +} + + +def get_source(config: dict, path: str) -> Source: + cfg_sources = config['sources'] + + for cfg_source in cfg_sources: + if cfg_source.get('filter'): + if re.match(cfg_source['filter'], path): + break + else: + break + else: + # no source found + raise RegistrationError(f'Could not find a suitable source for the path {path}') + + return SOURCE_TYPES[cfg_source['type']]( + cfg_source['name'], + *cfg_source.get('args', []), + **cfg_source.get('kwargs', {}) + ) diff --git a/core/registrar/utils.py b/core/registrar/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8a522c91ae78540930fb6edce786ba131e40b3fe --- /dev/null +++ b/core/registrar/utils.py @@ -0,0 +1,12 @@ +def isoformat(dt): + """ Formats a datetime object to an ISO string. Timezone naive datetimes are + are treated as UTC Zulu. UTC Zulu is expressed with the proper "Z" + ending and not with the "+00:00" offset declaration. + + :param dt: the :class:`datetime.datetime` to encode + :returns: an encoded string + """ + if not dt.utcoffset(): + dt = dt.replace(tzinfo=None) + return dt.isoformat("T") + "Z" + return dt.isoformat("T") diff --git a/core/registrar/xml.py b/core/registrar/xml.py new file mode 100644 index 0000000000000000000000000000000000000000..8a25ff07c411ace15f75a5021261569055328841 --- /dev/null +++ b/core/registrar/xml.py @@ -0,0 +1,54 @@ +from os import remove +from os.path import join, basename +from tempfile import gettempdir, gettempprefix +from dataclasses import dataclass, field +from typing import Union, Type, Optional, List, Callable, Any +import logging +from xml.sax.saxutils import escape + +import lxml.etree + +from .source import Source +from .exceptions import RegistrationError + + +logger = logging.getLogger(__name__) + +def read_xml(source: Source, path: str) -> lxml.etree._ElementTree: + out_filename = join(gettempdir(), basename(path)) + try: + source.get_file(path, out_filename) + tree = lxml.etree.parse(out_filename) + finally: + remove(out_filename) + return tree + +@dataclass +class Parameter: + xpath: str + multi: bool = False + parser: Optional[Callable[[str], Any]] = None + namespaces: dict = field(default_factory=dict) + + +class ParserError(RegistrationError): + pass + +def parse_metadata_schema(tree: lxml.etree._ElementTree, schema: dict, namespaces: dict=None) -> dict: + out = {} + for key, param in schema.items(): + values = tree.xpath(param.xpath, namespaces=param.namespaces or namespaces) + if param.multi: + value = [ + param.parser(v) if param.parser else v + for v in values + ] + else: + try: + value = param.parser(values[0]) if param.parser else values[0] + except IndexError: + raise ParserError(f'Failed to fetch single value for parameter {key}') + + out[key] = value + + return out diff --git a/core/run-registrar.sh b/core/run-registrar.sh index 348b4f75081870185eb370e84b568f3f46254cd6..a1bae61792c574e203a50054ed4d1943f80dbb51 100644 --- a/core/run-registrar.sh +++ b/core/run-registrar.sh @@ -6,13 +6,10 @@ if test "$REGISTRAR_REPLACE" = true; then replace="--replace" fi -python3 /registrar.py \ - --mode redis \ - --redis-host ${REDIS_HOST} \ - --redis-port ${REDIS_PORT} \ - --redis-register-queue-key ${REDIS_REGISTER_QUEUE_KEY} \ - --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} \ - --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} \ - --reporting-dir ${REPORTING_DIR} \ - --service-url ${SERVICE_URL} \ +registrar daemon \ + --config-file /config.yaml \ + --host ${REDIS_HOST} \ + --port ${REDIS_PORT} \ + --listen-queue ${REDIS_REGISTER_QUEUE_KEY} \ + --registered-set-key ${REDIS_REGISTERED_SET_KEY} \ ${replace} >&2 diff --git a/core/setup.py b/core/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..b44d89b2d4b7ddd125b94a7f5ce15aaafaf3fc97 --- /dev/null +++ b/core/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +# with open("README.md", "r") as fh: +# long_description = fh.read() +long_description = "" + +setup( + name="registrar", # Replace with your own username + version="0.0.1", + author="", + author_email="", + description="registrar for PVS", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://gitlab.eox.at/esa/prism/vs/-/tree/master/core", + packages=find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.6', + entry_points={ + "console_scripts": [ + "registrar = registrar.cli:cli", + ], + } +) diff --git a/docker-compose.dem.dev.yml b/docker-compose.dem.dev.yml index 56ee0072c3a2cf7c2f49eebd82eaac84504007b9..d39ce234c8f19ac600c14283c116d4c3ecf092e6 100644 --- a/docker-compose.dem.dev.yml +++ b/docker-compose.dem.dev.yml @@ -42,8 +42,6 @@ services: - type: bind source: ./core/ target: /core/ - logging: - driver: "fluentd" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: diff --git a/docker-compose.dem.ops.yml b/docker-compose.dem.ops.yml index 0fa2a23ec7772392ed549d677a2b950bd507b92c..5184b39dc967baa24794696f481d4d416919096f 100644 --- a/docker-compose.dem.ops.yml +++ b/docker-compose.dem.ops.yml @@ -187,18 +187,18 @@ services: constraints: [node.role == manager] labels: # router for basic auth based access (https) - - "traefik.http.routers.shibauth.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth.middlewares=compress@file,cors@file" - - "traefik.http.routers.shibauth.tls=true" - - "traefik.http.routers.shibauth.tls.certresolver=default" - - "traefik.http.routers.shibauth.entrypoints=https" + - "traefik.http.routers.dem-shibauth.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.dem-shibauth.middlewares=compress@file,cors@file" + - "traefik.http.routers.dem-shibauth.tls=true" + - "traefik.http.routers.dem-shibauth.tls.certresolver=default" + - "traefik.http.routers.dem-shibauth.entrypoints=https" # router for basic auth based access (http) - - "traefik.http.routers.shibauth-redirect.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth-redirect.middlewares=redirect@file" - - "traefik.http.routers.shibauth-redirect.entrypoints=http" + - "traefik.http.routers.dem-shibauth-redirect.rule=Host(`dem.pass.copernicus.eu`, `a.dem.pass.copernicus.eu`, `b.dem.pass.copernicus.eu`, `c.dem.pass.copernicus.eu`, `d.dem.pass.copernicus.eu`, `e.dem.pass.copernicus.eu`, `f.dem.pass.copernicus.eu`, `g.dem.pass.copernicus.eu`, `h.dem.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.dem-shibauth-redirect.middlewares=redirect@file" + - "traefik.http.routers.dem-shibauth-redirect.entrypoints=http" # general - - "traefik.http.services.shibauth.loadbalancer.sticky=false" - - "traefik.http.services.shibauth.loadbalancer.server.port=80" + - "traefik.http.services.dem-shibauth.loadbalancer.sticky=false" + - "traefik.http.services.dem-shibauth.loadbalancer.server.port=80" - "traefik.docker.network=dem-extnet" - "traefik.docker.lbswarm=true" - "traefik.enable=true" diff --git a/docker-compose.dem.yml b/docker-compose.dem.yml index 61059026ef9c5f0089ed89a1ad3d2ba85118b059..fc4b355fc5b11624504962ee82d429d8559ce467 100644 --- a/docker-compose.dem.yml +++ b/docker-compose.dem.yml @@ -122,7 +122,7 @@ services: OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" configs: - source: preprocessor-config - target: /config.yaml + target: /config.yaml deploy: replicas: 1 networks: @@ -160,6 +160,8 @@ services: configs: - source: init-db target: /init-db.sh + - source: registrar-config + target: /config.yaml deploy: replicas: 1 networks: @@ -182,9 +184,8 @@ services: configs: - source: sftp_users_dem target: /etc/sftp/users.conf - ports: - - "2222:22" + - "2224:22" deploy: replicas: 1 ingestor: @@ -212,6 +213,8 @@ configs: file: ./config/dem_index-ops.html preprocessor-config: file: ./config/dem_preprocessor-config.yml + registrar-config: + file: ./config/dem_registrar-config.yml volumes: db-data: redis-data: diff --git a/docker-compose.emg.ops.yml b/docker-compose.emg.ops.yml index 98a5ba55a6493981f1f5a70c74f9c924168178c7..4f4d5cd0645dc5766f412f705d9ebc36b72cada6 100644 --- a/docker-compose.emg.ops.yml +++ b/docker-compose.emg.ops.yml @@ -187,18 +187,18 @@ services: constraints: [node.role == manager] labels: # router for basic auth based access (https) - - "traefik.http.routers.shibauth.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth.middlewares=compress@file,cors@file" - - "traefik.http.routers.shibauth.tls=true" - - "traefik.http.routers.shibauth.tls.certresolver=default" - - "traefik.http.routers.shibauth.entrypoints=https" + - "traefik.http.routers.emg-shibauth.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.emg-shibauth.middlewares=compress@file,cors@file" + - "traefik.http.routers.emg-shibauth.tls=true" + - "traefik.http.routers.emg-shibauth.tls.certresolver=default" + - "traefik.http.routers.emg-shibauth.entrypoints=https" # router for basic auth based access (http) - - "traefik.http.routers.shibauth-redirect.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth-redirect.middlewares=redirect@file" - - "traefik.http.routers.shibauth-redirect.entrypoints=http" + - "traefik.http.routers.emg-shibauth-redirect.rule=Host(`emg.pass.copernicus.eu`, `a.emg.pass.copernicus.eu`, `b.emg.pass.copernicus.eu`, `c.emg.pass.copernicus.eu`, `d.emg.pass.copernicus.eu`, `e.emg.pass.copernicus.eu`, `f.emg.pass.copernicus.eu`, `g.emg.pass.copernicus.eu`, `h.emg.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.emg-shibauth-redirect.middlewares=redirect@file" + - "traefik.http.routers.emg-shibauth-redirect.entrypoints=http" # general - - "traefik.http.services.shibauth.loadbalancer.sticky=false" - - "traefik.http.services.shibauth.loadbalancer.server.port=80" + - "traefik.http.services.emg-shibauth.loadbalancer.sticky=false" + - "traefik.http.services.emg-shibauth.loadbalancer.server.port=80" - "traefik.docker.network=emg-extnet" - "traefik.docker.lbswarm=true" - "traefik.enable=true" diff --git a/docker-compose.emg.yml b/docker-compose.emg.yml index 107b33a93be54cf8dafe04c36af94b6f82f3a2f8..06da63290791105ef1a8444bf999f7a3225fa6c8 100644 --- a/docker-compose.emg.yml +++ b/docker-compose.emg.yml @@ -132,7 +132,7 @@ services: OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" configs: - source: preprocessor-config - target: /config.yaml + target: /config.yaml deploy: replicas: 1 networks: @@ -170,6 +170,8 @@ services: configs: - source: init-db target: /init-db.sh + - source: registrar-config + target: /config.yaml deploy: replicas: 1 networks: @@ -194,7 +196,7 @@ services: target: /etc/sftp/users.conf ports: - - "2222:22" + - "2223:22" deploy: replicas: 1 configs: @@ -212,6 +214,8 @@ configs: file: ./config/emg_index-ops.html preprocessor-config: file: ./config/emg_preprocessor-config.yml + registrar-config: + file: ./config/emg_registrar-config.yml volumes: db-data: redis-data: diff --git a/docker-compose.vhr18.ops.yml b/docker-compose.vhr18.ops.yml index 1c8a5f4ce07ed0811ec5f9de7aa66bf812c960f4..c0baef92e4d72f0a9d52a8ae7bb2a7544eb9e581 100644 --- a/docker-compose.vhr18.ops.yml +++ b/docker-compose.vhr18.ops.yml @@ -187,18 +187,18 @@ services: constraints: [node.role == manager] labels: # router for basic auth based access (https) - - "traefik.http.routers.shibauth.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth.middlewares=compress@file,cors@file" - - "traefik.http.routers.shibauth.tls=true" - - "traefik.http.routers.shibauth.tls.certresolver=default" - - "traefik.http.routers.shibauth.entrypoints=https" + - "traefik.http.routers.vhr18-shibauth.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.vhr18-shibauth.middlewares=compress@file,cors@file" + - "traefik.http.routers.vhr18-shibauth.tls=true" + - "traefik.http.routers.vhr18-shibauth.tls.certresolver=default" + - "traefik.http.routers.vhr18-shibauth.entrypoints=https" # router for basic auth based access (http) - - "traefik.http.routers.shibauth-redirect.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" - - "traefik.http.routers.shibauth-redirect.middlewares=redirect@file" - - "traefik.http.routers.shibauth-redirect.entrypoints=http" + - "traefik.http.routers.vhr18-shibauth-redirect.rule=Host(`vhr18.pass.copernicus.eu`, `a.vhr18.pass.copernicus.eu`, `b.vhr18.pass.copernicus.eu`, `c.vhr18.pass.copernicus.eu`, `d.vhr18.pass.copernicus.eu`, `e.vhr18.pass.copernicus.eu`, `f.vhr18.pass.copernicus.eu`, `g.vhr18.pass.copernicus.eu`, `h.vhr18.pass.copernicus.eu`) && PathPrefix(`/secure`, `/secure-cache`, `/Shibboleth.sso`)" + - "traefik.http.routers.vhr18-shibauth-redirect.middlewares=redirect@file" + - "traefik.http.routers.vhr18-shibauth-redirect.entrypoints=http" # general - - "traefik.http.services.shibauth.loadbalancer.sticky=false" - - "traefik.http.services.shibauth.loadbalancer.server.port=80" + - "traefik.http.services.vhr18-shibauth.loadbalancer.sticky=false" + - "traefik.http.services.vhr18-shibauth.loadbalancer.server.port=80" - "traefik.docker.network=vhr18-extnet" - "traefik.docker.lbswarm=true" - "traefik.enable=true" diff --git a/docker-compose.vhr18.yml b/docker-compose.vhr18.yml index 72c3c5ddcef71d61cc21022ef0e9ffe6542c36bc..fe74cc7e622be0ba2e5534e63d2c5b5fa2429caa 100644 --- a/docker-compose.vhr18.yml +++ b/docker-compose.vhr18.yml @@ -135,7 +135,7 @@ services: OS_PASSWORD_DOWNLOAD_FILE: "/run/secrets/OS_PASSWORD_DOWNLOAD" configs: - source: preprocessor-config - target: /config.yaml + target: /config.yaml deploy: replicas: 1 networks: @@ -173,6 +173,8 @@ services: configs: - source: init-db target: /init-db.sh + - source: registrar-config + target: /config.yaml deploy: replicas: 1 networks: @@ -197,7 +199,6 @@ services: target: /etc/sftp/users.conf deploy: replicas: 1 - ports: - "2222:22" ingestor: @@ -219,6 +220,8 @@ configs: file: ./config/vhr18_index-ops.html preprocessor-config: file: ./config/vhr18_preprocessor-config.yml + registrar-config: + file: ./config/vhr18_registrar-config.yml volumes: db-data: redis-data: diff --git a/documentation/operator-guide/access.rst b/documentation/operator-guide/access.rst index b96c281e63c29a686b2574f52f2d00a5ef5ccb54..9dafab5175a4373ef0963b65056062e15e2c619a 100644 --- a/documentation/operator-guide/access.rst +++ b/documentation/operator-guide/access.rst @@ -68,7 +68,7 @@ Currently setting individual authorization rules on a ``Collection`` and ``Servi Configuration ~~~~~~~~~~~~~ -For correct configuration of Shibboleth SP3 on a new stack, several steps need to be done. Most of these configurations are usually done in the :ref:`_initialization` step using ``pvs_starter`` tool. Still, it is advised to check following steps, understand them and change if necessary. +For correct configuration of Shibboleth SP3 on a new stack, several steps need to be done. Most of these configurations are usually done in the :ref:`initialization` step using ``pvs_starter`` tool. Still, it is advised to check following steps, understand them and change if necessary. Briefly summarized, SP and IdP need to exchange metadata and certificates to trust each other, SP needs to know which attributes the IdP will be sending about the logged-in user and respective access-control rules are configured based on those attributes. Most of the configurations are done via docker configs defined in the docker compose - Create a pair of key, certificate using attached Shibboleth ``config/shibboleth/keygen.sh`` in the cloned vs repository and save them as respective docker secrets. @@ -81,6 +81,7 @@ Briefly summarized, SP and IdP need to exchange metadata and certificates to tru docker secret create _SHIB_KEY sp-signing-key.pem - Get IDP metadata and save it as a docker config. Also read the entityID of the IdP for further use in referencing it in your ``shibboleth2.xml`` configuration. + .. code-block:: bash docker config create idp-metadata idp-metadata-received.xml @@ -88,13 +89,14 @@ Briefly summarized, SP and IdP need to exchange metadata and certificates to tru - Configure Apache ServerName used inside the ``shibauth`` service by modifying ``APACHE_SERVERNAME`` environment variable of corresponding ``shibauth`` service in ``docker-compose..ops.yml``. This URL should resolve to the actual service URL. - Modify shibboleth2.xml content by setting your "entityID" in Additionally edit the "entityID" value inside ``SSO`` element to match the IdP "entityID". Note that "entityID" does not need to resolve to an actual service URL. + .. code-block:: xml - - - SAML2 - + + + + SAML2 + + - Deploy your shibauth service and exchange your SP metadata with the IdP provider and have them recognize your SP. Necessary metadata needs to be downloaded from url ``/Shibboleth.sso/Metadata``. @@ -108,6 +110,7 @@ Briefly summarized, SP and IdP need to exchange metadata and certificates to tru - Create custom access rules based on these attributes and map these access controls to different internal apache routes to which Traefik ForwardAuth middleware will point. Access rules are created in ``config/shibboleth/-ac.xml``. + Example of external Access control rules configuration: .. code-block:: xml diff --git a/documentation/operator-guide/configuration.rst b/documentation/operator-guide/configuration.rst index 44b18d7e7dc761afb2e7c4e6264e03dc2dacb518..dce3a837f6dcba86c2a197f794bccaf0c2180365 100644 --- a/documentation/operator-guide/configuration.rst +++ b/documentation/operator-guide/configuration.rst @@ -516,7 +516,7 @@ An example of creating configurations for sftp image using the following command .. code-block:: bash - printf ":::" | docker config create sftp-users - + printf ":::" | docker config create sftp-users- - An example of creating ``OS_PASSWORD`` as secret using the following command : diff --git a/documentation/operator-guide/management.rst b/documentation/operator-guide/management.rst index bbb33703d4a4d9b33bca29bf7797be2161192fc9..181866b9b7ed1e4996c631ceb101d50c48f475a8 100644 --- a/documentation/operator-guide/management.rst +++ b/documentation/operator-guide/management.rst @@ -63,7 +63,7 @@ Inspecting reports ------------------ Once a product is registered, a xml report containing wcs and wms getcapabilities of the registered product is generated and can be accessed by connecting to the `SFTP` service via the sftp protocol. -In order to log into the logging folders through port 2222 on the hosting ip (e.g. localhost if you are running the dev stack) The following command can be used: +In order to log into the logging folders through port 2222 (for ``vhr18``, ``emg`` and ``dem`` have 2223 and 2224 respectively) on the hosting ip (e.g. localhost if you are running the dev stack) The following command can be used: .. code-block:: bash @@ -116,6 +116,7 @@ In order to select any other option from the Kibana toolkit, click the horizonta .. _fig_kibana_menu: .. figure:: images/kibana_1.* + :width: 250 :alt: Kibana menu *Kibana menu* diff --git a/documentation/user-guide/Makefile b/documentation/user-guide/Makefile index e994f10c0d30135b07fa08066c43f7e4e454b949..0f20b85a8014130c9902c0afe88de285c8b83cab 100644 --- a/documentation/user-guide/Makefile +++ b/documentation/user-guide/Makefile @@ -8,7 +8,7 @@ SPHINXBUILD ?= sphinx-build SPHINXAUTOBUILD ?= sphinx-autobuild SOURCEDIR = . BUILDDIR = _build -USERGUIDE_VERSION = v1.1.0 +USERGUIDE_VERSION = v1.1.1 # Put it first so that "make" without argument is like "make help". help: diff --git a/testing/gitlab_test.sh b/testing/gitlab_test.sh index 08920c0a7649bbe63f18944c3bc997c3118f1fc6..4503809d4a1a9c87a4b7dafadabdeb2555d5c530 100755 --- a/testing/gitlab_test.sh +++ b/testing/gitlab_test.sh @@ -9,7 +9,6 @@ cat $emg_db > ../env/emg_db.env cat $emg_django > ../env/emg_django.env cat $emg_obs > ../env/emg_obs.env - # use `pvs_testing` bucket instead sed -i -e 's/emg-data/pvs_testing/g' ../env/emg.env @@ -23,7 +22,6 @@ set -o allexport source ../env/emg.env set +o allexport - mkdir data docker swarm init docker network create -d overlay emg-extnet @@ -33,11 +31,11 @@ printf $OS_PASSWORD_DOWNLOAD | docker secret create OS_PASSWORD_DOWNLOAD - printf $DJANGO_PASSWORD | docker secret create DJANGO_PASSWORD - printf $OS_PASSWORD | docker secret create OS_PASSWORD - -# create docker configs +# create docker configs printf $sftp_users_emg | docker config create sftp_users_emg - docker stack deploy -c ../docker-compose.emg.yml -c ../docker-compose.emg.dev.yml emg-pvs -./docker-stack-wait.sh -n renderer -n registrar -n preprocessor -n ingestor -n sftp emg-pvs +./docker-stack-wait.sh -n renderer -n registrar -n preprocessor -n database -n sftp emg-pvs docker service ls # perform the testing @@ -48,10 +46,9 @@ if [ $? -ne 0 ] then echo "Failure in tests, logging from services:" for service in $(docker service ls --format "{{.Name}}"); do - echo "________________________________________" - docker service ps $service --no-trunc - docker service logs $service - done + docker service ps $service --no-trunc >> "/tmp/$service.log" + docker service logs $service >> "/tmp/$service.log" + done exit 1 fi diff --git a/testing/registrar_test.py b/testing/registrar_test.py index b53da07c15de0f0aeb866f4ccf9e40d55acb2bc3..5a8397f32f8d16e728b83472ad7536dfacaaad6f 100644 --- a/testing/registrar_test.py +++ b/testing/registrar_test.py @@ -34,7 +34,7 @@ def identifiers(): def sftp_connection(): username = os.environ['sftp_users_emg'].split(':')[0] password = os.environ['sftp_users_emg'].split(':')[1] - transport = paramiko.Transport(('docker', 2222)) + transport = paramiko.Transport(('docker', 2223)) transport.connect(username=username, password=password) with paramiko.SFTPClient.from_transport(transport) as sftp: yield sftp diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh index 7659f177f2156c726dc222a4485d4ec5deca6eec..ef398367d57462a9a70d89422452c17b82ed6d17 100755 --- a/testing/registrar_test.sh +++ b/testing/registrar_test.sh @@ -2,18 +2,14 @@ product_list_file=$1 echo "Starting registrar test" -OS_PASSWORD=$(docker exec -i $(docker ps -qf "name=emg-pvs_registrar") cat /run/secrets/OS_PASSWORD) - IFS="," docker exec -i $(docker ps -qf "name=emg-pvs_registrar") /wait-initialized.sh docker exec -i $(docker ps -qf "name=emg-pvs_renderer") /wait-initialized.sh while read product; do docker exec -e OS_PASSWORD=$OS_PASSWORD -i $(docker ps -qf "name=emg-pvs_registrar") \ - python3 /registrar.py \ - --objects-prefix $product \ - --service-url $SERVICE_URL \ - --reporting-dir "/mnt/reports" \ - <<<$product + registrar register \ + --config-file /config.yaml \ + "$product" done < "$product_list_file"