diff --git a/README.md b/README.md index d7870362abaf0c7fe73322e26fcbdd9cc49deb62..e78dcecebfff019dcd52ab35bfad0471a16c135e 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ The following services are defined via docker compose files. * holds ids for successfully registered items * `sadd` by registrar * register-failure_set - * holds its for failed registered items + * holds ids for failed registered items * `sadd` by registrar * seeding * seed_queue diff --git a/cache/Dockerfile b/cache/Dockerfile index 2a4a364820c9caedf5c74c0f9772216154fe3a1d..6bbd951a6e7ab45f611764c3ae66707502fb5f17 100644 --- a/cache/Dockerfile +++ b/cache/Dockerfile @@ -47,9 +47,9 @@ ENV COLLECTION_ID= \ APACHE_ServerName="pvs_cache" \ APACHE_ServerAdmin="office@eox.at" \ APACHE_NGEO_CACHE_ALIAS="/ows" \ - REDIS_HOST= \ + REDIS_HOST="redis" \ REDIS_PORT="6379" \ - REDIS_QUEUE_KEY="seed_queue" \ + REDIS_SEED_QUEUE_KEY="seed_queue" \ ST_AUTH_VERSION=3 \ OS_AUTH_URL= \ OS_USERNAME= \ diff --git a/cache/run-seeder.sh b/cache/run-seeder.sh index 53f789d7902beb5e31c6dcb30ab639130f8bf494..9549c163da3f4b02ab5eb09f02ae592ce9e19850 100644 --- a/cache/run-seeder.sh +++ b/cache/run-seeder.sh @@ -4,4 +4,4 @@ echo "Running seeder" -python3 /seeder.py --mode redis --redis-host ${REDIS_HOST} --redis-port ${REDIS_PORT} --redis-queue-key ${REDIS_QUEUE_KEY} +python3 /seeder.py --mode redis --redis-host ${REDIS_HOST} --redis-port ${REDIS_PORT} --redis-queue-key ${REDIS_SEED_QUEUE_KEY} diff --git a/chart/values.yaml b/chart/values.yaml index a501173a5926be9b839723d86cbf7ea690e115af..fdb20631dee696ff34f466eb6448cbb0f1e93031 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -49,12 +49,15 @@ config: redis: REDIS_PORT: "6379" REDIS_PREPROCESS_QUEUE_KEY: preprocess_queue - REDIS_QUEUE_KEY: seed_queue + REDIS_PREPROCESS_MD_QUEUE_KEY: preprocess-md_queue + REDIS_PREPROCESS_FAILURE_KEY: preprocess-failure_set + REDIS_PREPROCESS_PROGRESS_KEY: preprocessing_set + REDIS_PREPROCESS_SUCCESS_KEY: preprocess-success_set REDIS_REGISTER_QUEUE_KEY: register_queue - REDIS_REGISTERED_SET_KEY: registered_set + REDIS_REGISTER_FAILURE_KEY: register-failure_set + REDIS_REGISTER_PROGRESS_KEY: registering_set + REDIS_REGISTER_SUCCESS_KEY: register-success_set REDIS_SEED_QUEUE_KEY: seed_queue - REDIS_SET_KEY: registered_set - client: layers: {} # VHR_IMAGE_2018_Level_1: diff --git a/config/dem_preprocessor-config.yml b/config/dem_preprocessor-config.yml index 3ae30fe3a7b0cf59ba735aabe6e05db529ff4fb1..441ff704adec0353fe891df8220de062b14054a0 100644 --- a/config/dem_preprocessor-config.yml +++ b/config/dem_preprocessor-config.yml @@ -48,7 +48,7 @@ preprocessing: - BLOCKSIZE=512 - COMPRESS=DEFLATE - NUM_THREADS=8 - - BIGTIFF=IF_SAFER + - BIGTIFF=IF_NEEDED - OVERVIEWS=AUTO types: SAR_DGE_30: # just to pass validation diff --git a/config/dem_registrar-config.yml b/config/dem_registrar-config.yml index 5c255c039420827c238520095ca71f3f37460acb..2e27ad279bfd9d12b3535a7f8dd8a08b1bb59053 100644 --- a/config/dem_registrar-config.yml +++ b/config/dem_registrar-config.yml @@ -10,6 +10,7 @@ sources: auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' auth_url_short: !env '${OS_AUTH_URL_SHORT}' + user_domain_name: !env '${OS_USER_DOMAIN_NAME}' container: !env '${UPLOAD_CONTAINER}' schemes: @@ -21,8 +22,8 @@ backends: - type: eoxserver filter: kwargs: - instance_base_path: /var/www/pvs/dev - instance_name: pvs_instance + instance_base_path: !env '${INSTALL_DIR}' + instance_name: !env '${INSTANCE_NAME}' mapping: DEM1: COP-DEM_EEA-10-DGED: diff --git a/config/emg_index-dev.html b/config/emg_index-dev.html index 09e3e675909e41269837eb5108c1b5b7c5f9fec5..d3834d25667d3efa70b88bd1b9a8e81a2963067a 100644 --- a/config/emg_index-dev.html +++ b/config/emg_index-dev.html @@ -39,15 +39,15 @@ ], "constrainTimeDomain": true, "displayTimeDomain": [ - "2019-01-01T00:00:00Z", - "2019-12-31T23:59:59Z", + "2019-06-01T00:00:00Z", + "2020-12-31T23:59:59Z", ], "displayInterval": "P1096D", "selectedTimeDomain": [ - "2019-01-01T00:00:00Z", - "2019-12-31T23:59:59Z", + "2019-06-01T00:00:00Z", + "2020-12-31T23:59:59Z", ], - "selectableInterval": "P1096D", + "selectableInterval": "P10960D", "timeSliderControls": true, "maxTooltips": 1, "center": [12, 49], diff --git a/config/emg_index-ops.html b/config/emg_index-ops.html index fa51d00a1c0996f0c0ce8062d58bbb1750a0d999..99b3048b05161d219cfa5e5a5cc0f32e6b481937 100644 --- a/config/emg_index-ops.html +++ b/config/emg_index-ops.html @@ -59,15 +59,15 @@ ], "constrainTimeDomain": true, "displayTimeDomain": [ - "2019-01-01T00:00:00Z", - "2019-12-31T23:59:59Z", + "2019-06-01T00:00:00Z", + "2020-12-31T23:59:59Z", ], "displayInterval": "P1096D", "selectedTimeDomain": [ - "2019-01-01T00:00:00Z", - "2019-12-31T23:59:59Z", + "2019-06-01T00:00:00Z", + "2020-12-31T23:59:59Z", ], - "selectableInterval": "P1096D", + "selectableInterval": "P10960D", "timeSliderControls": true, "maxTooltips": 1, "center": [12, 49], diff --git a/config/emg_init-db.sh b/config/emg_init-db.sh index 7ce4d7e77ce45259a46dd32d869c66fc93c79bc7..739119a119713ef3b190df2a67ca3cb1c54ecfe8 100644 --- a/config/emg_init-db.sh +++ b/config/emg_init-db.sh @@ -921,7 +921,109 @@ if python3 manage.py id check "${COLLECTION}"; then --blue-nodata 0 python3 manage.py browsetype create "${COLLECTION}"_Product_PH1B "NDVI" --traceback \ --grey "(nir-red)/(nir+red)" --grey-range -1 1 - + python3 manage.py producttype create "${COLLECTION}"_Product_SK00 --traceback \ + --coverage-type "RGBNir" + python3 manage.py browsetype create "${COLLECTION}"_Product_SK00 --traceback \ + --red "red" \ + --green "green" \ + --blue "blue" \ + --red-range 1 600 \ + --green-range 1 600 \ + --blue-range 1 600 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_SK00 "TRUE_COLOR" --traceback \ + --red "red" \ + --green "green" \ + --blue "blue" \ + --red-range 1 600 \ + --green-range 1 600 \ + --blue-range 1 600 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_SK00 "FALSE_COLOR" --traceback \ + --red "nir" \ + --green "red" \ + --blue "green" \ + --red-range 1 600 \ + --green-range 1 600 \ + --blue-range 1 600 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_SK00 "NDVI" --traceback \ + --grey "(nir-red)/(nir+red)" --grey-range -1 1 + python3 manage.py producttype create "${COLLECTION}"_Product_SW03 --traceback \ + --coverage-type "RGBNir" + python3 manage.py browsetype create "${COLLECTION}"_Product_SW03 --traceback \ + --red "red" \ + --green "green" \ + --blue "blue" \ + --red-range 1 1000 \ + --green-range 1 1000 \ + --blue-range 1 1000 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_SW03 "TRUE_COLOR" --traceback \ + --red "red" \ + --green "green" \ + --blue "blue" \ + --red-range 1 1000 \ + --green-range 1 1000 \ + --blue-range 1 1000 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_SW03 "FALSE_COLOR" --traceback \ + --red "nir" \ + --green "red" \ + --blue "green" \ + --red-range 1 1000 \ + --green-range 1 1000 \ + --blue-range 1 1000 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_SW03 "NDVI" --traceback \ + --grey "(nir-red)/(nir+red)" --grey-range -1 1 + # bands go in order b,g,r,nir and I did not change them in preprocessing + python3 manage.py producttype create "${COLLECTION}"_Product_PL00 --traceback \ + --coverage-type "RGBNir" + python3 manage.py browsetype create "${COLLECTION}"_Product_PL00 --traceback \ + --red "blue" \ + --green "green" \ + --blue "red" \ + --red-range 1500 15000 \ + --green-range 1500 15000 \ + --blue-range 1500 15000 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_PL00 "TRUE_COLOR" --traceback \ + --red "blue" \ + --green "green" \ + --blue "red" \ + --red-range 1500 15000 \ + --green-range 1500 15000 \ + --blue-range 1500 15000 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_PL00 "FALSE_COLOR" --traceback \ + --red "nir" \ + --green "blue" \ + --blue "green" \ + --red-range 1500 20000 \ + --green-range 1500 15000 \ + --blue-range 1500 15000 \ + --red-nodata 0 \ + --green-nodata 0 \ + --blue-nodata 0 + python3 manage.py browsetype create "${COLLECTION}"_Product_PL00 "NDVI" --traceback \ + --grey "(nir-blue)/(nir+blue)" --grey-range -1 1 python3 manage.py collectiontype create "${COLLECTION}"_Collection --traceback \ --coverage-type "RGBNir" \ --coverage-type "RGNirByte" \ @@ -967,6 +1069,9 @@ if python3 manage.py id check "${COLLECTION}"; then --product-type "${COLLECTION}"_Product_SP05 \ --product-type "${COLLECTION}"_Product_SP06 \ --product-type "${COLLECTION}"_Product_SP07 \ + --product-type "${COLLECTION}"_Product_SK00 \ + --product-type "${COLLECTION}"_Product_SW03 \ + --product-type "${COLLECTION}"_Product_PL00 \ --product-type "${COLLECTION}"_Product_KS03 python3 manage.py collection create "${COLLECTION}" --type "${COLLECTION}"_Collection --traceback diff --git a/config/emg_preprocessor-config.yml b/config/emg_preprocessor-config.yml index 13c5d5e51c5ea64d07b21b4fe6766ff312f55ac5..3ed463273e8f77b7f9ae1a3dde9b15b6a6e8ac49 100644 --- a/config/emg_preprocessor-config.yml +++ b/config/emg_preprocessor-config.yml @@ -35,23 +35,202 @@ level_extractor: preprocessing: defaults: move_files: true + nested: true data_file_globs: - '*.tif' - '*.jp2' + - '*.h5' output: options: format: COG dstSRS: 'EPSG:4326' dstNodata: 0 + multithread: True + warpMemoryLimit: 3000 creationOptions: - BLOCKSIZE=512 - COMPRESS=DEFLATE - NUM_THREADS=8 - - BIGTIFF=IF_SAFER + - BIGTIFF=YES - OVERVIEWS=AUTO types: - PH1B: # just to pass validation - nested: true + KS03: + data_file_globs: + - "*.tif" + additional_file_globs: + - "*.rpc*" + stack_bands: + group_by: ".*/(.*)_P..tif" + sort_by: ".*_P(R|G|B|N).tif" + order: + - R + - G + - B + - N + RS02: + data_file_globs: + - "*imagery_*.tif" + georeference: + geotransforms: + - type: gcp + - type: no_op + calc: + formulas: + # complex b1/b2 to b1 uint16 for all 4 polarizations + - inputs: + A: + glob: '*HH.tif' + band: 1 + B: + glob: '*HH.tif' + band: 2 + formula: sqrt(A.astype(float)*A.astype(float)+B.astype(float)*B.astype(float)) + output_postfix: _proc + nodata_value: 0 + data_type: UInt16 + - inputs: + A: + glob: '*HV.tif' + band: 1 + B: + glob: '*HV.tif' + band: 2 + formula: sqrt(A.astype(float)*A.astype(float)+B.astype(float)*B.astype(float)) + output_postfix: _proc + nodata_value: 0 + data_type: UInt16 + - inputs: + A: + glob: '*VH.tif' + band: 1 + B: + glob: '*VH.tif' + band: 2 + formula: sqrt(A.astype(float)*A.astype(float)+B.astype(float)*B.astype(float)) + output_postfix: _proc + nodata_value: 0 + data_type: UInt16 + - inputs: + A: + glob: '*VV.tif' + band: 1 + B: + glob: '*VV.tif' + band: 2 + formula: sqrt(A.astype(float)*A.astype(float)+B.astype(float)*B.astype(float)) + output_postfix: _proc + nodata_value: 0 + data_type: UInt16 + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + SK00: + data_file_globs: + - "*pansharpened_clip.tif" + - "*_pansharpened.tif" + SP06: + data_file_globs: + # throw away Panchromatic *_P_* + - "*IMG_*_PMS_*.JP2" + - "*IMG_*_PMS_*.tif" + - "*IMG_*_MS_*.JP2" + - "*IMG_*_MS_*.tif" + additional_file_globs: + - "*RPC_*" + - "*DIM_*" + georeference: + geotransforms: + - type: rpc + - type: no_op + SP07: + data_file_globs: + - "*IMG_*_PMS_*.JP2" + - "*IMG_*_PMS_*.tif" + - "*IMG_*_MS_*.JP2" + - "*IMG_*_MS_*.tif" + additional_file_globs: + - "*RPC_*" + - "*DIM_*" + georeference: + geotransforms: + # first try RPC, if not present, go on + - type: rpc + - type: no_op + PH1A: + data_file_globs: + - "*IMG_*_PMS_*.JP2" + - "*IMG_*_PMS_*.tif" + - "*IMG_*_MS_*.JP2" + - "*IMG_*_MS_*.tif" + additional_file_globs: + - "*RPC_*" + - "*DIM_*" + georeference: + geotransforms: + - type: rpc + - type: no_op + PH1B: + data_file_globs: + - "*IMG_*_PMS_*.JP2" + - "*IMG_*_PMS_*.tif" + - "*IMG_*_MS_*.JP2" + - "*IMG_*_MS_*.tif" + additional_file_globs: + - "*RPC_*" + - "*DIM_*" + georeference: + geotransforms: + - type: rpc + - type: no_op + PL00: + data_file_globs: + - "*MS.tif" + - "*MS_clip.tif" + SW03: + data_file_globs: + - "*.tif" + GY01: + # throw away Panchromatic *-P3D* + data_file_globs: + - "*-M3D*.TIF" + - "*-M3D*.tif" + - "*-S3D*.TIF" + - "*-S3D*.tif" + - "*-M2A*.TIF" + - "*-M2A*.tif" + - "*-S2A*.TIF" + - "*-S2A*.tif" + EW03: + data_file_globs: + - "*-M3D*.TIF" + - "*-M3D*.tif" + - "*-S3D*.TIF" + - "*-S3D*.tif" + - "*-M2A*.TIF" + - "*-M2A*.tif" + - "*-S2A*.TIF" + - "*-S2A*.tif" + EW02: + data_file_globs: + - "*-M3D*.TIF" + - "*-M3D*.tif" + - "*-S3D*.TIF" + - "*-S3D*.tif" + - "*-M2A*.TIF" + - "*-M2A*.tif" + - "*-S2A*.TIF" + - "*-S2A*.tif" + EW01: + data_file_globs: + - "*.TIF" + - "*.tif" + DM02: + data_file_globs: + - "*.tif" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/emg_registrar-config.yml b/config/emg_registrar-config.yml index e9103d8db2aa011cfbbdcff1e8e111246880e778..dd7acf4b05fc8f50f7a07b9c689a3b4a4fbfb52c 100644 --- a/config/emg_registrar-config.yml +++ b/config/emg_registrar-config.yml @@ -10,6 +10,7 @@ sources: auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' auth_url_short: !env '${OS_AUTH_URL_SHORT}' + user_domain_name: !env '${OS_USER_DOMAIN_NAME}' container: !env '${UPLOAD_CONTAINER}' schemes: @@ -19,8 +20,8 @@ backends: - type: eoxserver filter: kwargs: - instance_base_path: /var/www/pvs/dev - instance_name: pvs_instance + instance_base_path: !env '${INSTALL_DIR}' + instance_name: !env '${INSTANCE_NAME}' mapping: CS00: ~: @@ -246,6 +247,27 @@ backends: - !env '${COLLECTION}' coverages: SP07: RGBNir + SK00: + ~: + product_type_name: !env '${COLLECTION}_Product_SK00' + collections: + - !env '${COLLECTION}' + coverages: + SK00: RGBNir + SW03: + ~: + product_type_name: !env '${COLLECTION}_Product_SW03' + collections: + - !env '${COLLECTION}' + coverages: + SW03: RGBNir + PL00: + ~: + product_type_name: !env '${COLLECTION}_Product_PL00' + collections: + - !env '${COLLECTION}' + coverages: + PL00: RGBNir TX01_2: ~: product_type_name: !env '${COLLECTION}_Product_TX01_2' diff --git a/config/vhr18_preprocessor-config.yml b/config/vhr18_preprocessor-config.yml index 359c52da54a21f67d2b38ab45d88671dbe404d23..703792f1089326e3890a0abcae771c68ab22c3f6 100644 --- a/config/vhr18_preprocessor-config.yml +++ b/config/vhr18_preprocessor-config.yml @@ -46,7 +46,7 @@ preprocessing: - BLOCKSIZE=512 - COMPRESS=DEFLATE - NUM_THREADS=8 - - BIGTIFF=IF_SAFER + - BIGTIFF=IF_NEEDED - OVERVIEWS=AUTO types: PH1B: # just to pass validation diff --git a/config/vhr18_registrar-config.yml b/config/vhr18_registrar-config.yml index cee5f0f3edf66fea4f6d783084d268cbe83dd0ca..38bba5609def232ec2399f00e88c3294a9f42166 100644 --- a/config/vhr18_registrar-config.yml +++ b/config/vhr18_registrar-config.yml @@ -10,6 +10,7 @@ sources: auth_version: !env '${ST_AUTH_VERSION}' auth_url: !env '${OS_AUTH_URL}' auth_url_short: !env '${OS_AUTH_URL_SHORT}' + user_domain_name: !env '${OS_USER_DOMAIN_NAME}' container: !env '${UPLOAD_CONTAINER}' schemes: @@ -19,8 +20,8 @@ backends: - type: eoxserver filter: kwargs: - instance_base_path: /var/www/pvs/dev - instance_name: pvs_instance + instance_base_path: !env '${INSTALL_DIR}' + instance_name: !env '${INSTANCE_NAME}' mapping: PL00: Level_1: diff --git a/core/Dockerfile b/core/Dockerfile index eba367f4a90596414df4ad16fdce3fbb1ab7a457..0fd5168b1da30165c04dadf7e852f3a0fb32d81b 100644 --- a/core/Dockerfile +++ b/core/Dockerfile @@ -65,10 +65,12 @@ ENV INSTANCE_ID="prism-view-server_core" \ APACHE_ServerName="pvs_instance" \ APACHE_ServerAdmin="office@eox.at" \ APACHE_ALIAS="pvs" \ - REDIS_HOST= \ - REDIS_PORT= \ - REDIS_REGISTER_QUEUE_KEY= \ - REDIS_REGISTERED_SET_KEY= \ + REDIS_HOST="redis" \ + REDIS_PORT="6379" \ + REDIS_REGISTER_QUEUE_KEY="register_queue" \ + REDIS_REGISTER_FAILURE_KEY="register-failure_set" \ + REDIS_REGISTER_PROGRESS_KEY="registering_set" \ + REDIS_REGISTER_SUCCESS_KEY="register-success_set" \ INIT_SCRIPTS="/configure.sh" \ COLLECT_STATIC="false" \ REGISTRAR_REPLACE= diff --git a/core/configure.sh b/core/configure.sh index 3966e886e6f9e09c4c1a41c3fe2b1542466db9e5..2565f8086d9844be0acca03e349f556594e4a13a 100644 --- a/core/configure.sh +++ b/core/configure.sh @@ -28,6 +28,7 @@ sed -e "s,http_service_url=http://localhost:8000/ows,http_service_url=${APACHE_ALIAS}/ows," -i pvs_instance/conf/eoxserver.conf sed -e "s/resampling_method=average/resampling_method=near/" -i pvs_instance/conf/eoxserver.conf + sed -e "s/image\/jp2,GDAL\/JPEG2000/#image\/jp2,GDAL\/JPEG2000/" -i pvs_instance/conf/formats.conf # TODO maxsize... echo "EOXS_VALIDATE_IDS_NCNAME = False" >> pvs_instance/settings.py diff --git a/core/registrar/cli.py b/core/registrar/cli.py index 0a2943ffa33a7c93227e2ce43e5faf4ac687d35f..a9bf677c72a3e25e8676f56dc0d6f5cb43569434 100644 --- a/core/registrar/cli.py +++ b/core/registrar/cli.py @@ -53,14 +53,16 @@ def cli(): @click.option('--host', type=str) @click.option('--port', type=int) @click.option('--listen-queue', type=str) -@click.option('--registered-set-key', type=str) +@click.option('--progress-set', type=str) +@click.option('--failure-set', type=str) +@click.option('--success-set', type=str) @click.option('--debug/--no-debug', default=False) -def daemon(config_file=None, validate=False, replace=False, host=None, port=None, listen_queue=None, registered_set_key=None, debug=False): +def daemon(config_file=None, validate=False, replace=False, host=None, port=None, listen_queue=None, progress_set=None, failure_set=None, success_set=None, debug=False): setup_logging(debug) config = load_config(config_file) if validate: validate_config(config) - run_daemon(config, replace, host, port, listen_queue, registered_set_key) + run_daemon(config, replace, host, port, listen_queue, progress_set, failure_set, success_set) @cli.command(help='Run a single, one-off registration') diff --git a/core/registrar/daemon.py b/core/registrar/daemon.py index efdf1ff565e5bda7c1b6bb80cd6465e027829cfe..887364035a99dd60bda536dfec4c8965afa4bff6 100644 --- a/core/registrar/daemon.py +++ b/core/registrar/daemon.py @@ -9,10 +9,9 @@ from .registrar import register_file logger = logging.getLogger(__name__) -def run_daemon(config, replace, host, port, listen_queue, registered_set_key): +def run_daemon(config, replace, host, port, listen_queue, progress_set, failure_set, success_set): """ Run the registrar daemon, listening on a redis queue - for files to be registered. After preprocessing the filename - of the registered files will be pushed to the output queue. + for files to be registered. """ # initialize the queue client client = redis.Redis( @@ -22,10 +21,15 @@ def run_daemon(config, replace, host, port, listen_queue, registered_set_key): while True: # fetch an item from the queue to be registered _, value = client.brpop(listen_queue) + client.sadd(progress_set, value) # start the registration on that file try: item = register_file(config, value, replace) - client.sadd(registered_set_key, item.identifier) - + client.sadd(success_set, item.identifier) + client.srem(progress_set, value) except Exception as e: + if 'is already registered' not in "%s" % e: + # do not add to failure if skipped due to already registered + client.sadd(failure_set, value) + client.srem(progress_set, value) logger.exception(e) diff --git a/core/run-registrar.sh b/core/run-registrar.sh index a1bae61792c574e203a50054ed4d1943f80dbb51..5c40132c7f9a91f61ee9df41f48422ac95ce366a 100644 --- a/core/run-registrar.sh +++ b/core/run-registrar.sh @@ -11,5 +11,7 @@ registrar daemon \ --host ${REDIS_HOST} \ --port ${REDIS_PORT} \ --listen-queue ${REDIS_REGISTER_QUEUE_KEY} \ - --registered-set-key ${REDIS_REGISTERED_SET_KEY} \ + --progress-set ${REDIS_REGISTER_PROGRESS_KEY} \ + --failure-set ${REDIS_REGISTER_FAILURE_KEY} \ + --success-set ${REDIS_REGISTER_SUCCESS_KEY} \ ${replace} >&2 diff --git a/docker-compose.dem.yml b/docker-compose.dem.yml index 6178f53c03ae8f19c2684722193be18c55e03af7..8dd1b825a9e8f023b1fd983e83537a08e0b7e41f 100644 --- a/docker-compose.dem.yml +++ b/docker-compose.dem.yml @@ -92,7 +92,6 @@ services: env_file: - env/dem.env - env/dem_obs.env - - env/dem_redis.env secrets: - OS_PASSWORD environment: @@ -111,7 +110,6 @@ services: env_file: - env/dem.env - env/dem_obs.env - - env/dem_redis.env secrets: - OS_PASSWORD - OS_PASSWORD_DOWNLOAD @@ -144,7 +142,6 @@ services: - env/dem.env - env/dem_db.env - env/dem_obs.env - - env/dem_redis.env secrets: - DJANGO_PASSWORD - OS_PASSWORD @@ -190,8 +187,6 @@ services: replicas: 1 ingestor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor:latest - env_file: - - env/dem_redis.env command: ["python3", "/filedaemon.py"] volumes: diff --git a/docker-compose.emg.yml b/docker-compose.emg.yml index a74d3460c750debac5d88ba48c9f1c43cb0bcaea..e99e747447547771e2fd70e52430aa318a37c489 100644 --- a/docker-compose.emg.yml +++ b/docker-compose.emg.yml @@ -2,7 +2,8 @@ version: "3.6" services: database: image: mdillon/postgis:10 - # PUT BACK VOLUME AFTER TEST + volumes: + - db-data:/var/lib/postgresql/data env_file: - env/emg.env - env/emg_db.env @@ -91,7 +92,6 @@ services: env_file: - env/emg.env - env/emg_obs.env - - env/emg_redis.env secrets: - OS_PASSWORD environment: @@ -113,8 +113,6 @@ services: - type: volume source: from-fepd target: /mnt/data - env_file: - - env/emg_redis.env environment: INSTANCE_ID: "prism-view-server_ingestor" deploy: @@ -126,7 +124,6 @@ services: env_file: - env/emg.env - env/emg_obs.env - - env/emg_redis.env secrets: - OS_PASSWORD - OS_PASSWORD_DOWNLOAD @@ -159,7 +156,6 @@ services: - env/emg.env - env/emg_db.env - env/emg_obs.env - - env/emg_redis.env secrets: - DJANGO_PASSWORD - OS_PASSWORD diff --git a/docker-compose.vhr18.yml b/docker-compose.vhr18.yml index 40206b9e414e809ba7807aff1b8462517ec17940..0e810e6f0151cc007fe8b1d87cd5190ba6519b4d 100644 --- a/docker-compose.vhr18.yml +++ b/docker-compose.vhr18.yml @@ -95,7 +95,6 @@ services: env_file: - env/vhr18.env - env/vhr18_obs.env - - env/vhr18_redis.env secrets: - OS_PASSWORD environment: @@ -117,8 +116,6 @@ services: - type: volume source: from-fepd target: /mnt/data - env_file: - - env/vhr18_redis.env environment: INSTANCE_ID: "prism-view-server_ingestor" deploy: @@ -130,7 +127,6 @@ services: env_file: - env/vhr18.env - env/vhr18_obs.env - - env/vhr18_redis.env secrets: - OS_PASSWORD - OS_PASSWORD_DOWNLOAD @@ -163,7 +159,6 @@ services: - env/vhr18.env - env/vhr18_db.env - env/vhr18_obs.env - - env/vhr18_redis.env secrets: - DJANGO_PASSWORD - OS_PASSWORD diff --git a/documentation/operator-guide/access.rst b/documentation/operator-guide/access.rst index 9dafab5175a4373ef0963b65056062e15e2c619a..3fd785fac8cf28cf56509a65f0a0c836ac54f0c9 100644 --- a/documentation/operator-guide/access.rst +++ b/documentation/operator-guide/access.rst @@ -62,7 +62,7 @@ Here, access on such endpoint requires basic authentication credentials (usernam Middleware delegates the authentication to Shibboleth. If Shibboleth response code is 2XX, access is granted and the original request is performed. Otherwise, the response from the Shibboleth is returned. -In order to authenticate with Shibboleth, a user must log in with valid credentials on the side of Identity Provider (IdP), if doing so, the IdP informs the SP about successful login, accompanied by relevant user attributes and a session is created for the user. SP then saves the information about a created session into a cookie and based on user attributes can authorize access to the services. If the user was already logged in, he is automatically +In order to authenticate with Shibboleth, a user must log in with valid credentials on the side of Identity Provider (IdP), if doing so, the IdP informs the SP about successful login, accompanied by relevant user attributes and a session is created for the user. SP then saves the information about a created session into a cookie and based on user attributes can authorize access to the services. If the user was already logged in, he is automatically offered the requested resource. Currently setting individual authorization rules on a ``Collection`` and ``Service`` level is possible with current approach. It is yet not clearly possible to separate viewing and download, as both of these parts are handled by ``renderer`` service. diff --git a/documentation/operator-guide/configuration.rst b/documentation/operator-guide/configuration.rst index dce3a837f6dcba86c2a197f794bccaf0c2180365..de3e0084a312e782e18bdd162ca0243b978d3e79 100644 --- a/documentation/operator-guide/configuration.rst +++ b/documentation/operator-guide/configuration.rst @@ -96,7 +96,6 @@ Environment variables and ``.env`` files are passed to the services via the - env/stack.env - env/stack_db.env - env/stack_obs.env - - env/stack_redis.env environment: INSTANCE_ID: "prism-view-server_registrar" INSTALL_DIR: "/var/www/pvs/dev/" @@ -118,8 +117,6 @@ The following ``.env`` files are typically used: django admin user to be used with the admin GUI. * ``<stack-name>_obs.env``: This contains access parameters for the object storage(s). -* ``<stack-name>_redis.env``: Redis access credentials and queue names - Groups of Environment Variables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -359,51 +356,55 @@ preprocessing How the extracted files shall be georeferenced. - type + geotransforms - The type of georeferencing to apply. One of ``gcp``, ``rpc``, - ``corner``, ``world``. + A list of georeference methods with options to try. + + type - options + The type of georeferencing to apply. One of ``gcp``, ``rpc``, + ``corner``, ``world``. + + options - Additional options for the georeferencing. Depends on the type of - georeferencing. + Additional options for the georeferencing. Depends on the type of + georeferencing. - order + order - The polynomial order to use for GCP related georeferencing. + The polynomial order to use for GCP related georeferencing. - projection + projection - The projection to use for ungeoreferenced images. + The projection to use for ungeoreferenced images. - rpc_file_template + rpc_file_template - The file glob template to use to find the RPC file. Template - parameters are {filename}, {fileroot}, and {extension}. + The file glob template to use to find the RPC file. Template + parameters are {filename}, {fileroot}, and {extension}. - warp_options + warp_options - Warp options. See - https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for - details + Warp options. See + https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for + details - corner_names + corner_names - The metadata field name including the corner names. Tuple of four: - bottom-left, bottom-right, top-left and top-right + The metadata field name including the corner names. Tuple of four: + bottom-left, bottom-right, top-left and top-right - orbit_direction_name + orbit_direction_name - The metadata field name containing the orbit direction + The metadata field name containing the orbit direction - force_north_up + force_north_up - Circumvents the naming of corner names and assumes a north-up orientation of the image. + Circumvents the naming of corner names and assumes a north-up orientation of the image. - tps + tps - Whether to use TPS transformation instead of GCP polynomials. + Whether to use TPS transformation instead of GCP polynomials. calc diff --git a/documentation/operator-guide/ingestion.rst b/documentation/operator-guide/ingestion.rst index 456d9ca7fbbcdb51fb9168a5e052da1a9e0bf920..3516fe75a5fddd763b740b5fe950141cb537ccce 100644 --- a/documentation/operator-guide/ingestion.rst +++ b/documentation/operator-guide/ingestion.rst @@ -137,8 +137,8 @@ Triggering preprocessing and registration via pushing to the redis queues is ver ``Ingestor`` can work in two modes: -- Default: Exposing a simple ``/`` endpoint, and listening for ``POST`` requests containing ``data`` with either a Browse Report JSON or a string with path to the object storage with product to be ingested. It then parses this informatio and internally puts it into configured redis queue (preprocess or register). -- Alternative: Listening for newly added Browse Report files on a configured path on a file system via ``inotify``. +- Default: Exposing a simple ``/`` endpoint, and listening for ``POST`` requests containing ``data`` with either a Browse Report XML or a string with path to the object storage with product to be ingested. It then parses this information and internally puts it into configured redis queue (preprocess or register). +- Alternative: Listening for newly added Browse Report or Availability Report files on a configured path on a file system via ``inotify``. These Browse Report files need to be in an agreed XML schema to be correctly handled. ``Sftp`` service enables a secure access to a configured folder via sftp, while this folder can be mounted to other vs services. This way, ``Ingestor`` can listen for newly created files by the sftp access. If the filedaemon alternative mode should be used, ``INOTIFY_WATCH_DIR`` environment variable needs to be set and a ``command`` used in the docker-compose.<stack>.ops.yml for ``ingestor`` service needs to be set to ``python3 filedaemon.py``: diff --git a/env/dem_redis.env b/env/dem_redis.env deleted file mode 100644 index 3eff4afd5a700d498d26fadb791632909d8b5f30..0000000000000000000000000000000000000000 --- a/env/dem_redis.env +++ /dev/null @@ -1,11 +0,0 @@ -REDIS_HOST=redis -REDIS_PORT=6379 - -REDIS_QUEUE_KEY=seed_queue - -REDIS_PREPROCESS_QUEUE_KEY=preprocess_queue -REDIS_PREPROCESS_MD_QUEUE_KEY=preprocess-md_queue -REDIS_REGISTER_QUEUE_KEY=register_queue -REDIS_REGISTERED_SET_KEY=registered_set -REDIS_SET_KEY=registered_set -REDIS_SEED_QUEUE_KEY=seed_queue diff --git a/env/emg_redis.env b/env/emg_redis.env deleted file mode 100644 index 3eff4afd5a700d498d26fadb791632909d8b5f30..0000000000000000000000000000000000000000 --- a/env/emg_redis.env +++ /dev/null @@ -1,11 +0,0 @@ -REDIS_HOST=redis -REDIS_PORT=6379 - -REDIS_QUEUE_KEY=seed_queue - -REDIS_PREPROCESS_QUEUE_KEY=preprocess_queue -REDIS_PREPROCESS_MD_QUEUE_KEY=preprocess-md_queue -REDIS_REGISTER_QUEUE_KEY=register_queue -REDIS_REGISTERED_SET_KEY=registered_set -REDIS_SET_KEY=registered_set -REDIS_SEED_QUEUE_KEY=seed_queue diff --git a/env/vhr18_redis.env b/env/vhr18_redis.env deleted file mode 100644 index 3eff4afd5a700d498d26fadb791632909d8b5f30..0000000000000000000000000000000000000000 --- a/env/vhr18_redis.env +++ /dev/null @@ -1,11 +0,0 @@ -REDIS_HOST=redis -REDIS_PORT=6379 - -REDIS_QUEUE_KEY=seed_queue - -REDIS_PREPROCESS_QUEUE_KEY=preprocess_queue -REDIS_PREPROCESS_MD_QUEUE_KEY=preprocess-md_queue -REDIS_REGISTER_QUEUE_KEY=register_queue -REDIS_REGISTERED_SET_KEY=registered_set -REDIS_SET_KEY=registered_set -REDIS_SEED_QUEUE_KEY=seed_queue diff --git a/ingestor/Dockerfile b/ingestor/Dockerfile index 53f44dcc207b5a66efe7de89a0e4bd5a88008234..311f3d7a6241ece034540a27ac59ea101359e4d7 100644 --- a/ingestor/Dockerfile +++ b/ingestor/Dockerfile @@ -43,7 +43,7 @@ COPY app.py config.py filedaemon.py / COPY ingestor/ /ingestor ENV INSTANCE_ID="prism-view-server_ingestor" \ - REDIS_HOST= \ + REDIS_HOST="redis" \ REDIS_PORT="6379" \ REDIS_PREPROCESS_MD_QUEUE_KEY="preprocess-md_queue" \ INOTIFY_WATCH_DIR="/mnt/data" diff --git a/preprocessor/Dockerfile b/preprocessor/Dockerfile index 756dbbb42670cab2840d9357994bfbf79698bb7a..5d9a2c31227c5fd8b97f98b12ece83a3bb1a9772 100644 --- a/preprocessor/Dockerfile +++ b/preprocessor/Dockerfile @@ -25,7 +25,7 @@ # IN THE SOFTWARE. #----------------------------------------------------------------------------- -FROM osgeo/gdal:ubuntu-full-3.1.2 +FROM osgeo/gdal:ubuntu-full-latest MAINTAINER EOX LABEL name="prism view server preprocessor" \ @@ -64,11 +64,15 @@ ENV INSTANCE_ID="prism-data-access-server_preprocessor" \ OS_TENANT_NAME_DOWNLOAD= \ OS_REGION_NAME_DOWNLOAD= \ OS_USER_DOMAIN_NAME_DOWNLOAD= \ - REDIS_HOST= \ - REDIS_PORT= \ - REDIS_PREPROCESS_QUEUE_KEY= \ - REDIS_PREPROCESS_MD_QUEUE_KEY= \ - REDIS_REGISTER_QUEUE_KEY= \ + REDIS_HOST="redis" \ + REDIS_PORT="6379" \ + REDIS_REGISTER_QUEUE_KEY="register_queue" \ + REDIS_PREPROCESS_QUEUE_KEY="preprocess_queue" \ + REDIS_PREPROCESS_MD_QUEUE_KEY="preprocess-md_queue" \ + REDIS_PREPROCESS_FAILURE_KEY="preprocess-failure_set" \ + REDIS_PREPROCESS_PROGRESS_KEY="preprocessing_set" \ + REDIS_PREPROCESS_SUCCESS_KEY="preprocess-success_set" \ + GDAL_PAM_ENABLED="NO" \ PREPROCESSOR_DEBUG= ADD run-preprocessor.sh \ diff --git a/preprocessor/preprocessor/cli.py b/preprocessor/preprocessor/cli.py index 9836ffdc5f02928766a66ba7d043c2f5c0b07763..0652047031d243f3c9f91cc3c759f631c161283a 100644 --- a/preprocessor/preprocessor/cli.py +++ b/preprocessor/preprocessor/cli.py @@ -55,13 +55,16 @@ def cli(): @click.option('--listen-queue', type=str) @click.option('--listen-md-queue', type=str) @click.option('--write-queue', type=str) +@click.option('--progress-set', type=str) +@click.option('--failure-set', type=str) +@click.option('--success-set', type=str) @click.option('--debug/--no-debug', default=False) -def daemon(config_file=None, use_dir=None, validate=False, host=None, port=None, listen_queue=None, listen_md_queue=None, write_queue=None, debug=False): +def daemon(config_file=None, use_dir=None, validate=False, host=None, port=None, listen_queue=None, listen_md_queue=None, write_queue=None, progress_set=None, failure_set=None, success_set=None, debug=False): setup_logging(debug) config = load_config(config_file) if validate: validate_config(config) - run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue) + run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, progress_set, failure_set, success_set) @cli.command(help='Run a single, one-off preprocessing') diff --git a/preprocessor/preprocessor/config-schema.yaml b/preprocessor/preprocessor/config-schema.yaml index 93f3f9b4f972556cf362bdc5eb24a163772d6fe9..10bb58f5f617c72ae85dce5d7c60b7f35dd69d29 100644 --- a/preprocessor/preprocessor/config-schema.yaml +++ b/preprocessor/preprocessor/config-schema.yaml @@ -117,44 +117,47 @@ definitions: ".*": type: string georeference: - description: The definition of a georeferencing step. - type: object + type: object properties: - type: - description: The type of georeferencing to apply. - type: string - enum: [gcp, rpc, corner, world] # TODO: more - options: - description: Additional options for the georeferencing. Depends on the type of georeferencing. - type: object - properties: - order: - description: The polynomial order to use for GCP reprojection. - type: number - projection: - description: The projection to use for ungeoreferenced images. - type: string - rpc_file_template: - description: The file glob template to use to find the RPC file. Template parameters are {filename}, {fileroot}, and {extension}. - type: string - warp_options: - description: "Warp options. See https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details" - corner_names: - description: "The metadata field name including the corner names. Tuple of four: bottom-left, bottom-right, top-left and top-right" - type: array - items: + geotransforms: + description: A list of geotransform methods to use + type: array + items: + type: object + properties: + type: + description: The type of georeferencing to apply. type: string - orbit_direction_name: - description: The metadata field name containing the orbit direction - type: string - force_north_up: - description: - type: boolean - tps: - description: Whether to use TPS transformation instead of GCP polynomials. - type: boolean - - required: [type] + enum: [gcp, rpc, corner, world, no_op] # TODO: more + options: + description: Additional options for the georeferencing. Depends on the type of georeferencing. + type: object + properties: + order: + description: The polynomial order to use for GCP reprojection. + type: number + projection: + description: The projection to use for ungeoreferenced images. + type: string + rpc_file_template: + description: The file glob template to use to find the RPC file. Template parameters are {filename}, {fileroot}, and {extension}. + type: string + warp_options: + description: "Warp options. See https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details" + corner_names: + description: "The metadata field name including the corner names. Tuple of four: bottom-left, bottom-right, top-left and top-right" + type: array + items: + type: string + orbit_direction_name: + description: The metadata field name containing the orbit direction + type: string + force_north_up: + description: + type: boolean + tps: + description: Whether to use TPS transformation instead of GCP polynomials. + type: boolean calc: description: Definition of a calculation step. type: object diff --git a/preprocessor/preprocessor/daemon.py b/preprocessor/preprocessor/daemon.py index e44a3c9079f341b59fcf3384be2bd97d7f811ca1..e4c945d806bca7b3e5650e4758e335b3da040b40 100644 --- a/preprocessor/preprocessor/daemon.py +++ b/preprocessor/preprocessor/daemon.py @@ -3,12 +3,12 @@ import logging import json from .preprocess import preprocess_file, preprocess_browse - +from .exceptions import ExistsAtUploadError logger = logging.getLogger(__name__) -def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue): +def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, progress_set, failure_set, success_set): """ Run the preprocessing daemon, listening on a redis queue for files to be preprocessed. After preprocessing the filename of the preprocessed files will be pushed to the output queue. @@ -21,17 +21,42 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue): while True: # fetch an item from the queue to be preprocessed queue, value = client.brpop([listen_queue, listen_md_queue]) + client.sadd(progress_set, value) file_paths = [] # start the preprocessing on that file if queue == listen_queue: - filename, file_path = preprocess_file(config, value) - file_paths.append(file_path) + try: + filename, file_path = preprocess_file(config, value) + file_paths.append(file_path) + client.sadd(success_set, value) + client.srem(progress_set, value) + except ExistsAtUploadError as e: + # do not add file already present to failure set + client.srem(progress_set, value) + logger.exception(e) + continue + except Exception as e: + client.sadd(failure_set, value) + client.srem(progress_set, value) + logger.exception(e) + continue elif queue == listen_md_queue: browse_report_data = json.loads(value) browse_type = browse_report_data['browse_type'] for browse in browse_report_data['browses']: - filename, file_path = preprocess_browse(config, browse_type, browse_report_data, browse) - file_paths.append(file_path) + try: + filename, file_path = preprocess_browse(config, browse_type, browse_report_data, browse) + file_paths.append(file_path) + client.sadd(success_set, browse['filename']) + except ExistsAtUploadError as e: + # do not add file already present to failure set + logger.exception(e) + continue + except Exception as e: + client.sadd(failure_set, browse['filename']) + logger.exception(e) + continue + client.srem(progress_set, value) # for browse_report mode # TODO: convert to string, list, .... for item in file_paths: client.lpush(write_queue, item) diff --git a/preprocessor/preprocessor/exceptions.py b/preprocessor/preprocessor/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..08b7726f014766b7428c0ae11366d284656d6bb5 --- /dev/null +++ b/preprocessor/preprocessor/exceptions.py @@ -0,0 +1,2 @@ +class ExistsAtUploadError(Exception): + pass diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 626099a5ab25ec001f907644fb1e856276bde68a..68e118a906400740da116b7c07b15bfd90485cb8 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -15,7 +15,8 @@ from .steps import ( georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step ) from .steps.browse_report import browse_georeference -from .util import workdir, Timer +from .util import workdir, Timer, get_size_in_bytes +from .exceptions import ExistsAtUploadError logging.basicConfig() @@ -94,7 +95,7 @@ def preprocess_internal(preprocess_config, previous_step='unpack'): preprocessor = STEP_FUNCTIONS[step] with Timer() as step_timer: - preprocessor(previous_step, step, **step_config) + preprocessor(previous_step, step, preprocess_config, **step_config) logger.info( 'Finished preprocessing step %s after %.3f seconds.' @@ -126,18 +127,18 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N logger.info('Preprocessing %s in %s' % (file_path, dirname)) target_config = config['target'] # check if target.replace is configured and if not, check storage if files there - if not target_config['replace']: + if not target_config.get('replace'): uploader = get_uploader( target_config['type'], target_config.get('args'), target_config.get('kwargs') ) if uploader.product_exists(file_path): - raise Exception('Target.replace configuration is not set to true and objects already exist in target %s.' % file_path) + raise ExistsAtUploadError('Target.replace configuration is not set to true and objects already exist in target %s.' % file_path) else: logger.debug('Product does not yet exist on target') # check if we can reuse a previous download if not os.path.isdir('download'): os.mkdir('download') - logger.info('Downloading %s from %s...' % (file_path, dirname)) + logger.info('Downloading %s to %s...' % (file_path, dirname)) # get the Downloader for the configured source archive to download the given source file source_config = config['source'] downloader = get_downloader( @@ -148,8 +149,8 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N source_archive_path = downloader.download(file_path, 'download') logger.info( - 'Downloaded file %s in %.3f seconds' - % (file_path, download_timer.elapsed) + 'Downloaded file %s with size "%.02f" MB in "%.2f" seconds' + % (file_path, get_size_in_bytes(source_archive_path, 'MB'), download_timer.elapsed) ) else: diff --git a/preprocessor/preprocessor/steps/calc.py b/preprocessor/preprocessor/steps/calc.py index 493c6c5c9dcc8333549ad12e6dd3938e5deba371..67e98dce5981b8b8aab1fbed9b92fb0a7cf539c3 100644 --- a/preprocessor/preprocessor/steps/calc.py +++ b/preprocessor/preprocessor/steps/calc.py @@ -12,7 +12,7 @@ from ..util import replace_ext logger = logging.getLogger(__name__) -def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, formulas: List[dict]): +def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, formulas: List[dict]): for i, item in enumerate(formulas): # get first filename as a base filename = glob(join(source_dir, list(item['inputs'].values())[0]['glob']))[0] diff --git a/preprocessor/preprocessor/steps/georeference.py b/preprocessor/preprocessor/steps/georeference.py index 129074815f84b8f10720d367d09606a155677251..80a8b43205130ddd2b5ca1dcb2af7700ca7040eb 100644 --- a/preprocessor/preprocessor/steps/georeference.py +++ b/preprocessor/preprocessor/steps/georeference.py @@ -11,25 +11,46 @@ from ..util import gdal, osr, replace_ext logger = logging.getLogger(__name__) -def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, type: str, **options: dict): - type_name = type.lower() - if type_name == 'gcp': - georef_func = gcp_georef - elif type_name == 'rpc': - georef_func = rpc_georef - elif type_name == 'world': - georef_func = world_georef - elif type_name == 'corners': - georef_func = corner_georef - else: - raise Exception('Invalid georeference type %s' % type_name) - for filename in [path for path in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(path)]: - target_filename = join(target_dir, basename(filename)) - georef_func(filename, target_filename, **options) +def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, geotransforms: List[dict]): + success = False + for options in geotransforms: + type_name = options['type'].lower() + opts_dict = {i: options[i] for i in options if i != 'type'} + if type_name == 'gcp': + georef_func = gcp_georef + elif type_name == 'rpc': + georef_func = rpc_georef + elif type_name == 'world': + georef_func = world_georef + elif type_name == 'corners': + georef_func = corner_georef + elif type_name == 'no_op': + georef_func = no_op + else: + raise Exception('Invalid georeference type %s' % type_name) + try: + filenames = [] + for dataglob in preprocessor_config.get('data_file_globs', '*'): + for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]: + filenames.append(p) + for filename in filenames: + target_filename = join(target_dir, basename(filename)) + georef_func(filename, target_filename, **opts_dict) + except Exception as e: + # if any file fails, try another method + logger.warn(e) + continue + else: + # all files georeferenced without exception, no need to try another methods + success = True + logger.debug('Used geotransform method success: %s' % type_name) + break + if not success and len(geotransforms) > 0: + raise Exception('No configured georeference method successfull: %s' % ', '.join([item['type'] for item in geotransforms])) def gcp_georef(input_filename: os.PathLike, target_filename: os.PathLike, order: int=1, projection: str='EPSG:4326', - tps: bool=False): + tps: bool=False, warp_options: dict=None): succeded = False # simple case: get the geotransform from some GCPs @@ -50,6 +71,7 @@ def gcp_georef(input_filename: os.PathLike, target_filename: os.PathLike, order: del ds shutil.move(input_filename, target_filename) succeded = True + logger.debug("GCPsToGeoTransform successful") # otherwise warp if not succeded: @@ -68,9 +90,11 @@ def gcp_georef(input_filename: os.PathLike, target_filename: os.PathLike, order: target_filename, input_filename, dstSRS=projection, + **(warp_options or {}), **options, ) + def rpc_georef(input_filename: os.PathLike, target_filename: os.PathLike, rpc_file_template: str='{fileroot}.RPC', warp_options: dict=None): fileroot, extension = splitext(input_filename) rpc_file_glob = rpc_file_template.format( @@ -85,7 +109,6 @@ def rpc_georef(input_filename: os.PathLike, target_filename: os.PathLike, rpc_fi # rename RPC filename to be compatible with GDAL if rpc_filename: shutil.move(rpc_filename, replace_ext(input_filename, '.rpc')) - gdal.Warp( target_filename, input_filename, @@ -141,6 +164,10 @@ def world_georef(): pass +def no_op(input_filename: os.PathLike, target_filename: os.PathLike): + # does not perform any operation + # configure as last step if you want other geotransform method to either pass or fail and then perform no other + shutil.move(input_filename, target_filename) def gcps_from_borders(size: Tuple[float, float], coords: List[Tuple[float, float]], orbit_direction: str, force_north_up: bool=False): diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py index d90c53435419f999964899080a7fa7fb6f277ef3..efc34ba92c123ce446d508d7a3a4a4d9ff38c41d 100644 --- a/preprocessor/preprocessor/steps/output.py +++ b/preprocessor/preprocessor/steps/output.py @@ -9,7 +9,7 @@ import logging logger = logging.getLogger(__name__) -def output_step(source_dir: os.PathLike, target_dir: os.PathLike, options: dict=None): +def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None): # find out the driver to get the extension options = options if options is not None else {} frmt = options.get('format', 'GTiff') @@ -19,20 +19,20 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, options: dict= extension = driver.GetMetadata().get('DMD_EXTENSIONS', 'tif').split(' ')[0] # warp each individual file warped_files = [] - for filename in [path for path in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(path)]: + filenames = [] + for dataglob in preprocessor_config.get('data_file_globs', '*'): + for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]: + filenames.append(p) + for filename in filenames: target_filename = join(target_dir, replace_ext(basename(filename), extension)) logger.debug('Warping file %s' % filename) - gdal.Warp(target_filename, filename, options=gdal.WarpOptions( - **options - )) + gdal.Warp(target_filename, filename, **options) warped_files.append(target_filename) if len(warped_files) > 1: tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension)) logger.debug('Warping files %s' % warped_files) - gdal.Warp(tmp_filename, warped_files, options=gdal.WarpOptions( - **options - )) + gdal.Warp(tmp_filename, warped_files, **options) # delete old files and rename the combined file to the first filename for filename in warped_files: diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py index 809f3ce7e574e31e3788cb3e15cab5fd1c489186..e70865405db9b8651ab95cd5d745dd91ba95cdb4 100644 --- a/preprocessor/preprocessor/steps/stack.py +++ b/preprocessor/preprocessor/steps/stack.py @@ -8,10 +8,13 @@ from typing import List from ..util import replace_ext, gdal -def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, group_by: str=None, sort_by: str=None, order: List[str]=None): +def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None): """ Stack bands of the individual images """ - filenames = [path for path in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(path)] + filenames = [] + for dataglob in preprocessor_config.get('data_file_globs', '*'): + for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]: + filenames.append(p) # check if we have a group_by regex. If yes, use the first # re-group to group by. # Fallback is basename of file as groupname diff --git a/preprocessor/preprocessor/steps/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py index a284de94bae96d40a0156b766f512d2c3946834d..6438d27790b36e7924783d9950bb3d8518c08a98 100644 --- a/preprocessor/preprocessor/steps/subdataset.py +++ b/preprocessor/preprocessor/steps/subdataset.py @@ -6,7 +6,7 @@ from typing import Dict from ..util import replace_ext, gdal -def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, data_file_glob: str, subdataset_types: Dict[str, str]=None): +def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, data_file_glob: str, subdataset_types: Dict[str, str]=None): datafiles = glob(join(source_dir, data_file_glob)) if not datafiles: raise Exception('No datafiles were matched by the provided glob') diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index 249176c5643c3f58d5fedbfed94a478992679dee..d668037a2d32044808ca3092985e759a9f9a9b6c 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -66,3 +66,23 @@ class Timer: @property def elapsed(self): return (self.end if self.end is not None else time()) - self.start + + +def convert_unit(size_in_bytes, unit='B'): + """ Convert the size from bytes to other units like KB, MB, GB, TB""" + if unit == 'KB': + return size_in_bytes / 1024 + elif unit == 'MB': + return size_in_bytes / (1024 * 1024) + elif unit == 'GB': + return size_in_bytes / (1024 * 1024 * 1024) + elif unit == 'TB': + return size_in_bytes / (1024 * 1024 * 1024 * 1024) + else: + return size_in_bytes + + +def get_size_in_bytes(file_path, unit): + """ Get size of file at given path in bytes""" + size = os.path.getsize(file_path) + return convert_unit(size, unit) diff --git a/preprocessor/run-preprocessor.sh b/preprocessor/run-preprocessor.sh index 70a8aee6572806b30e158b706fc65fbd95ad427c..2727f4e6d1e1f5e2fda5d660dc388c9df5174aa5 100644 --- a/preprocessor/run-preprocessor.sh +++ b/preprocessor/run-preprocessor.sh @@ -13,4 +13,7 @@ preprocessor daemon \ --listen-queue ${REDIS_PREPROCESS_QUEUE_KEY} \ --listen-md-queue ${REDIS_PREPROCESS_MD_QUEUE_KEY} \ --write-queue ${REDIS_REGISTER_QUEUE_KEY} \ - ${debug} \ No newline at end of file + --progress-set ${REDIS_PREPROCESS_PROGRESS_KEY} \ + --failure-set ${REDIS_PREPROCESS_FAILURE_KEY} \ + --success-set ${REDIS_PREPROCESS_SUCCESS_KEY} \ + ${debug} diff --git a/testing/testing_preprocessor_config.yml b/testing/testing_preprocessor_config.yml index ac160a150bfdd5ecda8f2161ab3e66947f7b4142..5edd267e9bf25a8765d16faa76f2f896519f97c8 100644 --- a/testing/testing_preprocessor_config.yml +++ b/testing/testing_preprocessor_config.yml @@ -55,6 +55,7 @@ preprocessing: nested: true data_file_globs: - "*.tif" + - "*.vrt" additional_file_globs: - "*.rpc*" stack_bands: @@ -67,14 +68,17 @@ preprocessing: - N MER_RR__2P: georeference: - type: gcp - tps: true + geotransforms: + - type: gcp + tps: true RS02: nested: true data_file_globs: - "*imagery_*.tif" + - "*imagery_*.vrt" georeference: - type: gcp + geotransforms: + - type: gcp calc: formulas: # complex b1/b2 to b1 uint16 for all 4 polarizations