diff --git a/.gitignore b/.gitignore index 5d6bab6d7cdfa5a9ec740e0dbe8dca77be5d761b..28162099edef7656b0f50e4be7553e9df03a5a78 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/data __pycache__ *.pyc .venv diff --git a/config/csea-emg/csea-emg_preprocessor-config.yml b/config/csea-emg/csea-emg_preprocessor-config.yml index 0aa694ab8eb7d410a53aa9c132964f95d578815b..c11c947d9dbbf73837717b5eb560025868063e01 100644 --- a/config/csea-emg/csea-emg_preprocessor-config.yml +++ b/config/csea-emg/csea-emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -34,12 +36,18 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -129,72 +137,165 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -203,30 +304,171 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_SBI.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI.tif" + - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/dem/dem_preprocessor-config.yml b/config/dem/dem_preprocessor-config.yml index 8d35a8c7efdd712b0ff0275e1edff26b2a7a2ff7..2fd7f61a50495e4150e96852000cfb273972850d 100644 --- a/config/dem/dem_preprocessor-config.yml +++ b/config/dem/dem_preprocessor-config.yml @@ -33,6 +33,9 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 1: "height" move_files: true data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in - '**/DEM/*.dt2' diff --git a/config/demF/demF_preprocessor-config.yml b/config/demF/demF_preprocessor-config.yml index 8d35a8c7efdd712b0ff0275e1edff26b2a7a2ff7..2fd7f61a50495e4150e96852000cfb273972850d 100644 --- a/config/demF/demF_preprocessor-config.yml +++ b/config/demF/demF_preprocessor-config.yml @@ -33,6 +33,9 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 1: "height" move_files: true data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in - '**/DEM/*.dt2' diff --git a/config/emg/emg_preprocessor-config.yml b/config/emg/emg_preprocessor-config.yml index 0aa694ab8eb7d410a53aa9c132964f95d578815b..c11c947d9dbbf73837717b5eb560025868063e01 100644 --- a/config/emg/emg_preprocessor-config.yml +++ b/config/emg/emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -34,12 +36,18 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -129,72 +137,165 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -203,30 +304,171 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_SBI.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI.tif" + - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/frtx-emg/frtx-emg_preprocessor-config.yml b/config/frtx-emg/frtx-emg_preprocessor-config.yml index 0aa694ab8eb7d410a53aa9c132964f95d578815b..c11c947d9dbbf73837717b5eb560025868063e01 100644 --- a/config/frtx-emg/frtx-emg_preprocessor-config.yml +++ b/config/frtx-emg/frtx-emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -34,12 +36,18 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -129,72 +137,165 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -203,30 +304,171 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_SBI.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI.tif" + - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/sace-emg/sace-emg_preprocessor-config.yml b/config/sace-emg/sace-emg_preprocessor-config.yml index 0aa694ab8eb7d410a53aa9c132964f95d578815b..c11c947d9dbbf73837717b5eb560025868063e01 100644 --- a/config/sace-emg/sace-emg_preprocessor-config.yml +++ b/config/sace-emg/sace-emg_preprocessor-config.yml @@ -27,6 +27,8 @@ keep_temp: false metadata_glob: "*GSC*.xml" type_extractor: xpath: + - /gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() + - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text() - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text() level_extractor: @@ -34,12 +36,18 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true nested: true data_file_globs: - '*.tif' - '*.jp2' - - '*.h5' output: options: format: COG @@ -129,72 +137,165 @@ preprocessing: - HV - VH - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" + 3: + - "HH" + - "HV" + - "HH decibel" + 5: + - "HH" + - "HV" + - "VH" + - "VV" + - "HH decibel" SK00: data_file_globs: - "*pansharpened_clip.tif" - "*_pansharpened.tif" SP06: data_file_globs: - # throw away Panchromatic *_P_* - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" SP07: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" additional_file_globs: - "*RPC_*" - "*DIM_*" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: # first try RPC, if not present, go on - type: rpc - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1A: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PH1B: data_file_globs: - - "*IMG_*_PMS_*.JP2" - - "*IMG_*_PMS_*.tif" - - "*IMG_*_MS_*.JP2" - - "*IMG_*_MS_*.tif" + - "*IMG_*.JP2" + - "*IMG_*.tif" - "*.vrt" additional_file_globs: - - "*RPC_*_MS_*.XML" - - "*DIM_*_MS_*.XML" - - "*RPC_*_PMS_*.XML" - - "*DIM_*_PMS_*.XML" + - "*RPC_*.XML" + - "*DIM_*.XML" + output: + group_by: "(.*_[PM]?[SM]?S?_).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES georeference: geotransforms: - type: rpc - type: fix_geotrans - type: no_op + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" + 1: + - "panchromatic" PL00: data_file_globs: - "*MS.tif" @@ -203,30 +304,171 @@ preprocessing: data_file_globs: - "*.tif" GY01: - # throw away Panchromatic *-P3D* data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 3: + - "red" + - "green" + - "blue" + 1: + - "panchromatic" EW03: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW02: data_file_globs: - - "*-M3D*.tif" - - "*-S3D*.tif" - - "*-M2A*.tif" - - "*-S2A*.tif" + - "*.tif" + output: + group_by: "(.*[PMS][0-9][AD]S).*" + options: + format: COG + dstSRS: 'EPSG:4326' + dstNodata: 0 + multithread: true + warpMemoryLimit: 3000 + creationOptions: + - BLOCKSIZE=512 + - COMPRESS=DEFLATE + - NUM_THREADS=8 + - BIGTIFF=YES + - OVERVIEWS=AUTO + - PREDICTOR=YES + stac_band_mapping: + 4: + - "blue" + - "green" + - "red" + - "nir" + 1: + - "panchromatic" + 3: + - "red" + - "green" + - "blue" + 8: + - "coastal" + - "blue" + - "green" + - "yellow" + - "red" + - "red edge" + - "nir" + - "nir2" EW01: data_file_globs: - "*.tif" + stac_band_mapping: + 1: + - "grayscale" DM02: data_file_globs: - "*.tif" + SAR_HIM_1B: + gdal_config_options: + - "GDAL_PAM_ENABLED=YES" + data_file_globs: + - "*.h5" + - "*.tif" + - "*_SBI.vrt" + subdatasets: + subdataset_types: + '//S01/SBI': 'S01_SBI' + georeference: + geotransforms: + - type: corners + corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"] + orbit_direction_name: Orbit_Direction + calc: + formulas: + - inputs: + A: + glob: '*.tif' # band 1 is default + data_type: Float32 + formula: "10*log10((A.astype(float)-${A_1_statistics_min}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + output_postfix: _stretch + creationOptions: + - TILED=YES + - NUM_THREADS=8 + nodata_value: 0 + - inputs: + A: + glob: '*_stretch.tif' + data_type: UInt16 + formula: (65535*(maximum(0.001,A+25)/25)).astype(int) + output_postfix: _db + nodata_value: 0 + stack_bands: + data_file_globs: + - "*_SBI.tif" + - "*_SBI_stretch_db.tif" + stack_bands: + sort_by: ".*_(HH|HV|VH|VV)_proc.tif" + order: + - HH + - HV + - VH + - VV + stac_band_mapping: + 2: + - "HH" + - "HH decibel" # this configuration is still a stub - not all product types are done # https://gitlab.eox.at/esa/prism/vs/-/issues/56 # https://gitlab.eox.at/esa/prism/vs/-/issues/23 diff --git a/config/vhr18/vhr18_preprocessor-config.yml b/config/vhr18/vhr18_preprocessor-config.yml index 0eb3918986b2be210eb8f62802e515a11119dc05..a56f2a14bf35c11ae6ec90b8a340a851a29771e7 100644 --- a/config/vhr18/vhr18_preprocessor-config.yml +++ b/config/vhr18/vhr18_preprocessor-config.yml @@ -33,6 +33,13 @@ level_extractor: xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/') preprocessing: defaults: + stac_output: true + stac_band_mapping: + 4: + - "red" + - "green" + - "blue" + - "nir" move_files: true data_file_globs: - '*.tif' diff --git a/data/.gitkeep b/data/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/docker-compose.core12.dev.yml b/docker-compose.core12.dev.yml index 61956f67d432c45ba3f3510166ed7e2a22b8c782..987e49e40577091df0d0fbe8eb2df28793f7cb2b 100644 --- a/docker-compose.core12.dev.yml +++ b/docker-compose.core12.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 networks: extnet: name: core12-extnet diff --git a/docker-compose.core12.ops.yml b/docker-compose.core12.ops.yml index 59f0c154ccfd9223b7c75c29045f692e41a39d6a..68b199248929e9dc5333b706a47640d3daace01d 100644 --- a/docker-compose.core12.ops.yml +++ b/docker-compose.core12.ops.yml @@ -1,5 +1,5 @@ version: "3.6" -x-vs-version: :release-1.4.7 # bumpversion +x-vs-version: :release-1.4.8 # bumpversion services: database: volumes: diff --git a/docker-compose.csea-emg.dev.yml b/docker-compose.csea-emg.dev.yml index e1a3bcd82d58b2bfb95c9fe2f60bcfe0b2acf8f2..8f4afde771054ab63130844041cc1fafca28baf6 100644 --- a/docker-compose.csea-emg.dev.yml +++ b/docker-compose.csea-emg.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 networks: extnet: name: csea-emg-extnet diff --git a/docker-compose.dem.dev.yml b/docker-compose.dem.dev.yml index 220e7eb1692ee8728d0c1f0d1f0ee1e6ee3b02c2..49014549aa275e61759f83484815fc8405fd5cc2 100644 --- a/docker-compose.dem.dev.yml +++ b/docker-compose.dem.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,56 +34,45 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 terrain: deploy: replicas: 0 networks: extnet: name: dem-extnet - external: true \ No newline at end of file + external: true diff --git a/docker-compose.demF.dev.yml b/docker-compose.demF.dev.yml index 2edb7ce4aaec4e1ab72d9158b7a297033370bc68..1c9da21d36f2754ee4960689d07cbcc810821382 100644 --- a/docker-compose.demF.dev.yml +++ b/docker-compose.demF.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 terrain: deploy: replicas: 0 diff --git a/docker-compose.emg.dev.yml b/docker-compose.emg.dev.yml index 962cdc08a12f66b38ac4aecd91051a379a6c4923..856df42a56e9ffda8bdf4361a47e749467fd10ed 100644 --- a/docker-compose.emg.dev.yml +++ b/docker-compose.emg.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 networks: extnet: name: emg-extnet diff --git a/docker-compose.frtx-emg.dev.yml b/docker-compose.frtx-emg.dev.yml index e258aa102f7d9d691304e39b654c3e84f9d43d73..1f7c35389200481a09a1f75467348a41f7760813 100644 --- a/docker-compose.frtx-emg.dev.yml +++ b/docker-compose.frtx-emg.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 networks: extnet: name: frtx-emg-extnet diff --git a/docker-compose.sace-emg.dev.yml b/docker-compose.sace-emg.dev.yml index 9a0219414b8d782a96dd2a37a9f91e9c82b86752..ff96c216bb82b53aa7faab2c054f590d362e78f2 100644 --- a/docker-compose.sace-emg.dev.yml +++ b/docker-compose.sace-emg.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 networks: extnet: name: sace-emg-extnet diff --git a/docker-compose.vhr18.dev.yml b/docker-compose.vhr18.dev.yml index 62c9304af7c2dfa12f7a04f930f5d3cc27bcc2a7..0866e9bc4527354057c78204857be0d4b79768cb 100644 --- a/docker-compose.vhr18.dev.yml +++ b/docker-compose.vhr18.dev.yml @@ -4,11 +4,9 @@ services: networks: - extnet ports: - - "5432:5432" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 5432 + published: 5432 sftp: image: registry.gitlab.eox.at/esa/prism/vs/pvs_sftp:dev ingestor: @@ -21,7 +19,9 @@ services: client: image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:dev ports: - - "80:80" + - mode: host + target: 80 + published: 80 volumes: - type: bind source: ./client/src/ @@ -34,52 +34,41 @@ services: environment: DEV: "true" ports: - - "81:80" - - "82:8080" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 81 + - mode: host + target: 8080 + published: 82 environment: DEV: "true" registrar: image: registry.gitlab.eox.at/esa/prism/vs/pvs_core:dev environment: DEV: "true" - volumes: - - type: bind - source: ./data/ - target: /data/ - - type: bind - source: ./core/ - target: /core/ - environment: - DEV: "true" cache: image: registry.gitlab.eox.at/esa/prism/vs/pvs_cache:dev ports: - - "83:80" - volumes: - - type: bind - source: ./data/ - target: /data/ + - mode: host + target: 80 + published: 83 configs: - source: mapcache-dev target: /mapcache-template.xml preprocessor: image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:dev + environment: + DEBUG: "true" volumes: - type: tmpfs target: /tmp - - type: bind - source: ./preprocessor/ - target: /preprocessor/ - - type: bind - source: ./data/ - target: /data/ - type: bind source: ./testing/ target: /testing/ + ports: + - mode: host + target: 5678 + published: 5678 networks: extnet: name: vhr18-extnet diff --git a/preprocessor/Dockerfile b/preprocessor/Dockerfile index c0cc8a2e4b9e975fc4241e6b0800852f0fdbb9a2..3964b005042869518b2c72ab76df743fbca46ea7 100644 --- a/preprocessor/Dockerfile +++ b/preprocessor/Dockerfile @@ -25,7 +25,7 @@ # IN THE SOFTWARE. #----------------------------------------------------------------------------- -FROM osgeo/gdal:ubuntu-full-3.2.1 +FROM osgeo/gdal:ubuntu-full-3.3.1 LABEL name="prism view server preprocessor" \ vendor="EOX IT Services GmbH " \ @@ -39,7 +39,8 @@ USER root RUN apt update && \ apt install -y \ - python3-redis python3-keystoneclient python3-swiftclient python3-click python3-setuptools python3-jsonschema wait-for-it && \ + python3-redis python3-keystoneclient python3-swiftclient python3-pip python3-click python3-setuptools python3-jsonschema wait-for-it && \ + pip3 install pystac && \ apt autoremove -y && \ apt clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* @@ -71,6 +72,7 @@ ENV INSTANCE_ID="prism-data-access-server_preprocessor" \ REDIS_PREPROCESS_PROGRESS_KEY="preprocessing_set" \ REDIS_PREPROCESS_SUCCESS_KEY="preprocess-success_set" \ GDAL_PAM_ENABLED="NO" \ + GDAL_ENABLE_DEPRECATED_DRIVER_JPEG2000="YES" \ PREPROCESSOR_DEBUG= ADD run-preprocessor.sh \ diff --git a/preprocessor/entrypoint.sh b/preprocessor/entrypoint.sh index af0b5a975ef5744601a0724ce192a114869bf547..2ccd50fc6e17bb6e3bc2cb1aecd60bd4b35f7f89 100644 --- a/preprocessor/entrypoint.sh +++ b/preprocessor/entrypoint.sh @@ -2,6 +2,12 @@ SERVICES=${WAIT_SERVICES:=''} TIMEOUT=${WAIT_TIMEOUT:='15'} +DEBUG=${DEBUG:='false'} + +if [[ "$DEBUG" = 'true' ]] ; then + echo 'Installing debug dependencies' + pip3 install debugpy; +fi if [[ ! -z $SERVICES ]] ; then for service in $SERVICES ; do diff --git a/preprocessor/preprocessor/__main__.py b/preprocessor/preprocessor/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..4cafccbafc79cf668801fe36c883a1a624f98913 --- /dev/null +++ b/preprocessor/preprocessor/__main__.py @@ -0,0 +1,3 @@ +from .cli import cli + +cli() diff --git a/preprocessor/preprocessor/config-schema.yaml b/preprocessor/preprocessor/config-schema.yaml index d5c933bb8b2a9bc5e7c40bdcc40766d57703aa9f..f0d966f444fbd140ae89a3c8c3a533a2dd1b058d 100644 --- a/preprocessor/preprocessor/config-schema.yaml +++ b/preprocessor/preprocessor/config-schema.yaml @@ -38,7 +38,7 @@ properties: description: The local directory, where intermediary files are to be stored. type: string keep_temp: - description: Whether to keep temporary files for each step. DEPRECATED. + description: Whether to keep temporary files for each step. type: boolean metadata_glob: description: A file glob to select metadata files from the downloaded archive. @@ -112,6 +112,9 @@ definitions: description: The definition of the subdataset extraction step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array subdataset_types: description: Mapping of subdataset identifier to output filename postfix for subdatasets to be extracted for each data file. type: object @@ -121,6 +124,9 @@ definitions: georeference: type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array geotransforms: description: A list of geotransform methods to use type: array @@ -164,6 +170,9 @@ definitions: description: Definition of a calculation step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array formulas: description: A list of formulas to calculate type: array @@ -187,7 +196,7 @@ definitions: description: The output data type for the calculated file. (GDAL notation) type: string formula: - description: The formula to calculate. See gdal_calc.py for details. + description: "The formula to calculate. See gdal_calc.py for details. Can contain custom templates for getting band stats, like '${A_1_statistics_min}'." type: string output_postfix: description: The filename postfix to append to the output filename. By default an enumeration is used. @@ -195,10 +204,16 @@ definitions: nodata_value: description: Use this nodata value in the calculation. type: float + creationOptions: + description: List of creation options for gdal_calc.py output. + type: array stack_bands: description: Definition of a stack bands step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array group_by: description: A regex to group the input datasets, if consisting of multiple file. The first regex group is used for the grouping. type: string @@ -214,6 +229,9 @@ definitions: description: Definition of an output step. type: object properties: + data_file_globs: + description: Custom globs for filtering which files will be used for this step. Overrides data_file_globs configured for whole preprocessor. + type: array options: description: "Options to be passed to `gdal.Warp`. See https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details" type: object @@ -230,3 +248,28 @@ definitions: kwargs: description: The map of keyword arguments to pass to that function. type: object + move_files: + type: boolean + default: false + description: "Files will be moved between steps to save space instead of copied." + nested: + type: boolean + default: false + description: "Unpack step will descend into the sub-archives to extract files." + stac_output: + type: boolean + default: false + description: "Switch to generate STAC Item output JSON to output queue." + data_file_globs: + description: "Custom globs for filtering which files will be used as input to this step." + type: array + items: + type: string + stac_band_mapping: + type: object + description: "Mapping of expected band counts to lists of expected common band names." + items: + type: array + description: "List of individual sorted band names." + items: + type: string diff --git a/preprocessor/preprocessor/daemon.py b/preprocessor/preprocessor/daemon.py index 009e1a541365a9a273253bcc1e929e07daecd6cc..99ea06680f524404e9dc210cd2ba6d2d1b7c9b63 100644 --- a/preprocessor/preprocessor/daemon.py +++ b/preprocessor/preprocessor/daemon.py @@ -36,6 +36,7 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, p client.srem(progress_set, value) if config['target'].get('pass_further_when_exists', True): # pass item to next queue even if file already exists + # TODO fix this for stac_output=true client.lpush(write_queue, value) continue except Exception as e: @@ -56,6 +57,7 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, p logger.exception(e) if config['target'].get('pass_further_when_exists', True): # pass item to next queue even if file already exists + # TODO fix this for stac_output=true file_paths.append(browse['browse_identifier']) continue except Exception as e: @@ -63,6 +65,5 @@ def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue, p client.sadd(failure_set, browse['filename']) continue client.srem(progress_set, value) # for browse_report mode - # TODO: convert to string, list, .... for item in file_paths: client.lpush(write_queue, item) diff --git a/preprocessor/preprocessor/metadata.py b/preprocessor/preprocessor/metadata.py index 083d585da796e6c75e2087548b94fe25fadc4205..fcced508ad20045780fa80d911445dcb3db2ecc6 100644 --- a/preprocessor/preprocessor/metadata.py +++ b/preprocessor/preprocessor/metadata.py @@ -1,12 +1,11 @@ - from lxml import etree +from datetime import datetime, timezone def evaluate_xpath(root, xpath): """ """ result = root.xpath(xpath, namespaces=root.nsmap) - print(xpath, result) if result: if isinstance(result, list): return result[0] @@ -14,34 +13,55 @@ def evaluate_xpath(root, xpath): return None -def extract_product_type_and_level(metadata_files, config): +def extract_product_types_and_levels(metadata_files, config): """ """ - product_type = None - product_level = None + product_types = [] + product_levels = [] for metadata_file in metadata_files: with open(metadata_file) as f: tree = etree.parse(f) root = tree.getroot() - if not product_type: - xpaths = config['type_extractor']['xpath'] + xpaths = config['type_extractor']['xpath'] + xpaths = [xpaths] if isinstance(xpaths, str) else xpaths + for xpath in xpaths: + product_type = evaluate_xpath(root, xpath) + if product_type is not None: + product_types.append(product_type) + + xpaths = config['level_extractor']['xpath'] + if xpaths: xpaths = [xpaths] if isinstance(xpaths, str) else xpaths for xpath in xpaths: - product_type = evaluate_xpath(root, xpath) - if product_type: - break - - if not product_level: - xpaths = config['level_extractor']['xpath'] - if xpaths: - xpaths = [xpaths] if isinstance(xpaths, str) else xpaths - for xpath in xpaths: - product_level = evaluate_xpath(root, xpath) - if product_level: - break - - if product_type and product_level: - break - - return product_type, product_level \ No newline at end of file + product_level = evaluate_xpath(root, xpath) + if product_level is not None: + product_levels.append(product_level) + + return product_types, product_levels + + +def extract_metadata_for_stac(metadata_files): + """ + Temporary function extracting necessary metadata to create a minimal STAC item. + For now the xpaths are hardcoded here + """ + GSC_SCHEMA = { + 'identifier': '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:identifier/text()', + } + + # just considering the first metadata file + metadata_file = next(iter(metadata_files.keys())) + with open(metadata_file) as f: + tree = etree.parse(f) + root = tree.getroot() + + output_metadata = { + "id": evaluate_xpath(root, GSC_SCHEMA['identifier']), + "geometry": None, # optional + "bbox": None, # optional + "datetime": datetime.now(timezone.utc), + "properties": {}, + "extra_fields": {}, + } + return output_metadata diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py index 15066ff4acda798d466eaf239b9543bd38fa22e8..ca8e2eecb2ffb9363afc6e974680b05935246253 100644 --- a/preprocessor/preprocessor/preprocess.py +++ b/preprocessor/preprocessor/preprocess.py @@ -1,21 +1,19 @@ import os import os.path -import itertools import importlib import logging import shutil -from typing import List from pprint import pformat from urllib.parse import urlparse from .transfer import get_downloader, get_uploader from .archive import unpack_files -from .metadata import extract_product_type_and_level +from .metadata import extract_product_types_and_levels from .steps import ( georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step ) from .steps.browse_report import browse_georeference -from .util import workdir, Timer, get_size_in_bytes +from .util import workdir, Timer, get_size_in_bytes, apply_gdal_config_options, set_gdal_options, flatten, create_simple_stac_item from .exceptions import ExistsAtUploadError logging.basicConfig() @@ -67,16 +65,13 @@ STEP_FUNCTIONS = { 'custom_postprocessor': custom_postprocessor, } - -def flatten(l): - return [item for sublist in l for item in sublist] - - # ----------------------------------------------------------------------------- def preprocess_internal(preprocess_config, previous_step='unpack'): force_refresh = False + # apply specific gdal config options + original_config = apply_gdal_config_options(preprocess_config) # make processing steps for step in ['custom_preprocessor', 'subdatasets', 'georeference', 'calc', 'stack_bands', 'output', 'custom_postprocessor']: step_config = preprocess_config.get(step) @@ -108,6 +103,9 @@ def preprocess_internal(preprocess_config, previous_step='unpack'): previous_step = step + # put back original configuration for further steps + set_gdal_options(original_config) + if not os.path.isdir('upload') or force_refresh: try: os.mkdir('upload') @@ -118,8 +116,6 @@ def preprocess_internal(preprocess_config, previous_step='unpack'): copy_files(previous_step, 'upload', move=preprocess_config.get('move_files', False)) - - def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=None): """ Runs the preprocessing of a single file. """ @@ -161,12 +157,20 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N metadata_files = unpack_files(source_archive_path, 'extra', glob=config['metadata_glob'], case=config.get('glob_case', False)) # open the XML to retrieve the product type and level - product_type, product_level = extract_product_type_and_level(metadata_files, config) - logger.info('Detected product_type/level_type %s/%s' % (product_type, product_level)) + product_types, product_levels = extract_product_types_and_levels(metadata_files, config) + logger.info('Detected product_types: %s and level_types: %s' % (product_types, product_levels)) # get a concrete configuration for the type, filled with the defaults default_config = dict(config['preprocessing'].get('defaults', {})) - type_based_config = dict(config['preprocessing']['types'].get(product_type, {})) + + type_based_config = {} + for product_type in product_types: + # search metadata product type in configuration until it finds it + configured_preprocessor_config = dict(config['preprocessing']['types'].get(product_type, {})) + if configured_preprocessor_config != {}: + type_based_config = configured_preprocessor_config + break + default_config.update(type_based_config) preprocess_config = default_config logger.debug('Using preprocessing config %s' % pformat(preprocess_config)) @@ -215,23 +219,28 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N if len(os.listdir('upload')) == 0: # end here, so not only metadata file is uploaded raise Exception('No data files to upload, aborting.') - - paths_for_upload = ['upload', 'extra'] - upload_filenames = [] - for path_to_upload in paths_for_upload: - upload_filenames.extend([ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(path_to_upload) - for filename in filenames - ]) + + upload_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('upload') + for filename in filenames + ] + + extra_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('extra') + for filename in filenames + ] # send all files in the upload directory to the target storage logger.info( 'Starting uploading of %d files to %s' - % (len(upload_filenames), file_path) + % (len(upload_filenames + extra_filenames), file_path) ) with Timer() as upload_timer: - uploader.upload(upload_filenames, file_path) + # returns dict local_path:upload_path + upload_images = uploader.upload(upload_filenames, file_path) + upload_extra = uploader.upload(extra_filenames, file_path) logger.info( 'Finished uploading after %.3f seconds.' @@ -243,6 +252,10 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N % (file_path, preprocess_timer.elapsed) ) + if preprocess_config.get('stac_output', False): + # build a stac asset + stac_item = create_simple_stac_item(preprocess_config, upload_images, upload_extra) + return upload_filenames + extra_filenames, stac_item return upload_filenames, file_path @@ -315,24 +328,29 @@ def preprocess_browse(config: dict, browse_type: str, browse_report: dict, brows uploader = get_uploader( target_config['type'], target_config.get('args'), target_config.get('kwargs') ) - paths_for_upload = ['upload', 'extra'] - upload_filenames = [] - for path_to_upload in paths_for_upload: - upload_filenames.extend([ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(path_to_upload) - for filename in filenames - ]) + upload_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('upload') + for filename in filenames + ] + + extra_filenames = [ + os.path.join(dirpath, filename) + for dirpath, _, filenames in os.walk('extra') + for filename in filenames + ] file_path = browse['browse_identifier'] or upload_filenames[0] # send all files in the upload directory to the target storage logger.info( 'Starting uploading of %d files to %s' - % (len(upload_filenames), file_path) + % (len(upload_filenames + extra_filenames), file_path) ) with Timer() as upload_timer: - uploader.upload(upload_filenames, file_path) + # returns dict local_path:upload_path + upload_images = uploader.upload(upload_filenames, file_path) + upload_extra = uploader.upload(extra_filenames, file_path) logger.info( 'Finished uploading after %.3f seconds.' @@ -343,5 +361,9 @@ def preprocess_browse(config: dict, browse_type: str, browse_report: dict, brows 'Finished preprocessing of browse "%s" after %.3f seconds.' % (filename, preprocess_timer.elapsed) ) + if preprocess_config.get('stac_output', False): + # build a stac asset + stac_item = create_simple_stac_item(preprocess_config, upload_images, upload_extra) + return upload_filenames + extra_filenames, stac_item return upload_filenames, file_path diff --git a/preprocessor/preprocessor/steps/calc.py b/preprocessor/preprocessor/steps/calc.py index 67e98dce5981b8b8aab1fbed9b92fb0a7cf539c3..02f3cc677f1400ffd795ae686a41ae5db6e2db64 100644 --- a/preprocessor/preprocessor/steps/calc.py +++ b/preprocessor/preprocessor/steps/calc.py @@ -1,21 +1,30 @@ import os -from os.path import basename, dirname, join, isfile +from os.path import basename, join, isfile import subprocess from typing import List from glob import glob import shutil import logging +import re -from ..util import replace_ext +from ..util import replace_ext, get_all_data_files, gdal logger = logging.getLogger(__name__) -def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, formulas: List[dict]): +def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, formulas: List[dict], data_file_globs: List[str]=[]): for i, item in enumerate(formulas): - # get first filename as a base - filename = glob(join(source_dir, list(item['inputs'].values())[0]['glob']))[0] + # get first filename as a base, first looking into source_dir, then target_dir as fallback + filenames_source = glob(join(source_dir, list(item['inputs'].values())[0]['glob'])) + if len(filenames_source) > 0: + filename = filenames_source[0] + else: + filenames_target = glob(join(target_dir, list(item['inputs'].values())[0]['glob'])) + if len(filenames_target) > 0: + filename = filenames_target[0] + else: + raise Exception('No input file in source or target directory for calc: %s' % item) target_filename = join( target_dir, replace_ext(basename(filename), item.get('output_postfix', '_proc%d' % i) + '.tif', False) @@ -24,33 +33,90 @@ def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_con if isfile(target_filename): logger.warn('Calc output filename %s already exists' % target_filename) - calc_formula(source_dir, item['inputs'], target_filename, item['formula'], item.get('data_type', 'Float32'), item.get('nodata_value', None)) + calc_formula(source_dir, target_dir, item['inputs'], target_filename, item['formula'], item.get('data_type', 'Float32'), item.get('nodata_value', None), item.get('creationOptions', {})) - # take all original files with from the last step - for filename in glob('%s/*' % source_dir): + # take all original files with from the last step matching the data_file_glob + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) + for filename in filenames: target_filename = join(target_dir, basename(filename)) if isfile(target_filename): logger.warn('Calc output filename %s already exists' % target_filename) shutil.copy(filename, target_filename) -def calc_formula(source_dir: os.PathLike, inputs: List[dict], target_filename: os.PathLike, formula: str, data_type: str="Float32", nodata_value: float=None): +def calc_formula(source_dir: os.PathLike, target_dir: os.PathLike, inputs: List[dict], target_filename: os.PathLike, formula: str, data_type: str="Float32", nodata_value: float=None, creationOptions: List[str]=[]): + used_formula = formula cmd = [ "gdal_calc.py", - "--calc=%s" % formula, "--outfile=%s" % target_filename, "--type", data_type, ] - - for name in inputs: - # select first - filename = glob(join(source_dir, inputs[name]['glob']))[0] + for input_name in inputs: + # get first filename as a base, first looking into source_dir, then target_dir as fallback + filenames_source = glob(join(source_dir, inputs[input_name]['glob'])) + if len(filenames_source) > 0: + filename = filenames_source[0] + else: + filenames_target = glob(join(target_dir, inputs[input_name]['glob'])) + if len(filenames_target) > 0: + filename = filenames_target[0] + else: + raise Exception('No input file in source or target directory for formula: %s' % formula) + band_number = inputs[input_name].get('band', 1) cmd.extend([ - "-%s" % name, filename, - "--%s_band=%d" % (name, inputs[name].get('band', 1)), + "-%s" % input_name, filename, + "--%s_band=%d" % (input_name, band_number), ]) + # evaluate formula as a template + used_formula = evaluate_formula(formula, filename, input_name, band_number) + + cmd.extend([ + "--calc=%s" % used_formula, + ]) if nodata_value is not None: - cmd.append("--NoDataValue=%f" % nodata_value) + cmd.extend([ + "--NoDataValue=%s" % nodata_value, + ]) + + for option in creationOptions: + cmd.extend([ + "--co", + option, + ]) + + process = subprocess.run(cmd, capture_output=True, text=True) + logger.debug("gdal calc stderr: %s" % process.stderr) + - subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +def evaluate_formula(formula, filename, input_name, band): + """ Tries to replace a few common placeholders in the calc formula + """ + # formula: "10*log10((A.astype(float)-${A_1_statistics_max}+0.0001)/(${A_1_statistics_max}-${A_1_statistics_min}))" + evaluated_formula = formula + if "statistics" in formula: + # find all occurence of templates in form of ${A_1_statistics_max} + found = re.findall(r'(\${[0-9a-zA-Z_]*})', formula) + # get unique, keeping order + found = list(dict.fromkeys(found)) + for item in found: + parts = item.replace("${", "").replace("}", "").split("_") + # if current input is the one in template, replace it with statistics + if parts[0] == input_name and int(parts[1]) == band: + src_ds = gdal.Open(filename) + statistics = src_ds.GetRasterBand(band).GetStatistics(True, True) # force recount + del src_ds + if parts[3] == "min": + replace = "%s" % statistics[0] + elif parts[3] == "max": + replace = "%s" % statistics[1] + elif parts[3] == "mean": + replace = "%s" % statistics[2] + elif parts[3] == "std": + replace = "%s" % statistics[3] + else: + logger.warn("Unknown statistics found in expression %s" % item) + continue + # replace the template with actual value + evaluated_formula = evaluated_formula.replace(item, replace) + return evaluated_formula diff --git a/preprocessor/preprocessor/steps/georeference.py b/preprocessor/preprocessor/steps/georeference.py index ebe8b1aea9a18f72caa51df7e9cb8d75ca9047aa..f9503e8388069bf5f9b4b2e0bf13d5feda9f8bc2 100644 --- a/preprocessor/preprocessor/steps/georeference.py +++ b/preprocessor/preprocessor/steps/georeference.py @@ -11,7 +11,7 @@ from ..util import gdal, osr, replace_ext, get_all_data_files logger = logging.getLogger(__name__) -def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, geotransforms: List[dict]): +def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, geotransforms: List[dict], data_file_globs: List[str]=[]): success = False for options in geotransforms: type_name = options['type'].lower() @@ -31,7 +31,7 @@ def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preproce else: raise Exception('Invalid georeference type %s' % type_name) try: - filenames = get_all_data_files(source_dir, preprocessor_config) + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) for filename in filenames: target_filename = join(target_dir, basename(filename)) georef_func(filename, target_filename, **opts_dict) diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py index f7e035f6182422f63ec0c1388e8747cd32bfc3c0..15eda4d575146e798998a1e12c27e717e7f87234 100644 --- a/preprocessor/preprocessor/steps/output.py +++ b/preprocessor/preprocessor/steps/output.py @@ -1,14 +1,16 @@ import os from os.path import join, basename from uuid import uuid4 +from typing import List from ..util import replace_ext, gdal, get_all_data_files +from .stack import handle_group_sort, create_groups import logging logger = logging.getLogger(__name__) -def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None): +def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, options: dict=None, data_file_globs: List[str]=[], group_by: str=None, sort_by: str=None, order: List[str]=None): # find out the driver to get the extension options = options if options is not None else {} frmt = options.get('format', 'GTiff') @@ -18,19 +20,22 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c extension = driver.GetMetadata().get('DMD_EXTENSIONS', 'tif').split(' ')[0] # warp each individual file warped_files = [] - filenames = get_all_data_files(source_dir, preprocessor_config) + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) for filename in filenames: target_filename = join(target_dir, replace_ext(basename(filename), extension)) logger.debug('Warping file %s' % filename) gdal.Warp(target_filename, filename, **options) warped_files.append(target_filename) - if len(warped_files) > 1: - tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension)) - logger.debug('Warping files %s' % warped_files) - gdal.Warp(tmp_filename, warped_files, **options) - - # delete old files and rename the combined file to the first filename - for filename in warped_files: - os.unlink(filename) - os.rename(tmp_filename, warped_files[0]) + # if more than 1 file present or more than 2 and group_by is configured (pan+ms) + if (len(warped_files) > 1 and not group_by) or len(warped_files) > 2: + groups = create_groups(group_by, warped_files) + for groupname, group in groups.items(): + handle_group_sort(sort_by, order, group, groupname) + tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension)) + logger.debug('Warping files %s' % group) + gdal.Warp(tmp_filename, group, **options) + # delete old files and rename the combined file to the first filename + for filename in group: + os.unlink(filename) + os.rename(tmp_filename, group[0]) diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py index f662f2c32d03d12e3d3f94ad509e6bc79087010b..c21329ea9c1b526b50b464da704fd54239e0c97c 100644 --- a/preprocessor/preprocessor/steps/stack.py +++ b/preprocessor/preprocessor/steps/stack.py @@ -10,13 +10,54 @@ from ..util import replace_ext, gdal, get_all_data_files logger = logging.getLogger(__name__) -def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None): +def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None, data_file_globs: List[str]=[]): """ Stack bands of the individual images """ - filenames = get_all_data_files(source_dir, preprocessor_config) - # check if we have a group_by regex. If yes, use the first - # re-group to group by. - # Fallback is basename of file as groupname + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) + groups = create_groups(group_by, filenames) + for groupname, group in groups.items(): + # check if a sort_by is specified. if yes, use the sort_by regex group + # and optionally a ordered list to order the filenames + handle_group_sort(sort_by, order, group, groupname) + + vrt_filename = replace_ext(join(target_dir, groupname), '.vrt') + # correct rotated geotransforms as those prevent vrt creation + group_new = remove_rotated_geotransform(group, target_dir) + logger.debug("Group contents %s" % group_new) + # build a VRT to stack bands for each group + gdal.BuildVRT(vrt_filename, group_new, separate=True) + + +def handle_group_sort(sort_by, order, group, groupname): + if sort_by: + logger.debug('Handling group before sort %s' % groupname) + re_sort_by = re.compile(sort_by) + + if order: + group = [ + v for v in group + if re_sort_by.match(v) + and re_sort_by.match(v).group(1) in order + ] + + group = sorted( + group, + key=lambda v: order.index(re_sort_by.match(v).group(1)) + ) + else: + group = sorted( + group, + key=lambda v: re_sort_by.match(v).group(1) + ) + + +def create_groups(group_by, filenames): + """ + Creates groups of files based on group_by configuration + check if we have a group_by regex. If yes, use the first + re-group to group by. + Fallback is basename of file as the only groupname + """ if group_by: re_group_by = re.compile(group_by) groups = { @@ -25,32 +66,30 @@ def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preproces } else: groups = {basename(filenames[0]): filenames} + return groups - for groupname, group in groups.items(): - # check if a sort_by is specified. if yes, use the sort_by regex group - # and optionally a ordered list to order the filenames - logger.debug('Handling group before sort %s' % groupname) - if sort_by: - re_sort_by = re.compile(sort_by) - - if order: - group = [ - v for v in group - if re_sort_by.match(v) - and re_sort_by.match(v).group(1) in order - ] - - group = sorted( - group, - key=lambda v: order.index(re_sort_by.match(v).group(1)) - ) - else: - group = sorted( - group, - key=lambda v: re_sort_by.match(v).group(1) - ) - logger.debug("Group contents %s" % group) - # build a VRT to stack bands for each group - vrt_filename = replace_ext(join(target_dir, groupname), '.vrt') - logger.debug("vrt_filename %s" % vrt_filename) - gdal.BuildVRT(vrt_filename, group, separate=True) + +def remove_rotated_geotransform(filenames, target_dir): + """ Unrotates geotransform to a common grid + """ + output_filenames = [] + for filename in filenames: + src_ds = gdal.Open(filename) + # validate if rotated geotransform + if src_ds.GetGeoTransform() and (src_ds.GetGeoTransform()[2] != 0.0 or src_ds.GetGeoTransform()[4] != 0.0): + # rotated geotransform, needs warping to unrotated grid + target_filename = join( + target_dir, + replace_ext(basename(filename), '_rotate' + '.vrt', False) + ) + intermediate_warp(src_ds, target_filename) + output_filenames.append(target_filename) + else: + output_filenames.append(filename) + del src_ds + return output_filenames + + +def intermediate_warp(src_ds, output_path=None, dst_SRS="EPSG:4326"): + nodata = src_ds.GetRasterBand(1).GetNoDataValue() or 0 + gdal.Warp(output_path, src_ds, dstSRS=dst_SRS, format="VRT", multithread=True, resampleAlg=gdal.GRA_Bilinear, srcNodata=nodata, dstNodata=nodata) diff --git a/preprocessor/preprocessor/steps/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py index 552bd29fd0eec0aeeb056afc9bbd8785e497a8ba..2d351833395f8b5a4c28d8cf88ede35002f948be 100644 --- a/preprocessor/preprocessor/steps/subdataset.py +++ b/preprocessor/preprocessor/steps/subdataset.py @@ -1,12 +1,15 @@ import os from os.path import join, basename -from typing import Dict +from typing import Dict, List +import logging from ..util import replace_ext, gdal, get_all_data_files +logger = logging.getLogger(__name__) -def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, subdataset_types: Dict[str, str]=None): - filenames = get_all_data_files(source_dir, preprocessor_config) + +def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, subdataset_types: Dict[str, str]=None, data_file_globs: List[str]=[]): + filenames = get_all_data_files(source_dir, preprocessor_config, data_file_globs) if len(filenames) == 0: raise Exception('No datafiles were matched by the provided glob') diff --git a/preprocessor/preprocessor/transfer/local.py b/preprocessor/preprocessor/transfer/local.py index f1450f7d6b18d2e0a98232023d5bda5cb6db667a..6fa18b1fae4ba8b599f5ad021d82eae55a4fa82b 100644 --- a/preprocessor/preprocessor/transfer/local.py +++ b/preprocessor/preprocessor/transfer/local.py @@ -25,6 +25,7 @@ class Uploader(Base): """ def upload(self, local_path: Union[os.PathLike, List[os.PathLike]], remote_dir: os.PathLike) -> List[os.PathLike]: paths = local_path if isinstance(local_path, List) else [local_path] + local_remote_mapping = {} remote_paths = [ os.path.join( self.storage_path, @@ -37,8 +38,8 @@ class Uploader(Base): for local_path, remote_path in zip(paths, remote_paths): os.makedirs(os.path.dirname(remote_path), exist_ok=True) shutil.copy2(local_path, remote_path) - - return remote_paths + local_remote_mapping[local_path] = remote_path + return local_remote_mapping def product_exists(self, remote_dir: os.PathLike) -> bool: remote_path = os.path.join(self.storage_path, remote_dir) diff --git a/preprocessor/preprocessor/transfer/swift.py b/preprocessor/preprocessor/transfer/swift.py index a58f7fb146530860a1b7af61961b38ea6b6849dc..45e54b504fbcdbaf40eeb2adb471f318c290d3cc 100644 --- a/preprocessor/preprocessor/transfer/swift.py +++ b/preprocessor/preprocessor/transfer/swift.py @@ -75,6 +75,7 @@ class Uploader(Base): """ def upload(self, local_path: Union[os.PathLike, List[os.PathLike]], remote_dir: os.PathLike) -> List[os.PathLike]: paths = local_path if isinstance(local_path, List) else [local_path] + local_remote_mapping = {} container, remote_dir = self.validate_container(remote_dir) remote_paths = [ os.path.join( @@ -83,13 +84,13 @@ class Uploader(Base): ) for path in paths ] - objects = [ - SwiftUploadObject( + objects = [] + for path, remote_path in zip(paths, remote_paths): + objects.append(SwiftUploadObject( path, object_name=remote_path - ) - for path, remote_path in zip(paths, remote_paths) - ] + )) + local_remote_mapping[path] = "%s/%s" % (container, remote_path) max_size = max([os.stat(path).st_size for path in paths]) options = {} @@ -120,7 +121,7 @@ class Uploader(Base): ) raise Exception('Failed to upload %s' % result["error"]) - return remote_paths + return local_remote_mapping def product_exists(self, remote_dir: os.PathLike) -> bool: with self.get_service() as swift: diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index f1b58e6cbb21bcdf7b7bd3bd6952b2879b955b0f..6ce37cc6dc8a8648525e09d08cfd46ef157da68f 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -4,8 +4,12 @@ from contextlib import contextmanager from tempfile import TemporaryDirectory, mkdtemp from time import time from glob import glob +import json +from typing import Dict +from pystac import Item, Asset from .archive import filter_filenames +from .metadata import extract_metadata_for_stac try: from osgeo import gdal @@ -26,6 +30,10 @@ def replace_ext(filename: os.PathLike, new_ext: str, force_dot: bool=True) -> os return splitext(filename)[0] + ('' if new_ext.startswith('.') or not force_dot else '.') + new_ext +def flatten(llist): + return [item for sublist in llist for item in sublist] + + @contextmanager def workdir(config: dict, use_dir: os.PathLike=None): prefix = config.get('prefix', 'preprocess_') @@ -91,14 +99,116 @@ def get_size_in_bytes(file_path, unit): return convert_unit(size, unit) -def get_all_data_files(source_dir, preprocessor_config): +def get_all_data_files(source_dir, preprocessor_config, data_file_globs=[]): """ Based on 'data_file_globs' configuration, gets all unique data file paths from folder matching any of the globs""" # get all file paths recursively file_paths = [p for p in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(p)] # filter them by data_globs file_paths_filt = [] - for dataglob in preprocessor_config.get('data_file_globs', ['*']): + used_globs = preprocessor_config.get('data_file_globs', ['*']) + # override global data_file_globs by the provided one if possible + if len(data_file_globs) > 0: + used_globs = data_file_globs + + for dataglob in used_globs: file_paths_filt += filter_filenames(file_paths, dataglob, preprocessor_config.get('glob_case', False)) # get only unique files to compensate for possibly bad glob yielding doubles, keeping order file_paths_filt = list(dict.fromkeys(file_paths_filt)) return file_paths_filt + + +def apply_gdal_config_options(preprocessor_config): + """ Applies config specific gdal configuration options for a given preprocessing step + Returning original values to allow switching them back after preprocessing done. + """ + original_gdal_config_options = {} + for config_option in preprocessor_config.get('gdal_config_options', []): + key, _, val = config_option.partition('=') + orig_val = gdal.GetConfigOption(key) + gdal.SetConfigOption(key, val) + original_gdal_config_options[key] = orig_val + return original_gdal_config_options + + +def set_gdal_options(config_options): + """ Sets a key, value dictionary of config options to gdal + """ + for key, value in config_options.items(): + gdal.SetConfigOption(key, value) + + +def create_simple_stac_item(preprocessor_config: dict, upload_files:Dict[str, str], extra_files:Dict[str, str]): + """ Temporary method creating a minimal STAC item from information about products uploaded and metadata files uploaded. + Accepts: 'upload_files' dictionary of upload_files (images), where key is local path and value is remote path. + 'extra_files' dictionary of extra_files (sidecar or metadata), where key is local path and value is remote path. + Assuming metadata file to read and create a STAC info from is first to pick by iterator. + """ + # get relevant metadata from GSC + metadata = extract_metadata_for_stac(extra_files) + stac_item = Item( + id=metadata["id"], + geometry=metadata["geometry"], + bbox=metadata["bbox"], + datetime=metadata["datetime"], + properties=metadata["properties"], + extra_fields=metadata["extra_fields"] + ) + for i, metadata_file in enumerate(extra_files.values()): + metadata_asset = Asset( + href=metadata_file, + title="GSC Metadata file %s" % i, + description="Accompanying GSC Metadata file copied over from source archive", + media_type="application/xml", + roles=["metadata"], + extra_fields={}, + ) + stac_item.add_asset('gsc_metadata_%s' % i, metadata_asset) + + counter = 1 + for image_path, uploaded_path in sorted(upload_files.items()): + title = description = "Preprocessed image" + # find out number of bands + ds = gdal.Open(image_path) + band_count = ds.RasterCount + del ds + if len(upload_files) > 1: + # TODO move this to configuration + # assuming PAN+MS product + if band_count == 1: + title += " - PAN" + description += " - Panchromatic" + else: + title += " - MS" + description += " - Multispectral" + else: + # TODO move content to configuration + pass + bands = [] + for j in range(band_count): + bands_d = {"name": "band%s" % (j + 1)} + # if mapping configured, extract common_name based on index of band in mapping + if preprocessor_config.get('stac_band_mapping', False): + band_common_names = preprocessor_config.get('stac_band_mapping').get(band_count, False) + if band_common_names: + band_common_names = [band_common_names] if isinstance(band_common_names, str) else band_common_names + bands_d["common_name"] = band_common_names[j] + else: + bands_d["common_name"] = "band%s" % (j + 1) + else: + bands_d["common_name"] = "band%s" % (j + 1) + bands.append(bands_d) + image_asset = Asset( + href=uploaded_path, + title=title, + description=description, + media_type="image/tiff; application=geotiff; profile=cloud-optimized", + roles=["visual"], + extra_fields={ + "eo:bands": bands + }, + ) + stac_item.add_asset('preprocessed_image_%s' % counter, image_asset) + counter += 1 + + stac_item_json = json.dumps(stac_item.to_dict(False)) + return stac_item_json