diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 25357e679999f4de870690aa1c2428faa5492691..658937aad7216c60be89e3761c8e8b82192f273b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -33,8 +33,11 @@ build-master:
     - IMAGE_5="$CI_REGISTRY_IMAGE/fluentd"
     - docker pull "$IMAGE_5":latest || true
     - docker build --cache-from "$IMAGE_5":latest -t "$IMAGE_5":latest -t "$IMAGE_5":$VERSION_5 fluentd/
-    - chmod +x gitlab_test.sh
-    - ./gitlab_test.sh
+    - VERSION_6=`grep 'version="*"' ingestor/Dockerfile | cut -d '"' -f2`
+    - IMAGE_6="$CI_REGISTRY_IMAGE/pvs_ingestor"
+    - docker pull "$IMAGE_6":latest || true
+    - docker build --cache-from "$IMAGE_6":latest -t "$IMAGE_6":latest -t "$IMAGE_6":$VERSION_6 ingestor/
+    - cd ./testing && ./gitlab_test.sh && cd -
     - docker push "$IMAGE_1":$VERSION_1
     - docker push "$IMAGE_1":latest
     - docker push "$IMAGE_2":$VERSION_2
@@ -45,6 +48,8 @@ build-master:
     - docker push "$IMAGE_4":latest
     - docker push "$IMAGE_5":$VERSION_5
     - docker push "$IMAGE_5":latest
+    - docker push "$IMAGE_6":$VERSION_6
+    - docker push "$IMAGE_6":latest
   only:
     - master
 
@@ -76,8 +81,10 @@ build:
     - docker pull "$IMAGE":latest || true
     - docker build --cache-from "$IMAGE":latest -t "$IMAGE":$CI_COMMIT_REF_SLUG -t "$IMAGE":$CI_COMMIT_REF_NAME fluentd/
     - docker tag "$IMAGE:$CI_COMMIT_REF_NAME" "$IMAGE:latest"
-    - chmod +x gitlab_test.sh
-    - ./gitlab_test.sh
+    - IMAGE="$CI_REGISTRY_IMAGE/pvs_ingestor"
+    - docker pull "$IMAGE":$CI_COMMIT_REF_NAME || true
+    - docker build --cache-from "$IMAGE":latest -t "$IMAGE":$CI_COMMIT_REF_SLUG -t "$IMAGE":$CI_COMMIT_REF_NAME ingestor/
+    - cd ./testing && ./gitlab_test.sh && cd -
   except:
     - master
-  
+
diff --git a/README.md b/README.md
index 09e0cf3f2ba7e33316619592d04751723e523c3e..4e74a1aab9fd822b5250f9492434ecf25c4eed57 100644
--- a/README.md
+++ b/README.md
@@ -171,6 +171,8 @@ docker build cache/ --cache-from registry.gitlab.eox.at/esa/prism/vs/pvs_cache -
 docker build preprocessor/ --cache-from registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor -t registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor
 docker build client/ --cache-from registry.gitlab.eox.at/esa/prism/vs/pvs_client -t registry.gitlab.eox.at/esa/prism/vs/pvs_client
 docker build fluentd/ --cache-from registry.gitlab.eox.at/esa/prism/vs/fluentd -t registry.gitlab.eox.at/esa/prism/vs/fluentd
+docker build ingestor/ --cache-from registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor -t registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor
+
 ```
 Or pull them from the registry:
 ```
@@ -207,11 +209,9 @@ docker stack rm vhr18-pvs                      # stop stack
 docker volume rm vhr18-pvs_db-data                        # delete volumes
 docker volume rm vhr18-pvs_redis-data
 docker volume rm vhr18-pvs_traefik-data
-docker volume rm vhr18-pvs_cache-db
 docker volume rm vhr18-pvs_instance-data
 ```
 
-
 ### Setup logging
 
 To access the logs, navigate to http://localhost:5601 . Ignore all of the fancy enterprise capabilities and select Kibana > Discover in the hamburger menu.
@@ -221,6 +221,21 @@ Since we only have fluentd, you can just use `*` as index pattern.
 Select `@timestamp` as time field
 ([see also](https://www.elastic.co/guide/en/kibana/current/tutorial-define-index.html)).
 
+
+### setup sftp
+
+The `SFTP` image allow remote access into 2 logging folders, you can define (edit/add) users, passwords and (UID/GID) in the respictive configuration file ( e.g  *config/vhr_sftp_users.conf* ). 
+
+The default username is `eox`, once the stack is deployed you can sftp into the logging folders through port 2222 on -if you rn the dev stack- localhost :
+
+```bash
+sftp -P 2222 eox@127.0.0.1
+``` 
+You will log in  into`/home/eox/data` directory which contains the 2 logging directories : `to/panda` and `from/fepd`
+
+ **NOTE:**  The mounted directory that you are directed into is *`/home/user`*, where `user` is the username, hence when changing the username in the `.conf` file, the `sftp` mounted volumes path in `docker-compse.<collection>.yml` must change respectivly.
+ 
+
 # Documentation
 
 ## Installation
diff --git a/cache/configure.sh b/cache/configure.sh
index d2d3c3ceb60556161c26a26f033e4a59afe2045c..e362050f8d11d2ed6805113b927d90cae05667c6 100755
--- a/cache/configure.sh
+++ b/cache/configure.sh
@@ -25,8 +25,6 @@ cd -
 
 chown -R www-data:www-data "${INSTALL_DIR}"
 
-mkdir -p "/cache-db/${COLLECTION}"
-
 
 if [ ! -f "${APACHE_CONF}" ] ; then
     echo "Adding Apache configuration"
diff --git a/cache/install.sh b/cache/install.sh
index d7c73c713b74893b196c6f9acaeedbf504fd0de6..64859f909e9e87b78c30e282597597fe9178af7f 100755
--- a/cache/install.sh
+++ b/cache/install.sh
@@ -10,7 +10,7 @@ add-apt-repository -y ppa:schpidi2/eox
 apt update
 
 echo "Installing packages"
-VERSION=1.8.0-1~bionic1eox6
+VERSION=1.10.0-1~bionic0eox2
 DEBIAN_FRONTEND=noninteractive apt install -y \
     libmapcache1=${VERSION} libapache2-mod-mapcache=${VERSION} mapcache-tools=${VERSION} \
     sqlite3 curl apache2 python3-dateutil python3-redis wait-for-it postgresql-client
diff --git a/config/dem_mapcache-dev.xml b/config/dem_mapcache-dev.xml
index 42332ca2a01f47508c3cafd718e065f3039619b5..6a96f676635350eec2f45ba8c758d12f6c80dde2 100644
--- a/config/dem_mapcache-dev.xml
+++ b/config/dem_mapcache-dev.xml
@@ -1,4 +1,7 @@
 <mapcache>
+  <connection_pool>
+    <max_connections>15</max_connections>
+  </connection_pool>
   <default_format>mixed</default_format>
   <format name="mypng" type="PNG">
     <compression>fast</compression>
diff --git a/config/dem_mapcache-ops.xml b/config/dem_mapcache-ops.xml
index a9f2a2aa67b9a3dcf96cb48384f9c85a00314b6e..9fb0f007876fb62570d586609609935b74866043 100644
--- a/config/dem_mapcache-ops.xml
+++ b/config/dem_mapcache-ops.xml
@@ -1,4 +1,7 @@
 <mapcache>
+  <connection_pool>
+    <max_connections>15</max_connections>
+  </connection_pool>
   <default_format>mixed</default_format>
   <format name="mypng" type="PNG">
     <compression>fast</compression>
diff --git a/config/dem_preprocessor-config.yml b/config/dem_preprocessor-config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ae30fe3a7b0cf59ba735aabe6e05db529ff4fb1
--- /dev/null
+++ b/config/dem_preprocessor-config.yml
@@ -0,0 +1,55 @@
+source:
+  type: swift
+  kwargs:
+    username: !env '${OS_USERNAME_DOWNLOAD}'
+    password: !env '${OS_PASSWORD_DOWNLOAD}'
+    tenant_name: !env '${OS_TENANT_NAME_DOWNLOAD}'
+    tenant_id: !env '${OS_TENANT_ID_DOWNLOAD}'
+    region_name: !env '${OS_REGION_NAME_DOWNLOAD}'
+    auth_url: !env '${OS_AUTH_URL_DOWNLOAD}'
+    auth_version: !env '${ST_AUTH_VERSION_DOWNLOAD}'
+    user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}'
+target:
+  type: swift
+  replace: false
+  kwargs:
+    username: !env '${OS_USERNAME}'
+    password: !env '${OS_PASSWORD}'
+    tenant_name: !env '${OS_TENANT_NAME}'
+    tenant_id: !env '${OS_TENANT_ID}'
+    region_name: !env '${OS_REGION_NAME}'
+    auth_version: !env '${ST_AUTH_VERSION}'
+    auth_url: !env '${OS_AUTH_URL}'
+    user_domain_name: !env '${OS_USER_DOMAIN_NAME}'
+    container: !env '${UPLOAD_CONTAINER}'
+workdir: /tmp
+keep_temp: false
+metadata_glob: '*GSC*.xml'
+type_extractor:
+  xpath:
+    - /gsc:report/gsc:sar_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:productType/text()
+level_extractor:
+  # xpath can also be a list of xpaths to be tried one after another
+  xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/')
+preprocessing:
+  defaults:
+    move_files: true
+    data_file_globs: # needs to be taken only from DEM sub-folder, otherwise previews get in
+      - '**/DEM/*.dt2'
+      - '**/DEM/*.dt1'
+      - '**/DEM/*.dt0'
+      - '**/DEM/*.tif'
+    output:
+      options:
+        format: COG
+        dstSRS: 'EPSG:4326'
+        dstNodata: 0
+        creationOptions:
+          - BLOCKSIZE=512
+          - COMPRESS=DEFLATE
+          - NUM_THREADS=8
+          - BIGTIFF=IF_SAFER
+          - OVERVIEWS=AUTO
+  types:
+    SAR_DGE_30: # just to pass validation
+      nested: true
diff --git a/config/dem_sftp_users.conf b/config/dem_sftp_users.conf
new file mode 100644
index 0000000000000000000000000000000000000000..a26dadbcb59ae265463143670502c500c72f06c1
--- /dev/null
+++ b/config/dem_sftp_users.conf
@@ -0,0 +1 @@
+***REMOVED***:1001:100
\ No newline at end of file
diff --git a/config/emg_init-db.sh b/config/emg_init-db.sh
index 73f9c1129b1767c66ccd09c6e920e143190b9328..5944a22d495d77b97a2b2e7dfa9cee068742c984 100644
--- a/config/emg_init-db.sh
+++ b/config/emg_init-db.sh
@@ -92,7 +92,7 @@ if python3 manage.py id check "${COLLECTION}"; then
             --blue-range 0 255 \
             --red-nodata 0 \
             --green-nodata 0 \
-            --blue-nodata 0        
+            --blue-nodata 0
         # EQ02_3
         python3 manage.py producttype create "${COLLECTION}"_Product_EQ02_3 --traceback \
             --coverage-type "RGB"
@@ -125,7 +125,7 @@ if python3 manage.py id check "${COLLECTION}"; then
             --blue-range 0 500 \
             --red-nodata 0 \
             --green-nodata 0 \
-            --blue-nodata 0  
+            --blue-nodata 0
         # EQ02_4
         python3 manage.py producttype create "${COLLECTION}"_Product_EQ02_4 --traceback \
             --coverage-type "RGBNir"
@@ -204,7 +204,7 @@ if python3 manage.py id check "${COLLECTION}"; then
             --blue-range 0 800 \
             --red-nodata 0 \
             --green-nodata 0 \
-            --blue-nodata 0  
+            --blue-nodata 0
         # EW02_4
         python3 manage.py producttype create "${COLLECTION}"_Product_EW02_4 --traceback \
             --coverage-type "RGBNir"
@@ -307,7 +307,7 @@ if python3 manage.py id check "${COLLECTION}"; then
             --blue-range 0 800 \
             --red-nodata 0 \
             --green-nodata 0 \
-            --blue-nodata 0  
+            --blue-nodata 0
         # EW03_4
         python3 manage.py producttype create "${COLLECTION}"_Product_EW03_4 --traceback \
             --coverage-type "RGBNir"
@@ -851,19 +851,6 @@ if python3 manage.py id check "${COLLECTION}"; then
         python3 manage.py browsetype create "${COLLECTION}"_Product_SP07 "NDVI" --traceback \
             --grey "(nir-red)/(nir+red)" --grey-range -1 1
         # PH1A
-        python3 manage.py producttype create "${COLLECTION}"_Product_PH1A --traceback \
-            --coverage-type "RGBNir"
-        python3 manage.py browsetype create "${COLLECTION}"_Product_PH1A  --traceback \
-            --red "red" \
-            --green "green" \
-            --blue "blue" \
-            --red-range 1 1000 \
-            --green-range 1 1000 \
-            --blue-range 1 1000 \
-            --red-nodata 0 \
-            --green-nodata 0 \
-            --blue-nodata 0
-        # PH1A
         python3 manage.py producttype create "${COLLECTION}"_Product_PH1A --traceback \
             --coverage-type "RGBNir"
         python3 manage.py browsetype create "${COLLECTION}"_Product_PH1A  --traceback \
diff --git a/config/emg_mapcache-dev.xml b/config/emg_mapcache-dev.xml
index c768c922323784283a61e9111860b00c28b8f4b8..279ecd3118ef90a7e0acaacb4526d8b10a0b50cb 100644
--- a/config/emg_mapcache-dev.xml
+++ b/config/emg_mapcache-dev.xml
@@ -1,4 +1,7 @@
 <mapcache>
+  <connection_pool>
+    <max_connections>15</max_connections>
+  </connection_pool>
   <default_format>mixed</default_format>
   <format name="mypng" type="PNG">
     <compression>fast</compression>
diff --git a/config/emg_mapcache-ops.xml b/config/emg_mapcache-ops.xml
index bb4c17ab7c975b2a75de27aec62e19f4bf274f0f..02cd349edadae751c895b0e0d0fd36e848261ed4 100644
--- a/config/emg_mapcache-ops.xml
+++ b/config/emg_mapcache-ops.xml
@@ -1,4 +1,7 @@
 <mapcache>
+  <connection_pool>
+    <max_connections>15</max_connections>
+  </connection_pool>
   <default_format>mixed</default_format>
   <format name="mypng" type="PNG">
     <compression>fast</compression>
diff --git a/config/emg_preprocessor-config.yml b/config/emg_preprocessor-config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..dc6fc7b08763e5069c0839ea9409f36c6336c271
--- /dev/null
+++ b/config/emg_preprocessor-config.yml
@@ -0,0 +1,173 @@
+source:
+  type: swift
+  kwargs:
+    username: !env{{OS_USERNAME_DOWNLOAD}}
+    password: "!env{{OS_PASSWORD_DOWNLOAD}}"
+    tenant_name: "!env{{OS_TENANT_NAME_DOWNLOAD}}"
+    tenant_id: "!env{{OS_TENANT_ID_DOWNLOAD}}"
+    region_name: "!env{{OS_REGION_NAME_DOWNLOAD}}"
+    auth_url: "!env{{OS_AUTH_URL_DOWNLOAD}}"
+    auth_version: "!env{{ST_AUTH_VERSION_DOWNLOAD}}"
+    user_domain_name: !env{{OS_USER_DOMAIN_NAME_DOWNLOAD}}
+# target:
+#   type: swift
+#   kwargs:
+#     auth_version: !env{{ST_AUTH_VERSION}}
+#     auth_url: "!env{{OS_AUTH_URL}}"
+#     username: "!env{{OS_USERNAME}}"
+#     password: "!env{{OS_PASSWORD}}"
+#     tenant_name: !env{{OS_TENANT_NAME}}
+#     tenant_id: !env{{OS_TENANT_ID}}
+#     region_name: !env{{OS_REGION_NAME}}
+#     user_domain_name: !env{{OS_USER_DOMAIN_NAME}}
+target:
+  type: local
+  replace: true
+  kwargs:
+    storage_path: /mnt/data/target
+
+workdir: /mnt/data/workdir
+keep_temp: true
+
+# metadata file to look for in downloaded tar/zip file
+metadata_glob: "*GSC*.xml"
+
+# extractors for Product type / level
+type_extractor:
+  # xpath can also be a list of xpaths to be tried one after another
+  xpath:
+    - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()
+    - /gsc:report/gsc:sar_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()
+  map: # optional mapping from extracted type name to used product type name
+    PHR_FUS__3: PH00
+
+level_extractor:
+  # xpath can also be a list of xpaths to be tried one after another
+  xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/')
+  map: # optional mapping
+
+
+preprocessing:
+  defaults:
+    output:
+      options:
+        # WarpOptions (see https://gdal.org/python/osgeo.gdal-module.html#WarpOptions)
+        format: COG
+        dstSRS: "EPSG:4326"
+        dstNodata: 0
+        creationOptions:
+          - BLOCKSIZE=512
+          - COMPRESS=DEFLATE
+          - NUM_THREADS=8
+          - BIGTIFF=IF_SAFER
+          - OVERVIEWS=AUTO
+  types:
+    KS03:
+      nested: true
+      data_file_globs:
+        - "*.tif"
+      additional_file_globs:
+        - "*.rpc"
+      georeference:
+      stack_bands:
+        # stack all bands for each scene in the product
+        group_by: ".*/(.*)_P..tif"
+        sort_by: ".*_P(R|G|B|N).tif"
+        order:
+          - R
+          - G
+          - B
+          - N
+
+
+
+
+    RS02: # as extracted/translated above
+      # whether the package can contain sub-packages of TARs/ZIPs
+      nested: true
+      # glob selectors to look for source images in the source package
+      data_file_globs:
+        - "*.TIF"
+      additional_file_globs:
+        - "*.rpc"
+
+      # a custom preprocessor function to be called on all selected files
+      # custom_preprocessor:
+      #   path: "path.to.some.module:attribute"
+      #   # TODO: specify args/kwargs and pass meaningful parameters
+
+      georeference:
+        # georeference each file individually
+        # - type: geotransform # one of geotransform, RPC, GCP, world file
+        # - type: GCP
+
+
+      stack_bands:
+        # stack all bands for each scene in the product
+        group_by: # TODO: figure out a way to get a grouping. e.g: part of the filename using regex?
+
+      output:
+
+      # define a custom postprocessor function to be called on the processed file
+      # custom_postprocessor:
+      #   path: "path.to.some.module:attribute"
+      #   # TODO: specify args/kwargs and pass meaningful parameters
+
+    CS00:
+      nested: true
+      data_file_globs:
+        - "*.h5"
+
+      subdatasets:
+        data_file_glob: '*/*/*.h5'
+        subdataset_types:
+          '//S01/SBI': 'S01_SBI'
+
+      georeference:
+        type: corners
+        corner_names: ["S01_SBI_Bottom_Left_Geodetic_Coordinates", "S01_SBI_Bottom_Right_Geodetic_Coordinates", "S01_SBI_Top_Left_Geodetic_Coordinates", "S01_SBI_Top_Right_Geodetic_Coordinates"]
+        orbit_direction_name: Orbit_Direction
+        force_north_up: false
+        # gcp_srid:
+
+      calc:
+        formulas:
+          - inputs:
+              A:
+                glob: '*.tif'
+                band: 1
+              B:
+                glob: '*.tif'
+                band: 2
+            data_type: Float32
+            formula: sqrt(A.astype(float)*A.astype(float)+B.astype(float)*B.astype(float))
+            output_postfix: _proc
+            nodata_value: 0
+      output:
+        options:
+          # WarpOptions (see https://gdal.org/python/osgeo.gdal-module.html#WarpOptions)
+          format: "COG"
+          dstSRS: "EPSG:3857"
+          dstNodata: 0
+          creationOptions:
+            - BLOCKSIZE=512
+            - COMPRESS=DEFLATE
+            - LEVEL=6
+            - OVERVIEWS=AUTO
+            - NUM_THREADS=8
+            - BIGTIFF=IF_SAFER
+            - RESAMPLING=CUBIC
+    CS01:
+      nested: true
+      data_file_globs:
+        - "*.h5"
+
+      subdatasets:
+        data_file_glob: '*/*.h5'
+        subdataset_types:
+          '//S01/SBI': 'S01_SBI'
+
+      georeference:
+        type: corners
+
+# this configuration is still a stub
diff --git a/config/emg_sftp_users.conf b/config/emg_sftp_users.conf
new file mode 100644
index 0000000000000000000000000000000000000000..a26dadbcb59ae265463143670502c500c72f06c1
--- /dev/null
+++ b/config/emg_sftp_users.conf
@@ -0,0 +1 @@
+***REMOVED***:1001:100
\ No newline at end of file
diff --git a/config/vhr18_mapcache-dev.xml b/config/vhr18_mapcache-dev.xml
index 7cf52772adc81ad07ec72616f6650f95404e175c..0935921a46bd44e8a948a088b8d9d2fb0a699984 100644
--- a/config/vhr18_mapcache-dev.xml
+++ b/config/vhr18_mapcache-dev.xml
@@ -1,4 +1,7 @@
 <mapcache>
+  <connection_pool>
+    <max_connections>15</max_connections>
+  </connection_pool>
   <default_format>mixed</default_format>
   <format name="mypng" type="PNG">
     <compression>fast</compression>
diff --git a/config/vhr18_mapcache-ops.xml b/config/vhr18_mapcache-ops.xml
index b5afd384f6b2e1bb6c0e5d5eff82170266ce0a21..91ab89d0a27c0ba4a499f5bd4f1a2407c5b55c04 100644
--- a/config/vhr18_mapcache-ops.xml
+++ b/config/vhr18_mapcache-ops.xml
@@ -1,4 +1,7 @@
 <mapcache>
+  <connection_pool>
+    <max_connections>15</max_connections>
+  </connection_pool>
   <default_format>mixed</default_format>
   <format name="mypng" type="PNG">
     <compression>fast</compression>
diff --git a/config/vhr18_preprocessor-config.yml b/config/vhr18_preprocessor-config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..359c52da54a21f67d2b38ab45d88671dbe404d23
--- /dev/null
+++ b/config/vhr18_preprocessor-config.yml
@@ -0,0 +1,53 @@
+source:
+  type: swift
+  kwargs:
+    username: !env '${OS_USERNAME_DOWNLOAD}'
+    password: !env '${OS_PASSWORD_DOWNLOAD}'
+    tenant_name: !env '${OS_TENANT_NAME_DOWNLOAD}'
+    tenant_id: !env '${OS_TENANT_ID_DOWNLOAD}'
+    region_name: !env '${OS_REGION_NAME_DOWNLOAD}'
+    auth_url: !env '${OS_AUTH_URL_DOWNLOAD}'
+    auth_version: !env '${ST_AUTH_VERSION_DOWNLOAD}'
+    user_domain_name: !env '${OS_USER_DOMAIN_NAME_DOWNLOAD}'
+target:
+  type: swift
+  replace: false
+  kwargs:
+    username: !env '${OS_USERNAME}'
+    password: !env '${OS_PASSWORD}'
+    tenant_name: !env '${OS_TENANT_NAME}'
+    tenant_id: !env '${OS_TENANT_ID}'
+    region_name: !env '${OS_REGION_NAME}'
+    auth_version: !env '${ST_AUTH_VERSION}'
+    auth_url: !env '${OS_AUTH_URL}'
+    user_domain_name: !env '${OS_USER_DOMAIN_NAME}'
+    container: !env '${UPLOAD_CONTAINER}'
+workdir: /tmp
+keep_temp: false
+metadata_glob: '*GSC*.xml'
+type_extractor:
+  xpath:
+    - /gsc:report/gsc:opt_metadata/gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()
+level_extractor:
+  # xpath can also be a list of xpaths to be tried one after another
+  xpath: substring-after(substring-after(/gsc:report/gsc:opt_metadata/gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text(), '/'), '/')
+preprocessing:
+  defaults:
+    move_files: true
+    data_file_globs:
+      - '*.tif'
+      - '*.jp2'
+    output:
+      options:
+        format: COG
+        dstSRS: 'EPSG:4326'
+        dstNodata: 0
+        creationOptions:
+          - BLOCKSIZE=512
+          - COMPRESS=DEFLATE
+          - NUM_THREADS=8
+          - BIGTIFF=IF_SAFER
+          - OVERVIEWS=AUTO
+  types:
+    PH1B: # just to pass validation
+      nested: true
diff --git a/config/vhr18_sftp_users.conf b/config/vhr18_sftp_users.conf
new file mode 100644
index 0000000000000000000000000000000000000000..a26dadbcb59ae265463143670502c500c72f06c1
--- /dev/null
+++ b/config/vhr18_sftp_users.conf
@@ -0,0 +1 @@
+***REMOVED***:1001:100
\ No newline at end of file
diff --git a/core/Dockerfile b/core/Dockerfile
index 85a0d289043ed7fcbaf7437332fc68598051f381..33baf310685828ebc93ededfc5b9816786491945 100644
--- a/core/Dockerfile
+++ b/core/Dockerfile
@@ -70,7 +70,8 @@ ENV INSTANCE_ID="prism-view-server_core" \
     REDIS_REGISTER_QUEUE_KEY= \
     REDIS_REGISTERED_SET_KEY= \
     INIT_SCRIPTS="/configure.sh" \
-    COLLECT_STATIC="false"
+    COLLECT_STATIC="false" \
+    REGISTRAR_REPLACE=
 
 ADD rgbnir_definition.json \
     configure.sh \
diff --git a/core/configure.sh b/core/configure.sh
index 943475d4bc273ce64e6f5814babfe8679f9d1d80..3966e886e6f9e09c4c1a41c3fe2b1542466db9e5 100644
--- a/core/configure.sh
+++ b/core/configure.sh
@@ -51,4 +51,4 @@
 
     chmod g+w -R .
     chgrp users -R .
-} 1> &2
\ No newline at end of file
+} 1>&2
diff --git a/core/entrypoint.sh b/core/entrypoint.sh
index 8d82bbf7d54a26f1e90d89ec61f4beec452e216b..a8e57f88358df480cba3f7e99640f455eb84f427 100644
--- a/core/entrypoint.sh
+++ b/core/entrypoint.sh
@@ -5,7 +5,7 @@ TIMEOUT=${WAIT_TIMEOUT:='15'}
 
 if [[ ! -z $SERVICES ]] ; then
     for service in $SERVICES ; do
-        wait-for-it -t $TIMEOUT $service > &2
+        wait-for-it -t $TIMEOUT $service >&2
     done
 fi
 
diff --git a/core/initialized.sh b/core/initialized.sh
index f0fdcf300b33b7eaf55ee431b0e9120af632b2e7..07fc196a6d0925e93e78227c33bfef0b53635832 100644
--- a/core/initialized.sh
+++ b/core/initialized.sh
@@ -1,4 +1,4 @@
 #!/bin/bash -e
 
 touch "${INSTANCE_DIR}/.initialized"
-echo "Instance ${INSTANCE_ID} is initialized" > &2
+echo "Instance ${INSTANCE_ID} is initialized" >&2
diff --git a/core/registrar.py b/core/registrar.py
index c501b6cd22e2de7f695dba6018f2ee41e288b179..cb052a74a3433ecd6a261397525c28ca89f61c19 100644
--- a/core/registrar.py
+++ b/core/registrar.py
@@ -35,14 +35,17 @@ import argparse
 import textwrap
 import logging
 import traceback
-import redis
+from xml.sax.saxutils import escape
+import subprocess
 
+import redis
 import lxml.etree
 from swiftclient.service import SwiftService
 
 import django
 from django.db import transaction
 from django.contrib.gis.geos import GEOSGeometry
+from osgeo import gdal
 
 path = os.path.join(os.getenv('INSTALL_DIR', "/var/www/pvs"), "pvs_instance")
 if path not in sys.path:
@@ -52,7 +55,9 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pvs_instance.settings")
 django.setup()
 
 from eoxserver.backends import access
+from eoxserver.contrib import vsi
 from eoxserver.backends import models as backends
+from eoxserver.core.util.timetools import isoformat
 from eoxserver.resources.coverages import models
 from eoxserver.resources.coverages.registration.product import (
     ProductRegistrator
@@ -63,7 +68,6 @@ from eoxserver.resources.coverages.registration.registrators.gdal import (
 
 logger = logging.getLogger(__name__)
 
-
 def setup_logging(verbosity):
     # start logging setup
     # get command line level
@@ -87,6 +91,16 @@ def setup_logging(verbosity):
     # finished logging setup
 
 
+def set_gdal_swift_auth():
+    # parsing command line output of swift auth
+    auth_keys = subprocess.check_output(["swift", "auth"]).decode(sys.stdout.encoding).split("\n")
+    storage_url = auth_keys[0].split("OS_STORAGE_URL=")[1]
+    auth_token = auth_keys[1].split("OS_AUTH_TOKEN=")[1]
+    # setting gdal config
+    gdal.SetConfigOption("SWIFT_STORAGE_URL", storage_url)
+    gdal.SetConfigOption("SWIFT_AUTH_TOKEN", auth_token)
+
+
 def add_mask(product):
     metadata_item = product.metadata_items.all()[0]
     with access.vsi_open(metadata_item) as f:
@@ -98,6 +112,7 @@ def add_mask(product):
     )[0]
     geometry = GEOSGeometry(wkt)
     mask_type = models.MaskType.objects.get(product_type=product.product_type)
+    logger.debug("Adding mask")
     models.Mask.objects.create(
         product=product,
         mask_type=mask_type,
@@ -115,7 +130,7 @@ def get_product_type_and_level(metadata_item):
         xp = '//gml:using/eop:EarthObservationEquipment/eop:platform/eop:Platform/eop:shortName/text()'
         product_type_name = tree.xpath(xp, namespaces=root.nsmap)[0]
     except Exception as e:
-        logger.warning(
+        logger.debug(
             'Failed to determine product type of %s, error was %s'
             % (metadata_item.location, e)
         )
@@ -124,7 +139,6 @@ def get_product_type_and_level(metadata_item):
         xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'
         parent_identifier = tree.xpath(xp, namespaces=root.nsmap)[0]
 
-        print("parent identifier --->", parent_identifier)
         if parent_identifier.endswith('Level_1'):
             level = 'Level_1'
         if parent_identifier.endswith('Level_3'):
@@ -132,7 +146,7 @@ def get_product_type_and_level(metadata_item):
         else:
             raise Exception('Invalid parent identifier type name %s' % parent_identifier)
     except Exception as e:
-        logger.warning(
+        logger.debug(
             'Failed to determine product level of %s, error was %s'
             % (metadata_item.location, e)
         )
@@ -140,36 +154,72 @@ def get_product_type_and_level(metadata_item):
     return product_type_name, level
 
 
+def get_product_collection(metadata_file):
+    # in case collection needs to be determined from metadata
+    try:
+        if metadata_file.startswith("/vsiswift"):
+            set_gdal_swift_auth()
+        with vsi.open(metadata_file, "r") as f:
+            tree = lxml.etree.parse(f)
+        root = tree.getroot()
+        xp = '//gml:metaDataProperty/gsc:EarthObservationMetaData/eop:parentIdentifier/text()'
+        product_type_name = tree.xpath(xp, namespaces=root.nsmap)
+        extracted = product_type_name[0].split('/')[0]
+        return extracted
+    except Exception as e:
+        logger.debug(
+            'Failed to determine product collection for metadata file %s, error was %s'
+            % (metadata_file, e)
+        )
+
+
+def get_product_type_from_band_count(product_type_name, file_path):
+    # get raster band count via gdal
+    logger.debug("Opening file using GDAL: %s" % file_path)
+    if file_path.startswith("/vsiswift"):
+        set_gdal_swift_auth()
+    src_ds = gdal.Open(file_path)
+    if src_ds is None:
+        raise RegistrationError("Band check: failed to open dataset: %s " % file_path)
+    # try to fetch product model with _bandcount
+    product_type_name_upd = "%s_%s" % (product_type_name, src_ds.RasterCount)
+    try:
+        product_type_model = models.ProductType.objects.get(name=product_type_name_upd)
+        return product_type_model
+    except models.ProductType.DoesNotExist:
+        raise RegistrationError("Product Type: '%s' was not found" % product_type_name_upd)
+
+
 class RegistrationError(Exception):
     pass
 
 
 @transaction.atomic
 def registrar(
-    collection,
-    objects_prefix, upload_container, replace=False, client=None, registered_set_key=None
+    collection_stack,
+    objects_prefix, upload_container=None, replace=False, client=None, registered_set_key=None,
+    reporting_dir=None, service_url=None
+
 ):
     logger.info("Starting registration of product '%s'." % objects_prefix)
 
-    metadata_package, data_package, has_vrt = None, None, None
-
+    metadata_package, data_package = None, None
+    if not upload_container:
+        # assuming objects_prefix = bucket/itemname
+        upload_container = objects_prefix.partition("/")[0]
+        objects_prefix = objects_prefix.partition("/")[2]
     with SwiftService() as swift:
         list_parts_gen = swift.list(
             container=upload_container, options={"prefix": objects_prefix},
         )
         for page in list_parts_gen:
-            print(page)
             if page["success"]:
                 for item in page["listing"]:
                     if item["name"].endswith(".xml"):
                         metadata_package = item["name"]
                     elif item["name"].endswith(".TIF") or \
                             item["name"].endswith(".tif"):
-                        if has_vrt is not True:
-                            data_package = item["name"]
-                    elif item["name"].endswith(".vrt"):
                         data_package = item["name"]
-                        has_vrt = True
                     elif not item["name"].endswith(".tar"):
                         raise RegistrationError(
                             "Product with objects prefix '%s' has "
@@ -187,58 +237,95 @@ def registrar(
             "Product with objects prefix '%s' has missing content."
             % objects_prefix
         )
+    logger.debug("Found objects '%s' and '%s'." % (data_package, metadata_package))
 
     storage = backends.Storage.objects.get(name=upload_container)
     metadata_item = models.MetaDataItem(storage=storage, location=metadata_package)
 
     product_type, level = get_product_type_and_level(metadata_item)
+    if collection_stack == 'DEM':
+        # special for DEM files, collection name === product_type
+        gdal_metadata_file_path = "/vsiswift/%s/%s" % (upload_container, metadata_package)
+        product_type = get_product_collection(gdal_metadata_file_path)
+    logger.debug("Registering product")
+    product_type_name = "%s_Product_%s" % (collection_stack, product_type)
+
+    try:
+        # first find product type by name from path
+        product_type_model = models.ProductType.objects.get(name=product_type_name)
+    except models.ProductType.DoesNotExist:
+        # if not found, maybe there are more product types with _bandcount suffix
+        gdal_file_path = "/vsiswift/%s/%s" % (upload_container, data_package)
+        product_type_model = get_product_type_from_band_count(product_type_name, gdal_file_path)
+        product_type_name = product_type_model.name
+    coverage_type_names = product_type_model.allowed_coverage_types.all()
+    if len(coverage_type_names) > 1:
+        logger.warning("More available 'CoverageType' found, selecting the first one.")
+    coverage_type_name = coverage_type_names[0].name
 
     product, replaced = ProductRegistrator().register(
         metadata_locations=[[upload_container,
                              metadata_package, ], ],
-        type_name="%s_Product_%s" % (collection, product_type),
+        type_name=product_type_name,
         replace=replace,
         extended_metadata=True,
         mask_locations=None,
         package_path=None,
+        simplify_footprint_tolerance=0.0001,  # ~10meters
         overrides={},
     )
+    if product.footprint.empty:
+        product.delete()
+        raise RegistrationError("No footprint was extracted. full product: %s" % product)
 
     collection = models.Collection.objects.get(
-        identifier=collection
+        identifier=collection_stack
     )
+    logger.debug("Inserting product into collection %s" % collection_stack)
     models.collection_insert_eo_object(collection, product)
 
+    if collection_stack == "DEM":
+        # also insert it to its own collection
+        collection_own = models.Collection.objects.get(
+            identifier="%s_%s" % (collection, product_type)
+        )
+        logger.debug("Inserting product to collection %s_%s" % (collection, product_type))
+        models.collection_insert_eo_object(collection_own, product)
+
     if level == 'Level_1':
         collection_level_1 = models.Collection.objects.get(
             identifier="%s_Level_1" % collection
         )
+        logger.debug("Inserting product to collection %s_Level_1" % collection)
         models.collection_insert_eo_object(collection_level_1, product)
     elif level == 'Level_3':
         collection_level_3 = models.Collection.objects.get(
             identifier="%s_Level_3" % collection
         )
+        logger.debug("Inserting product to collection %s_Level_3" % collection)
         models.collection_insert_eo_object(collection_level_3, product)
 
+    logger.debug("Registering coverage")
     report = GDALRegistrator().register(
         data_locations=[[upload_container, data_package, ], ],
         metadata_locations=[[upload_container,
                              metadata_package, ], ],
-        coverage_type_name="RGBNir",
+        coverage_type_name=coverage_type_name,
         overrides={
             "identifier": "%s__coverage" % product.identifier,
             "footprint": None,
         },
         replace=replace,
     )
+    logger.debug("Adding coverage to product")
     models.product_add_coverage(product, report.coverage)
 
     try:
         add_mask(product)
     except Exception as e:
-        logger.info("Couldn't add mask.")
+        logger.debug("Couldn't add mask.")
         logger.debug(traceback.format_exc())
-        logger.warning("%s: %s\n" % (type(e).__name__, str(e)))
+        logger.debug("%s: %s\n" % (type(e).__name__, str(e)))
 
     if client is not None:
         logger.debug(
@@ -252,6 +339,40 @@ def registrar(
             )
         )
 
+    timestamp = product.inserted.strftime("%Y%m%dT%H%M%S")
+
+    if reporting_dir is not None:
+        with open(os.path.join(reporting_dir, 'item_%s_%s.xml' % (timestamp, product.identifier)),'w') as f:
+            f.write(textwrap.dedent("""
+                <?xml version="1.0" encoding="UTF-8"?>
+                <DataAccessItem
+                    xsi:schemaLocation="http://www.telespazio.com/CSCDA/CDD/PDAS PDAS_interfaces%2020190924_1916.xsd"
+                    xmlns="http://www.telespazio.com/CSCDA/CDD/PDAS"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                  <identifier>{identifier}</identifier>
+                  <BROWSE_AVAILABILITY_DATETIME>{availability_time}</BROWSE_AVAILABILITY_DATETIME>
+                  <URL>
+                      <Service>WCS</Service>
+                      <URL>{wms_capabilities_url}</URL>
+                  </URL>
+                  <URL>
+                      <Service>WMS</Service>
+                      <URL>{wcs_capabilities_url}</URL>
+                  </URL>
+                </DataAccessItem>
+            """.format(
+                identifier=escape(product.identifier),
+                availability_time=escape(isoformat(product.inserted)),
+                wcs_capabilities_url=escape(
+                    '%s/ows?service=wcs&request=GetCapabilities&cql=identifier="%s"'
+                    % (service_url, product.identifier)
+                ),
+                wms_capabilities_url=escape(
+                    '%s/ows?service=wms&request=GetCapabilities&cql=identifier="%s"'
+                    % (service_url, product.identifier)
+                ),
+            )))
+
     logger.info(
         "Successfully finished registration of product '%s'." % objects_prefix
     )
@@ -263,6 +384,8 @@ def registrar_redis_wrapper(
     replace=False, host="localhost", port=6379,
     register_queue_key="register_queue",
     registered_set_key="registered_set",
+    reporting_dir=None,
+    service_url=None,
 ):
     client = redis.Redis(
         host=host, port=port, charset="utf-8", decode_responses=True
@@ -277,7 +400,9 @@ def registrar_redis_wrapper(
                 upload_container,
                 replace=replace,
                 client=client,
-                registered_set_key=registered_set_key
+                registered_set_key=registered_set_key,
+                reporting_dir=reporting_dir,
+                service_url=service_url,
             )
         except Exception as e:
             logger.debug(traceback.format_exc())
@@ -321,6 +446,12 @@ if __name__ == "__main__":
     parser.add_argument(
         "--redis-port", type=int, default=6379
     )
+    parser.add_argument(
+        "--reporting-dir",
+    )
+    parser.add_argument(
+        "--service-url",
+    )
 
     parser.add_argument(
         "-v", "--verbosity", type=int, default=3, choices=[0, 1, 2, 3, 4],
@@ -341,8 +472,7 @@ if __name__ == "__main__":
 
     upload_container = os.environ.get('UPLOAD_CONTAINER')
     if upload_container is None:
-        logger.critical("UPLOAD_CONTAINER environment variable not set.")
-        sys.exit(1)
+        logger.warn("UPLOAD_CONTAINER environment variable not set. Assuming part of path bucket/item")
 
     if arg_values.mode == "standard":
         registrar(
@@ -350,6 +480,8 @@ if __name__ == "__main__":
             arg_values.objects_prefix,
             upload_container,
             replace=arg_values.replace,
+            reporting_dir=arg_values.reporting_dir,
+            service_url=arg_values.service_url,
         )
     else:
         registrar_redis_wrapper(
@@ -360,4 +492,6 @@ if __name__ == "__main__":
             port=arg_values.redis_port,
             register_queue_key=arg_values.redis_register_queue_key,
             registered_set_key=arg_values.redis_registered_set_key,
+            reporting_dir=arg_values.reporting_dir,
+            service_url=arg_values.service_url,
         )
diff --git a/core/run-httpd.sh b/core/run-httpd.sh
index 30f876a5eb22d6b07381e9be28258a79b3b57b65..44498e3c1f6338dd0ecb4a49941038e544f44a49 100644
--- a/core/run-httpd.sh
+++ b/core/run-httpd.sh
@@ -2,4 +2,4 @@
 
 
 echo "Running gunicorn"
-exec gunicorn --chdir ${INSTALL_DIR}/pvs_instance/ --bind :80 pvs_instance.wsgi:application --workers 8 --max-requests 10 --max-requests-jitter 3 --worker-class sync --timeout 120 --access-logfile - --error-logfile - --log-level warning --disable-redirect-access-to-syslog 2> &1
+exec gunicorn --chdir ${INSTALL_DIR}/pvs_instance/ --bind :80 pvs_instance.wsgi:application --workers 8 --max-requests 10 --max-requests-jitter 3 --worker-class sync --timeout 120 --access-logfile - --error-logfile - --log-level warning --disable-redirect-access-to-syslog 2>&1
diff --git a/core/run-registrar.sh b/core/run-registrar.sh
index 088f4bfa4b6cefb06e868ae6159021361d1de1bc..348b4f75081870185eb370e84b568f3f46254cd6 100644
--- a/core/run-registrar.sh
+++ b/core/run-registrar.sh
@@ -1,5 +1,18 @@
 #!/bin/sh
 
-echo "Running registrar" > &2
+echo "Running registrar" >&2
+replace=""
+if test "$REGISTRAR_REPLACE" = true; then
+    replace="--replace"
+fi
 
-python3 /registrar.py --mode redis --redis-host ${REDIS_HOST} --redis-port ${REDIS_PORT} --redis-register-queue-key ${REDIS_REGISTER_QUEUE_KEY} --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} > &2
+python3 /registrar.py \
+    --mode redis \
+    --redis-host ${REDIS_HOST} \
+    --redis-port ${REDIS_PORT} \
+    --redis-register-queue-key ${REDIS_REGISTER_QUEUE_KEY} \
+    --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} \
+    --redis-registered-set-key ${REDIS_REGISTERED_SET_KEY} \
+    --reporting-dir ${REPORTING_DIR} \
+    --service-url ${SERVICE_URL} \
+    ${replace} >&2
diff --git a/core/wait-initialized.sh b/core/wait-initialized.sh
index da9746eeb2f90aec198cb15c97580baa2117421a..95afa5bde5cc5642492125f53e914d0d8ddfa9a1 100644
--- a/core/wait-initialized.sh
+++ b/core/wait-initialized.sh
@@ -1,7 +1,7 @@
 #!/bin/bash -e
 
 until [ -f "${INSTANCE_DIR}/.initialized" ] ; do
-    echo "Waiting until instance ${INSTANCE_ID} is initialized" > &2
+    echo "Waiting until instance ${INSTANCE_ID} is initialized" >&2
     sleep 3
     # TODO: timeout?
 done
diff --git a/docker-compose.dem.dev.yml b/docker-compose.dem.dev.yml
index 3e213c68cebc843d445f5ec9c4110bd6cb44b28b..8ac49a6c078ec3cdc21e82686f8aee3543ea070f 100644
--- a/docker-compose.dem.dev.yml
+++ b/docker-compose.dem.dev.yml
@@ -9,8 +9,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-        driver: "fluentd"
   client:
     ports:
       - "80:80"
@@ -21,8 +19,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   renderer:
     ports:
       - "81:80"
@@ -31,8 +27,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   registrar:
     volumes:
       - type: bind
@@ -53,8 +47,6 @@ services:
     configs:
       - source: mapcache-dev
         target: /mapcache-template.xml
-    logging:
-      driver: "fluentd"
   preprocessor:
     volumes:
       - type: tmpfs
@@ -65,8 +57,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
 networks:
   extnet:
     name: dem-extnet
diff --git a/docker-compose.dem.yml b/docker-compose.dem.yml
index 7b496f1be491dd5c2f8d9989ddfab8c611b96cfd..16de9b1d7b9d8baf45e384ccc1aaf6d24c720558 100644
--- a/docker-compose.dem.yml
+++ b/docker-compose.dem.yml
@@ -14,6 +14,11 @@ services:
         constraints: [node.role == manager]
     networks:
       - intnet
+    command: ["postgres", "-c", "max_connections=300"]
+    sysctls:
+      net.ipv4.tcp_keepalive_time: 600
+      net.ipv4.tcp_keepalive_probes: 5
+      net.ipv4.tcp_keepalive_intvl: 10
   redis:
     image: redis
     volumes:
@@ -28,9 +33,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
       - type: volume
         source: instance-data
         target: /var/www/pvs
@@ -60,9 +62,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
     env_file:
       - env/dem.env
       - env/dem_db.env
@@ -83,9 +82,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
     env_file:
       - env/dem.env
       - env/dem_obs.env
@@ -106,10 +102,12 @@ services:
       - env/dem.env
       - env/dem_obs.env
       - env/dem_redis.env
-      - env/dem_preprocessor.env
     environment:
       INSTANCE_ID: "prism-view-server_preprocessor"
       WAIT_SERVICES: "redis:6379"
+    configs:
+      - source: preprocessor-config
+        target: /config.yaml
     deploy:
       replicas: 1
     networks:
@@ -124,6 +122,9 @@ services:
       - type: volume
         source: instance-data
         target: /var/www/pvs
+      - type: volume
+        source: report-data
+        target: /mnt/reports/
     env_file:
       - env/dem.env
       - env/dem_db.env
@@ -138,6 +139,7 @@ services:
       INIT_SCRIPTS: "/configure.sh /init-db.sh /initialized.sh"
       STARTUP_SCRIPTS: "/wait-initialized.sh"
       WAIT_SERVICES: "redis:6379 database:5432"
+      REPORTING_DIR: '/mnt/reports/'
     configs:
       - source: init-db
         target: /init-db.sh
@@ -151,7 +153,36 @@ services:
     image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:latest
     deploy:
       replicas: 1
+  sftp:
+    image: atmoz/sftp:latest
+    volumes:
+      - type: volume
+        source: report-data
+        target: /home/eox/data/to/panda
+      - type: volume
+        source: from-fepd
+        target: /home/eox/data/from/fepd
+    configs:
+      - source: sftp-users
+        target: /etc/sftp/users.conf
+
+    ports:
+        - "2222:22"
+    deploy:
+      replicas: 1
+  ingestor:
+    image: registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor:latest
+    env_file:
+      - env/dem_redis.env
+    environment:
+      INSTANCE_ID: "prism-view-server_ingestor"
+    deploy:
+      replicas: 1
+    networks:
+      - intnet
 configs:
+  sftp-users:
+    file: ./config/dem_sftp_users.conf
   init-db:
     file: ./config/dem_init-db.sh
   mapcache-dev:
@@ -162,10 +193,13 @@ configs:
     file: ./config/dem_index-dev.html
   client-ops:
     file: ./config/dem_index-ops.html
+  preprocessor-config:
+    file: ./config/dem_preprocessor-config.yml
 volumes:
   db-data:
   redis-data:
-  cache-db:
   instance-data:
+  from-fepd:
+  report-data:
 networks:
   intnet:
diff --git a/docker-compose.emg.dev.yml b/docker-compose.emg.dev.yml
index 8e8c7d65bfbddd9875a8e4e9441ab53510c040d9..af436d2e83c8e6a65fe69ad5473896b4ec523bd1 100644
--- a/docker-compose.emg.dev.yml
+++ b/docker-compose.emg.dev.yml
@@ -9,8 +9,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   client:
     ports:
       - "80:80"
@@ -21,8 +19,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   renderer:
     ports:
       - "81:80"
@@ -31,8 +27,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   registrar:
     volumes:
       - type: bind
@@ -41,8 +35,6 @@ services:
       - type: bind
         source: ./core/
         target: /core/
-    logging:
-      driver: "fluentd"
   cache:
     ports:
       - "83:80"
@@ -53,8 +45,6 @@ services:
     configs:
       - source: mapcache-dev
         target: /mapcache-template.xml
-    logging:
-      driver: "fluentd"
   preprocessor:
     volumes:
       - type: tmpfs
@@ -65,8 +55,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
 networks:
   extnet:
     name: emg-extnet
diff --git a/docker-compose.emg.yml b/docker-compose.emg.yml
index 807f1e48fa2fe19dbf0ec94e649653e81a215abb..4fcd5168c7b88c936527fe5412ec83d8e6fd1add 100644
--- a/docker-compose.emg.yml
+++ b/docker-compose.emg.yml
@@ -14,6 +14,11 @@ services:
         constraints: [node.role == manager]
     networks:
       - intnet
+    command: ["postgres", "-c", "max_connections=300"]
+    sysctls:
+      net.ipv4.tcp_keepalive_time: 600
+      net.ipv4.tcp_keepalive_probes: 5
+      net.ipv4.tcp_keepalive_intvl: 10
   redis:
     image: redis
     volumes:
@@ -28,9 +33,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
       - type: volume
         source: instance-data
         target: /var/www/pvs
@@ -60,9 +62,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
     env_file:
       - env/emg.env
       - env/emg_db.env
@@ -83,9 +82,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
     env_file:
       - env/emg.env
       - env/emg_obs.env
@@ -100,16 +96,28 @@ services:
       - intnet
     command:
       ["/run-seeder.sh"]
+  ingestor:
+    image: registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor:latest
+    env_file:
+      - env/emg_redis.env
+    environment:
+      INSTANCE_ID: "prism-view-server_ingestor"
+    deploy:
+      replicas: 1
+    networks:
+      - intnet
   preprocessor:
     image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:latest
     env_file:
       - env/emg.env
       - env/emg_obs.env
       - env/emg_redis.env
-      - env/emg_preprocessor.env
     environment:
       INSTANCE_ID: "prism-view-server_preprocessor"
       WAIT_SERVICES: "redis:6379"
+    configs:
+      - source: preprocessor-config
+        target: /config.yaml      
     deploy:
       replicas: 1
     networks:
@@ -124,6 +132,9 @@ services:
       - type: volume
         source: instance-data
         target: /var/www/pvs
+      - type: volume
+        source: report-data
+        target: /mnt/reports/
     env_file:
       - env/emg.env
       - env/emg_db.env
@@ -138,6 +149,7 @@ services:
       INIT_SCRIPTS: "/configure.sh /init-db.sh /initialized.sh"
       STARTUP_SCRIPTS: "/wait-initialized.sh"
       WAIT_SERVICES: "redis:6379 database:5432"
+      REPORTING_DIR: '/mnt/reports/'
     configs:
       - source: init-db
         target: /init-db.sh
@@ -151,7 +163,26 @@ services:
     image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:latest
     deploy:
       replicas: 1
+  sftp:
+    image: atmoz/sftp:latest
+    volumes:
+      - type: volume
+        source: report-data
+        target: /home/eox/data/to/panda
+      - type: volume
+        source: from-fepd
+        target: /home/eox/data/from/fepd
+    configs:
+      - source: sftp-users
+        target: /etc/sftp/users.conf
+
+    ports:
+        - "2222:22"
+    deploy:
+      replicas: 1
 configs:
+  sftp-users:
+    file: ./config/emg_sftp_users.conf
   init-db:
     file: ./config/emg_init-db.sh
   mapcache-dev:
@@ -162,10 +193,13 @@ configs:
     file: ./config/emg_index-dev.html
   client-ops:
     file: ./config/emg_index-ops.html
+  preprocessor-config:
+    file: ./config/emg_preprocessor-config.yml
 volumes:
   db-data:
   redis-data:
-  cache-db:
   instance-data:
+  from-fepd:
+  report-data:
 networks:
   intnet:
diff --git a/docker-compose.logging.dev.yml b/docker-compose.logging.dev.yml
index 6ce3f23db24f96836bfef5b3f17bc2af06d83931..d749cb97edd584b85c8bbe46b708d06e2653ee1f 100644
--- a/docker-compose.logging.dev.yml
+++ b/docker-compose.logging.dev.yml
@@ -11,6 +11,24 @@ services:
       resources:
         limits:
           memory: 500M
+  database:
+    logging:
+      driver: "fluentd"
+  client:
+    logging:
+      driver: "fluentd"
+  renderer:
+    logging:
+      driver: "fluentd"
+  registrar:
+    logging:
+      driver: "fluentd"
+  cache:
+    logging:
+      driver: "fluentd"
+  preprocessor:
+    logging:
+      driver: "fluentd"
   kibana:
     ports:
       - "5601:5601"
diff --git a/docker-compose.vhr18.dev.yml b/docker-compose.vhr18.dev.yml
index 1a576b4abd0748a1d93811616801719a3568df97..e7c46c3f2348a66e5fa40df62c5c86dc5557d3de 100644
--- a/docker-compose.vhr18.dev.yml
+++ b/docker-compose.vhr18.dev.yml
@@ -9,8 +9,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   client:
     ports:
       - "80:80"
@@ -21,8 +19,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   renderer:
     ports:
       - "81:80"
@@ -31,8 +27,6 @@ services:
       - type: bind
         source: ./data/
         target: /data/
-    logging:
-      driver: "fluentd"
   registrar:
     volumes:
       - type: bind
@@ -41,8 +35,6 @@ services:
       - type: bind
         source: ./core/
         target: /core/
-    logging:
-      driver: "fluentd"
   cache:
     ports:
       - "83:80"
@@ -53,8 +45,6 @@ services:
     configs:
       - source: mapcache-dev
         target: /mapcache-template.xml
-    logging:
-      driver: "fluentd"
   preprocessor:
     volumes:
       - type: tmpfs
@@ -62,8 +52,6 @@ services:
       - type: bind
         source: ./preprocessor/
         target: /preprocessor/
-    logging:
-      driver: "fluentd"
 networks:
   extnet:
     name: vhr18-extnet
diff --git a/docker-compose.vhr18.yml b/docker-compose.vhr18.yml
index 870e34271a010f8c0689fd2e3afd9d491a0dca41..dbaa05bf63427e90deaf1fa82f1f81488214bb5b 100644
--- a/docker-compose.vhr18.yml
+++ b/docker-compose.vhr18.yml
@@ -14,6 +14,11 @@ services:
         constraints: [node.role == manager]
     networks:
       - intnet
+    command: ["postgres", "-c", "max_connections=300"]
+    sysctls:
+      net.ipv4.tcp_keepalive_time: 600
+      net.ipv4.tcp_keepalive_probes: 5
+      net.ipv4.tcp_keepalive_intvl: 10
   redis:
     image: redis
     volumes:
@@ -28,9 +33,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
       - type: volume
         source: instance-data
         target: /var/www/pvs
@@ -60,9 +62,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
     env_file:
       - env/vhr18.env
       - env/vhr18_db.env
@@ -86,9 +85,6 @@ services:
     volumes:
       - type: tmpfs
         target: /tmp
-      - type: volume
-        source: cache-db
-        target: /cache-db
     env_file:
       - env/vhr18.env
       - env/vhr18_obs.env
@@ -103,16 +99,28 @@ services:
       - intnet
     command:
       ["/run-seeder.sh"]
+  ingestor:
+    image: registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor:latest
+    env_file:
+      - env/vhr18_redis.env
+    environment:
+      INSTANCE_ID: "prism-view-server_ingestor"
+    deploy:
+      replicas: 1
+    networks:
+      - intnet
   preprocessor:
     image: registry.gitlab.eox.at/esa/prism/vs/pvs_preprocessor:latest
     env_file:
       - env/vhr18.env
       - env/vhr18_obs.env
       - env/vhr18_redis.env
-      - env/vhr18_preprocessor.env
     environment:
       INSTANCE_ID: "prism-view-server_preprocessor"
       WAIT_SERVICES: "redis:6379"
+    configs:
+      - source: preprocessor-config
+        target: /config.yaml
     deploy:
       replicas: 1
     networks:
@@ -127,6 +135,9 @@ services:
       - type: volume
         source: instance-data
         target: /var/www/pvs
+      - type: volume
+        source: report-data
+        target: /mnt/reports/
     env_file:
       - env/vhr18.env
       - env/vhr18_db.env
@@ -141,6 +152,7 @@ services:
       INIT_SCRIPTS: "/configure.sh /init-db.sh /initialized.sh"
       STARTUP_SCRIPTS: "/wait-initialized.sh"
       WAIT_SERVICES: "redis:6379 database:5432"
+      REPORTING_DIR: '/mnt/reports/'
     configs:
       - source: init-db
         target: /init-db.sh
@@ -154,7 +166,30 @@ services:
     image: registry.gitlab.eox.at/esa/prism/vs/pvs_client:latest
     deploy:
       replicas: 1
+  sftp:
+    image: atmoz/sftp:latest
+    volumes:
+      - type: volume
+        source: report-data
+        target: /home/eox/data/to/panda
+      - type: volume
+        source: from-fepd
+        target: /home/eox/data/from/fepd
+    configs:
+      - source: sftp-users
+        target: /etc/sftp/users.conf
+    deploy:
+      replicas: 1
+
+    ports:
+        - "2222:22"
+  ingestor:
+    image: registry.gitlab.eox.at/esa/prism/vs/pvs_ingestor:latest
+    deploy:
+      replicas: 1
 configs:
+  sftp-users:
+    file: ./config/vhr18_sftp_users.conf
   init-db:
     file: ./config/vhr18_init-db.sh
   mapcache-dev:
@@ -165,10 +200,13 @@ configs:
     file: ./config/vhr18_index-dev.html
   client-ops:
     file: ./config/vhr18_index-ops.html
+  preprocessor-config:
+    file: ./config/vhr18_preprocessor-config.yml
 volumes:
   db-data:
   redis-data:
-  cache-db:
   instance-data:
+  from-fepd:
+  report-data:
 networks:
   intnet:
diff --git a/documentation/operator-guide/configuration.rst b/documentation/operator-guide/configuration.rst
index 327832e46be3b71202293e4df20d59354518231b..578bd680b47cbe2e61596c6357ca9edbf89c2866 100644
--- a/documentation/operator-guide/configuration.rst
+++ b/documentation/operator-guide/configuration.rst
@@ -117,8 +117,6 @@ The following ``.env`` files are typically used:
   django admin user to be used with the admin GUI.
 * ``<stack-name>_obs.env``: This contains access parameters for the object
   storage(s).
-* ``<stack-name>_preprocessor.env``: Preprocessor related environment
-  variables
 * ``<stack-name>_redis.env``: Redis access credentials and queue names
 
 
@@ -173,6 +171,7 @@ retrieve the original product files:
 * ``OS_REGION_NAME_DOWNLOAD``
 * ``OS_AUTH_URL_DOWNLOAD``
 * ``ST_AUTH_VERSION_DOWNLOAD``
+* ``OS_USER_DOMAIN_NAME_DOWNLOAD``
 
 VS Environment Variables
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -193,6 +192,8 @@ parts.
   to.
 * ``DJANGO_USER``, ``DJANGO_MAIL``, ``DJANGO_PASSWORD`` -
   The Django admin user account credentials to use the Admin GUI.
+* ``REPORTING_DIR`` -
+  This sets the directory to write the reports of the registered products to.
 
 .. note::
   These variables are used during the initial stack setup. When these
@@ -240,18 +241,256 @@ such a configuration file is defined and the used in a service:
 
 The following configuration files are used throughout the VS:
 
-* ``<stack-name>_init-db.sh``: This shell script file's purpose is to set up
-  the EOxServer instance used by both the renderer and registrar.
-* ``<stack-name>_index-dev.html``/``<stack-name>_index-ops.html``: The
-  clients main HTML page, containing various client settings. The ``dev`` one
-  is used for development only, whereas the ``ops`` one is used for operational
-  deployment.
-* ``<stack-name>_mapcache-dev.xml``/``<stack-name>_mapcache-ops.xml``: The
-  configuration file for MapCache, the software powering the cache service.
-  Similarly to the client configuration files, the ``dev`` and ``ops`` files
-  used for development and operational usage respectively. Further
-  documentation can be found at `the official site
-  <https://mapserver.org/mapcache/config.html>`_.
+``<stack-name>_init-db.sh``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This shell script file's purpose is to set up the EOxServer instance used by
+both the renderer and registrar.
+
+``<stack-name>_index-dev.html``/``<stack-name>_index-ops.html``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The clients main HTML page, containing various client settings. The ``dev`` one
+is used for development only, whereas the ``ops`` one is used for operational
+deployment.
+
+``<stack-name>_mapcache-dev.xml``/``<stack-name>_mapcache-ops.xml``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The configuration file for MapCache, the software powering the cache service.
+Similarly to the client configuration files, the ``dev`` and ``ops`` files
+used for development and operational usage respectively. Further
+documentation can be found at `the official site
+<https://mapserver.org/mapcache/config.html>`_.
+
+``<stack-name>_preprocessor-config.yaml``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The configuration for the proprocessing service to use to process to be
+ingested files.
+
+The files are using YAML as a format and are structured in the following
+fashion:
+
+source/target
+
+  Here, the source file storage and the target file storage are configured.
+  This can either be a local directory or an OpenStack Swift object storage.
+
+workdir
+
+  The workdir can be configured, to determine where the intermediate files are
+  placed. This can be convenient for debugging and development.
+
+keep_temp
+
+  This boolean decides if the temporary directory for the preprocessing will be
+  cleaned up after being finished. Also, convenient for development.
+
+metadata_glob
+
+  This file glob is used to determine the main metadata file to extract the
+  product type from. This file will be searched in the downloaded package.
+
+glob_case
+
+  If all globs will be used in a case-sensitive way.
+
+type_extractor
+
+  This setting configures how the product type is extracted from the previously
+  extracted metadata. In the ``xpath`` setting one or more XPath expressions
+  can supplied to fetch the product type. Each XPath will be tried until one is
+  found that produces a result. These results can then be mapped using the
+  ``map`` dictionary.
+
+level_extractor
+
+  This section works very similar to the ``type_extractor`` but only for the
+  product level. The product level is currently not used.
+
+preprocessing
+
+  This is the actual preprocessing configuration setting. It is split in
+  defaults and product type specific settings. The defaults are applied
+  where there is no setting supplied for that specific type. The product type
+  is the one extracted earlier.
+
+  defaults
+
+    This section allows to configure any one of the available steps. Each step
+    configuration can be overridden in a specific product type configuration.
+
+    The available steps are as follows:
+
+    custom_preprocessor
+
+      A custom python function to be called.
+
+      path
+
+        The Python module path to the function to call.
+
+      args
+
+        A list of arguments to pass to the function.
+
+      kwargs
+
+        A dictionary of keyword arguments to pass to the function.
+
+    subdatasets
+
+      What subdatasets to extract and how to name them.
+
+      data_file_glob
+
+        A file glob pattern to select files to extract from.
+
+      subdataset_types
+
+        Mapping of subdataset identifier to output filename postfix for
+        subdatasets to be extracted for each data file.
+
+    georeference
+
+      How the extracted files shall be georeferenced.
+
+      type
+
+        The type of georeferencing to apply. One of ``gcp``, ``rpc``,
+        ``corner``, ``world``.
+
+      options
+
+        Additional options for the georeferencing. Depends on the type of
+        georeferencing.
+
+        order
+
+          The polynomial order to use for GCP related georeferencing.
+
+        projection
+
+          The projection to use for ungeoreferenced images.
+
+        rpc_file_template
+
+          The file glob template to use to find the RPC file. Template
+          parameters are {filename}, {fileroot}, and {extension}.
+
+        warp_options
+
+          Warp options. See
+          https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for
+          details
+
+        corner_names
+
+          The metadata field name including the corner names. Tuple of four:
+          bottom-left, bottom-right, top-left and top-right
+
+        orbit_direction_name
+
+          The metadata field name containing the orbit direction
+
+        force_north_up
+
+          TODO
+
+        tps
+
+          Whether to use TPS transformation instead of GCP polynomials.
+
+    calc
+
+      Calculate derived data using formulas.
+
+      formulas
+
+        A list of formulas to use to calculate derived data. Each has the
+        following fields
+
+        inputs
+
+          A map of characters in the range of A-Z to respective inputs. Each
+          has the following properties
+
+          glob
+
+            The input file glob
+
+          band
+
+            The input file band index (1-based)
+
+        data_type
+
+          The GDAL data type name for the output
+
+        formula
+
+          The formula to apply. See
+          https://gdal.org/programs/gdal_calc.html#cmdoption-calc for details.
+
+        output_postfix
+
+          The postfix to apply for the filename of the created file.
+
+        nodata_value
+
+          The nodata value to be used.
+
+    stack_bands
+
+      Concatenate bands and arrange them in a single file.
+
+      group_by
+
+        A regex to group the input datasets, if consisting of multiple file.
+        The first regex group is used for the grouping.
+
+      sort_by
+
+        A regex to select a portion of the filename to be used for sorting. The
+        first regex group is used.
+
+      order
+
+        The order of the extracted item used in 'sort_by'. When the value
+        extracted by ``sort_by`` is missing, then that file will be dropped.
+
+    output
+
+      Final adjustments to generate an output file. Add overviews, reproject to
+      a common projection, etc.
+
+      options
+
+        Options to be passed to `gdal.Warp`. See
+        https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details.
+
+    custom_preprocessor
+
+      A custom python function to be called.
+
+      path
+
+        The Python module path to the function to call.
+
+      args
+
+        A list of arguments to pass to the function.
+
+      kwargs
+
+        A dictionary of keyword arguments to pass to the function.
+
+  types
+
+    This mapping of product type identifier to step configuration allows to
+    define specific step settings, even overriding the values from the
+    defaults.
 
 The next section :ref:`management` describes how an operator interacts with a
 deployed VS stack.
diff --git a/documentation/operator-guide/ingestion.rst b/documentation/operator-guide/ingestion.rst
index dc1b31aaab837ab4514ec8949c3047d389a7cb34..16be9b8cf531ba9df398debe962f7c313d9b7c27 100644
--- a/documentation/operator-guide/ingestion.rst
+++ b/documentation/operator-guide/ingestion.rst
@@ -146,9 +146,9 @@ registrar can be accomplished.
 Preprocessing
 ~~~~~~~~~~~~~
 
-In this section all command examples are assumed to be run from within a running
-preprocessor container. To open a shell on a preprocessor, the following
-command can be used.
+In this section all command examples are assumed to be run from within a
+running preprocessor container. To open a shell on a preprocessor, the
+following command can be used.
 
 .. code-block:: bash
 
diff --git a/documentation/operator-guide/setup.rst b/documentation/operator-guide/setup.rst
index fbafa38bda800a7f858170a7776c605f87862885..bb91533c06ab1e442c2ed932fd03f589be0d94d2 100644
--- a/documentation/operator-guide/setup.rst
+++ b/documentation/operator-guide/setup.rst
@@ -116,6 +116,25 @@ Now the relevant images can be pulled:
 
 .. # TODO: ingestor image?
 
+
+Logging
+-------
+
+For production, the docker images in the compose files use the default logging
+driver. Therefore we configure the default logging driver for the docker daemon to
+be fluent by createing the file ``/etc/docker/daemon.json`` with the following content:
+
+.. code-block:: json
+
+    {
+        "log-driver": "fluentd"
+    }
+
+For development, we don't want to redirect all of the docker logging output,
+so the respective compose files for dev configure the logging driver for each
+container.
+
+
 Stack Deployment
 ----------------
 
diff --git a/env/dem.env b/env/dem.env
index 687b7c830b448883a8e3a146645cf804f67ecf6a..a1d307f435679da7324a0ec1115bb99e98523345 100644
--- a/env/dem.env
+++ b/env/dem.env
@@ -3,3 +3,4 @@ UPLOAD_CONTAINER=dem-data
 
 GDAL_DISABLE_READDIR_ON_OPEN=TRUE
 CPL_VSIL_CURL_ALLOWED_EXTENSIONS=.TIF,.tif,.xml
+SERVICE_URL=dem.pass.copernicus.eu
diff --git a/env/dem_preprocessor.env b/env/dem_preprocessor.env
deleted file mode 100644
index 9b650c648b40c6e947507f1dff7fb83af90d974a..0000000000000000000000000000000000000000
--- a/env/dem_preprocessor.env
+++ /dev/null
@@ -1,5 +0,0 @@
-SPLIT_PARTS_CHECK=False
-DATA_FILE_SIZE_LIMIT=100000
-FILENAME_PART_SKIP=auxraster,preview,support,annotation,auxfiles
-PREPROCESSOR_REPLACE=TRUE
-FORCE_NO_DATA_VALUE=0
\ No newline at end of file
diff --git a/env/dem_redis.env b/env/dem_redis.env
index 45dcfdf80c5dbf4b1dcdfccaba2671ee2b57d7ef..3eff4afd5a700d498d26fadb791632909d8b5f30 100644
--- a/env/dem_redis.env
+++ b/env/dem_redis.env
@@ -4,6 +4,7 @@ REDIS_PORT=6379
 REDIS_QUEUE_KEY=seed_queue
 
 REDIS_PREPROCESS_QUEUE_KEY=preprocess_queue
+REDIS_PREPROCESS_MD_QUEUE_KEY=preprocess-md_queue
 REDIS_REGISTER_QUEUE_KEY=register_queue
 REDIS_REGISTERED_SET_KEY=registered_set
 REDIS_SET_KEY=registered_set
diff --git a/env/emg.env b/env/emg.env
index d056fa2885245bb91d5d250f4fed083dcc54eafe..f3af09603b2a3b96912d5fe0c0ebcb128aadc92e 100644
--- a/env/emg.env
+++ b/env/emg.env
@@ -3,3 +3,4 @@ UPLOAD_CONTAINER=emg-data
 
 GDAL_DISABLE_READDIR_ON_OPEN=TRUE
 CPL_VSIL_CURL_ALLOWED_EXTENSIONS=.TIF,.tif,.xml
+SERVICE_URL=emg.pass.copernicus.eu
diff --git a/env/emg_preprocessor.env b/env/emg_preprocessor.env
deleted file mode 100644
index c25407773d5186db820c33ce89bd2d35ab47fce7..0000000000000000000000000000000000000000
--- a/env/emg_preprocessor.env
+++ /dev/null
@@ -1,2 +0,0 @@
-SPLIT_PARTS_CHECK=False
-ENFORCE_FOUR_BANDS=True
diff --git a/env/emg_redis.env b/env/emg_redis.env
index 45dcfdf80c5dbf4b1dcdfccaba2671ee2b57d7ef..3eff4afd5a700d498d26fadb791632909d8b5f30 100644
--- a/env/emg_redis.env
+++ b/env/emg_redis.env
@@ -4,6 +4,7 @@ REDIS_PORT=6379
 REDIS_QUEUE_KEY=seed_queue
 
 REDIS_PREPROCESS_QUEUE_KEY=preprocess_queue
+REDIS_PREPROCESS_MD_QUEUE_KEY=preprocess-md_queue
 REDIS_REGISTER_QUEUE_KEY=register_queue
 REDIS_REGISTERED_SET_KEY=registered_set
 REDIS_SET_KEY=registered_set
diff --git a/env/vhr18.env b/env/vhr18.env
index 33c93caf76fbc1d6cfc995e939bf40a66cfce66d..12eacf3ff0adc299d6f800b2cd58414e7c904aae 100644
--- a/env/vhr18.env
+++ b/env/vhr18.env
@@ -3,3 +3,4 @@ UPLOAD_CONTAINER=vhr18-data
 
 GDAL_DISABLE_READDIR_ON_OPEN=TRUE
 CPL_VSIL_CURL_ALLOWED_EXTENSIONS=.TIF,.tif,.xml
+SERVICE_URL=vhr18.pass.copernicus.eu
diff --git a/env/vhr18_preprocessor.env b/env/vhr18_preprocessor.env
deleted file mode 100644
index c25407773d5186db820c33ce89bd2d35ab47fce7..0000000000000000000000000000000000000000
--- a/env/vhr18_preprocessor.env
+++ /dev/null
@@ -1,2 +0,0 @@
-SPLIT_PARTS_CHECK=False
-ENFORCE_FOUR_BANDS=True
diff --git a/env/vhr18_redis.env b/env/vhr18_redis.env
index 45dcfdf80c5dbf4b1dcdfccaba2671ee2b57d7ef..3eff4afd5a700d498d26fadb791632909d8b5f30 100644
--- a/env/vhr18_redis.env
+++ b/env/vhr18_redis.env
@@ -4,6 +4,7 @@ REDIS_PORT=6379
 REDIS_QUEUE_KEY=seed_queue
 
 REDIS_PREPROCESS_QUEUE_KEY=preprocess_queue
+REDIS_PREPROCESS_MD_QUEUE_KEY=preprocess-md_queue
 REDIS_REGISTER_QUEUE_KEY=register_queue
 REDIS_REGISTERED_SET_KEY=registered_set
 REDIS_SET_KEY=registered_set
diff --git a/env_setup.sh b/env_setup.sh
deleted file mode 100644
index 9f2bb95bc7ffe5d2cb9e26b7676bf2c464ba304d..0000000000000000000000000000000000000000
--- a/env_setup.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/sh
-cat $vhr18_db > ./env/vhr18_db.env
-cat $vhr18_django > ./env/vhr18_django.env
-cat $vhr18_obs > ./env/vhr18_obs.env
-
-cat $emg_db > ./env/emg_db.env
-cat $emg_django > ./env/emg_django.env
-cat $emg_obs > ./env/emg_obs.env
-
-
-set -o allexport
-
-source ./env/emg_db.env
-source ./env/vhr18_db.env
-
-set +o allexport
-
-
-sed -i -e 's/emg-data/pvs_testing/g' ./env/emg.env
-sed -i -e 's/vhr18-data/pvs_testing/g' ./env/vhr18.env
-
-sed -i -e 's/emg-cache/pvs_testing/g' ./env/emg_obs.env
-sed -i -e 's/vhr18-cache/pvs_testing/g' ./env/vhr18_obs.env
diff --git a/ingestor/.dockerignore b/ingestor/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..c72a38af9c876136701d901b31f2c9e058641b14
--- /dev/null
+++ b/ingestor/.dockerignore
@@ -0,0 +1,5 @@
+tests
+lib
+bin
+__pycache__
+.pytest_cache
diff --git a/ingestor/.gitignore b/ingestor/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..758d3e612a2674122446b61b101f78a23b6a9bd8
--- /dev/null
+++ b/ingestor/.gitignore
@@ -0,0 +1,139 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# venv stuff
+bin
+include
+pyvenv.cfg
diff --git a/ingestor/Dockerfile b/ingestor/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..6490ddff5c31c8f9eb2e9dc2bbfa452e0088434f
--- /dev/null
+++ b/ingestor/Dockerfile
@@ -0,0 +1,61 @@
+#------------------------------------------------------------------------------
+#
+# Project: prism view server
+# Authors: Fabian Schindler <fabian.schindler@eox.at>
+#
+#------------------------------------------------------------------------------
+# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies of this Software or works derived from this Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#-----------------------------------------------------------------------------
+
+FROM ubuntu:18.04
+
+MAINTAINER EOX
+LABEL name="prism view server cache" \
+      vendor="EOX IT Services GmbH <https://eox.at>" \
+      license="MIT Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>" \
+      type="prism view server ingestor" \
+      version="0.0.1-dev"
+
+USER root
+ADD install.sh requirements.txt \
+    /
+RUN /install.sh
+RUN mkdir /ingestor
+COPY app.py config.py filedaemon.py /
+COPY ingestor/ /ingestor
+
+ENV COLLECTION_ID= \
+    INSTANCE_ID="prism-view-server_ingestor" \
+    RENDERER_HOST= \
+    COLLECTION= \
+    REDIS_HOST= \
+    REDIS_PORT="6379" \
+    REDIS_PREPROCESS_MD_QUEUE_KEY="preprocess-md_queue" \
+    INOTIFY_WATCH_DIR="/mnt/data" \
+    ST_AUTH_VERSION=3 \
+    OS_AUTH_URL= \
+    OS_USERNAME= \
+    OS_PASSWORD= \
+    OS_TENANT_NAME= \
+    OS_TENANT_ID= \
+    OS_REGION_NAME=
+
+CMD ["gunicorn3", "-c", "config.py", "app"]
diff --git a/ingestor/Readme.md b/ingestor/Readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..01d9c75b00a38a20190a7f59c900af337c530b03
--- /dev/null
+++ b/ingestor/Readme.md
@@ -0,0 +1,16 @@
+# Ingestor
+
+## Set up
+
+```bash
+python3 -m venv .
+source bin/activate
+pip install -r requirements.txt
+pip install pytest
+```
+
+## Testing
+
+```bash
+pytest ingestor/
+```
diff --git a/ingestor/app.py b/ingestor/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fde0df409198fd618ff77f3fd3752a4841bb0d3
--- /dev/null
+++ b/ingestor/app.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+#------------------------------------------------------------------------------
+#
+# Project: prism view server
+# Authors: Fabian Schindler <fabian.schindler@eox.at>
+#
+#------------------------------------------------------------------------------
+# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies of this Software or works derived from this Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#-----------------------------------------------------------------------------
+
+import os
+import logging
+import logging.config
+import json
+
+from flask import Flask, request, Response
+import redis
+
+from ingestor.browse_report import parse_browse_report
+from ingestor.util import converter
+
+
+application = Flask(__name__)
+
+logger = logging.getLogger(__name__)
+
+logging.config.dictConfig({
+    'version': 1,
+    'formatters': {
+        'simple': {
+            'format': '%(levelname)s: %(message)s',
+        },
+        'verbose': {
+            'format': '[%(asctime)s][%(module)s] %(levelname)s: %(message)s',
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            'class': 'logging.StreamHandler',
+            'formatter': 'verbose',
+        }
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console'],
+            'level': 'DEBUG',
+            'propagate': False,
+        }
+    }
+})
+
+client = redis.Redis(
+    host=os.environ['REDIS_HOST'],
+    port=int(os.environ.get('REDIS_PORT', '6379')),
+    charset="utf-8",
+    decode_responses=True,
+)
+
+queue_name = os.environ['REDIS_PREPROCESS_MD_QUEUE_KEY']
+
+@application.route('/', methods=['POST'])
+def ingest():
+    try:
+        request.get_data()
+        browse_report = parse_browse_report(request.data)
+        logger.debug(browse_report)
+        client.lpush(queue_name, json.dumps(
+            browse_report, default=converter
+        ))
+        return Response(status=202)
+
+    except Exception as e:
+        return Response(str(e), status=400)
diff --git a/ingestor/config.py b/ingestor/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..069ada04df74d97773a3a1c935d3f3384b1ed4f6
--- /dev/null
+++ b/ingestor/config.py
@@ -0,0 +1 @@
+bind = ['0.0.0.0:8000']
diff --git a/ingestor/filedaemon.py b/ingestor/filedaemon.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b3bef423a489d6f004295382117cea3305a5fd7
--- /dev/null
+++ b/ingestor/filedaemon.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+#------------------------------------------------------------------------------
+#
+# Project: prism view server
+# Authors: Fabian Schindler <fabian.schindler@eox.at>
+#
+#------------------------------------------------------------------------------
+# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies of this Software or works derived from this Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#-----------------------------------------------------------------------------
+
+import os
+import logging
+import logging.config
+import json
+
+import pyinotify
+import redis
+
+from ingestor.browse_report import parse_browse_report
+from ingestor.util import converter
+
+
+logger = logging.getLogger(__name__)
+
+logging.config.dictConfig({
+    'version': 1,
+    'formatters': {
+        'simple': {
+            'format': '%(levelname)s: %(message)s',
+        },
+        'verbose': {
+            'format': '[%(asctime)s][%(module)s] %(levelname)s: %(message)s',
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            'class': 'logging.StreamHandler',
+            'formatter': 'verbose',
+        }
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console'],
+            'level': 'DEBUG',
+            'propagate': False,
+        }
+    }
+})
+
+queue_name = os.environ['REDIS_PREPROCESS_MD_QUEUE_KEY']
+watch_dir = os.environ['INOTIFY_WATCH_DIR']
+
+client = redis.Redis(
+    host=os.environ['REDIS_HOST'],
+    port=int(os.environ.get('REDIS_PORT', '6379')),
+    charset='utf-8',
+    decode_responses=True,
+)
+
+
+watchmanager = pyinotify.WatchManager()
+
+class EventHandler(pyinotify.ProcessEvent):
+    def process_IN_CREATE(self, event):
+        logger.info(f'Parsing browse file: {event.pathname}')
+        try:
+            with open(event.pathname) as f:
+                browse_report = parse_browse_report(f)
+            logger.debug(browse_report)
+            client.lpush(queue_name, json.dumps(
+                browse_report, default=converter
+            ))
+        except Exception as e:
+            logger.exception(e)
+
+handler = EventHandler()
+notifier = pyinotify.Notifier(watchmanager, handler)
+
+wdd = watchmanager.add_watch(watch_dir, pyinotify.IN_CREATE, rec=True)
+
+notifier.loop()
diff --git a/ingestor/ingestor/__init__.py b/ingestor/ingestor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/ingestor/ingestor/browse_report.py b/ingestor/ingestor/browse_report.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3669bd77719f32609d4a8b6c344b1f30f3ed675
--- /dev/null
+++ b/ingestor/ingestor/browse_report.py
@@ -0,0 +1,161 @@
+#------------------------------------------------------------------------------
+#
+# Project: prism view server
+# Authors: Fabian Schindler <fabian.schindler@eox.at>
+#
+#------------------------------------------------------------------------------
+# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies of this Software or works derived from this Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#-----------------------------------------------------------------------------
+
+import io
+
+from lxml import etree
+import dateutil.parser
+
+from .util import pairwise
+
+
+class BrowseReportParserError(ValueError):
+    pass
+
+NS_REP = 'http://ngeo.eo.esa.int/ngEO/browseReport/1.0'
+NS_REP_OLD = 'http://ngeo.eo.esa.int/schema/browseReport'
+NS_BSI = 'http://ngeo.eo.esa.int/schema/browse/ingestion'
+
+
+nsmap = {
+    'rep': NS_REP,
+    'bsi': NS_BSI
+}
+
+
+def rep(tag):
+    return f'{{{NS_REP}}}{tag}'
+
+def rep_old(tag):
+    return f'{{{NS_REP_OLD}}}{tag}'
+
+def bsi(tag):
+    return f'{{{NS_BSI}}}{tag}'
+
+
+ALLOWED_ROOT_TAGS = {rep('browseReport'), rep_old('browseReport'), bsi('ingestBrowse')}
+
+
+def parse_browse_report(input_file):
+    """
+        :returns: list of browses
+    """
+    if isinstance(input_file, bytes):
+        input_file = io.BytesIO(input_file)
+
+    try:
+        tree = etree.parse(input_file)
+    except etree.XMLSyntaxError as e:
+        raise BrowseReportParserError('Failed to parse XML.') from e
+    root = tree.getroot()
+
+    if not root.tag in ALLOWED_ROOT_TAGS:
+        raise BrowseReportParserError(
+            'Document is not a browse report or an ingest browse instruction.'
+        )
+
+    if root.tag == rep_old('browseReport'):
+        used_rep = rep_old
+    else:
+        used_rep = rep
+
+    return {
+        'responsible_org_name': root.findtext(used_rep('responsibleOrgName')),
+        'date_time': dateutil.parser.parse(root.findtext(used_rep('dateTime'))),
+        'browse_type': root.findtext(used_rep('browseType')),
+        'browses': [
+            parse_browse(elem, used_rep)
+            for elem in root.iterfind(used_rep('browse'))
+        ],
+    }
+
+
+def parse_browse(elem, used_rep):
+    browse = {
+        'type': '',
+        'browse_identifier': elem.findtext(used_rep('browseIdentifier')),
+        'filename': elem.findtext(used_rep('fileName')),
+        'image_type': elem.findtext(used_rep('imageType')),
+        'reference_system_identifier': elem.findtext(
+            used_rep('referenceSystemIdentifier')
+        ),
+        'start_time': dateutil.parser.parse(elem.findtext(used_rep('startTime'))),
+        'end_time': dateutil.parser.parse(elem.findtext(used_rep('endTime'))),
+    }
+
+    rectified_elem = elem.find(used_rep('rectifiedBrowse'))
+    footprint_elem = elem.find(used_rep('footprint'))
+    geotiff_elem = elem.find(used_rep('modelInGeotiff'))
+    regular_grid_browse = elem.find(used_rep('regularGrid'))
+
+    if rectified_elem is not None:
+        browse['type'] = 'rectified_browse'
+        browse['rectified'] = {
+            'coord_list': [
+                (float(x), float(y))
+                for x, y in pairwise(
+                    rectified_elem.findtext(used_rep('coordList')).split()
+                )
+            ],
+        }
+
+    elif footprint_elem is not None:
+        browse['type'] = 'footprint_browse'
+        browse['footprint'] = {
+            'col_row_list': [
+                (int(x), int(y))
+                for x, y in pairwise(
+                    footprint_elem.findtext(used_rep('colRowList')).split()
+                )
+            ],
+            'coord_list': [
+                (float(x), float(y))
+                for x, y in pairwise(
+                    footprint_elem.findtext(used_rep('coordList')).split()
+                )
+            ],
+        }
+
+    elif geotiff_elem is not None:
+        browse['type'] = 'model_in_geotiff_browse'
+
+    elif regular_grid_browse is not None:
+        browse['type'] = 'regular_grid_browse'
+        browse['regular_grid'] = {
+            'col_node_number': int(regular_grid_browse.findtext(used_rep('colNodeNumber'))),
+            'row_node_number': int(regular_grid_browse.findtext(used_rep('rowNodeNumber'))),
+            'col_step': float(regular_grid_browse.findtext(used_rep('colStep'))),
+            'row_step': float(regular_grid_browse.findtext(used_rep('rowStep'))),
+            'coord_lists': [
+                [
+                    (float(x), float(y))
+                    for x, y in pairwise(elem.text.split())
+                ] for elem in regular_grid_browse.iterfind(used_rep('coordList'))
+            ]
+        }
+
+    return browse
diff --git a/ingestor/ingestor/test_browse_report.py b/ingestor/ingestor/test_browse_report.py
new file mode 100644
index 0000000000000000000000000000000000000000..d30cbbcaed92c4cda348858f0220ba57191dff4e
--- /dev/null
+++ b/ingestor/ingestor/test_browse_report.py
@@ -0,0 +1,165 @@
+#------------------------------------------------------------------------------
+#
+# Project: prism view server
+# Authors: Fabian Schindler <fabian.schindler@eox.at>
+#
+#------------------------------------------------------------------------------
+# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies of this Software or works derived from this Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#-----------------------------------------------------------------------------
+
+
+from os.path import dirname, join
+from datetime import datetime
+from dateutil import tz
+
+from ingestor.browse_report import parse_browse_report
+
+
+TEST_DATA_DIR = join(dirname(dirname(__file__)), 'tests/data')
+
+
+def test_parse_footprint_browse():
+    with open(join(TEST_DATA_DIR, 'footprint_browse.xml')) as f:
+        browse_report = parse_browse_report(f)
+
+    assert browse_report == {
+        'responsible_org_name': 'Generated by Eoli 2 ngEO Converter V1.2.0',
+        'date_time': datetime(2013, 9, 25, 14, 54, 38, 0, tz.UTC),
+        'browse_type': 'SAR',
+        'browses': [{
+            'type': 'footprint_browse',
+            'browse_identifier': 'ERS-2-11040113373745-1507.SAR_IM0_0P.BP',
+            'filename': 'ERS-2-11040113373745-1507.SAR_IM0_0P.BP.jpg',
+            'image_type': 'Jpeg',
+            'reference_system_identifier': 'EPSG:4326',
+            'footprint': {
+                'col_row_list': [
+                    (0, 0),
+                    (500, 0),
+                    (500, 250),
+                    (0, 250),
+                    (0, 0),
+                ],
+                'coord_list': [
+                    (83.66, 42.31),
+                    (84.53, 42.42),
+                    (84.48, 51.28),
+                    (83.61, 50.32),
+                    (83.66, 42.31),
+                ]
+            },
+            'start_time': datetime(2011, 4, 1, 13, 37, 37, 0, tz.UTC),
+            'end_time': datetime(2011, 4, 1, 13, 37, 52, 0, tz.UTC),
+        }]
+    }
+
+
+def test_parse_model_in_geotiff_browse():
+    with open(join(TEST_DATA_DIR, 'model_in_geotiff_browse.xml')) as f:
+        browse_report = parse_browse_report(f)
+
+    assert browse_report == {
+        'responsible_org_name': 'DMI',
+        'date_time': datetime(2012, 7, 13, 11, 54, 26, 0, tz.UTC),
+        'browse_type': 'SAR',
+        'browses': [{
+            'type': 'model_in_geotiff_browse',
+            'browse_identifier': 'ID_DODWH_MG2_CORE_09DM010001_1',
+            'filename': 'ID_DEIMOS01-v2_DE0028bfp_L3R.tif',
+            'image_type': 'TIFF',
+            'reference_system_identifier': 'EPSG:4326',
+            'start_time': datetime(2011, 2, 1, 11, 48, 1, 0, tz.UTC),
+            'end_time': datetime(2011, 2, 1, 11, 48, 27, 0, tz.UTC),
+        }]
+    }
+
+def test_parse_rectified_browse():
+    with open(join(TEST_DATA_DIR, 'rectified_browse.xml')) as f:
+        browse_report = parse_browse_report(f)
+
+    assert browse_report == {
+        'responsible_org_name': 'SLAP 03.03',
+        'date_time': datetime(2014, 7, 24, 11, 58, 24, 0, tz.UTC),
+        'browse_type': 'NGEO-LIGHT',
+        'browses': [{
+            'type': 'rectified_browse',
+            'browse_identifier': 'LS05_RFUI_TM__GTC_1P_19910928T071939_19910928T072007_040292_0172_0031_B10D',
+            'filename': 'http://landsat-ds.eo.esa.int/metadata/LandsatTMCloudFreeCoverage/1991/09/28/LS05_RFUI_TM__GTC_1P_19910928T071939_19910928T072007_040292_0172_0031_B10D.BP.PNG',
+            'image_type': 'PNG',
+            'reference_system_identifier': 'EPSG:4326',
+            'rectified': {
+                'coord_list': [
+                    (40.8395, 40.1005),
+                    (42.6645, 42.7907),
+                ]
+            },
+            'start_time': datetime(1991, 9, 28, 7, 19, 39, 0, tz.UTC),
+            'end_time': datetime(1991, 9, 28, 7, 20, 7, 0, tz.UTC),
+        }]
+    }
+
+
+def test_parse_regular_grid_browse():
+    with open(join(TEST_DATA_DIR, 'regular_grid_browse.xml')) as f:
+        browse_report = parse_browse_report(f)
+
+    assert browse_report == {
+        'responsible_org_name': 'Sentinel 1 PDGS',
+        'date_time': datetime(2012, 11, 8, 17, 25, 49, tzinfo=tz.tzoffset(None, 3600)),
+        'browse_type': 'SAR',
+        'browses': [{
+            'type': 'regular_grid_browse',
+            'browse_identifier': 'a20120101T043724405923',
+            'filename': 'quick-look.png',
+            'image_type': 'PNG',
+            'reference_system_identifier': 'EPSG:4326',
+            'regular_grid': {
+                'col_node_number': 6,
+                'row_node_number': 20,
+                'col_step': 78.8,
+                'row_step': 29.157894737,
+                'coord_lists': [
+                    [(16.80678325439503, -156.5765611873593), (16.92753830505998, -156.6004684133847), (17.04829841199142, -156.6243747867503), (17.16905739541897, -156.6482792679816), (17.28981280532396, -156.6721814810342), (17.31890450297918, -156.6779396385797)],
+                    [(16.81392675836918, -156.5375764998714), (16.93467625534994, -156.561457873508), (17.05543081391954, -156.5853381897874), (17.17618425492683, -156.609216410023), (17.2969341243137, -156.6330921572468), (17.32602449992995, -156.6388438384338)],
+                    [(16.82106287728486, -156.498588896317), (16.94180676422443, -156.522444395707), (17.06255571800818, -156.5462986330286), (17.1833035600995, -156.5701505703374), (17.30404783238212, -156.593999829775), (17.33313687211055, -156.599745030351)],
+                    [(16.82819160727274, -156.4595983781882), (16.94892982780319, -156.4834279815255), (17.06967312036884, -156.5072561180554), (17.19041530703756, -156.5310817505579), (17.31115392562071, -156.5549045002936), (17.34024161561499, -156.5606432159938)],
+                    [(16.83531294446828, -156.420604946974), (16.95604544221135, -156.4444086325018), (17.07678301711796, -156.4682106464455), (17.19751949184693, -156.4920099523122), (17.31825240012606, -156.5158061704734), (17.34733872654218, -156.5215383970218)],
+                    [(16.84242688501015, -156.381608604169), (16.96315360357771, -156.4053863501777), (17.08388540437526, -156.4291622197829), (17.20461611063721, -156.4529351772322), (17.32534325199815, -156.4767048419914), (17.35442820099433, -156.4824305751009)],
+                    [(16.84953342503939, -156.3426093512784), (16.97025430803387, -156.3663611361029), (17.09098027826299, -156.3901108396617), (17.21170515952094, -156.4138574269578), (17.33242647733964, -156.437600516534), (17.36151003507623, -156.4433197519077)],
+                    [(16.85663256069959, -156.3036071898168), (16.97734755171454, -156.327332991834), (17.09806763490601, -156.3510565076854), (17.21878663461358, -156.3747767031367), (17.33950207225592, -156.3984931957972), (17.36858422489525, -156.4042059291287)],
+                    [(16.86372428813775, -156.2646021213034), (16.98443333075823, -156.2883019189305), (17.10514747043269, -156.311999225462), (17.22586053203457, -156.3356930074195), (17.34657003285606, -156.3593828814815), (17.37565076656231, -156.3650891084553)],
+                    [(16.87080860350595, -156.225594147253), (16.99151164130902, -156.2492679189448), (17.11221978097664, -156.2729389945953), (17.23292684790889, -156.2966063414503), (17.35363035525441, -156.3202695752825), (17.38270965619353, -156.3259692915751)],
+                    [(16.87424257900799, -156.2066700838774), (16.994942683509, -156.2303312224714), (17.11564789218319, -156.2539895656031), (17.23635203068512, -156.2776440808492), (17.35705261014584, -156.3012943835597), (17.38613121105771, -156.3069909399195)],
+                    [(16.88495498268113, -156.1475694884959), (17.00564584155052, -156.1711911438214), (17.12634181169133, -156.1948096932411), (17.24703671955967, -156.2184241052198), (17.36772806994446, -156.2420339939122), (17.39680446385366, -156.2477206758413)],
+                    [(16.89201703884214, -156.1085528066608), (17.01270172358132, -156.1321483716202), (17.13339152417947, -156.1557406258028), (17.25408026763801, -156.1793285380803), (17.37476545451566, -156.2029117219735), (17.40384037416505, -156.2085918802064)],
+                    [(16.89907166764901, -156.0695332249886), (17.01975012180706, -156.0931026781663), (17.14043369632741, -156.1166686157758), (17.26111621878105, -156.1402300068893), (17.38179518545178, -156.1637864645731), (17.41086861701329, -156.1694600947586)],
+                    [(16.90611886532681, -156.0305107447066), (17.02679103244686, -156.054054064714), (17.14746832434206, -156.0775936644758), (17.26814456918872, -156.1011285129948), (17.38881725894098, -156.124658223118), (17.41788918858779, -156.1303253208992)],
+                    [(16.91315862812868, -155.991485366909), (17.03382445174815, -156.0150025323825), (17.15449540445841, -156.0385157730854), (17.27516531508931, -156.0620240576098), (17.39583167119955, -156.0855269988818), (17.42490208510597, -156.0911875598966)],
+                    [(16.92019095234127, -155.9524570925252), (17.04085037599229, -155.9759480821245), (17.161514932945, -155.9994349426222), (17.28217845274488, -156.0229166417812), (17.40283841847734, -156.046392792973), (17.43190730281881, -156.0520468128543)],
+                    [(16.92721583429081, -155.9134259222858), (17.04786880150054, -155.9368907146913), (17.16852690610996, -155.9603511739047), (17.28918397845739, -155.9838062663549), (17.40983749706388, -156.0072556063005), (17.43890483801674, -156.0129030806771)],
+                    [(16.93423327034943, -155.8743918566859), (17.05487972464038, -155.8978304305973), (17.17553132030729, -155.9212644675158), (17.29618188857495, -155.9446929319396), (17.41682890329453, -155.9681154395373), (17.44589468703602, -155.973756364034)],
+                    [(16.94103455650875, -155.8365177408673), (17.06167463789954, -155.8599308569188), (17.18231986439887, -155.8833392385752), (17.30296406819755, -155.9067418477961), (17.42360471850083, -155.9301383022012), (17.45266897820548, -155.9357728677357)],
+                ],
+            },
+            'start_time': datetime(2012, 1, 1, 4, 37, 24, 405923),
+            'end_time': datetime(2012, 1, 1, 4, 37, 32, 890783),
+        }]
+    }
diff --git a/ingestor/ingestor/util.py b/ingestor/ingestor/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d659c9ce70440242ae62d602c372e6e743c4658
--- /dev/null
+++ b/ingestor/ingestor/util.py
@@ -0,0 +1,39 @@
+#------------------------------------------------------------------------------
+#
+# Project: prism view server
+# Authors: Fabian Schindler <fabian.schindler@eox.at>
+#
+#------------------------------------------------------------------------------
+# Copyright (C) 2020 EOX IT Services GmbH <https://eox.at>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies of this Software or works derived from this Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#-----------------------------------------------------------------------------
+
+from datetime import datetime
+
+
+def pairwise(iterable):
+    "s -> (s0,s1), (s2,s3), (s4, s5), ..."
+    a = iter(iterable)
+    return zip(a, a)
+
+
+def converter(o):
+    if isinstance(o, datetime):
+        return o.isoformat()
diff --git a/ingestor/install.sh b/ingestor/install.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c221fe0880d737796250e53db92aa60df39438c2
--- /dev/null
+++ b/ingestor/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+echo "Running install.sh"
+
+apt update
+
+echo "Installing packages"
+DEBIAN_FRONTEND=noninteractive apt install -y python3-flask python3-lxml python3-dateutil gunicorn3 python3-redis python3-pyinotify
+
+
+# pip3 install -r /requirements.txt
+
+rm -rf /var/lib/apt/lists/*
diff --git a/ingestor/requirements.txt b/ingestor/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc6bf8b47d6dedb12339879756582c3e51ae2440
--- /dev/null
+++ b/ingestor/requirements.txt
@@ -0,0 +1,5 @@
+flask
+lxml
+python-dateutil
+gunicorn
+redis
diff --git a/ingestor/tests/data/footprint_browse.xml b/ingestor/tests/data/footprint_browse.xml
new file mode 100644
index 0000000000000000000000000000000000000000..8ab11ec5333b8b612e8da391d38d560e39997b27
--- /dev/null
+++ b/ingestor/tests/data/footprint_browse.xml
@@ -0,0 +1,18 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<rep:browseReport xmlns:rep="http://ngeo.eo.esa.int/ngEO/browseReport/1.0" version="1.3">
+  <rep:responsibleOrgName>Generated by Eoli 2 ngEO Converter V1.2.0</rep:responsibleOrgName>
+  <rep:dateTime>2013-09-25T14:54:38Z</rep:dateTime>
+  <rep:browseType>SAR</rep:browseType>
+  <rep:browse>
+    <rep:browseIdentifier>ERS-2-11040113373745-1507.SAR_IM0_0P.BP</rep:browseIdentifier>
+    <rep:fileName>ERS-2-11040113373745-1507.SAR_IM0_0P.BP.jpg</rep:fileName>
+    <rep:imageType>Jpeg</rep:imageType>
+    <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier>
+    <rep:footprint nodeNumber="5">
+      <rep:colRowList>0 0 500 0 500 250 0 250 0 0</rep:colRowList>
+      <rep:coordList>83.66 42.31 84.53 42.42 84.48 51.28 83.61 50.32 83.66 42.31</rep:coordList>
+    </rep:footprint>
+    <rep:startTime>2011-04-01T13:37:37Z</rep:startTime>
+    <rep:endTime>2011-04-01T13:37:52Z</rep:endTime>
+  </rep:browse>
+</rep:browseReport>
diff --git a/ingestor/tests/data/model_in_geotiff_browse.xml b/ingestor/tests/data/model_in_geotiff_browse.xml
new file mode 100644
index 0000000000000000000000000000000000000000..624a01e9e142fb48200fd855440eace4a8b6858a
--- /dev/null
+++ b/ingestor/tests/data/model_in_geotiff_browse.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bsi:ingestBrowse xmlns:bsi="http://ngeo.eo.esa.int/schema/browse/ingestion"
+  xmlns:rep="http://ngeo.eo.esa.int/ngEO/browseReport/1.0" version="1.3">
+  <rep:responsibleOrgName>DMI</rep:responsibleOrgName>
+  <rep:dateTime>2012-07-13T11:54:26Z</rep:dateTime>
+  <rep:browseType>SAR</rep:browseType>
+  <rep:browse xmlns:gsc="http://earth.esa.int/gsc"
+    xmlns:gml="http://www.opengis.net/gml"
+    xmlns:eop="http://earth.esa.int/eop"
+    xmlns:opt="http://earth.esa.int/opt">
+    <rep:browseIdentifier>ID_DODWH_MG2_CORE_09DM010001_1</rep:browseIdentifier>
+    <rep:fileName>ID_DEIMOS01-v2_DE0028bfp_L3R.tif</rep:fileName>
+    <rep:imageType>TIFF</rep:imageType>
+    <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier>
+    <rep:modelInGeotiff>true</rep:modelInGeotiff>
+    <rep:startTime>2011-02-01T11:48:01Z</rep:startTime>
+    <rep:endTime>2011-02-01T11:48:27Z</rep:endTime>
+  </rep:browse>
+</bsi:ingestBrowse>
diff --git a/ingestor/tests/data/rectified_browse.xml b/ingestor/tests/data/rectified_browse.xml
new file mode 100644
index 0000000000000000000000000000000000000000..1daa44670f6c9808ec9c5b30a962b12a6c8aacf3
--- /dev/null
+++ b/ingestor/tests/data/rectified_browse.xml
@@ -0,0 +1,19 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<rep:browseReport version="1.1" xsi:schemaLocation="http://ngeo.eo.esa.int/schema/browseReport IF-ngEO-BrowseReport-1.1.xsd"
+  xmlns:rep="http://ngeo.eo.esa.int/schema/browseReport"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <rep:responsibleOrgName>SLAP 03.03</rep:responsibleOrgName>
+  <rep:dateTime>2014-07-24T11:58:24Z</rep:dateTime>
+  <rep:browseType>NGEO-LIGHT</rep:browseType>
+  <rep:browse>
+    <rep:browseIdentifier>LS05_RFUI_TM__GTC_1P_19910928T071939_19910928T072007_040292_0172_0031_B10D</rep:browseIdentifier>
+    <rep:fileName>http://landsat-ds.eo.esa.int/metadata/LandsatTMCloudFreeCoverage/1991/09/28/LS05_RFUI_TM__GTC_1P_19910928T071939_19910928T072007_040292_0172_0031_B10D.BP.PNG</rep:fileName>
+    <rep:imageType>PNG</rep:imageType>
+    <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier>
+    <rep:rectifiedBrowse>
+      <rep:coordList>40.8395 40.1005 42.6645 42.7907</rep:coordList>
+    </rep:rectifiedBrowse>
+    <rep:startTime>1991-09-28T07:19:39Z</rep:startTime>
+    <rep:endTime>1991-09-28T07:20:07Z</rep:endTime>
+  </rep:browse>
+</rep:browseReport>
diff --git a/ingestor/tests/data/regular_grid_browse.xml b/ingestor/tests/data/regular_grid_browse.xml
new file mode 100644
index 0000000000000000000000000000000000000000..dd4b7d8dbe003ca6b27b2f097b0783f4387b91fa
--- /dev/null
+++ b/ingestor/tests/data/regular_grid_browse.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bsi:ingestBrowse xmlns:bsi="http://ngeo.eo.esa.int/schema/browse/ingestion" xmlns:eop="http://earth.esa.int/eop" xmlns:gsc="http://earth.esa.int/gsc" xmlns:opt="http://earth.esa.int/opt" xmlns:rep="http://ngeo.eo.esa.int/ngEO/browseReport/1.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://ngeo.eo.esa.int/ngEO/browseReport/1.0 IF-ngEO-BrowseReport.xsd" version="1.3">
+  <rep:responsibleOrgName>Sentinel 1 PDGS</rep:responsibleOrgName>
+  <rep:dateTime>2012-11-08T17:25:49+01:00</rep:dateTime>
+  <rep:browseType>SAR</rep:browseType>
+  <rep:browse>
+    <rep:browseIdentifier>a20120101T043724405923</rep:browseIdentifier>
+    <rep:fileName>quick-look.png</rep:fileName>
+    <rep:imageType>PNG</rep:imageType>
+    <rep:referenceSystemIdentifier>EPSG:4326</rep:referenceSystemIdentifier>
+    <rep:regularGrid>
+      <rep:colNodeNumber>6</rep:colNodeNumber>
+      <rep:rowNodeNumber>20</rep:rowNodeNumber>
+      <rep:colStep>78.8</rep:colStep> <!-- 555x395px -->
+      <rep:rowStep>29.157894737</rep:rowStep>
+      <rep:coordList>1.680678325439503e+01 -1.565765611873593e+02 1.692753830505998e+01 -1.566004684133847e+02 1.704829841199142e+01 -1.566243747867503e+02 1.716905739541897e+01 -1.566482792679816e+02 1.728981280532396e+01 -1.566721814810342e+02 1.731890450297918e+01 -1.566779396385797e+02</rep:coordList>
+      <rep:coordList>1.681392675836918e+01 -1.565375764998714e+02 1.693467625534994e+01 -1.565614578735080e+02 1.705543081391954e+01 -1.565853381897874e+02 1.717618425492683e+01 -1.566092164100230e+02 1.729693412431370e+01 -1.566330921572468e+02 1.732602449992995e+01 -1.566388438384338e+02</rep:coordList>
+      <rep:coordList>1.682106287728486e+01 -1.564985888963170e+02 1.694180676422443e+01 -1.565224443957070e+02 1.706255571800818e+01 -1.565462986330286e+02 1.718330356009950e+01 -1.565701505703374e+02 1.730404783238212e+01 -1.565939998297750e+02 1.733313687211055e+01 -1.565997450303510e+02</rep:coordList>
+      <rep:coordList>1.682819160727274e+01 -1.564595983781882e+02 1.694892982780319e+01 -1.564834279815255e+02 1.706967312036884e+01 -1.565072561180554e+02 1.719041530703756e+01 -1.565310817505579e+02 1.731115392562071e+01 -1.565549045002936e+02 1.734024161561499e+01 -1.565606432159938e+02</rep:coordList>
+      <rep:coordList>1.683531294446828e+01 -1.564206049469740e+02 1.695604544221135e+01 -1.564444086325018e+02 1.707678301711796e+01 -1.564682106464455e+02 1.719751949184693e+01 -1.564920099523122e+02 1.731825240012606e+01 -1.565158061704734e+02 1.734733872654218e+01 -1.565215383970218e+02</rep:coordList>
+      <rep:coordList>1.684242688501015e+01 -1.563816086041690e+02 1.696315360357771e+01 -1.564053863501777e+02 1.708388540437526e+01 -1.564291622197829e+02 1.720461611063721e+01 -1.564529351772322e+02 1.732534325199815e+01 -1.564767048419914e+02 1.735442820099433e+01 -1.564824305751009e+02</rep:coordList>
+      <rep:coordList>1.684953342503939e+01 -1.563426093512784e+02 1.697025430803387e+01 -1.563663611361029e+02 1.709098027826299e+01 -1.563901108396617e+02 1.721170515952094e+01 -1.564138574269578e+02 1.733242647733964e+01 -1.564376005165340e+02 1.736151003507623e+01 -1.564433197519077e+02</rep:coordList>
+      <rep:coordList>1.685663256069959e+01 -1.563036071898168e+02 1.697734755171454e+01 -1.563273329918340e+02 1.709806763490601e+01 -1.563510565076854e+02 1.721878663461358e+01 -1.563747767031367e+02 1.733950207225592e+01 -1.563984931957972e+02 1.736858422489525e+01 -1.564042059291287e+02</rep:coordList>
+      <rep:coordList>1.686372428813775e+01 -1.562646021213034e+02 1.698443333075823e+01 -1.562883019189305e+02 1.710514747043269e+01 -1.563119992254620e+02 1.722586053203457e+01 -1.563356930074195e+02 1.734657003285606e+01 -1.563593828814815e+02 1.737565076656231e+01 -1.563650891084553e+02</rep:coordList>
+      <rep:coordList>1.687080860350595e+01 -1.562255941472530e+02 1.699151164130902e+01 -1.562492679189448e+02 1.711221978097664e+01 -1.562729389945953e+02 1.723292684790889e+01 -1.562966063414503e+02 1.735363035525441e+01 -1.563202695752825e+02 1.738270965619353e+01 -1.563259692915751e+02</rep:coordList>
+      <rep:coordList>1.687424257900799e+01 -1.562066700838774e+02 1.699494268350900e+01 -1.562303312224714e+02 1.711564789218319e+01 -1.562539895656031e+02 1.723635203068512e+01 -1.562776440808492e+02 1.735705261014584e+01 -1.563012943835597e+02 1.738613121105771e+01 -1.563069909399195e+02</rep:coordList>
+      <rep:coordList>1.688495498268113e+01 -1.561475694884959e+02 1.700564584155052e+01 -1.561711911438214e+02 1.712634181169133e+01 -1.561948096932411e+02 1.724703671955967e+01 -1.562184241052198e+02 1.736772806994446e+01 -1.562420339939122e+02 1.739680446385366e+01 -1.562477206758413e+02</rep:coordList>
+      <rep:coordList>1.689201703884214e+01 -1.561085528066608e+02 1.701270172358132e+01 -1.561321483716202e+02 1.713339152417947e+01 -1.561557406258028e+02 1.725408026763801e+01 -1.561793285380803e+02 1.737476545451566e+01 -1.562029117219735e+02 1.740384037416505e+01 -1.562085918802064e+02</rep:coordList>
+      <rep:coordList>1.689907166764901e+01 -1.560695332249886e+02 1.701975012180706e+01 -1.560931026781663e+02 1.714043369632741e+01 -1.561166686157758e+02 1.726111621878105e+01 -1.561402300068893e+02 1.738179518545178e+01 -1.561637864645731e+02 1.741086861701329e+01 -1.561694600947586e+02</rep:coordList>
+      <rep:coordList>1.690611886532681e+01 -1.560305107447066e+02 1.702679103244686e+01 -1.560540540647140e+02 1.714746832434206e+01 -1.560775936644758e+02 1.726814456918872e+01 -1.561011285129948e+02 1.738881725894098e+01 -1.561246582231180e+02 1.741788918858779e+01 -1.561303253208992e+02</rep:coordList>
+      <rep:coordList>1.691315862812868e+01 -1.559914853669090e+02 1.703382445174815e+01 -1.560150025323825e+02 1.715449540445841e+01 -1.560385157730854e+02 1.727516531508931e+01 -1.560620240576098e+02 1.739583167119955e+01 -1.560855269988818e+02 1.742490208510597e+01 -1.560911875598966e+02</rep:coordList>
+      <rep:coordList>1.692019095234127e+01 -1.559524570925252e+02 1.704085037599229e+01 -1.559759480821245e+02 1.716151493294500e+01 -1.559994349426222e+02 1.728217845274488e+01 -1.560229166417812e+02 1.740283841847734e+01 -1.560463927929730e+02 1.743190730281881e+01 -1.560520468128543e+02</rep:coordList>
+      <rep:coordList>1.692721583429081e+01 -1.559134259222858e+02 1.704786880150054e+01 -1.559368907146913e+02 1.716852690610996e+01 -1.559603511739047e+02 1.728918397845739e+01 -1.559838062663549e+02 1.740983749706388e+01 -1.560072556063005e+02 1.743890483801674e+01 -1.560129030806771e+02</rep:coordList>
+      <rep:coordList>1.693423327034943e+01 -1.558743918566859e+02 1.705487972464038e+01 -1.558978304305973e+02 1.717553132030729e+01 -1.559212644675158e+02 1.729618188857495e+01 -1.559446929319396e+02 1.741682890329453e+01 -1.559681154395373e+02 1.744589468703602e+01 -1.559737563640340e+02</rep:coordList>
+      <rep:coordList>1.694103455650875e+01 -1.558365177408673e+02 1.706167463789954e+01 -1.558599308569188e+02 1.718231986439887e+01 -1.558833392385752e+02 1.730296406819755e+01 -1.559067418477961e+02 1.742360471850083e+01 -1.559301383022012e+02 1.745266897820548e+01 -1.559357728677357e+02</rep:coordList>
+    </rep:regularGrid>
+    <rep:startTime>2012-01-01T04:37:24.405923</rep:startTime>
+    <rep:endTime>2012-01-01T04:37:32.890783</rep:endTime>
+  </rep:browse>
+</bsi:ingestBrowse>
diff --git a/preprocessor/Dockerfile b/preprocessor/Dockerfile
index 9294c83268a2d60ed15c497dbba838523dce7b8b..1d3cb90f96f4273318e6ef21d24ab061decf677a 100644
--- a/preprocessor/Dockerfile
+++ b/preprocessor/Dockerfile
@@ -25,7 +25,8 @@
 # IN THE SOFTWARE.
 #-----------------------------------------------------------------------------
 
-FROM osgeo/gdal:ubuntu-small-latest
+FROM osgeo/gdal:ubuntu-full-3.1.2
+
 MAINTAINER EOX
 LABEL name="prism view server preprocessor" \
       vendor="EOX IT Services GmbH <https://eox.at>" \
@@ -33,11 +34,14 @@ LABEL name="prism view server preprocessor" \
       type="prism view server preprocessor" \
       version="0.0.1-dev"
 
+ENV LC_ALL=C.UTF-8
+ENV LANG=C.UTF-8
+
 USER root
 
 RUN apt update && \
     apt install -y \
-        python3-redis python3-keystoneclient python3-swiftclient wait-for-it && \
+        python3-redis python3-keystoneclient python3-swiftclient python3-click python3-setuptools python3-jsonschema wait-for-it && \
     apt autoremove -y && \
     apt clean && \
     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
@@ -46,32 +50,40 @@ ENV INSTANCE_ID="prism-data-access-server_preprocessor" \
     COLLECTION= \
     UPLOAD_CONTAINER= \
     ST_AUTH_VERSION=3 \
-    OS_AUTH_URL="https://auth.cloud.ovh.net/v3/" \
+    OS_AUTH_URL= \
     OS_USERNAME= \
     OS_PASSWORD= \
     OS_TENANT_NAME= \
     OS_TENANT_ID= \
     OS_REGION_NAME= \
+    OS_USER_DOMAIN_NAME= \
     OS_AUTH_URL_DOWNLOAD= \
     ST_AUTH_VERSION_DOWNLOAD= \
     OS_USERNAME_DOWNLOAD= \
     OS_PASSWORD_DOWNLOAD= \
     OS_TENANT_NAME_DOWNLOAD= \
     OS_REGION_NAME_DOWNLOAD= \
+    OS_USER_DOMAIN_NAME_DOWNLOAD= \
     REDIS_HOST= \
     REDIS_PORT= \
     REDIS_PREPROCESS_QUEUE_KEY= \
-    REDIS_REGISTER_QUEUE_KEY=
+    REDIS_PREPROCESS_MD_QUEUE_KEY= \
+    REDIS_REGISTER_QUEUE_KEY= \
+    PREPROCESSOR_DEBUG= 
 
 ADD run-preprocessor.sh \
-    preprocessor.py \
-    get_min_max.py \
-    transform_chain.py \
     entrypoint.sh \
+    setup.py \
     /
 
+RUN chmod +x /run-preprocessor.sh
+
+COPY preprocessor /preprocessor
+
+RUN cd / && \
+    python3 setup.py install
+
 RUN chmod -v +x \
-    /run-preprocessor.sh \
     /entrypoint.sh
 
 ENTRYPOINT ["/entrypoint.sh"]
diff --git a/preprocessor/get_min_max.py b/preprocessor/get_min_max.py
deleted file mode 100644
index a9ef3aa181b53eaf753f27cb6da99104d4722e67..0000000000000000000000000000000000000000
--- a/preprocessor/get_min_max.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-# -----------------------------------------------------------------------------
-#
-# Project: get_min_max.py
-# Authors: Stephan Meissl <stephan.meissl@eox.at>
-#
-# -----------------------------------------------------------------------------
-# Copyright (c) 2019 EOX IT Services GmbH
-#
-# Python script to retrieve min and max values of items.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies of this Software or works derived from this Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-# -----------------------------------------------------------------------------
-
-
-import sys
-import os
-import argparse
-import textwrap
-import logging
-import traceback
-import subprocess
-import re
-
-from swiftclient.service import SwiftError, SwiftService
-
-
-logger = logging.getLogger(__name__)
-
-
-def setup_logging(verbosity):
-    # start logging setup
-    # get command line level
-    verbosity = verbosity
-    if verbosity == 0:
-        level = logging.CRITICAL
-    elif verbosity == 1:
-        level = logging.ERROR
-    elif verbosity == 2:
-        level = logging.WARNING
-    elif verbosity == 3:
-        level = logging.INFO
-    else:
-        level = logging.DEBUG
-    logger.setLevel(level)
-    sh = logging.StreamHandler()
-    sh.setLevel(level)
-    formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
-    sh.setFormatter(formatter)
-    logger.addHandler(sh)
-    # finished logging setup
-
-
-def get_min_max(collection):
-    logger.info("Starting")
-
-    try:
-        with SwiftService() as swift, open("min_max_out", "a+") as outfile:
-            p = subprocess.run(
-                ["swift", "auth"], capture_output=True,
-                timeout=600, check=True
-            )
-            vars = p.stdout.decode("utf-8")
-            os.environ["SWIFT_STORAGE_URL"] = re.findall(
-                r"OS_STORAGE_URL=(.*)\n", vars
-            )[0]
-            os.environ["SWIFT_AUTH_TOKEN"] = re.findall(
-                r"OS_AUTH_TOKEN=(.*)\n", vars
-            )[0]
-            outfile.write("container,product_type,min/max\n")
-            try:
-                list_gen = swift.list()
-                for page in list_gen:
-                    if page["success"]:
-                        for item in page["listing"]:
-                            list_gen2 = swift.list(container=item["name"])
-                            for page2 in list_gen2:
-                                if page2["success"]:
-                                    for item2 in page2["listing"]:
-                                        if item2["name"].endswith(".TIF") or \
-                                                item2["name"].endswith(".tif"):
-                                            gdalout = subprocess.run([
-                                                "gdalinfo", "-mm",
-                                                "/vsiswift/%s/%s" %
-                                                (item["name"], item2["name"])],
-                                                capture_output=True,
-                                                timeout=600, check=True
-                                            ).stdout.decode("utf-8")
-                                            minmax = re.findall(
-                                                r"Computed Min/Max=(.*)\n",
-                                                gdalout
-                                            )
-                                            outfile.write(
-                                                "%s,%s,%s\n" %
-                                                (item["name"],
-                                                 item2["name"].split("/")[1],
-                                                 minmax)
-                                            )
-                                else:
-                                    logger.error(
-                                        "No product found in container '%s'."
-                                        % item["name"]
-                                    )
-                                    return(1)
-                    else:
-                        logger.error("No container found.")
-                        return(1)
-
-            except SwiftError as e:
-                logger.debug(traceback.format_exc())
-                logger.error("%s: %s\n" % (type(e).__name__, str(e)))
-                return(1)
-
-    except Exception as e:
-        logger.debug(traceback.format_exc())
-        logger.error("%s: %s\n" % (type(e).__name__, str(e)))
-        return(1)
-
-    logger.info("Successfully finished")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.description = textwrap.dedent("""\
-    Get min/max of products.
-    """)
-
-    parser.add_argument(
-        "-v", "--verbosity", type=int, default=3, choices=[0, 1, 2, 3, 4],
-        help=(
-            "Set verbosity of log output "
-            "(4=DEBUG, 3=INFO, 2=WARNING, 1=ERROR, 0=CRITICAL). (default: 3)"
-        )
-    )
-
-    arg_values = parser.parse_args()
-
-    setup_logging(arg_values.verbosity)
-
-    collection = os.environ.get('Collection')
-    if collection is None:
-        logger.critical("Collection environment variable not set.")
-        sys.exit(1)
-
-    get_min_max(
-        collection,
-    )
diff --git a/preprocessor/gsc_generator.py b/preprocessor/gsc_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a863d2f6fa8c8aa34a8908fca7aa10cb313fa79
--- /dev/null
+++ b/preprocessor/gsc_generator.py
@@ -0,0 +1,147 @@
+from textwrap import dedent
+
+from osgeo import gdal
+
+
+def positions_to_poslist(positions, projection):
+    # TODO: maybe reproject if not lat, lon
+    return ' '.join([
+        ' '.join(pair)
+        for pair in positions
+    ])
+
+
+def positions_from_corners(low, high):
+    minx, miny = low
+    maxx, maxy = high
+
+    return [
+        (minx, miny),
+        (maxx, miny),
+        (maxx, maxy),
+        (minx, maxy),
+        (minx, minx),
+    ]
+
+
+def get_footprint_from_browse(data_file, browse):
+    btype = browse['browse_type']
+
+    if btype == 'rectified_browse':
+        low, high = browse['rectified']['coord_list']
+        positions = positions_from_corners(low, high)
+
+    elif btype == 'footprint_browse':
+        positions = browse['footprint']
+
+    elif btype == 'model_in_geotiff_browse':
+        ds = gdal.Open(data_file)
+        gt = ds.GetGeoTransform()
+        width, height = ds.RasterXSize, ds.RasterXSize
+
+        low = (gt[0], gt[3] + gt[5] * height)
+        high = (gt[0] + gt[1] * width, gt[3])
+
+        positions = positions_from_corners(low, high)
+
+    elif btype == 'regular_grid_browse':
+        raise NotImplementedError('Regular grid browses are not supported')
+
+    return positions_to_poslist(
+        positions,
+        browse['reference_system_identifier'],
+    )
+
+
+def generate_gsc_metadata(metadata):
+    return dedent("""\
+        <?xml version='1.0' encoding='UTF-8'?>
+        <gsc:report xmlns:sar="http://earth.esa.int/sar"
+            xmlns:gml="http://www.opengis.net/gml"
+            xmlns:eop="http://earth.esa.int/eop"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+            xmlns:opt="http://earth.esa.int/opt"
+            xmlns:gsc="http://earth.esa.int/gsc"
+            xmlns:atm="http://earth.esa.int/atm"
+            xmlns:xlink="http://www.w3.org/1999/xlink" version="2.0">
+            <gsc:responsibleOrgName>EOX</gsc:responsibleOrgName>
+            <gsc:reportType>CIRCULATION</gsc:reportType>
+            <gsc:dateTime>{now_time}</gsc:dateTime>
+            <gsc:orderReference></gsc:orderReference>
+            <gsc:opt_metadata version="1.2.1">
+                <gml:metaDataProperty>
+                <gsc:EarthObservationMetaData>
+                    <eop:identifier>{identifier}</eop:identifier>
+                    <!--<eop:parentIdentifier>D2_MG2b_FRTX_004a/other/FRSC_0059_001/SpotImage</eop:parentIdentifier>-->
+                    <eop:acquisitionType>NOMINAL</eop:acquisitionType>
+                    <eop:productType>{product_type}</eop:productType>
+                    <eop:status>ARCHIVED</eop:status>
+                    <eop:archivedIn>
+                        <eop:ArchivingInformation>
+                            <eop:archivingCenter>CDS</eop:archivingCenter>
+                            <eop:archivingDate>{archiving_time}</eop:archivingDate>
+                        </eop:ArchivingInformation>
+                    </eop:archivedIn>
+                    <gsc:deliveryInfo>
+                        <gsc:deliveryDateTime>{delivery_time}</gsc:deliveryDateTime>
+                        <gsc:deliveryMethod>ELECTRONIC</gsc:deliveryMethod>
+                    </gsc:deliveryInfo>
+                </gsc:EarthObservationMetaData>
+                </gml:metaDataProperty>
+                <gml:validTime>
+                    <gml:TimePeriod>
+                        <gml:beginPosition>{begin_time}</gml:beginPosition>
+                        <gml:endPosition>{end_time}</gml:endPosition>
+                    </gml:TimePeriod>
+                </gml:validTime>
+                <gml:using>
+                    <!--<eop:EarthObservationEquipment>
+                        <eop:platform>
+                            <eop:Platform>
+                                <eop:shortName>PH1A</eop:shortName>
+                                <eop:serialIdentifier>1A</eop:serialIdentifier>
+                            </eop:Platform>
+                        </eop:platform>
+                        <eop:instrument>
+                            <eop:Instrument>
+                                <eop:shortName>HR</eop:shortName>
+                            </eop:Instrument>
+                        </eop:instrument>
+                        <eop:sensor>
+                            <eop:Sensor>
+                                <eop:sensorType>OPTICAL</eop:sensorType>
+                                <eop:operationalMode>FUS</eop:operationalMode>
+                                <eop:resolution uom="m">0.5</eop:resolution>
+                            </eop:Sensor>
+                        </eop:sensor>
+                        <eop:acquisitionParameters>
+                            <opt:Acquisition>
+                                <eop:orbitNumber>118</eop:orbitNumber>
+                                <eop:orbitDirection>DESCENDING</eop:orbitDirection>
+                                <eop:acrossTrackIncidenceAngle uom="deg">-4.070247073869651</eop:acrossTrackIncidenceAngle>
+                                <eop:alongTrackIncidenceAngle uom="deg">2.304231907410827</eop:alongTrackIncidenceAngle>
+                                <opt:illuminationAzimuthAngle uom="deg">164.3516878667332</opt:illuminationAzimuthAngle>
+                            </opt:Acquisition>
+                        </eop:acquisitionParameters>
+                    </eop:EarthObservationEquipment>-->
+                </gml:using>
+                <gml:target>
+                    <eop:Footprint>
+                        <gml:multiExtentOf>
+                            <gml:MultiSurface srsName="EPSG:4326">
+                                <gml:surfaceMembers>
+                                    <gml:Polygon>
+                                        <gml:exterior>
+                                            <gml:LinearRing>
+                                                <gml:posList>{footprint}</gml:posList>
+                                            </gml:LinearRing>
+                                        </gml:exterior>
+                                    </gml:Polygon>
+                                </gml:surfaceMembers>
+                            </gml:MultiSurface>
+                        </gml:multiExtentOf>
+                    </eop:Footprint>
+                </gml:target>
+                <gml:resultOf/>
+            </gsc:opt_metadata>
+        </gsc:report>""".format(**metadata))
\ No newline at end of file
diff --git a/preprocessor/preprocessor.py b/preprocessor/preprocessor.py
deleted file mode 100644
index a0d059797a0ab7e4b3663b4e423c3f34b4edef9c..0000000000000000000000000000000000000000
--- a/preprocessor/preprocessor.py
+++ /dev/null
@@ -1,417 +0,0 @@
-#!/usr/bin/env python
-# -----------------------------------------------------------------------------
-#
-# Project: preprocessor.py
-# Authors: Stephan Meissl <stephan.meissl@eox.at>
-#
-# -----------------------------------------------------------------------------
-# Copyright (c) 2019 EOX IT Services GmbH
-#
-# Python script to preprocess product data.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies of this Software or works derived from this Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-# -----------------------------------------------------------------------------
-
-
-import sys
-import os
-import argparse
-import textwrap
-import logging
-import traceback
-import redis
-import tempfile
-import tarfile
-import re
-import subprocess
-
-from swiftclient.multithreading import OutputManager
-from swiftclient.service import SwiftError, SwiftService, SwiftUploadObject
-
-import transform_chain
-
-SPLIT_PARTS_CHECK = os.environ.get('SPLIT_PARTS_CHECK')
-ENFORCE_FOUR_BANDS = os.environ.get('ENFORCE_FOUR_BANDS')
-
-FILESIZE_LIMIT = 4 * (1024 ** 3)  # swift 5GB limit for filesize (non-compressed), here less to have margin
-swift_upload_options = {
-    'use_slo': True
-}
-
-logger = logging.getLogger("preprocessor")
-
-
-def setup_logging(verbosity):
-    # start logging setup
-    # get command line level
-    verbosity = verbosity
-    if verbosity == 0:
-        level = logging.CRITICAL
-    elif verbosity == 1:
-        level = logging.ERROR
-    elif verbosity == 2:
-        level = logging.WARNING
-    elif verbosity == 3:
-        level = logging.INFO
-    else:
-        level = logging.DEBUG
-    logger.setLevel(level)
-    sh = logging.StreamHandler()
-    sh.setLevel(level)
-    formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
-    sh.setFormatter(formatter)
-    logger.addHandler(sh)
-    # finished logging setup
-
-
-def preprocessor(
-    collection, tar_object_path, upload_container, replace=False,
-    client=None, register_queue_key=None
-):
-    logger.info("Starting preprocessing of '%s'." % (tar_object_path))
-
-    try:
-        container = tar_object_path.split("/")[1]
-        package = "/".join(tar_object_path.split("/")[2:])
-
-        with SwiftService() as swift, OutputManager(), \
-                tempfile.TemporaryDirectory() as tmpdirname:
-            if not replace:
-                try:
-                    list_parts_gen = swift.list(
-                        container=upload_container, options={"prefix": tar_object_path},
-                    )
-                    for page in list_parts_gen:
-                        if page["success"]:
-                            logger.critical(
-                                "Aborting, package '%s' already exists at "
-                                "target container '%s'." % (package, container)
-                            )
-                            return(1)
-                except SwiftError as e:
-                    logger.debug(traceback.format_exc())
-                    logger.error("%s: %s\n" % (type(e).__name__, str(e)))
-                    return(1)
-
-            tmpfilename = os.path.join(tmpdirname, "tmp.tar")
-
-            options = {
-                "os_username": os.environ.get('OS_USERNAME_DOWNLOAD'),
-                "os_password": os.environ.get('OS_PASSWORD_DOWNLOAD'),
-                "os_tenant_name": os.environ.get('OS_TENANT_NAME_DOWNLOAD'),
-                "os_tenant_id": os.environ.get('OS_TENANT_ID_DOWNLOAD'),
-                "os_region_name": os.environ.get('OS_REGION_NAME_DOWNLOAD'),
-                "os_auth_url": os.environ.get('OS_AUTH_URL_DOWNLOAD'),
-                "auth_version": os.environ.get('ST_AUTH_VERSION_DOWNLOAD'),
-            }
-            with SwiftService(options=options) as swift_down:
-                for down_res in swift_down.download(
-                    container=container,
-                    objects=[package, ],
-                    options={"out_file": tmpfilename},
-                ):
-                    if down_res["success"]:
-                        logger.debug(
-                            "'%s' downloaded" % down_res["object"]
-                        )
-                    else:
-                        logger.error(
-                            "'%s' download failed" % down_res["object"]
-                        )
-                        return(1)
-
-            tf = tarfile.open(tmpfilename, mode="r")
-
-            data_files_ti = [
-                m for m in tf.getmembers() if
-                m is not None and re.search(r"IMG.+\.(TIF|JP2)", m.name, re.IGNORECASE)
-            ]
-            metadata_file_ti = next(
-                m for m in tf.getmembers() if m is not None and re.search(r"GSC.+\.xml", m.name, re.IGNORECASE)
-            )
-            world_files_ti = [
-                m for m in tf.getmembers() if m is not None and
-                re.search(r"RPC.+\.xml", m.name, re.IGNORECASE)
-            ]
-            # add J2W files only if more than one files are present
-            # that signalizes that file was split into multiple or has panchromatic
-            if len(data_files_ti) > 1:
-                world_files_ti += [
-                    m for m in tf.getmembers() if m is not None and
-                    re.search(r".+\.J2W", m.name, re.IGNORECASE)
-                ]
-            data_files = [
-                member.name
-                for member in data_files_ti
-            ]
-            metadata_file = metadata_file_ti.name
-            members = data_files_ti + [metadata_file_ti] + world_files_ti
-
-            if not data_files or not metadata_file:
-                logger.error(
-                    "Aborting, not all needed files found in package."
-                )
-                return(1)
-
-            tf.extractall(path=tmpdirname, members=members)
-
-            # cleanup after use to save space
-            tf.close()
-            os.remove(tmpfilename)
-
-            source_name_first = os.path.join(tmpdirname, data_files[0])
-
-            # if there is more than one file, make a VRT to mosaic them
-            if len(data_files) > 1:
-                logger.debug("More files found, creating a VRT")
-                source_name_vrt = os.path.join(tmpdirname, 'tmp.vrt')
-                # open all datasets one by one and create an array of open datasets
-                dataset_array = [transform_chain.open_gdal_dataset(os.path.join(tmpdirname, data_file)) for data_file in data_files]
-                if ENFORCE_FOUR_BANDS:
-                    # remove and close datasets with different number of bands than expected
-                    dataset_array = list(filter(None, [transform_chain.validate_band_count(dataset, 4) for dataset in dataset_array]))
-                    if len(dataset_array) == 0:
-                        logger.error(
-                            "Aborting, wrong number of bands for all datasets %s" % ",".join(data_files)
-                        )
-                        return(1)
-                # try to fix geotransform for ortho images one by one before making a vrt, which fails otherwise
-                dataset_array = [transform_chain.correct_geo_transform(dataset_entity) for dataset_entity in dataset_array]
-                # create a vrt out of them
-                dataset = transform_chain.create_vrt_dataset(dataset_array, source_name_vrt)
-                # during creating of a vrt, reference to RPC is lost
-                # if there was rpc, set it to the vrt
-                dataset = transform_chain.set_rpc_metadata(dataset_array[0], dataset)
-                dataset_array = None
-            else:
-                # open file using gdal
-                dataset = transform_chain.open_gdal_dataset(source_name_first)
-            # close datasets with different number of bands than expected
-            if ENFORCE_FOUR_BANDS:
-                dataset = transform_chain.validate_band_count(dataset, 4)
-                if dataset is None:
-                    logger.error(
-                        "Aborting, wrong number of bands for dataset %s" % data_files[0]
-                    )
-                    return(1)
-            # change RPC to geotransform if present
-            dataset = transform_chain.apply_rpc(dataset)
-
-            # perform transformation correction if necessary
-            dataset = transform_chain.correct_geo_transform(dataset)
-
-            # save file with given options - should use ENV
-            creation_options = ["BLOCKSIZE=512", "COMPRESS=DEFLATE", "LEVEL=6", "NUM_THREADS=8",
-                                "BIGTIFF=IF_SAFER", "OVERVIEWS=AUTO", "RESAMPLING=CUBIC"]
-
-            split_parts = transform_chain.split_check(dataset, FILESIZE_LIMIT) if SPLIT_PARTS_CHECK == True else 1
-
-            output_file_list = transform_chain.write_gdal_dataset_split(dataset, "COG", "%s.tif" % os.path.splitext(
-                source_name_first)[0], creation_options, split_parts)
-            dataset = None
-            objects = []
-            # create vrt if file was split
-            if len(output_file_list) > 1:
-                logger.debug("Creating .vrt of previously split files.")
-                vrt_name = "%s.vrt" % os.path.splitext(source_name_first)[0]
-                subprocess.run(
-                    ['gdalbuildvrt', '-quiet', os.path.basename(vrt_name)] + [
-                        os.path.basename(data_file) for data_file in output_file_list],
-                    timeout=600, check=True, cwd=os.path.dirname(vrt_name)
-                )  # use cwd to create relative paths in vrt
-                # add vrt to files to be uploaded
-                objects.append(
-                    SwiftUploadObject(
-                        vrt_name,
-                        object_name=os.path.join(
-                            container, package, os.path.basename(vrt_name))
-                    )
-                )
-
-            # add image files to files to be uploaded
-            for data_file in output_file_list:
-                # check if 5GB swift upload limit is exceeded by any of files, if yes, use segmentation
-                size = os.stat(data_file).st_size
-                if (size > 1024 * 1024 * 1024 * 5):
-                    swift_upload_options["segment_size"] = 2 * 1024 * 1024 * 1024  # 2gb segments
-
-                dest_object_name = os.path.join(
-                    container, package, os.path.basename(data_file)
-                )
-                objects.append(
-                    SwiftUploadObject(
-                        data_file,
-                        object_name=dest_object_name
-                    )
-                )
-
-            # add metadata to files to be uploaded after data files
-            objects.append(
-                SwiftUploadObject(
-                    os.path.join(tmpdirname, metadata_file),
-                    object_name=os.path.join(container, package, metadata_file)
-                )
-            )
-
-            # upload files
-            for upload in swift.upload(
-                container=upload_container,
-                objects=objects,
-                options=swift_upload_options
-            ):
-                if upload["success"]:
-                    if "object" in upload:
-                        logger.info(
-                            "'%s' successfully uploaded." % upload["object"]
-                        )
-                    elif "for_object" in upload:
-                        logger.debug(
-                            "Successfully uploaded '%s' segment '%s'."
-                            % (upload["for_object"], upload["segment_index"])
-                        )
-                else:
-                    logger.error(
-                        "'%s' upload failed" % upload["error"]
-                    )
-                    return(1)
-
-            if client is not None:
-                logger.debug(
-                    "Storing paths in redis queue '%s" % register_queue_key
-                )
-                client.lpush(
-                    register_queue_key, "%s" % tar_object_path
-                )
-
-    except Exception as e:
-        logger.debug(traceback.format_exc())
-        logger.error("%s: %s\n" % (type(e).__name__, str(e)))
-        return(1)
-
-    logger.info(
-        "Successfully finished preprocessing of '%s'." % (tar_object_path)
-    )
-
-
-def preprocessor_redis_wrapper(
-    collection, upload_container, replace=False, host="localhost", port=6379,
-    preprocess_queue_key="preprocess_queue",
-    register_queue_key="register_queue"
-):
-    client = redis.Redis(
-        host=host, port=port, charset="utf-8", decode_responses=True
-    )
-    while True:
-        logger.debug("waiting for redis queue '%s'..." % preprocess_queue_key)
-        value = client.brpop(preprocess_queue_key)
-        preprocessor(
-            collection,
-            value[1],
-            upload_container,
-            replace=replace,
-            client=client,
-            register_queue_key=register_queue_key
-        )
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.description = textwrap.dedent("""\
-    Preprocess product data.
-    """)
-
-    parser.add_argument(
-        "--mode", default="standard", choices=["standard", "redis"],
-        help=(
-            "The mode to run the preprocessor. Either one-off (standard) or "
-            "reading from a redis queue."
-        )
-    )
-    parser.add_argument(
-        "--tar-object-path", default=None,
-        help=(
-            "Path to object holding tar archive file of product."
-        )
-    )
-    parser.add_argument(
-        "--upload-container", default=None,
-        help=(
-            "The name of the swift container where the result is uploaded."
-        )
-    )
-    parser.add_argument(
-        "--replace", action="store_true",
-        help=(
-            "Replace existing products instead of skipping the preprocessing."
-        )
-    )
-    parser.add_argument(
-        "--redis-preprocess-queue-key", default="preprocess_queue"
-    )
-    parser.add_argument(
-        "--redis-register-queue-key", default="register_queue"
-    )
-    parser.add_argument(
-        "--redis-host", default="localhost"
-    )
-    parser.add_argument(
-        "--redis-port", type=int, default=6379
-    )
-
-    parser.add_argument(
-        "-v", "--verbosity", type=int, default=3, choices=[0, 1, 2, 3, 4],
-        help=(
-            "Set verbosity of log output "
-            "(4=DEBUG, 3=INFO, 2=WARNING, 1=ERROR, 0=CRITICAL). (default: 3)"
-        )
-    )
-
-    arg_values = parser.parse_args()
-
-    setup_logging(arg_values.verbosity)
-
-    collection = os.environ.get('COLLECTION')
-    if collection is None:
-        logger.critical("Collection environment variable not set.")
-        sys.exit(1)
-
-    upload_container = arg_values.upload_container
-    if upload_container is None:
-        upload_container = os.environ.get('UPLOAD_CONTAINER')
-        if upload_container is None:
-            logger.critical("UPLOAD_CONTAINER environment variable not set.")
-            sys.exit(1)
-
-    if arg_values.mode == "standard":
-        preprocessor(
-            collection,
-            arg_values.tar_object_path,
-            upload_container,
-            replace=arg_values.replace,
-        )
-    else:
-        preprocessor_redis_wrapper(
-            collection,
-            upload_container,
-            replace=arg_values.replace,
-            host=arg_values.redis_host,
-            port=arg_values.redis_port,
-            preprocess_queue_key=arg_values.redis_preprocess_queue_key,
-            register_queue_key=arg_values.redis_register_queue_key,
-        )
diff --git a/preprocessor/preprocessor/__init__.py b/preprocessor/preprocessor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/preprocessor/preprocessor/archive.py b/preprocessor/preprocessor/archive.py
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb973939d2c6799cc387b8759bc43f64372ad
--- /dev/null
+++ b/preprocessor/preprocessor/archive.py
@@ -0,0 +1,130 @@
+from os import PathLike
+import os.path
+import io
+from typing import List, Union, BinaryIO
+import tarfile
+import zipfile
+import logging
+from fnmatch import translate
+import re
+
+logger = logging.getLogger(__name__)
+
+ARCHIVE_EXTENSIONS = ['ZIP', 'zip', 'TAR', 'tar', 'TAR.BZ2', 'tar.bz2', 'TAR.GZ', 'tar.gz']
+
+def filter_filenames(filenames: List[PathLike], glob: str, case: bool=False) -> List[PathLike]:
+    regex = translate(glob)
+    if case:
+        reobj = re.compile(regex)
+    else:
+        reobj = re.compile(regex, re.IGNORECASE)
+    return [
+        filename
+        for filename in filenames
+        if reobj.match(filename)
+    ]
+
+
+def is_tarfile(archive_file: Union[PathLike, BinaryIO]) -> bool:
+    """ Helper to detect whether a path or a file object is
+        referencing a valid TAR file.
+    """
+    try:
+        return tarfile.is_tarfile(archive_file)
+    except TypeError:
+        pass
+
+    try:
+        tarfile.open(fileobj=archive_file)
+        return True
+    except (TypeError, tarfile.ReadError):
+        return False
+
+def open_tarfile(archive_file: Union[PathLike, BinaryIO]) -> tarfile.TarFile:
+    """ Open a TAR file from either a path or a file object.
+    """
+    if isinstance(archive_file, (BinaryIO, io.BufferedReader)):
+        return tarfile.open(fileobj=archive_file)
+    return tarfile.open(archive_file)
+
+
+def unpack_files(archive_path: Union[PathLike, BinaryIO], target_dir: PathLike, glob=None, case=None, filenames=None, recursive=False) -> List[PathLike]:
+    """ Unpacks the contents of the specified ZIP or TAR archive to the
+        given target directory. Optionally, only a given list of filenames
+        will be extracted.
+        When a glob is passed, all filenames (either given or from the archive)
+        will be filtered and only the matching files will be extracted.
+    """
+    iszip = False
+    istar = False
+
+    # open the archive and extract a list of filenames
+    if is_tarfile(archive_path):
+        archive = open_tarfile(archive_path)
+        all_filenames = archive.getnames()
+        filenames = filenames or all_filenames
+        istar = True
+    elif zipfile.is_zipfile(archive_path):
+        archive = zipfile.ZipFile(archive_path)
+        all_filenames = archive.namelist()
+        filenames = filenames or all_filenames
+        iszip = True
+    else:
+        raise Exception('Cannot open archive %s' % archive_path)
+
+    # filter the filenames when a glob is passed
+    if glob:
+        filenames = filter_filenames(filenames, glob, case)
+
+    extracted_filenames = []
+
+    # extract the files to the target directory
+    if istar:
+        members = [
+            member
+            for member in archive.getmembers()
+            if member.name in filenames
+        ]
+        archive.extractall(target_dir, members)
+        extracted_filenames.extend([
+            os.path.join(target_dir, member.name)
+            for member in members
+        ])
+
+    elif iszip:
+        archive.extractall(target_dir, filenames)
+        extracted_filenames.extend([
+            os.path.join(target_dir, filename)
+            for filename in filenames
+        ])
+
+    # go into the sub-archives to extract files
+    if recursive:
+        for extension in ARCHIVE_EXTENSIONS:
+            sub_archives = filter_filenames(all_filenames, '*.%s' % extension)
+            for sub_archive in sub_archives:
+                sub_archive_filename = os.path.join(
+                    os.path.dirname(archive_path),
+                    os.path.basename(sub_archive),
+                )
+                if istar:
+                    archive.extract(
+                        archive.getmember(sub_archive)
+                    )
+                    os.rename(sub_archive, sub_archive_filename)
+                if iszip:
+                    archive.extract(sub_archive)
+                    os.rename(sub_archive, sub_archive_filename)
+
+                sub_filenames = unpack_files(
+                    sub_archive_filename,
+                    os.path.join(target_dir, sub_archive),
+                    glob,
+                    case,
+                    filenames,
+                    recursive,
+                )
+                extracted_filenames.extend(sub_filenames)
+
+    # return a list of files extracted
+    return extracted_filenames
diff --git a/preprocessor/preprocessor/cli.py b/preprocessor/preprocessor/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..9836ffdc5f02928766a66ba7d043c2f5c0b07763
--- /dev/null
+++ b/preprocessor/preprocessor/cli.py
@@ -0,0 +1,91 @@
+from os.path import join, dirname
+import logging.config
+import json
+
+import click
+import yaml
+import jsonschema
+
+from .preprocess import preprocess_file, preprocess_browse
+from .daemon import run_daemon
+from .config import load_config
+
+
+def setup_logging(debug=False):
+    logging.config.dictConfig({
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            'brief': {
+                'format': '%(levelname)s %(name)s: %(message)s'
+            }
+        },
+        'handlers': {
+            'console': {
+                'class': 'logging.StreamHandler',
+                'level': 'DEBUG' if debug else 'INFO',
+                'formatter': 'brief',
+            }
+        },
+        'root': {
+            'handlers': ['console'],
+            'level': 'DEBUG' if debug else 'INFO',
+        }
+    })
+
+
+def validate_config(config):
+    with open(join(dirname(__file__), 'config-schema.yaml')) as f:
+        schema = yaml.load(f)
+
+    jsonschema.validate(config, schema)
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command(help='Run the preprocess daemon, attaching to a Redis queue')
+@click.option('--config-file', type=click.File('r'))
+@click.option('--use-dir', type=str) # TODO: check dir
+@click.option('--validate/--no-validate', default=False)
+@click.option('--host', type=str)
+@click.option('--port', type=int)
+@click.option('--listen-queue', type=str)
+@click.option('--listen-md-queue', type=str)
+@click.option('--write-queue', type=str)
+@click.option('--debug/--no-debug', default=False)
+def daemon(config_file=None, use_dir=None, validate=False, host=None, port=None, listen_queue=None, listen_md_queue=None, write_queue=None, debug=False):
+    setup_logging(debug)
+    config = load_config(config_file)
+    if validate:
+        validate_config(config)
+    run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue)
+
+
+@cli.command(help='Run a single, one-off preprocessing')
+@click.argument('file_path', type=str)
+@click.option('--config-file', type=click.File('r'))
+@click.option('--use-dir', type=str) # TODO: check dir
+@click.option('--validate/--no-validate', default=False)
+@click.option('--browse-report/--no-browse-report', default=False)
+@click.option('--debug/--no-debug', default=False)
+def preprocess(file_path, config_file=None, use_dir=None, validate=False, browse_report=False, debug=False):
+    setup_logging(debug)
+    config = load_config(config_file)
+    if validate:
+        validate_config(config)
+
+    if browse_report:
+        with open(file_path) as f:
+            browse_report_data = json.load(f)
+
+        browse_type = browse_report_data['browse_type']
+        for browse in browse_report_data['browses']:
+            preprocess_browse(config, browse_type, browse_report_data, browse, use_dir)
+    else:
+        preprocess_file(config, file_path, use_dir)
+
+if __name__ == '__main__':
+    cli()
diff --git a/preprocessor/preprocessor/config-schema.yaml b/preprocessor/preprocessor/config-schema.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93f3f9b4f972556cf362bdc5eb24a163772d6fe9
--- /dev/null
+++ b/preprocessor/preprocessor/config-schema.yaml
@@ -0,0 +1,227 @@
+$id: https://example.com/address.schema.json
+$schema: http://json-schema.org/draft-07/schema#
+type: object
+properties:
+  source:
+    description: File source description. Either a local file system or an object storage.
+    type: object
+    properties:
+      type:
+        description: The type of the file source.
+        type: string
+        enum: [local, swift]
+      kwargs:
+        description: Extra arguments. Use depends on actual implementation.
+        type: object
+      # required: ['type']
+  target:
+    description: File target description. Either a local file system or an object storage.
+    type: object
+    properties:
+      type:
+        description: The type of the file target.
+        type: string
+        enum: [local, swift]
+      kwargs:
+        description: Extra arguments. Use depends on actual implementation.
+        type: object
+      # required: [type]
+      replace:
+        description: If set to true, output replaces already existing files on target. If no existing are present, preprocessing does not start.
+        type: boolean
+        default: false
+  workdir:
+    description: The local directory, where intermediary files are to be stored.
+    type: string
+  keep_temp:
+    description: Whether to keep temporary files for each step. DEPRECATED.
+    type: boolean
+  metadata_glob:
+    description: A file glob to select metadata files from the downloaded archive.
+    type: string
+  glob_case:
+    description: If all file globs will use case-sensitive match.
+    type: boolean    
+  type_extractor:
+    description: How the product type is to be extracted from the metadata file.
+    type: object
+    properties:
+      xpath:
+        description: Either a single XPath or multiple XPaths to the product type in the metadata file. Each is tried consecutively until the type could be extracted.
+        oneOf:
+          - type: string
+          - type: array
+            items:
+              type: string
+      map:
+        description: A simple mapping of the extracted type value to an identifier for later usage. This is useful when a preprocessing chain can be re-used for multiple product types.
+        type: object
+    required: [xpath]
+  level_extractor:
+    description: How the product level is extracted. Currently unused.
+    type: object
+    # TODO
+  preprocessing:
+    description: The actual preprocessing definition.
+    type: object
+    properties:
+      defaults:
+        description: The default step settings to be applied.
+        $ref: '#/definitions/steps'
+      types:
+        description: Product type specific step config.
+        type: object
+        additionalProperties:
+          description: A mapping of product type -> steps configuration
+          $ref: '#/definitions/steps'
+    required: [types]
+
+  browse_type_mapping:
+    description: Mapping of browse types to product types. Default is direct mapping.
+    type: object
+required:
+  - source
+  - target
+  - workdir
+  - keep_temp
+  - metadata_glob
+  - type_extractor
+  - level_extractor
+  - preprocessing
+definitions:
+  steps:
+    custom_preprocessor:
+      description: Definition of a custom preprocessor step
+      type: object
+      properties:
+        path:
+          description: "The python dotted path to the function to invoke. e.g: 'path.to.module.function'"
+          type: string
+        args:
+          description: The list of arguments to pass to that function
+          type: array
+        kwargs:
+          description: The map of keyword arguments to pass to that function.
+          type: object
+    subdatasets:
+      description: The definition of the subdataset extraction step.
+      type: object
+      properties:
+        data_file_glob:
+          description: The data file selector.
+          type: string
+        subdataset_types:
+          description: Mapping of subdataset identifier to output filename postfix for subdatasets to be extracted for each data file.
+          type: object
+          patternProperties:
+            ".*":
+              type: string
+    georeference:
+      description: The definition of a georeferencing step.
+      type: object
+      properties:
+        type:
+          description: The type of georeferencing to apply.
+          type: string
+          enum: [gcp, rpc, corner, world] # TODO: more
+        options:
+          description: Additional options for the georeferencing. Depends on the type of georeferencing.
+          type: object
+          properties:
+            order:
+              description: The polynomial order to use for GCP reprojection.
+              type: number
+            projection:
+              description: The projection to use for ungeoreferenced images.
+              type: string
+            rpc_file_template:
+              description: The file glob template to use to find the RPC file. Template parameters are {filename}, {fileroot}, and {extension}.
+              type: string
+            warp_options:
+              description: "Warp options. See https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details"
+            corner_names:
+              description: "The metadata field name including the corner names. Tuple of four: bottom-left, bottom-right, top-left and top-right"
+              type: array
+              items:
+                type: string
+            orbit_direction_name:
+              description: The metadata field name containing the orbit direction
+              type: string
+            force_north_up:
+              description:
+              type: boolean
+            tps:
+              description: Whether to use TPS transformation instead of GCP polynomials.
+              type: boolean
+
+      required: [type]
+    calc:
+      description: Definition of a calculation step.
+      type: object
+      properties:
+        formulas:
+          description: A list of formulas to calculate
+          type: array
+          items:
+            type: object
+            properties:
+              inputs:
+                description: Input definition of this formula
+                type: object
+                patternProperties:
+                  "[A-Z]":
+                    type: object
+                    properties:
+                      glob:
+                        description: The input file glob to find the input file.
+                        type: string
+                      band:
+                        description: The band number of the input file. Defaults to 1.
+                        type: integer
+              data_type:
+                description: The output data type for the calculated file. (GDAL notation)
+                type: string
+              formula:
+                description: The formula to calculate. See gdal_calc.py for details.
+                type: string
+              output_postfix:
+                description: The filename postfix to append to the output filename. By default an enumeration is used.
+                type: string
+              nodata_value:
+                description: Use this nodata value in the calculation.
+                type: float
+    stack_bands:
+      description: Definition of a stack bands step.
+      type: object
+      properties:
+        group_by:
+          description: A regex to group the input datasets, if consisting of multiple file. The first regex group is used for the grouping.
+          type: string
+        sort_by:
+          description: A regex to select a portion of the filename to be used for sorting. The first regex group is used.
+          type: string
+        order:
+          description: The order of the extracted item used in 'sort_by'.
+          type: array
+          items:
+            type: string
+    output:
+      description: Definition of an output step.
+      type: object
+      properties:
+        options:
+          description: "Options to be passed to `gdal.Warp`. See https://gdal.org/python/osgeo.gdal-module.html#WarpOptions for details"
+          type: object
+    custom_postprocessor:
+      description: Definition of a custom postprocessor step
+      type: object
+      properties:
+        path:
+          description: "The python dotted path to the function to invoke. e.g: 'path.to.module.function'"
+          type: string
+        args:
+          description: The list of arguments to pass to that function
+          type: array
+        kwargs:
+          description: The map of keyword arguments to pass to that function.
+          type: object
diff --git a/preprocessor/preprocessor/config.py b/preprocessor/preprocessor/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..77534e9411ce9e598d059a53460d40565408870d
--- /dev/null
+++ b/preprocessor/preprocessor/config.py
@@ -0,0 +1,39 @@
+import os
+from typing import TextIO
+import re
+
+import yaml
+
+
+ENV_PATTERN = re.compile(r'.*?\${(\w+)}.*?')
+
+def constructor_env_variables(loader, node):
+        """
+        Extracts the environment variable from the node's value
+        :param yaml.Loader loader: the yaml loader
+        :param node: the current node in the yaml
+        :return: the parsed string that contains the value of the environment
+        variable
+        """
+        value = loader.construct_scalar(node)
+        match = ENV_PATTERN.findall(value)  # to find all env variables in line
+        if match:
+            full_value = value
+            for g in match:
+                full_value = full_value.replace(
+                    f'${{{g}}}', os.environ.get(g, g)
+                )
+            return full_value
+        return value
+
+
+def load_config(input_file: TextIO):
+    tag = '!env'
+    loader = yaml.SafeLoader
+
+    # the tag will be used to mark where to start searching for the pattern
+    # e.g. somekey: !env somestring${MYENVVAR}blah blah blah
+    loader.add_implicit_resolver(tag, ENV_PATTERN, None)
+    loader.add_constructor(tag, constructor_env_variables)
+
+    return yaml.load(input_file, Loader=loader)
diff --git a/preprocessor/preprocessor/daemon.py b/preprocessor/preprocessor/daemon.py
new file mode 100644
index 0000000000000000000000000000000000000000..e44a3c9079f341b59fcf3384be2bd97d7f811ca1
--- /dev/null
+++ b/preprocessor/preprocessor/daemon.py
@@ -0,0 +1,37 @@
+import redis
+import logging
+import json
+
+from .preprocess import preprocess_file, preprocess_browse
+
+
+logger = logging.getLogger(__name__)
+
+
+def run_daemon(config, host, port, listen_queue, listen_md_queue, write_queue):
+    """ Run the preprocessing daemon, listening on a redis queue
+        for files to be preprocessed. After preprocessing the filename
+        of the preprocessed files will be pushed to the output queue.
+    """
+    # initialize the queue client
+    client = redis.Redis(
+        host=host, port=port, charset="utf-8", decode_responses=True
+    )
+    logger.debug("waiting for redis queue '%s'..." % listen_queue)
+    while True:
+        # fetch an item from the queue to be preprocessed
+        queue, value = client.brpop([listen_queue, listen_md_queue])
+        file_paths = []
+        # start the preprocessing on that file
+        if queue == listen_queue:
+            filename, file_path = preprocess_file(config, value)
+            file_paths.append(file_path)
+        elif queue == listen_md_queue:
+            browse_report_data = json.loads(value)
+            browse_type = browse_report_data['browse_type']
+            for browse in browse_report_data['browses']:
+                filename, file_path = preprocess_browse(config, browse_type, browse_report_data, browse)
+                file_paths.append(file_path)
+        # TODO: convert to string, list, ....
+        for item in file_paths:
+            client.lpush(write_queue, item)
diff --git a/preprocessor/preprocessor/metadata.py b/preprocessor/preprocessor/metadata.py
new file mode 100644
index 0000000000000000000000000000000000000000..083d585da796e6c75e2087548b94fe25fadc4205
--- /dev/null
+++ b/preprocessor/preprocessor/metadata.py
@@ -0,0 +1,47 @@
+
+from lxml import etree
+
+
+def evaluate_xpath(root, xpath):
+    """
+    """
+    result = root.xpath(xpath, namespaces=root.nsmap)
+    print(xpath, result)
+    if result:
+        if isinstance(result, list):
+            return result[0]
+        return result
+    return None
+
+
+def extract_product_type_and_level(metadata_files, config):
+    """
+    """
+    product_type = None
+    product_level = None
+    for metadata_file in metadata_files:
+        with open(metadata_file) as f:
+            tree = etree.parse(f)
+            root = tree.getroot()
+
+        if not product_type:
+            xpaths = config['type_extractor']['xpath']
+            xpaths = [xpaths] if isinstance(xpaths, str) else xpaths
+            for xpath in xpaths:
+                product_type = evaluate_xpath(root, xpath)
+                if product_type:
+                    break
+
+        if not product_level:
+            xpaths = config['level_extractor']['xpath']
+            if xpaths:
+                xpaths = [xpaths] if isinstance(xpaths, str) else xpaths
+                for xpath in xpaths:
+                    product_level = evaluate_xpath(root, xpath)
+                    if product_level:
+                        break
+
+        if product_type and product_level:
+            break
+
+    return product_type, product_level
\ No newline at end of file
diff --git a/preprocessor/preprocessor/preprocess.py b/preprocessor/preprocessor/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..626099a5ab25ec001f907644fb1e856276bde68a
--- /dev/null
+++ b/preprocessor/preprocessor/preprocess.py
@@ -0,0 +1,337 @@
+import os
+import os.path
+import itertools
+import importlib
+import logging
+import shutil
+from typing import List
+from pprint import pformat
+from urllib.parse import urlparse
+
+from .transfer import get_downloader, get_uploader
+from .archive import unpack_files
+from .metadata import extract_product_type_and_level
+from .steps import (
+    georeference_step, extract_subdataset_step, calc_step, stack_bands_step, output_step
+)
+from .steps.browse_report import browse_georeference
+from .util import workdir, Timer
+
+logging.basicConfig()
+
+logger = logging.getLogger(__name__)
+
+# -----------------------------------------------------------------------------
+
+
+def copy_files(source, target, move=False):
+    for item in os.listdir(source):
+        src_path = os.path.join(source, item)
+        dst_path = os.path.join(target, item)
+        if move:
+            shutil.move(src_path, dst_path)
+        else:
+            if os.path.isdir(src_path):
+                shutil.copytree(
+                    src_path,
+                    dst_path
+                )
+            else:
+                shutil.copy(src_path, dst_path)
+
+
+def custom_preprocessor(source_dir, target_dir, path, args=None, kwargs=None):
+    """ Preprocessing step for a custom preprocessing.
+    """
+    module_name, _, func_name = path.rpartition('.')
+    func = getattr(importlib.import_module(module_name), func_name)
+    func(source_dir, target_dir, *(args or []), **(kwargs or {}))
+
+
+def custom_postprocessor(source_dir, target_dir, path, args=None, kwargs=None):
+    """ Preprocessing step for a custom preprocessing.
+    """
+    module_name, _, func_name = path.rpartition('.')
+    func = getattr(importlib.import_module(module_name), func_name)
+    func(source_dir, target_dir, *(args or []), **(kwargs or {}))
+
+
+STEP_FUNCTIONS = {
+    'custom_preprocessor': custom_preprocessor,
+    'subdatasets': extract_subdataset_step,
+    'georeference': georeference_step,
+    'calc': calc_step,
+    'stack_bands': stack_bands_step,
+    'output': output_step,
+    'custom_postprocessor': custom_postprocessor,
+}
+
+
+def flatten(l):
+    return [item for sublist in l for item in sublist]
+
+
+# -----------------------------------------------------------------------------
+
+
+def preprocess_internal(preprocess_config, previous_step='unpack'):
+    force_refresh = False
+    # make processing steps
+    for step in ['custom_preprocessor', 'subdatasets', 'georeference', 'calc', 'stack_bands', 'output', 'custom_postprocessor']:
+        step_config = preprocess_config.get(step)
+        if not step_config:
+            logger.debug('Skipping step %s as it is not configured.' % step)
+            continue
+
+        # run the step if it was not already run
+        if not os.path.isdir(step) or force_refresh:
+            if os.path.isdir(step):
+                logger.info('Forcing refresh of existing directory %s' % step)
+                shutil.rmtree(step)
+
+            logger.info('Running preprocessing step %s' % step)
+            os.mkdir(step)
+            preprocessor = STEP_FUNCTIONS[step]
+
+            with Timer() as step_timer:
+                preprocessor(previous_step, step, **step_config)
+
+            logger.info(
+                'Finished preprocessing step %s after %.3f seconds.'
+                % (step, step_timer.elapsed)
+            )
+            force_refresh = True
+
+        else:
+            logger.info('%s dir already exists, skipping step...' % step)
+
+        previous_step = step
+
+    if not os.path.isdir('upload') or force_refresh:
+        try:
+            os.mkdir('upload')
+        except FileExistsError:
+            logger.debug('Upload folder already exists.')
+
+        # copy or move files from previous step directory to upload directory
+        copy_files(previous_step, 'upload', move=preprocess_config.get('move_files', False))
+
+
+
+
+def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=None):
+    """ Runs the preprocessing of a single file.
+    """
+    with workdir(config, use_dir) as dirname, Timer() as preprocess_timer:
+        logger.info('Preprocessing %s in %s' % (file_path, dirname))
+        target_config = config['target']
+        # check if target.replace is configured and if not, check storage if files there
+        if not target_config['replace']:
+            uploader = get_uploader(
+                target_config['type'], target_config.get('args'), target_config.get('kwargs')
+            )
+            if uploader.product_exists(file_path):
+                raise Exception('Target.replace configuration is not set to true and objects already exist in target %s.' % file_path)
+            else:
+                logger.debug('Product does not yet exist on target')
+        # check if we can reuse a previous download
+        if not os.path.isdir('download'):
+            os.mkdir('download')
+            logger.info('Downloading %s from %s...' % (file_path, dirname))
+            # get the Downloader for the configured source archive to download the given source file
+            source_config = config['source']
+            downloader = get_downloader(
+                source_config['type'], source_config.get('args'), source_config.get('kwargs')
+            )
+
+            with Timer() as download_timer:
+                source_archive_path = downloader.download(file_path, 'download')
+
+            logger.info(
+                'Downloaded file %s in %.3f seconds'
+                % (file_path, download_timer.elapsed)
+            )
+
+        else:
+            source_archive_path = os.path.join('download', os.path.basename(file_path))
+            logger.info('Download dir already exists, skipping...')
+
+        # fetch the metadata XML file from the downloaded archive
+        metadata_files = unpack_files(source_archive_path, 'extra', glob=config['metadata_glob'], case=config.get('glob_case', False))
+
+        # open the XML to retrieve the product type and level
+        product_type, product_level = extract_product_type_and_level(metadata_files, config)
+        logger.info('Detected product_type/level_type %s/%s' % (product_type, product_level))
+
+        # get a concrete configuration for the type, filled with the defaults
+        default_config = dict(config['preprocessing'].get('defaults', {}))
+        type_based_config = dict(config['preprocessing']['types'].get(product_type, {}))
+        default_config.update(type_based_config)
+        preprocess_config = default_config
+        logger.debug('Using preprocessing config %s' % pformat(preprocess_config))
+
+        if not os.path.isdir('unpack'):
+            os.mkdir('unpack')
+            logger.info('Unpacking original files...')
+            # select and unpack files according to configuration
+
+            with Timer() as unpack_timer:
+                data_files = flatten([
+                    unpack_files(
+                        source_archive_path,
+                        'unpack',
+                        glob=glob,
+                        case=config.get('glob_case', False),
+                        recursive=preprocess_config.get('nested', False),
+                    )
+                    for glob in preprocess_config['data_file_globs']
+                ])
+                metadata_files = flatten([
+                    unpack_files(
+                        source_archive_path,
+                        'unpack',
+                        glob=glob,
+                        case=config.get('glob_case', False),
+                        recursive=preprocess_config.get('nested', False),
+                    )
+                    for glob in preprocess_config.get('additional_file_globs', [])
+                ])
+
+            logger.info(
+                'Unpacked files: %s in %.3f seconds'
+                % (', '.join(metadata_files + data_files), unpack_timer.elapsed)
+            )
+        else:
+            logger.info('Unpack dir already exists, skipping...')
+
+        # actually perform the preprocessing from the downloaded file
+        preprocess_internal(preprocess_config, 'unpack')
+
+        # get an uploader for the finalized images
+        uploader = get_uploader(
+            target_config['type'], target_config.get('args'), target_config.get('kwargs')
+        )
+        if len(os.listdir('upload')) == 0:
+            # end here, so not only metadata file is uploaded
+            raise Exception('No data files to upload, aborting.')
+        
+        paths_for_upload = ['upload', 'extra']
+        upload_filenames = []
+        for path_to_upload in paths_for_upload:
+            upload_filenames.extend([
+                os.path.join(dirpath, filename)
+                for dirpath, _, filenames in os.walk(path_to_upload)
+                for filename in filenames
+            ])
+
+        # send all files in the upload directory to the target storage
+        logger.info(
+            'Starting uploading of %d files to %s'
+            % (len(upload_filenames), file_path)
+        )
+        with Timer() as upload_timer:
+            uploader.upload(upload_filenames, file_path)
+
+        logger.info(
+            'Finished uploading after %.3f seconds.'
+            % (upload_timer.elapsed)
+        )
+
+        logger.info(
+            'Finished preprocessing of %s after %.3f seconds.'
+            % (file_path, preprocess_timer.elapsed)
+        )
+
+        return upload_filenames, file_path
+
+
+def preprocess_browse(config: dict, browse_type: str, browse_report: dict, browse: dict, use_dir: os.PathLike=None):
+    with workdir(config, use_dir) as dirname, Timer() as preprocess_timer:
+        filename = browse['filename']
+        logger.info('Preprocessing browse "%s" in %s' % (filename, dirname))
+
+        parsed = urlparse(filename)
+
+        if not parsed.scheme:
+            # check if we can reuse a previous download
+            if not os.path.isdir('download'):
+                os.mkdir('download')
+                logger.info('Downloading %s from %s...' % (filename, dirname))
+                # get the Downloader for the configured source archive to download the given source file
+                source_config = config['source']
+                downloader = get_downloader(
+                    source_config['type'], source_config.get('args'), source_config.get('kwargs')
+                )
+
+                with Timer() as download_timer:
+                    source_filename_path = downloader.download(filename, 'download')
+
+                logger.info(
+                    'Downloaded file %s in %.3f seconds'
+                    % (filename, download_timer.elapsed)
+                )
+
+            else:
+                source_filename_path = os.path.join('download', os.path.basename(filename))
+                logger.info('Download dir already exists, skipping...')
+
+        elif parsed.scheme in ('http', 'https'):
+            # TODO: check if allowed and download from there
+            raise NotImplementedError
+
+        if not os.path.isdir('unpack'):
+            os.mkdir('unpack')
+        if not os.path.isdir('extra'):
+            os.mkdir('extra')
+
+        logger.info('Applying browse georeference to browse %s' % filename)
+        browse_georeference('download', 'unpack', 'extra', browse_report, browse)
+
+        # fetch the product type from the browse_type
+        product_type = config.get('browse_type_mapping', {}).get(browse_type, browse_type)
+        logger.info('Detected product_type %s' % (product_type))
+
+        # get a concrete configuration for the type, filled with the defaults
+        default_config = dict(config['preprocessing'].get('defaults', {}))
+        type_based_config = dict(config['preprocessing']['types'].get(product_type, {}))
+        default_config.update(type_based_config)
+        preprocess_config = default_config
+
+        logger.debug('Using preprocessing config %s' % pformat(preprocess_config))
+        preprocess_internal(preprocess_config)
+
+        # get an uploader for the finalized images
+        target_config = config['target']
+        uploader = get_uploader(
+            target_config['type'], target_config.get('args'), target_config.get('kwargs')
+        )
+        paths_for_upload = ['upload', 'extra']
+        upload_filenames = []
+        for path_to_upload in paths_for_upload:
+            upload_filenames.extend([
+                os.path.join(dirpath, filename)
+                for dirpath, _, filenames in os.walk(path_to_upload)
+                for filename in filenames
+            ])
+
+        file_path = browse['browse_identifier'] or upload_filenames[0]
+
+        # send all files in the upload directory to the target storage
+        logger.info(
+            'Starting uploading of %d files to %s'
+            % (len(upload_filenames), file_path)
+        )
+        with Timer() as upload_timer:
+            uploader.upload(upload_filenames, file_path)
+
+        logger.info(
+            'Finished uploading after %.3f seconds.'
+            % (upload_timer.elapsed)
+        )
+
+        logger.info(
+            'Finished preprocessing of browse "%s" after %.3f seconds.'
+            % (filename, preprocess_timer.elapsed)
+        )
+
+        return upload_filenames, file_path
diff --git a/preprocessor/preprocessor/steps/__init__.py b/preprocessor/preprocessor/steps/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..82fc04493807bdd29b6d268dd2d0296fa9111ddd
--- /dev/null
+++ b/preprocessor/preprocessor/steps/__init__.py
@@ -0,0 +1,14 @@
+from .georeference import georeference_step
+from .output import output_step
+from .stack import stack_bands_step
+from .subdataset import extract_subdataset_step
+from .calc import calc_step
+
+
+__all__ = [
+    'georeference_step',
+    'output_step',
+    'stack_bands_step',
+    'extract_subdataset_step',
+    'calc_step',
+]
diff --git a/preprocessor/preprocessor/steps/browse_report.py b/preprocessor/preprocessor/steps/browse_report.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd377b6cc3dde156079952c1288fd1e7da01df86
--- /dev/null
+++ b/preprocessor/preprocessor/steps/browse_report.py
@@ -0,0 +1,210 @@
+import os
+from glob import glob
+from os.path import join, basename
+from textwrap import dedent
+
+from ..util import replace_ext, pairwise, gdal, osr
+
+
+def browse_georeference(source_dir: os.PathLike, target_dir_data: os.PathLike, target_dir_meta: os.PathLike, browse_report: dict, browse: dict):
+    for filename in glob(join(source_dir, '*')):
+        target_filename_data = join(target_dir_data, replace_ext(basename(filename), '.tif'))
+        target_filename_meta = join(target_dir_meta, replace_ext(basename(filename), '.xml'))
+        apply_browse_report_georeference(filename, target_filename_data, browse)
+
+        generate_gsc(filename, target_filename_meta, browse_report, browse)
+
+
+def apply_browse_report_georeference(input_filename: os.PathLike, target_filename: os.PathLike, browse: dict):
+    ds = gdal.GetDriverByName('GTiff').CreateCopy(target_filename, gdal.Open(input_filename))
+    type_ = browse['type']
+
+    if type_ == 'rectified_browse':
+        size_x, size_y = ds.RasterXSize, ds.RasterYSize
+        low, high = browse['rectified']['coord_list']
+
+        minx, miny = low
+        maxx, maxy = high
+
+        ds.SetGeoTransform([
+            minx, (maxx - minx) / size_x, 0,
+            maxy, 0, (miny - maxy) / size_y,
+        ])
+
+    elif type_ == 'footprint_browse':
+        col_rows = browse['footprint']['col_row_list']
+        coords = browse['footprint']['coord_list']
+
+        if col_rows[0] == col_rows[-1] and coords[0] == coords[-1]:
+            col_rows = col_rows[:-1]
+            coords = coords[:-1]
+
+        gcps = [
+            gdal.GCP(coord[1], coord[0], 0, col_row[0], col_row[1])
+            for col_row, coord in zip(col_rows, coords)
+        ]
+
+        sr = osr.SpatialReference()
+        sr.ImportFromEPSG(4326)
+        ds.SetGCPs(gcps, sr)
+
+    elif type_ == 'model_in_geotiff_browse':
+        # nothing to do in this case
+        pass
+
+    elif type_ == 'regular_grid_browse':
+        col_node_number = browse['regular_grid']['col_node_number']
+        row_node_number = browse['regular_grid']['row_node_number']
+        col_step = browse['regular_grid']['col_step']
+        row_step = browse['regular_grid']['row_step']
+
+        coord_lists = browse['regular_grid']['coord_lists']
+
+        range_x = frange(0.0, row_node_number * row_step, row_step)
+        range_y = frange(0.0, col_node_number * col_step, col_step)
+        pixels = [(x, y) for x in range_x for y in range_y]
+
+        coords = [
+            coord
+            for coord in coord_list for coord_list in coord_lists
+        ]
+
+        gcps = [
+            gdal.GCP(coord[1], coord[0], 0, col_row[0], col_row[1])
+            for col_row, coord in zip(col_rows, coords)
+        ]
+
+        sr = osr.SpatialReference()
+        sr.ImportFromEPSG(4326)
+        ds.SetGCPs(gcps, sr)
+
+    else:
+        raise Exception
+
+    del ds
+
+
+# copied from: https://pynative.com/python-range-for-float-numbers/
+def frange(start, stop=None, step=None):
+    # if stop and step argument is None set start=0.0 and step = 1.0
+    start = float(start)
+    if stop == None:
+        stop = start + 0.0
+        start = 0.0
+    if step == None:
+        step = 1.0
+
+    count = 0
+    while True:
+        temp = float(start + count * step)
+        if step > 0 and temp >= stop:
+            break
+        elif step < 0 and temp <= stop:
+            break
+        yield temp
+
+        count += 1
+
+
+def generate_gsc(input_filename: os.PathLike, target_filename: os.PathLike, browse_report: dict, browse: dict):
+    footprint = ''
+
+    type_ = browse['type']
+    if type_ == 'rectified_browse':
+        low, high = browse['rectified']['coord_list']
+        footprint = ' '.join(str(v) for v in [
+            low[0], low[1],
+            high[0], low[1],
+            high[0], high[1],
+            low[0], high[1],
+            low[0], low[1],
+        ])
+
+    elif type_ == 'footprint_browse':
+        footprint = ' '.join([
+            f'{y} {x}'
+            for y, x in browse['footprint']['coord_list']
+        ])
+
+    elif type_ == 'model_in_geotiff_browse':
+        ds = gdal.Open(input_filename)
+        gt = ds.GetGeoTransform()
+
+        low = (gt[3], gt[0])
+        high = (gt[3] + ds.RasterYSize * gt[5], gt[0] + ds.RasterXSize * gt[1])
+
+        footprint = ' '.join(str(v) for v in [
+            low[0], low[1],
+            high[0], low[1],
+            high[0], high[1],
+            low[0], high[1],
+            low[0], low[1],
+        ])
+
+    elif type_ == 'regular_grid_browse':
+        coord_lists = browse['regular_grid']['coord_lists']
+        coords = coord_lists[0] + [
+            coord_list[-1]
+            for coord_list in coord_lists[1:-1]
+        ] + reversed(coord_lists[-1]) + [
+            coord_list[0]
+            for coord_list in coord_lists[-1::-1]
+        ]
+
+        footprint = ' '.join([
+            f'{y} {x}'
+            for y, x in coords
+        ])
+
+    return dedent(f"""\
+        <?xml version='1.0' encoding='UTF-8'?>
+        <gsc:report
+            xmlns:sar="http://earth.esa.int/sar"
+            xmlns:gml="http://www.opengis.net/gml"
+            xmlns:eop="http://earth.esa.int/eop"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+            xmlns:opt="http://earth.esa.int/opt"
+            xmlns:gsc="http://earth.esa.int/gsc"
+            xmlns:atm="http://earth.esa.int/atm"
+            xmlns:xlink="http://www.w3.org/1999/xlink" version="2.0">
+          <gsc:responsibleOrgName>{browse_report['responsible_org_name']}</gsc:responsibleOrgName>
+          <gsc:reportType>BROWSE</gsc:reportType>
+          <gsc:dateTime>{browse_report['date_time']}</gsc:dateTime>
+          <gsc:orderReference></gsc:orderReference>
+          <gsc:opt_metadata version="1.2.1">
+            <gml:metaDataProperty>
+              <gsc:EarthObservationMetaData>
+                <eop:identifier>{browse['browse_identifier']}</eop:identifier>
+              </gsc:EarthObservationMetaData>
+              </gml:metaDataProperty>
+              <gml:validTime>
+              <gml:TimePeriod>
+                <gml:beginPosition>{browse['start_time']}</gml:beginPosition>
+                <gml:endPosition>{browse['end_time']}</gml:endPosition>
+              </gml:TimePeriod>
+              </gml:validTime>
+              <gml:using>
+                <eop:EarthObservationEquipment>
+                </eop:EarthObservationEquipment>
+              </gml:using>
+              <gml:target>
+                <eop:Footprint>
+                  <gml:multiExtentOf>
+                    <gml:MultiSurface srsName="EPSG:4326">
+                      <gml:surfaceMembers>
+                        <gml:Polygon>
+                          <gml:exterior>
+                            <gml:LinearRing>
+                              <gml:posList>{footprint}</gml:posList>
+                            </gml:LinearRing>
+                          </gml:exterior>
+                        </gml:Polygon>
+                      </gml:surfaceMembers>
+                    </gml:MultiSurface>
+                  </gml:multiExtentOf>
+                </eop:Footprint>
+              </gml:target>
+            <gml:resultOf/>
+          </gsc:opt_metadata>
+        </gsc:report>
+    """)
diff --git a/preprocessor/preprocessor/steps/calc.py b/preprocessor/preprocessor/steps/calc.py
new file mode 100644
index 0000000000000000000000000000000000000000..493c6c5c9dcc8333549ad12e6dd3938e5deba371
--- /dev/null
+++ b/preprocessor/preprocessor/steps/calc.py
@@ -0,0 +1,56 @@
+import os
+from os.path import basename, dirname, join, isfile
+import subprocess
+from typing import List
+from glob import glob
+import shutil
+import logging
+
+from ..util import replace_ext
+
+
+logger = logging.getLogger(__name__)
+
+
+def calc_step(source_dir: os.PathLike, target_dir: os.PathLike, formulas: List[dict]):
+    for i, item in enumerate(formulas):
+        # get first filename as a base
+        filename = glob(join(source_dir, list(item['inputs'].values())[0]['glob']))[0]
+        target_filename = join(
+            target_dir,
+            replace_ext(basename(filename), item.get('output_postfix', '_proc%d' % i) + '.tif', False)
+        )
+
+        if isfile(target_filename):
+            logger.warn('Calc output filename %s already exists' % target_filename)
+
+        calc_formula(source_dir, item['inputs'], target_filename, item['formula'], item.get('data_type', 'Float32'), item.get('nodata_value', None))
+
+    # take all original files with from the last step
+    for filename in glob('%s/*' % source_dir):
+        target_filename = join(target_dir, basename(filename))
+        if isfile(target_filename):
+            logger.warn('Calc output filename %s already exists' % target_filename)
+        shutil.copy(filename, target_filename)
+
+
+def calc_formula(source_dir: os.PathLike, inputs: List[dict], target_filename: os.PathLike, formula: str, data_type: str="Float32", nodata_value: float=None):
+    cmd = [
+        "gdal_calc.py",
+        "--calc=%s" % formula,
+        "--outfile=%s" % target_filename,
+        "--type", data_type,
+    ]
+
+    for name in inputs:
+        # select first
+        filename = glob(join(source_dir, inputs[name]['glob']))[0]
+        cmd.extend([
+            "-%s" % name, filename,
+            "--%s_band=%d" % (name, inputs[name].get('band', 1)),
+        ])
+
+    if nodata_value is not None:
+        cmd.append("--NoDataValue=%f" % nodata_value)
+
+    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
diff --git a/preprocessor/preprocessor/steps/georeference.py b/preprocessor/preprocessor/steps/georeference.py
new file mode 100644
index 0000000000000000000000000000000000000000..129074815f84b8f10720d367d09606a155677251
--- /dev/null
+++ b/preprocessor/preprocessor/steps/georeference.py
@@ -0,0 +1,207 @@
+import os
+from os.path import join, basename, splitext
+import logging
+from glob import glob
+import shutil
+from typing import List, Tuple
+
+from ..util import gdal, osr, replace_ext
+
+
+logger = logging.getLogger(__name__)
+
+
+def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, type: str, **options: dict):
+    type_name = type.lower()
+    if type_name == 'gcp':
+        georef_func = gcp_georef
+    elif type_name == 'rpc':
+        georef_func = rpc_georef
+    elif type_name == 'world':
+        georef_func = world_georef
+    elif type_name == 'corners':
+        georef_func = corner_georef
+    else:
+        raise Exception('Invalid georeference type %s' % type_name)
+    for filename in [path for path in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(path)]:
+        target_filename = join(target_dir, basename(filename))
+        georef_func(filename, target_filename, **options)
+
+
+def gcp_georef(input_filename: os.PathLike, target_filename: os.PathLike, order: int=1, projection: str='EPSG:4326',
+               tps: bool=False):
+    succeded = False
+
+    # simple case: get the geotransform from some GCPs
+    try:
+        ds = gdal.Open(input_filename, gdal.GA_Update)
+    except RuntimeError:
+        logger.warn('Can not open file by GDAL %s' % (input_filename))
+        return
+    if ds.GetGCPCount() <= 4:
+        try:
+            gcps = ds.GetGCPs()
+            gt = gdal.GCPsToGeoTransform(gcps)
+            ds.SetGeoTransform(gt)
+        except Exception:
+            del ds
+            logger.warning("Failed to get geotransform via GCPsToGeoTransform")
+        else:
+            del ds
+            shutil.move(input_filename, target_filename)
+            succeded = True
+
+    # otherwise warp
+    if not succeded:
+        logger.info("Applying GCP transform by warping")
+
+        if tps:
+            options = {
+                'tps': tps
+            }
+        else:
+            options = {
+                'polynomialOrder': order
+            }
+
+        gdal.Warp(
+            target_filename,
+            input_filename,
+            dstSRS=projection,
+            **options,
+        )
+
+def rpc_georef(input_filename: os.PathLike, target_filename: os.PathLike, rpc_file_template: str='{fileroot}.RPC', warp_options: dict=None):
+    fileroot, extension = splitext(input_filename)
+    rpc_file_glob = rpc_file_template.format(
+        filename=input_filename, fileroot=fileroot,
+        extension=extension,
+    )
+    rpc_filename = None
+    try:
+        rpc_filename = glob(rpc_file_glob, recursive=True)[0]
+    except IndexError:
+        logger.warn('No RPC filename found with glob %s' % rpc_file_glob)
+    # rename RPC filename to be compatible with GDAL
+    if rpc_filename:
+        shutil.move(rpc_filename, replace_ext(input_filename, '.rpc'))
+
+    gdal.Warp(
+        target_filename,
+        input_filename,
+        rpc=True,
+        **(warp_options or {})
+    )
+
+
+def corner_georef(input_filename: os.PathLike, target_filename: os.PathLike, corner_names: List[str]=None,
+                  orbit_direction_name: str=None, force_north_up: bool=False, gcp_srid: int=4326, warp: bool=False):
+    corner_names = corner_names or ["bottom_left", "bottom_right", "top_left", "top_right"]
+    ds = gdal.Open(input_filename, gdal.GA_Update)
+
+    orbit_direction = ds.GetMetadata()[orbit_direction_name].lower()
+    metadata = ds.GetRasterBand(1).GetMetadata()
+
+    # from pprint import pprint
+
+    # pprint (metadata)
+    # pprint(ds.GetMetadata())
+    bl, br, tl, tr = [
+        [float(num) for num in metadata[corner_name].split()]
+        for corner_name in corner_names
+    ]
+
+    gcps = gcps_from_borders(
+        (ds.RasterXSize, ds.RasterYSize),
+        (bl, br, tl, tr),
+        orbit_direction,
+        force_north_up
+    )
+
+    sr = osr.SpatialReference()
+    sr.ImportFromEPSG(gcp_srid)
+
+    ds.SetGCPs(gcps, sr.ExportToWkt())
+
+    if warp:
+        gdal.Warp(
+            target_filename,
+            ds,
+        )
+        del ds
+    else:
+        ds.SetGeoTransform(gdal.GCPsToGeoTransform(ds.GetGCPs()))
+        driver = ds.GetDriver()
+        del ds
+        driver.Rename(target_filename, input_filename)
+
+
+def world_georef():
+    # TODO: implement
+    pass
+
+
+
+
+def gcps_from_borders(size: Tuple[float, float], coords: List[Tuple[float, float]], orbit_direction: str, force_north_up: bool=False):
+    x_size, y_size = size
+    # expects coordinates in dict(.*border_left.*:[lat,lon],...)
+    gcps = []
+    if force_north_up and len(coords) == 4:
+        # compute gcps assuming north-up, east-right image no matter, what is claimed by metadata
+        sorted_by_lats = sorted(coords, key=lambda x: x[0], reverse=True)
+        # compare longitudes
+        if sorted_by_lats[0][1] > sorted_by_lats[1][1]:
+            #                                                            /\
+            #  1                                                         \ \
+            # top - left corner has lower latitude from two northernmost  \/
+            if orbit_direction != "descending":
+                top_left = sorted_by_lats[1]
+                top_right = sorted_by_lats[0]
+                bottom_left = sorted_by_lats[3]
+                bottom_right = sorted_by_lats[2]
+            else:
+                top_left = sorted_by_lats[3]
+                top_right = sorted_by_lats[2]
+                bottom_left = sorted_by_lats[1]
+                bottom_right = sorted_by_lats[0]
+        else:
+            #                                                                /\
+            #   2                                                           / /
+            #  top - left corner has higher latitude from two northernmost  \/
+            if orbit_direction != "descending":
+                top_left = sorted_by_lats[0]
+                top_right = sorted_by_lats[1]
+                bottom_left = sorted_by_lats[2]
+                bottom_right = sorted_by_lats[3]
+            else:
+                top_left = sorted_by_lats[2]
+                top_right = sorted_by_lats[3]
+                bottom_left = sorted_by_lats[0]
+                bottom_right = sorted_by_lats[1]
+        gcps.append(gdal.GCP(bottom_left[1], bottom_left[0], 0, 0.5, 0.5))
+        gcps.append(gdal.GCP(bottom_right[1], bottom_right[0], 0, x_size - 0.5, 0.5))
+        gcps.append(gdal.GCP(top_left[1], top_left[0], 0, 0.5, y_size - 0.5))
+        gcps.append(gdal.GCP(top_right[1], top_right[0], 0, x_size - 0.5, y_size - 0.5))
+
+    else:
+        bl, br, tl, tr = coords
+
+        x_left = x_size - 0.5
+        x_right = 0.5
+
+        y_bottom = 0.5
+        y_top = y_size - 0.5
+
+        if orbit_direction == 'descending':
+            x_left, x_right = x_right, x_left
+            y_bottom, y_top = y_top, y_bottom
+
+        gcps.extend([
+            gdal.GCP(bl[1], bl[0], 0, x_left, y_bottom),
+            gdal.GCP(br[1], br[0], 0, x_right, y_bottom),
+            gdal.GCP(tl[1], tl[0], 0, x_left, y_top),
+            gdal.GCP(tr[1], tr[0], 0, x_right, y_top),
+        ])
+
+    return gcps
diff --git a/preprocessor/preprocessor/steps/output.py b/preprocessor/preprocessor/steps/output.py
new file mode 100644
index 0000000000000000000000000000000000000000..d90c53435419f999964899080a7fa7fb6f277ef3
--- /dev/null
+++ b/preprocessor/preprocessor/steps/output.py
@@ -0,0 +1,40 @@
+import os
+from os.path import join, basename
+from uuid import uuid4
+from glob import glob
+
+from ..util import replace_ext, gdal
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def output_step(source_dir: os.PathLike, target_dir: os.PathLike, options: dict=None):
+    # find out the driver to get the extension
+    options = options if options is not None else {}
+    frmt = options.get('format', 'GTiff')
+    driver = gdal.GetDriverByName(frmt)
+    if not driver:
+        raise ValueError('Unsupported driver %s' % frmt)
+    extension = driver.GetMetadata().get('DMD_EXTENSIONS', 'tif').split(' ')[0]
+    # warp each individual file
+    warped_files = []
+    for filename in [path for path in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(path)]:
+        target_filename = join(target_dir, replace_ext(basename(filename), extension))
+        logger.debug('Warping file %s' % filename)
+        gdal.Warp(target_filename, filename, options=gdal.WarpOptions(
+            **options
+        ))
+        warped_files.append(target_filename)
+
+    if len(warped_files) > 1:
+        tmp_filename = join(target_dir, '%s.%s' % (uuid4().hex, extension))
+        logger.debug('Warping files %s' % warped_files)
+        gdal.Warp(tmp_filename, warped_files, options=gdal.WarpOptions(
+            **options
+        ))
+
+        # delete old files and rename the combined file to the first filename
+        for filename in warped_files:
+            os.unlink(filename)
+        os.rename(tmp_filename, warped_files[0])
diff --git a/preprocessor/preprocessor/steps/stack.py b/preprocessor/preprocessor/steps/stack.py
new file mode 100644
index 0000000000000000000000000000000000000000..809f3ce7e574e31e3788cb3e15cab5fd1c489186
--- /dev/null
+++ b/preprocessor/preprocessor/steps/stack.py
@@ -0,0 +1,52 @@
+import os
+from os.path import basename, join, splitext
+from itertools import groupby
+import re
+from glob import glob
+from typing import List
+
+from ..util import replace_ext, gdal
+
+
+def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, group_by: str=None, sort_by: str=None, order: List[str]=None):
+    """ Stack bands of the individual images
+    """
+    filenames = [path for path in glob(join(source_dir, '**'), recursive=True) if not os.path.isdir(path)]
+    # check if we have a group_by regex. If yes, use the first
+    # re-group to group by.
+    # Fallback is basename of file as groupname
+    if group_by:
+        re_group_by = re.compile(group_by)
+        groups = {
+            k: list(v)
+            for k, v in groupby(filenames, key=lambda v: re_group_by.match(v).group(1))
+        }
+    else:
+        groups = {basename(filenames[0]): filenames}
+
+    for groupname, group in groups.items():
+        # check if a sort_by is specified. if yes, use the sort_by regex group
+        # and optionally a ordered list to order the filenames
+        if sort_by:
+            re_sort_by = re.compile(sort_by)
+
+            if order:
+                group = [
+                    v for v in group
+                    if re_sort_by.match(v)
+                    and re_sort_by.match(v).group(1) in order
+                ]
+
+                group = sorted(
+                    group,
+                    key=lambda v: order.index(re_sort_by.match(v).group(1))
+                )
+            else:
+                group = sorted(
+                    group,
+                    key=lambda v: re_sort_by.match(v).group(1)
+                )
+
+        # build a VRT to stack bands for each group
+        vrt_filename = replace_ext(join(target_dir, groupname), '.vrt')
+        gdal.BuildVRT(vrt_filename, group, separate=True)
diff --git a/preprocessor/preprocessor/steps/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a284de94bae96d40a0156b766f512d2c3946834d
--- /dev/null
+++ b/preprocessor/preprocessor/steps/subdataset.py
@@ -0,0 +1,36 @@
+import os
+from os.path import join, splitext, basename, dirname
+from glob import glob
+from typing import Dict
+
+from ..util import replace_ext, gdal
+
+
+def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, data_file_glob: str, subdataset_types: Dict[str, str]=None):
+    datafiles = glob(join(source_dir, data_file_glob))
+    if not datafiles:
+        raise Exception('No datafiles were matched by the provided glob')
+
+    for filename in datafiles:
+        extract_subdatasets(
+            filename,
+            target_dir,
+            subdataset_types
+        )
+
+
+def extract_subdatasets(source_filename: os.PathLike, target_dir: os.PathLike, subdataset_types: Dict[str, str]=None):
+    ds = gdal.Open(source_filename)
+
+    sub_datasets = []
+    for locator, _ in ds.GetSubDatasets():
+        _, _, sd_type = locator.split(':')
+        if subdataset_types is None or sd_type in subdataset_types:
+            sub_datasets.append((locator, subdataset_types[sd_type]))
+
+    if not sub_datasets:
+        raise Exception('No subdatasets were matched by the provided types')
+
+    for locator, suffix in sub_datasets:
+        target_filename = join(target_dir, basename(replace_ext(source_filename, '%s.tif' % suffix)))
+        gdal.Translate(target_filename, locator, format='GTiff')
diff --git a/preprocessor/preprocessor/transfer/__init__.py b/preprocessor/preprocessor/transfer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..824127913807029b545af0f77c69bc4109c9fbd8
--- /dev/null
+++ b/preprocessor/preprocessor/transfer/__init__.py
@@ -0,0 +1 @@
+from .util import get_downloader, get_uploader
\ No newline at end of file
diff --git a/preprocessor/preprocessor/transfer/abc.py b/preprocessor/preprocessor/transfer/abc.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cc9818dcd1393b73f2c60a1fe78ec0cce98aad5
--- /dev/null
+++ b/preprocessor/preprocessor/transfer/abc.py
@@ -0,0 +1,36 @@
+from os import PathLike
+from abc import ABC, abstractmethod
+from typing import List, Optional, Union
+
+# copied from: https://github.com/python/cpython/blob/3.8/Lib/_collections_abc.py
+def _check_methods(C, *methods):
+    mro = C.__mro__
+    for method in methods:
+        for B in mro:
+            if method in B.__dict__:
+                if B.__dict__[method] is None:
+                    return NotImplemented
+                break
+        else:
+            return NotImplemented
+    return True
+
+
+class Downloader(ABC):
+    """ ABC for file downloaders. Implementing classes shall download files
+        from a given storage location to be preprocessed.
+    """
+
+    @abstractmethod
+    def download(self, remote_path: PathLike, local_path: PathLike) -> PathLike:
+        pass
+
+
+class Uploader(ABC):
+    @abstractmethod
+    def upload(self, local_path: Union[PathLike, List[PathLike]], remote_dir: PathLike) -> List[PathLike]:
+        pass
+
+    @abstractmethod
+    def product_exists(self, remote_dir: PathLike) -> bool:
+        pass
diff --git a/preprocessor/preprocessor/transfer/local.py b/preprocessor/preprocessor/transfer/local.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1450f7d6b18d2e0a98232023d5bda5cb6db667a
--- /dev/null
+++ b/preprocessor/preprocessor/transfer/local.py
@@ -0,0 +1,48 @@
+import os
+import os.path
+import shutil
+from typing import List, Union
+import logging
+
+logger = logging.getLogger(__name__)
+
+class Base:
+    def __init__(self, storage_path: os.PathLike):
+        self.storage_path = storage_path
+
+
+class Downloader(Base):
+    """ Downloader for OpenStack swift object storages
+    """
+    def download(self, remote_path: os.PathLike, local_dir: os.PathLike) -> os.PathLike:
+        os.path.join(self.storage_path, remote_path)
+
+        return shutil.copy2(os.path.join(self.storage_path, remote_path), local_dir)
+
+
+class Uploader(Base):
+    """ Uploader for OpenStack swift object storages
+    """
+    def upload(self, local_path: Union[os.PathLike, List[os.PathLike]], remote_dir: os.PathLike) -> List[os.PathLike]:
+        paths = local_path if isinstance(local_path, List) else [local_path]
+        remote_paths = [
+            os.path.join(
+                self.storage_path,
+                remote_dir,
+                os.path.basename(path)
+            )
+            for path in paths
+        ]
+
+        for local_path, remote_path in zip(paths, remote_paths):
+            os.makedirs(os.path.dirname(remote_path), exist_ok=True)
+            shutil.copy2(local_path, remote_path)
+
+        return remote_paths
+
+    def product_exists(self, remote_dir: os.PathLike) -> bool:
+        remote_path = os.path.join(self.storage_path, remote_dir)
+        for r, d, f in os.walk(remote_path):
+            if len(f) >= 2:
+                return True
+        return False
diff --git a/preprocessor/preprocessor/transfer/swift.py b/preprocessor/preprocessor/transfer/swift.py
new file mode 100644
index 0000000000000000000000000000000000000000..a58f7fb146530860a1b7af61961b38ea6b6849dc
--- /dev/null
+++ b/preprocessor/preprocessor/transfer/swift.py
@@ -0,0 +1,135 @@
+import os
+import os.path
+from typing import List, Union
+import logging
+
+from swiftclient.multithreading import OutputManager
+from swiftclient.service import SwiftError, SwiftService, SwiftUploadObject
+
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("swiftclient").setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
+
+class Base:
+    def __init__(self, username=None, password=None, tenant_name=None,
+                 tenant_id=None, region_name=None, user_domain_id=None,
+                 user_domain_name=None, auth_url=None, auth_version=None,
+                 container=None):
+        self.username = username
+        self.password = password
+        self.tenant_name = tenant_name
+        self.tenant_id = tenant_id
+        self.region_name = region_name
+        self.user_domain_id = user_domain_id
+        self.user_domain_name = user_domain_name
+        self.auth_url = auth_url
+        self.auth_version = auth_version  # TODO: assume 3
+        self.container = container
+
+    def get_service(self):
+        return SwiftService(options={
+            "os_username": self.username,
+            "os_password": self.password,
+            "os_tenant_name": self.tenant_name,
+            "os_tenant_id": self.tenant_id,
+            "os_region_name": self.region_name,
+            "os_auth_url": self.auth_url,
+            "auth_version": self.auth_version,
+            "os_user_domain_id": self.user_domain_id,
+            "os_user_domain_name": self.user_domain_name,
+        })
+
+    def validate_container(self, remote_dir):
+        if self.container:
+            # container was specified, use it
+            return self.container, remote_dir
+        # container needs to be extracted from path
+        # paths needs to be updated
+        return remote_dir.partition('/')[0], remote_dir.partition('/')[2]
+
+
+class Downloader(Base):
+    """ Downloader for OpenStack swift object storages
+    """
+    def download(self, remote_path: os.PathLike, local_path: os.PathLike) -> os.PathLike:
+        container, remote_path = self.validate_container(remote_path)
+        target_filename = os.path.join(local_path, os.path.basename(remote_path))
+        with self.get_service() as swift:
+            results = swift.download(
+                container,
+                [remote_path],
+                options={
+                    'out_file': target_filename
+                }
+            )
+
+            for result in results:
+                if result["success"]:
+                    return target_filename
+                else:
+                    raise Exception('Failed to download %s' % remote_path)
+
+
+class Uploader(Base):
+    """ Uploader for OpenStack swift object storages
+    """
+    def upload(self, local_path: Union[os.PathLike, List[os.PathLike]], remote_dir: os.PathLike) -> List[os.PathLike]:
+        paths = local_path if isinstance(local_path, List) else [local_path]
+        container, remote_dir = self.validate_container(remote_dir)
+        remote_paths = [
+            os.path.join(
+                remote_dir,
+                os.path.basename(path)
+            )
+            for path in paths
+        ]
+        objects = [
+            SwiftUploadObject(
+                path,
+                object_name=remote_path
+            )
+            for path, remote_path in zip(paths, remote_paths)
+        ]
+
+        max_size = max([os.stat(path).st_size for path in paths])
+        options = {}
+
+        # use segment options if file is bigger than 5GB
+        if (max_size > 1024 * 1024 * 1024 * 5):
+            options['segment_size'] = 2 * 1024 * 1024 * 1024
+            options['use_slo'] = True
+
+        with self.get_service() as swift:
+            # use container or first part of path
+            results = swift.upload(container=container, objects=objects, options=options)
+
+            for result in results:
+                if result["success"]:
+                    if "object" in result:
+                        logger.info(
+                            "'%s' successfully uploaded." % result["object"]
+                        )
+                    elif "for_object" in result:
+                        logger.debug(
+                            "Successfully uploaded '%s' segment '%s'."
+                            % (result["for_object"], result["segment_index"])
+                        )
+                else:
+                    logger.error(
+                        "'%s' upload failed" % result["error"]
+                    )
+                    raise Exception('Failed to upload %s' % result["error"])
+
+        return remote_paths
+
+    def product_exists(self, remote_dir: os.PathLike) -> bool:
+        with self.get_service() as swift:
+            container, remote_dir = self.validate_container(remote_dir)
+            list_parts_gen = swift.list(
+                container=container, options={"prefix": remote_dir},
+            )
+            for page in list_parts_gen:
+                if page["success"] and len(page["listing"]) >= 2:
+                    # at least two files present -> pass validation
+                    return True
+                return False
diff --git a/preprocessor/preprocessor/transfer/util.py b/preprocessor/preprocessor/transfer/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc14bbf5695e7a06d49a9a1bc5ffddc7090d51d6
--- /dev/null
+++ b/preprocessor/preprocessor/transfer/util.py
@@ -0,0 +1,22 @@
+from . import swift
+from . import local
+
+from .abc import Downloader, Uploader
+
+
+def get_downloader(type_name, args, kwargs) -> Downloader:
+    if type_name == 'swift':
+        return swift.Downloader(*args or [], **kwargs or {})
+    elif type_name == 'local':
+        return local.Downloader(*args or [], **kwargs or {})
+
+    raise Exception('Downloader type %s is not supported' % type_name)
+
+
+def get_uploader(type_name, args, kwargs) -> Uploader:
+    if type_name == 'swift':
+        return swift.Uploader(*args or [], **kwargs or {})
+    elif type_name == 'local':
+        return local.Uploader(*args or [], **kwargs or {})
+
+    raise Exception('Uploader type %s is not supported' % type_name)
diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..249176c5643c3f58d5fedbfed94a478992679dee
--- /dev/null
+++ b/preprocessor/preprocessor/util.py
@@ -0,0 +1,68 @@
+import os
+from os.path import splitext
+from contextlib import contextmanager
+from tempfile import TemporaryDirectory, mkdtemp
+from time import time
+
+try:
+    from osgeo import gdal
+except ImportError:
+    import gdal
+
+gdal.UseExceptions()
+
+try:
+    from osgeo import osr
+except ImportError:
+    import osr
+
+osr.UseExceptions()
+
+
+def replace_ext(filename: os.PathLike, new_ext: str, force_dot: bool=True) -> os.PathLike:
+    return splitext(filename)[0] + ('' if new_ext.startswith('.') or not force_dot else '.') + new_ext
+
+
+@contextmanager
+def workdir(config: dict, use_dir: os.PathLike=None):
+    prefix = config.get('prefix', 'preprocess_')
+    workdir = config.get('workdir')
+    if use_dir:
+        os.chdir(use_dir)
+        yield use_dir
+    elif config.get('keep_temp'):
+        dirname = mkdtemp(prefix=prefix, dir=workdir)
+        os.chdir(dirname)
+        yield dirname
+    else:
+        with TemporaryDirectory(prefix=prefix, dir=workdir) as dirname:
+            os.chdir(dirname)
+            yield dirname
+
+
+def pairwise(col):
+    iterator = iter(col)
+    while True:
+        try:
+            yield (next(iterator), next(iterator))
+        except StopIteration:
+            break
+
+
+class Timer:
+    """ Helper timer class to allow logging of timing values
+    """
+    def __init__(self):
+        self.start = None
+        self.end = None
+
+    def __enter__(self):
+        self.start = time()
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        self.end = time()
+
+    @property
+    def elapsed(self):
+        return (self.end if self.end is not None else time()) - self.start
diff --git a/preprocessor/run-preprocessor.sh b/preprocessor/run-preprocessor.sh
index 7ae857f3b9a6beb9a8c5197115df75da76380ab3..70a8aee6572806b30e158b706fc65fbd95ad427c 100644
--- a/preprocessor/run-preprocessor.sh
+++ b/preprocessor/run-preprocessor.sh
@@ -1,5 +1,16 @@
 #!/bin/sh
 
 echo "Running preprocessor"
+debug="--no-debug"
+if test "$PREPROCESSOR_DEBUG" = true; then
+    debug="--debug"
+fi
 
-python3 /preprocessor.py --mode redis --redis-host ${REDIS_HOST} --redis-port ${REDIS_PORT} --redis-preprocess-queue-key ${REDIS_PREPROCESS_QUEUE_KEY} --redis-register-queue-key ${REDIS_REGISTER_QUEUE_KEY}
+preprocessor daemon \
+    --config-file /config.yaml \
+    --host ${REDIS_HOST} \
+    --port ${REDIS_PORT} \
+    --listen-queue ${REDIS_PREPROCESS_QUEUE_KEY} \
+    --listen-md-queue ${REDIS_PREPROCESS_MD_QUEUE_KEY} \
+    --write-queue ${REDIS_REGISTER_QUEUE_KEY} \
+    ${debug}
\ No newline at end of file
diff --git a/preprocessor/setup.py b/preprocessor/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b90e5064267302085796c218846f79fe551945c
--- /dev/null
+++ b/preprocessor/setup.py
@@ -0,0 +1,28 @@
+from setuptools import setup, find_packages
+
+# with open("README.md", "r") as fh:
+#     long_description = fh.read()
+long_description = ""
+
+setup(
+    name="preprocessor", # Replace with your own username
+    version="0.0.1",
+    author="",
+    author_email="",
+    description="preprocessor for PVS",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://gitlab.eox.at/esa/prism/vs/-/tree/master/preprocessor",
+    packages=find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+    entry_points={
+        "console_scripts": [
+            "preprocessor = preprocessor.cli:cli",
+        ],
+    }
+)
diff --git a/preprocessor/transform_chain.py b/preprocessor/transform_chain.py
deleted file mode 100644
index 9c3977b1993c434c0362978c56f13c91014342fe..0000000000000000000000000000000000000000
--- a/preprocessor/transform_chain.py
+++ /dev/null
@@ -1,184 +0,0 @@
-from osgeo import gdal, osr
-from math import ceil, floor
-import logging
-
-gdal.SetConfigOption("GDAL_CACHEMAX", "1000")
-logger = logging.getLogger("preprocessor")
-
-GDT_SIZES = {
-    # geotiff data types and their respective sizes in bits
-    "Byte": 8,
-    "Int16": 16,
-    "UInt16": 16,
-    "CInt16": 16,
-    "Int32": 32,
-    "UInt32": 32,
-    "CInt32": 32,
-    "Float32": 32,
-    "CFloat32": 32,
-    "Float64": 64,
-    "CFloat64": 64
-}
-
-
-def open_gdal_dataset(input_file):
-    logger.debug("Opening file using GDAL: %s" % input_file)
-    return gdal.Open(input_file)
-
-
-def apply_rpc(src_ds):
-    # if RPC metadata on source is present, apply it as geotransform using warp through temporary vrt
-    rpc = src_ds.GetMetadata("RPC")
-    if rpc:
-        logger.debug("Applying RPC metadata.")
-        dst_ds = gdal.Warp("", src_ds, dstSRS="EPSG:4326", format="VRT", rpc=True, multithread=True, warpMemoryLimit=1024*1024*1024, resampleAlg=gdal.GRA_NearestNeighbour)
-        return dst_ds
-    return src_ds
-
-
-def set_rpc_metadata(src_ds, dst_ds):
-    # sets RPC metadata from one source dataset to destination dataset if present
-    rpc = src_ds.GetMetadata('RPC')
-    if rpc:
-        dst_ds.SetMetadata(rpc, 'RPC')
-    return dst_ds
-
-
-def write_gdal_dataset(src_ds, driver_name, output_file_name, creation_options=[]):
-    # writes dataset to an output file using a given driver and array of creation options (-CO)
-    # returns filelist as an array (for later reference)
-    driver_instance = gdal.GetDriverByName(driver_name)
-    dst_ds = driver_instance.CreateCopy(output_file_name, src_ds, strict=0, options=creation_options)
-    dst_ds = None   # write to disk
-    return [output_file_name]
-
-
-def write_gdal_dataset_split(src_ds, driver_name, output_file_name, creation_options=[], split_parts=1):
-    # writes dataset to an output file using a given driver and array of creation options (-CO)
-    # returns a list of created files
-    if driver_name == "COG":
-        # COG driver does not have Create method, need to create in memory raster first and then CreateCopy
-        driver_instance = gdal.GetDriverByName("MEM")
-        creation_options_valid = []  # creation options applied only to result driver
-    else:
-        driver_instance = gdal.GetDriverByName(driver_name)
-        creation_options_valid = creation_options
-    logger.info("Writing file to disk. %s" % ("Splitting into %s stripes." % split_parts if split_parts > 1 else ""))
-    if split_parts > 1:
-        # get image properties
-        dst_filenames = []
-        y_coord = 0
-        cols = src_ds.RasterXSize
-        rows = src_ds.RasterYSize
-        bands = src_ds.RasterCount
-        data_type = src_ds.GetRasterBand(1).DataType
-        geo_transform = src_ds.GetGeoTransform()
-        projection = osr.SpatialReference()
-        projection.ImportFromWkt(src_ds.GetProjectionRef())
-        if not geo_transform:
-            # provide some default when not set, which will not make sense
-            # but at least split images will not be stacked on top of each other
-            geo_transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]
-        y_origin = geo_transform[3]  # uly
-        pixelHeight = geo_transform[5]  # yres
-        size = floor(rows / split_parts)  # height of one strip
-        for i in range(split_parts):
-            # add underscore and padded index to between filename and extension if there is one
-            output_file_name_split = output_file_name.rsplit(".", 1)
-            output_file_name_crop = "%s_%04d.%s" % (output_file_name_split[0], i, output_file_name_split[1] if len(output_file_name_split) > 1 else "")
-            logger.debug("Creating in memory dataset stripe %04d." % i)
-            if i < split_parts - 1:
-                # all stripes except the last one
-                # create a new dataset
-                dst_ds = driver_instance.Create(output_file_name_crop, xsize=cols, ysize=size, bands=bands, eType=data_type, options=creation_options_valid)
-                # fill bands with data
-                for band in range(1, bands + 1):
-                    data = src_ds.GetRasterBand(band).ReadAsArray(0, y_coord, cols, size)  # xoff, yoff, xsize, ysize
-                    dst_ds.GetRasterBand(band).WriteArray(data)
-                    no_data_value = src_ds.GetRasterBand(band).GetNoDataValue()
-                    if no_data_value:
-                        dst_ds.GetRasterBand(band).SetNoDataValue(no_data_value)
-                # move y_start for next iteration
-                y_coord += size
-            else:
-                # for last stripe ysize will be larger by remainder after division by split_parts
-                dst_ds = driver_instance.Create(output_file_name_crop, xsize=cols, ysize=size + (rows % split_parts), bands=bands, eType=data_type, options=creation_options_valid)
-                for band in range(1, bands + 1):
-                    data = src_ds.GetRasterBand(band).ReadAsArray(0, y_coord, cols, size + rows % split_parts)  # xoff, yoff, xsize, ysize
-                    dst_ds.GetRasterBand(band).WriteArray(data)
-                    no_data_value = src_ds.GetRasterBand(band).GetNoDataValue()
-                    if no_data_value:
-                        dst_ds.GetRasterBand(band).SetNoDataValue(no_data_value)
-            # set new geotransform
-            new_y = y_origin + (i * size * pixelHeight)
-            dst_ds.SetGeoTransform((geo_transform[0], geo_transform[1], geo_transform[2], new_y, geo_transform[4], geo_transform[5]))
-            dst_ds.SetProjection(projection.ExportToWkt())
-            logger.debug("Writing stripe file %04d." % i)
-            if driver_name == "COG":
-                cog_driver = gdal.GetDriverByName("COG")
-                if cog_driver:
-                    output_ds = cog_driver.CreateCopy(output_file_name_crop, dst_ds, options=creation_options)
-                    output_ds = None  # write to disk
-                else:
-                    raise Exception("COG driver was not found. Please upgrade GDAL to version >= 3.1")
-            else:
-                dst_ds = None  # write to disk
-            dst_filenames.append(output_file_name_crop)
-    else:
-        # do not split, just create output
-        dst_filenames = write_gdal_dataset(src_ds, driver_name, output_file_name, creation_options)
-    logger.info("Finished writing %s files to disk." % split_parts)
-    return dst_filenames
-
-
-def correct_geo_transform(src_dst):
-    # input - gdal dataset
-    # sets new geotransform if necessary by creating control points of a raster with switched height and width - by Petr
-    # returns - gdal dataset
-    ulx, xres, xskew, uly, yskew, yres = src_dst.GetGeoTransform()
-    # test geotransform if necessary to shift
-    if xres == 0.0 and yres == 0.0:
-        logger.debug("Malformed geotransform xres,yres=0 detected, correcting.")
-        # malformed image, compute xres and yres switched in geotransform
-        lrx = ulx + (src_dst.RasterXSize * xskew)
-        lry = uly + (src_dst.RasterYSize * yskew)
-        # [ulx, lrx, lry, uly] - bounds = lon_min, lon_max, lat_min, lat_max
-        fp = [[0, src_dst.RasterXSize, src_dst.RasterXSize, 0], [0, 0, src_dst.RasterYSize, src_dst.RasterYSize]]
-        tp = [[ulx, lrx, lrx, ulx], [lry, lry, uly, uly]]
-        pix = list(zip(fp[0], fp[1]))
-        coor = list(zip(tp[0], tp[1]))
-        # compute the gdal.GCP parameters
-        gcps = []
-        for index, txt in enumerate(pix):
-            gcps.append(gdal.GCP())
-            gcps[index].GCPPixel = pix[index][0]
-            gcps[index].GCPLine = src_dst.RasterYSize - int(pix[index][1])
-            gcps[index].GCPX = coor[index][0]
-            gcps[index].GCPY = coor[index][1]
-        # get correct geotransform from gcps
-        geotransform_new = gdal.GCPsToGeoTransform(gcps)
-        # overwrite geotransform with new
-        src_dst.SetGeoTransform(geotransform_new)
-    return src_dst
-
-
-def split_check(dst, limit=1024 ** 3):
-    # returns number of parts to which split the resulting image to get each part within limit
-    # assuming non-compressed resulting image
-    parts = 1
-    size = 0
-    if dst is not None:
-        for i in range(1, dst.RasterCount + 1):
-            size += dst.RasterXSize * dst.RasterYSize * GDT_SIZES[gdal.GetDataTypeName(dst.GetRasterBand(i).DataType)] / 8
-        parts = ceil(size / limit)
-    return parts
-
-
-def create_vrt_dataset(src_dst_array, dst_ds_name):
-    return gdal.BuildVRT(dst_ds_name, src_dst_array)
-
-
-def validate_band_count(src_ds, count=4):
-    if src_ds.RasterCount == count:
-        return src_ds
-    return None
diff --git a/registrar_test.py b/registrar_test.py
deleted file mode 100644
index 7d2df6332b55589a3d6ad7f80b91937599ff35dd..0000000000000000000000000000000000000000
--- a/registrar_test.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import psycopg2
-import os
-import csv
-
-
-with open('./env/emg_db.env', 'r') as f:
-    env = dict(
-        line.split('=', 1)
-        for line in f
-    )
-database= env['DB_NAME'].replace('\n','')
-port = env['DB_PORT'].replace('\n','')
-host = env['DB_HOST'].replace('\n','')
-database_password= env['DB_PW'].replace('\n','')
-database_user = env['DB_USER'].replace('\n','')
-
-
-def connect_to_db(eo_id):
-    global db_name, coverage_id
-    connection= None
-    try:
-        connection = psycopg2.connect(dbname=database, user=database_user, password=database_password, host='docker', port=port)
-        cursor = connection.cursor()
-        db_name = connection.get_dsn_parameters()["dbname"]
-        postgreSQL_select_Query = "SELECT identifier FROM coverages_eoobject WHERE identifier = '%s';" % eo_id
-        cursor.execute(postgreSQL_select_Query)
-        coverage_id = cursor.fetchone()[0]
-
-    except (Exception, psycopg2.Error) as error :
-        print ("Error while connecting to PostgreSQL", error)
-    finally:
-        #closing database connection.
-        if connection:
-            cursor.close()
-            connection.close()
-            print("PostgreSQL connection is closed")
-                
-
-
-def test_db_name(name):
-    with open(name, newline='') as csvfile:
-        spamreader = csv.reader(csvfile)
-        for row in spamreader:
-            identifier = row[0].split('/')[4]
-            connect_to_db(identifier)
-            assert coverage_id == identifier
-            assert db_name == database
-
diff --git a/registrar_test.sh b/registrar_test.sh
deleted file mode 100755
index d394a2d1de2fe73b2157adab0136e08c2d2c7708..0000000000000000000000000000000000000000
--- a/registrar_test.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-product_list_file=$1
-docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /var/www/pvs/dev/pvs_instance/manage.py storage create pvs_testing pvs_testing --type swift --storage-auth auth-cloud-ovh
-IFS=","
-while read  product; do
-    docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /registrar.py --objects-prefix $product <<<$product
-    
-done < $product_list_file
-
-pytest -s registrar_test.py --name $product_list_file
diff --git a/testing/docker-stack-wait.sh b/testing/docker-stack-wait.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c7f00a3199b8562cd7759e4f481709a306a9768a
--- /dev/null
+++ b/testing/docker-stack-wait.sh
@@ -0,0 +1,148 @@
+#!/bin/sh
+
+# By: Brandon Mitchell <public@bmitch.net>
+# License: MIT
+# Source repo: https://github.com/sudo-bmitch/docker-stack-wait
+
+set -e
+trap "{ exit 1; }" TERM INT
+opt_h=0
+opt_r=0
+opt_s=5
+opt_t=3600
+start_epoc=$(date +%s)
+
+usage() {
+  echo "$(basename $0) [opts] stack_name"
+  echo "  -f filter: only wait for services matching filter, may be passed multiple"
+  echo "             times, see docker stack services for the filter syntax"
+  echo "  -h:        this help message"
+  echo "  -n name:   only wait for specific service names, overrides any filters,"
+  echo "             may be passed multiple times, do not include the stack name prefix"
+  echo "  -r:        treat a rollback as successful"
+  echo "  -s sec:    frequency to poll service state (default $opt_s sec)"
+  echo "  -t sec:    timeout to stop waiting"
+  [ "$opt_h" = "1" ] && exit 0 || exit 1
+}
+check_timeout() {
+  # timeout when a timeout is defined and we will exceed the timeout after the
+  # next sleep completes
+  if [ "$opt_t" -gt 0 ]; then
+    cur_epoc=$(date +%s)
+    cutoff_epoc=$(expr ${start_epoc} + $opt_t - $opt_s)
+    if [ "$cur_epoc" -gt "$cutoff_epoc" ]; then
+      echo "Error: Timeout exceeded"
+      exit 1
+    fi
+  fi
+}
+get_service_ids() {
+  if [ -n "$opt_n" ]; then
+    service_list=""
+    for name in $opt_n; do
+      service_list="${service_list:+${service_list} }${stack_name}_${name}"
+    done
+    docker service inspect --format '{{.ID}}' ${service_list}
+  else
+    docker stack services ${opt_f} -q "${stack_name}"
+  fi
+}
+service_state() {
+  # output the state when it changes from the last state for the service
+  service=$1
+  # strip any invalid chars from service name for caching state
+  service_safe=$(echo "$service" | sed 's/[^A-Za-z0-9_]/_/g')
+  state=$2
+  if eval [ \"\$cache_${service_safe}\" != \"\$state\" ]; then
+    echo "Service $service state: $state"
+    eval cache_${service_safe}=\"\$state\"
+  fi
+}
+
+while getopts 'f:hn:rs:t:' opt; do
+  case $opt in
+    f) opt_f="${opt_f:+${opt_f} }-f $OPTARG";;
+    h) opt_h=1;;
+    n) opt_n="${opt_n:+${opt_n} } $OPTARG";;
+    r) opt_r=1;;
+    s) opt_s="$OPTARG";;
+    t) opt_t="$OPTARG";;
+  esac
+done
+shift $(expr $OPTIND - 1)
+
+if [ $# -ne 1 -o "$opt_h" = "1" -o "$opt_s" -le "0" ]; then
+  usage
+fi
+
+stack_name=$1
+
+# 0 = running, 1 = success, 2 = error
+stack_done=0
+while [ "$stack_done" != "1" ]; do
+  stack_done=1
+  # run get_service_ids outside of the for loop to catch errors
+  service_ids=$(get_service_ids)
+  for service_id in ${service_ids}; do
+    service_done=1
+    service=$(docker service inspect --format '{{.Spec.Name}}' "$service_id")
+
+    # hardcode a "new" state when UpdateStatus is not defined
+    state=$(docker service inspect -f '{{if .UpdateStatus}}{{.UpdateStatus.State}}{{else}}new{{end}}' "$service_id")
+
+    # check for failed update states
+    case "$state" in
+      paused|rollback_paused)
+        service_done=2
+        ;;
+      rollback_*)
+        if [ "$opt_r" = "0" ]; then
+          service_done=2
+        fi
+        ;;
+    esac
+
+    # identify/report current state
+    if [ "$service_done" != "2" ]; then
+      replicas=$(docker service ls --format '{{.Replicas}}' --filter "id=$service_id" | cut -d' ' -f1)
+      current=$(echo "$replicas" | cut -d/ -f1)
+      target=$(echo "$replicas" | cut -d/ -f2)
+      if [ "$current" != "$target" ]; then
+        # actively replicating service
+        service_done=0
+        state="replicating $replicas"
+      fi
+    fi
+    service_state "$service" "$state"
+
+    # check for states that indicate an update is done
+    if [ "$service_done" = "1" ]; then
+      case "$state" in
+        new|completed|rollback_completed)
+          service_done=1
+          ;;
+        *)
+          # any other state is unknown, not necessarily finished
+          service_done=0
+          ;;
+      esac
+    fi
+
+    # update stack done state
+    if [ "$service_done" = "2" ]; then
+      # error condition
+      stack_done=2
+    elif [ "$service_done" = "0" -a "$stack_done" = "1" ]; then
+      # only go to an updating state if not in an error state
+      stack_done=0
+    fi
+  done
+  if [ "$stack_done" = "2" ]; then
+    echo "Error: This deployment will not complete"
+    exit 1
+  fi
+  if [ "$stack_done" != "1" ]; then
+    check_timeout
+    sleep "${opt_s}"
+  fi
+done
diff --git a/gitlab_test.sh b/testing/gitlab_test.sh
old mode 100644
new mode 100755
similarity index 50%
rename from gitlab_test.sh
rename to testing/gitlab_test.sh
index 1b0972c92f180d9cf6090c5d457169b5408899ab..51a14fa137fa6b6a4160a91e1444fc8cef52a898
--- a/gitlab_test.sh
+++ b/testing/gitlab_test.sh
@@ -1,15 +1,41 @@
 #!/bin/sh
-chmod +x env_setup.sh wait_for_container.sh
-./env_setup.sh
+
+# fetch secrets and write them to their according files
+cat $vhr18_db > ../env/vhr18_db.env
+cat $vhr18_django > ../env/vhr18_django.env
+cat $vhr18_obs > ../env/vhr18_obs.env
+
+cat $emg_db > ../env/emg_db.env
+cat $emg_django > ../env/emg_django.env
+cat $emg_obs > ../env/emg_obs.env
+
+
+# use `pvs_testing` bucket instead
+
+sed -i -e 's/emg-data/pvs_testing/g' ../env/emg.env
+sed -i -e 's/vhr18-data/pvs_testing/g' ../env/vhr18.env
+
+sed -i -e 's/emg-cache/pvs_testing/g' ../env/emg_obs.env
+sed -i -e 's/vhr18-cache/pvs_testing/g' ../env/vhr18_obs.env
+
+# source the env file
+set -o allexport
+source ../env/emg.env
+set +o allexport
+
 mkdir data
 docker swarm init
 docker network create -d overlay emg-extnet
-docker stack deploy -c docker-compose.emg.yml -c docker-compose.emg.dev.yml -c docker-compose.logging.yml emg-pvs
-apk update && apk add bash postgresql-dev gcc python3-dev musl-dev py-pip gdal
+docker stack deploy -c ../docker-compose.emg.yml -c ../docker-compose.emg.dev.yml emg-pvs
+apk update && apk add bash postgresql-dev gcc python3-dev musl-dev py-pip gdal libffi-dev openssl-dev make
 pip3 install -r requirements.txt
-./wait_for_container.sh
+
+./docker-stack-wait.sh -n renderer -n registrar -n preprocessor emg-pvs
+
+docker service ls
+
 bash ./registrar_test.sh product_list.csv
 
 # docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /var/www/pvs/dev/pvs_instance/manage.py storage create pvs_testing pvs_testing --type swift --storage-auth auth-cloud-ovh
 # docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /core/registrar.py --objects-prefix "OA/PH1B/0.1/b9/urn:eop:PHR:MULTISPECTRAL_0.5m:DS_PHR1B_201608070959189_FR1_PX_E012N32_0719_00974_4148/0000/PH1B_PHR_FUS_1A_20160807T095918_20160807T095920_TOU_1234_4148.DIMA.tar"
-# pytest -s registrar_test.py --name OA/PH1B/0.1/b9/urn:eop:PHR:MULTISPECTRAL_0.5m:DS_PHR1B_201608070959189_FR1_PX_E012N32_0719_00974_4148/0000/PH1B_PHR_FUS_1A_20160807T095918_20160807T095920_TOU_1234_4148.DIMA.tar
\ No newline at end of file
+# pytest -s registrar_test.py --name OA/PH1B/0.1/b9/urn:eop:PHR:MULTISPECTRAL_0.5m:DS_PHR1B_201608070959189_FR1_PX_E012N32_0719_00974_4148/0000/PH1B_PHR_FUS_1A_20160807T095918_20160807T095920_TOU_1234_4148.DIMA.tar
diff --git a/product_list.csv b/testing/product_list.csv
similarity index 100%
rename from product_list.csv
rename to testing/product_list.csv
diff --git a/testing/registrar_test.py b/testing/registrar_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..54bc3108fc0164c0e62b700c2e62b887650f7435
--- /dev/null
+++ b/testing/registrar_test.py
@@ -0,0 +1,82 @@
+import os
+import csv
+
+import pytest
+import psycopg2
+import paramiko
+from dotenv import load_dotenv
+from xml.etree import ElementTree
+
+
+@pytest.fixture(scope="session")
+def connection():
+    load_dotenv(dotenv_path='../env/emg_db.env')
+
+    connect_args = dict(
+        dbname=os.environ['DB_NAME'],
+        user=os.environ['DB_USER'],
+        password=f"\"{os.environ['DB_PW']}\"",
+        host='docker',
+        port=os.environ['DB_PORT'],
+    )
+
+    with psycopg2.connect(**connect_args) as connection:
+        yield connection
+
+
+@pytest.fixture
+def identifiers():
+    with open('./product_list.csv') as f:
+        yield csv.reader(f)
+
+
+@pytest.fixture
+def sftp_connection():
+
+    transport = paramiko.Transport(('docker',2222))
+    transport.connect(username='eox', password='password')
+    with paramiko.SFTPClient.from_transport(transport) as sftp:
+        yield sftp
+
+def query_eo_object(connection, eo_id):
+    query = f"SELECT identifier FROM coverages_eoobject WHERE identifier = '{eo_id}';"
+    with connection.cursor() as cursor:
+        cursor.execute(query)
+        return cursor.fetchone()[0]
+
+
+def test_db_name(connection, identifiers):
+    for row in identifiers:
+        identifier = row[0].split('/')[4]
+        query_eo_object(connection, identifier)
+
+
+def compare_links(sftp, product_xml, product):
+
+    report= sftp.file("data/to/panda/%s" % product_xml)
+
+
+    xml_file= report.read()
+    root = ElementTree.fromstring(xml_file.decode('utf-8').strip())
+
+    urls = root.findall('{http://www.telespazio.com/CSCDA/CDD/PDAS}URL')
+    wms_link = urls[0].find('{http://www.telespazio.com/CSCDA/CDD/PDAS}URL').text
+    wcs_link = urls[1].find('{http://www.telespazio.com/CSCDA/CDD/PDAS}URL').text
+
+    wms_capabilities = 'emg.pass.copernicus.eu/ows?service=wms&request=GetCapabilities&amp&cql=identifier='
+    wcs_capabilities = 'emg.pass.copernicus.eu/ows?service=wcs&request=GetCapabilities&amp&cql=identifier='
+    expected_wms_link = '%s"%s"' % (wms_capabilities, product)
+    expected_wcs_link = '%s"%s"' % (wcs_capabilities, product)
+    assert expected_wms_link.replace('&amp&', '&') == wms_link
+
+    assert expected_wcs_link.replace('&amp&', '&') == wcs_link
+
+def test_reporting(sftp_connection, identifiers):
+    report_list = sftp_connection.listdir('data/to/panda/')
+    assert len(report_list) > 0
+    for item in report_list:
+        for row in identifiers:
+            identifier = row[0].split('/')[4]
+
+            if identifier in item:
+                compare_links(sftp_connection, item, identifier)
\ No newline at end of file
diff --git a/testing/registrar_test.sh b/testing/registrar_test.sh
new file mode 100755
index 0000000000000000000000000000000000000000..3c26bc87dea352ee81cea9b046f0ff2602de11f6
--- /dev/null
+++ b/testing/registrar_test.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+product_list_file=$1
+docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /var/www/pvs/dev/pvs_instance/manage.py storage create pvs_testing pvs_testing --type swift --storage-auth auth-cloud-ovh
+IFS=","
+
+while read product; do
+    docker exec -i $(docker ps -qf "name=emg-pvs_registrar") \
+        python3 /registrar.py \
+            --objects-prefix $product \
+            --service-url $SERVICE_URL \
+            --reporting-dir "/mnt/reports" \
+            <<<$product
+
+done < "$product_list_file"
+
+pytest #-s registrar_test.py --name $product_list_file
diff --git a/requirements.txt b/testing/requirements.txt
similarity index 72%
rename from requirements.txt
rename to testing/requirements.txt
index 56ba698664ddb78df20e57a7c6159d1da82feba9..b6db4b77a42837295c7f2c82dfad95859ea34b56 100644
--- a/requirements.txt
+++ b/testing/requirements.txt
@@ -1,4 +1,6 @@
 pytest
 psycopg2
+python-dotenv
+paramiko
 # python-swiftclient
 # python-keystoneclient
\ No newline at end of file
diff --git a/wait_for_container.sh b/wait_for_container.sh
deleted file mode 100755
index 8a6b5f97f19a839f79fc748e98fdc6c48c0bef61..0000000000000000000000000000000000000000
--- a/wait_for_container.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-while [ -z $(docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /var/www/pvs/dev/pvs_instance/manage.py id list) ]; do
-  >&2 echo "Collection is not created yet - sleeping"
-    sleep 20
-  done
-
-while [ -z $(docker exec -i $(docker ps -qf "name=emg-pvs_registrar") python3 /var/www/pvs/dev/pvs_instance/manage.py id list) ]; do
-  >&2 echo "Collection is not created yet - sleeping"
-    sleep 20
-  done