diff --git a/preprocessor/preprocessor/steps/subdataset.py b/preprocessor/preprocessor/steps/subdataset.py index 84be41a2aef60b4f7c669de5e52723194eb1f5c2..4906bcbaad2bdf3cfc3e18c21bfa66ba4eae34a4 100644 --- a/preprocessor/preprocessor/steps/subdataset.py +++ b/preprocessor/preprocessor/steps/subdataset.py @@ -1,38 +1,41 @@ import os -from os.path import join, splitext, basename +from os.path import join, splitext, basename, dirname from glob import glob -from typing import Set +from typing import Dict from osgeo import gdal from ..util import replace_ext -def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, data_file_glob: str, subdataset_types: Set[str]=None): - for filename in glob(join(source_dir, data_file_glob)): +gdal.UseExceptions() + + +def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, data_file_glob: str, subdataset_types: Dict[str, str]=None): + datafiles = glob(join(source_dir, data_file_glob)) + if not datafiles: + raise Exception('No datafiles were matched by the provided glob') + + for filename in datafiles: extract_subdatasets( filename, - join(target_dir, basename(replace_ext(filename, '.tif'))), + target_dir, subdataset_types ) -def extract_subdatasets(source_filename: os.PathLike, target_filename: os.PathLike, subdataset_types: Set[str]=None): +def extract_subdatasets(source_filename: os.PathLike, target_dir: os.PathLike, subdataset_types: Dict[str, str]=None): ds = gdal.Open(source_filename) sub_datasets = [] for locator, _ in ds.GetSubDatasets(): _, _, sd_type = locator.split(':') if subdataset_types is None or sd_type in subdataset_types: - sub_datasets.append(locator) - - # combine as VRT - out_vrt_name = join(source_dir, replace_ext(filename, '.vrt')) - gdal.BuildVRT(out_vrt_name, sub_datasets, # TODO: options - ) - - # TODO: maybe just translate here? - gdal.Warp( - target_filename, - out_vrt_name - ) + sub_datasets.append((locator, subdataset_types[sd_type])) + + if not sub_datasets: + raise Exception('No subdatasets were matched by the provided types') + + for locator, suffix in sub_datasets: + target_filename = join(target_dir, basename(replace_ext(source_filename, '%s.tif' % suffix))) + gdal.Translate(target_filename, locator, format='GTiff')