import re from os.path import join from .xml import read_xml, parse_metadata_schema, Parameter from .context import Context from .exceptions import RegistrationError class RegistrationScheme: def __init__(self, source, path): self.source = source self.path = path def get_context(self): raise NotImplementedError class Sentinel2RegistrationScheme(RegistrationScheme): MTD_TL_SCHEMA = { 'begin_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_START_TIME/text()', False, parse_datetime), 'end_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_STOP_TIME/text()', False, parse_datetime), 'identifier': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_URI/text()'), 'level': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PROCESSING_LEVEL/text()'), 'type': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/PRODUCT_TYPE/text()'), 'generation_time': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/GENERATION_TIME/text()', False, parse_datetime), 'cloud_cover': Parameter('/n1:Level-2A_User_Product/n1:Quality_Indicators_Info/Cloud_Coverage_Assessment'), 'image_file_paths': Parameter('/n1:Level-2A_User_Product/n1:General_Info/Product_Info/Product_Organisation/Granule_List/Granule/IMAGE_FILE/text()', True), 'mask_file_paths': Parameter('/n1:Level-2A_Tile_ID/n1:Quality_Indicators_Info/Pixel_Level_QI/MASK_FILENAME', True), } S2_NAMESPACES = { 'n1': "https://psd-14.sentinel2.eo.esa.int/PSD/User_Product_Level-2A.xsd" } def get_context(self): metadata_file = join(self.path, 'MTD_TL.xml') mtd_tree = read_xml(self.source, metadata_file) # get MTD metadata metadata = parse_metadata_schema(mtd_tree, self.MTD_TL_SCHEMA, self.S2_NAMESPACES) band_re = re.compile(r'.*([A-Z0-9]{3})_([0-9]{2}m)$') raster_files = { band_re.match(image_file_path).groups()[0]: f'{join(self.path, image_file_path)}.jp2' for image_file_path in metadata['image_file_paths'] } mask_type_re = re.compile(r'.*/MSK_([A-Z]*)_([A-Z0-9]{3}).[a-z0-9]+$') mask_files = { mask_type_re.match(mask_file_path).groups[0]: mask_file_path for mask_file_path in metadata['mask_file_paths'] } return Context( identifier=metadata['identifier'], raster_files=raster_files, mask_files=mask_files, metadata_files=[metadata_file], metadata={ 'begin_time': metadata['begin_time'], 'end_time': metadata['end_time'], 'generation_time': metadata['generation_time'], 'cloud_cover': metadata['cloud_cover'], } ) class GSCRegistrationScheme(RegistrationScheme): pass REGISTRATION_SCHEMES = { 'gsc': GSCRegistrationScheme, 'sentinel-2': Sentinel2RegistrationScheme, } def get_scheme(config: dict, path: str) -> RegistrationScheme: cfg_schemes = config['schemes'] for cfg_scheme in cfg_schemes: if cfg_scheme['filter']: if re.match(cfg_scheme['filter'], path): break else: break else: # no source found raise RegistrationError(f'Could not find a suitable scheme for the path {path}') return REGISTRATION_SCHEMES[cfg_scheme['type']]( *cfg_scheme.get('args', []), **cfg_scheme.get('kwargs', {}), )