EOX GitLab Instance

Skip to content
Snippets Groups Projects
Commit 7ff95207 authored by Lubomir Dolezal's avatar Lubomir Dolezal
Browse files

removing duplicates from filelists preprocessor

parent b6b60ae4
No related branches found
No related tags found
2 merge requests!55Production release 1.2.0,!54Shib configs update
......@@ -5,7 +5,7 @@ from glob import glob
import shutil
from typing import List, Tuple
from ..util import gdal, osr, replace_ext
from ..util import gdal, osr, replace_ext, get_all_data_files
logger = logging.getLogger(__name__)
......@@ -31,10 +31,7 @@ def georeference_step(source_dir: os.PathLike, target_dir: os.PathLike, preproce
else:
raise Exception('Invalid georeference type %s' % type_name)
try:
filenames = []
for dataglob in preprocessor_config.get('data_file_globs', '*'):
for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]:
filenames.append(p)
filenames = get_all_data_files(source_dir, preprocessor_config)
for filename in filenames:
target_filename = join(target_dir, basename(filename))
georef_func(filename, target_filename, **opts_dict)
......
import os
from os.path import join, basename
from uuid import uuid4
from glob import glob
from ..util import replace_ext, gdal
from ..util import replace_ext, gdal, get_all_data_files
import logging
logger = logging.getLogger(__name__)
......@@ -19,10 +18,7 @@ def output_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_c
extension = driver.GetMetadata().get('DMD_EXTENSIONS', 'tif').split(' ')[0]
# warp each individual file
warped_files = []
filenames = []
for dataglob in preprocessor_config.get('data_file_globs', '*'):
for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]:
filenames.append(p)
filenames = get_all_data_files(source_dir, preprocessor_config)
for filename in filenames:
target_filename = join(target_dir, replace_ext(basename(filename), extension))
logger.debug('Warping file %s' % filename)
......
import os
from os.path import basename, join, splitext
from os.path import basename, join
from itertools import groupby
import re
from glob import glob
from typing import List
from ..util import replace_ext, gdal
from ..util import replace_ext, gdal, get_all_data_files
def stack_bands_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, group_by: str=None, sort_by: str=None, order: List[str]=None):
""" Stack bands of the individual images
"""
filenames = []
for dataglob in preprocessor_config.get('data_file_globs', '*'):
for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]:
filenames.append(p)
filenames = get_all_data_files(source_dir, preprocessor_config)
# check if we have a group_by regex. If yes, use the first
# re-group to group by.
# Fallback is basename of file as groupname
......
import os
from os.path import join, splitext, basename, dirname, isdir
from glob import glob
from os.path import join, basename
from typing import Dict
from ..util import replace_ext, gdal
from ..util import replace_ext, gdal, get_all_data_files
def extract_subdataset_step(source_dir: os.PathLike, target_dir: os.PathLike, preprocessor_config: dict, subdataset_types: Dict[str, str]=None):
filenames = []
for dataglob in preprocessor_config.get('data_file_globs', '*'):
for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not isdir(path)]:
filenames.append(p)
filenames = get_all_data_files(source_dir, preprocessor_config)
if len(filenames) == 0:
raise Exception('No datafiles were matched by the provided glob')
for filename in datafiles:
for filename in filenames:
extract_subdatasets(
filename,
target_dir,
......
import os
from os.path import splitext
from os.path import splitext, join
from contextlib import contextmanager
from tempfile import TemporaryDirectory, mkdtemp
from time import time
from glob import glob
try:
from osgeo import gdal
......@@ -86,3 +87,14 @@ def get_size_in_bytes(file_path, unit):
""" Get size of file at given path in bytes"""
size = os.path.getsize(file_path)
return convert_unit(size, unit)
def get_all_data_files(source_dir, preprocessor_config):
""" Based on 'data_file_globs' configuration, gets all unique data file paths from folder"""
file_paths = []
for dataglob in preprocessor_config.get('data_file_globs', '*'):
for p in [path for path in glob(join(source_dir, '**', dataglob), recursive=True) if not os.path.isdir(path)]:
file_paths.append(p)
# get only unique files to compensate for possibly bad glob yielding doubles, keeping order
file_paths = list(dict.fromkeys(file_paths))
return file_paths
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment