EOX GitLab Instance
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
V
VS
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container Registry
Model registry
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ESA
PRISM
VS
Commits
cd17f411
Commit
cd17f411
authored
4 years ago
by
Fabian Schindler
Browse files
Options
Downloads
Patches
Plain Diff
Adding timer class, cleanup
parent
3303a85f
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
preprocessor/preprocessor/preprocess.py
+89
-37
89 additions, 37 deletions
preprocessor/preprocessor/preprocess.py
with
89 additions
and
37 deletions
preprocessor/preprocessor/preprocess.py
+
89
−
37
View file @
cd17f411
...
...
@@ -8,6 +8,7 @@ import shutil
from
typing
import
List
from
contextlib
import
contextmanager
from
pprint
import
pformat
from
time
import
time
from
.transfer
import
get_downloader
,
get_uploader
from
.archive
import
unpack_files
...
...
@@ -45,15 +46,6 @@ def custom_preprocessor(source_dir, target_dir, path, args=None, kwargs=None):
func
(
source_dir
,
target_dir
,
*
(
args
or
[]),
**
(
kwargs
or
{}))
def
georeference
(
source_dir
:
os
.
PathLike
,
target_dir
:
os
.
PathLike
,
type
:
str
,
**
options
:
dict
):
"""
Apply georeferencing to the files in source_dir to create georeferenced
image files in target_dir.
Georeferencing type specifies what geoereferencing shall be applied. Options
are
'
GCP
'
,
'
RPC
'
,
'
world
'
, ...
"""
# TODO: implement
pass
def
custom_postprocessor
(
source_dir
,
target_dir
,
path
,
args
=
None
,
kwargs
=
None
):
"""
Preprocessing step for a custom preprocessing.
"""
...
...
@@ -77,6 +69,25 @@ def flatten(l):
return
[
item
for
sublist
in
l
for
item
in
sublist
]
class
Timer
:
"""
Helper timer class to allow logging of timing values
"""
def
__init__
(
self
):
self
.
start
=
None
self
.
end
=
None
def
__enter__
(
self
):
self
.
start
=
time
()
return
self
def
__exit__
(
self
,
*
args
,
**
kwargs
):
self
.
end
=
time
()
@property
def
elapsed
(
self
):
return
(
self
.
end
if
self
.
end
is
not
None
else
time
())
-
self
.
start
# -----------------------------------------------------------------------------
@contextmanager
...
...
@@ -86,8 +97,7 @@ def workdir(config, use_dir: os.PathLike=None):
if
use_dir
:
os
.
chdir
(
use_dir
)
yield
use_dir
if
config
.
get
(
'
keep_temp
'
):
elif
config
.
get
(
'
keep_temp
'
):
dirname
=
mkdtemp
(
prefix
=
prefix
,
dir
=
workdir
)
os
.
chdir
(
dirname
)
yield
dirname
...
...
@@ -100,7 +110,7 @@ def workdir(config, use_dir: os.PathLike=None):
def
preprocess_file
(
config
:
dict
,
file_path
:
os
.
PathLike
,
use_dir
:
os
.
PathLike
=
None
):
"""
Runs the preprocessing of a single file.
"""
with
workdir
(
config
,
use_dir
)
as
dirname
:
with
workdir
(
config
,
use_dir
)
as
dirname
,
Timer
()
as
preprocess_timer
:
logger
.
info
(
'
Preprocessing %s in %s
'
%
(
file_path
,
dirname
))
# check if we can reuse a previous download
...
...
@@ -112,13 +122,21 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N
downloader
=
get_downloader
(
source_config
[
'
type
'
],
source_config
.
get
(
'
args
'
),
source_config
.
get
(
'
kwargs
'
)
)
source_archive_path
=
downloader
.
download
(
file_path
,
'
download
'
)
with
Timer
()
as
download_timer
:
source_archive_path
=
downloader
.
download
(
file_path
,
'
download
'
)
logger
.
info
(
'
Downloaded file %s in %f seconds
'
%
(
file_path
,
download_timer
.
elapsed
)
)
else
:
source_archive_path
=
os
.
path
.
join
(
'
download
'
,
os
.
path
.
basename
(
file_path
))
logger
.
info
(
'
Download dir already exists, skipping...
'
)
# fetch the metadata XML file from the downloaded archive
metadata_files
=
unpack_files
(
source_archive_path
,
'
unpack
'
,
glob
=
config
[
'
metadata_glob
'
])
metadata_files
=
unpack_files
(
source_archive_path
,
'
extra
'
,
glob
=
config
[
'
metadata_glob
'
])
# open the XML to retrieve the product type and level
product_type
,
product_level
=
extract_product_type_and_level
(
metadata_files
,
config
)
...
...
@@ -135,30 +153,37 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N
os
.
mkdir
(
'
unpack
'
)
logger
.
info
(
'
Unpacking original files...
'
)
# select and unpack files according to configuration
data_files
=
flatten
([
unpack_files
(
source_archive_path
,
'
unpack
'
,
glob
=
glob
,
recursive
=
preprocess_config
.
get
(
'
nested
'
,
False
),
)
for
glob
in
preprocess_config
[
'
data_file_globs
'
]
])
metadata_files
=
flatten
([
unpack_files
(
source_archive_path
,
'
unpack
'
,
glob
=
glob
,
recursive
=
preprocess_config
.
get
(
'
nested
'
,
False
),
)
for
glob
in
preprocess_config
[
'
additional_file_globs
'
]
])
logger
.
info
(
'
Unpacked files: %s
'
%
'
,
'
.
join
(
metadata_files
,
data_files
))
with
Timer
()
as
unpack_timer
:
data_files
=
flatten
([
unpack_files
(
source_archive_path
,
'
unpack
'
,
glob
=
glob
,
recursive
=
preprocess_config
.
get
(
'
nested
'
,
False
),
)
for
glob
in
preprocess_config
[
'
data_file_globs
'
]
])
metadata_files
=
flatten
([
unpack_files
(
source_archive_path
,
'
unpack
'
,
glob
=
glob
,
recursive
=
preprocess_config
.
get
(
'
nested
'
,
False
),
)
for
glob
in
preprocess_config
.
get
(
'
additional_file_globs
'
,
[])
])
logger
.
info
(
'
Unpacked files: %s in %f seconds
'
%
(
'
,
'
.
join
(
metadata_files
+
data_files
),
unpack_timer
.
elapsed
)
)
else
:
logger
.
info
(
'
Unpack dir already exists, skipping...
'
)
previous_step
=
'
unpack
'
force_refresh
=
False
# make processing steps
for
step
in
[
'
custom_preprocessor
'
,
'
subdatasets
'
,
'
georeference
'
,
'
calc
'
,
'
stack_bands
'
,
'
output
'
,
'
custom_postprocessor
'
]:
step_config
=
preprocess_config
.
get
(
step
)
...
...
@@ -167,14 +192,26 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N
continue
# run the step if it was not already run
if
not
os
.
path
.
isdir
(
step
):
if
not
os
.
path
.
isdir
(
step
)
or
force_refresh
:
if
os
.
path
.
isdir
(
step
):
logger
.
info
(
'
Forcing refresh of existing directory %s
'
%
step
)
shutil
.
rmtree
(
step
)
logger
.
info
(
'
Running preprocessing step %s
'
%
step
)
os
.
mkdir
(
step
)
preprpocessor
=
STEP_FUNCTIONS
[
step
]
preprpocessor
(
previous_step
,
step
,
**
step_config
)
with
Timer
()
as
step_timer
:
preprpocessor
(
previous_step
,
step
,
**
step_config
)
logger
.
info
(
'
Finished preprocessing step %s after %f seconds.
'
%
(
step
,
step_timer
.
elapsed
)
)
else
:
logger
.
info
(
'
%s dir already exists, skipping...
'
%
step
)
logger
.
info
(
'
%s dir already exists, skipping step...
'
%
step
)
force_refresh
=
True
previous_step
=
step
...
...
@@ -197,4 +234,19 @@ def preprocess_file(config: dict, file_path: os.PathLike, use_dir: os.PathLike=N
]
# send all files in the upload directory to the target storage
uploader
.
upload
(
upload_filenames
,
file_path
)
logger
.
info
(
'
Starting uploading of %d files to %s
'
%
(
len
(
upload_filenames
),
file_path
)
)
with
Timer
()
as
upload_timer
:
uploader
.
upload
(
upload_filenames
,
file_path
)
logger
.
info
(
'
Finished uploading after %f seconds.
'
%
(
upload_timer
.
elapsed
)
)
logger
.
info
(
'
Finished preprocessing of %s after %f seconds.
'
%
(
file_path
,
preprocess_timer
.
elapsed
)
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment