EOX GitLab Instance
Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
View Server 2
harvester
Commits
26b364eb
Commit
26b364eb
authored
Dec 07, 2021
by
Fabian Schindler
Browse files
Fixing argument parsing and handling absolute paths
parent
a70884cc
Pipeline
#19462
passed with stages
in 2 minutes and 6 seconds
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
harvester/source/s3.py
View file @
26b364eb
...
@@ -114,23 +114,31 @@ class S3Source(Source):
...
@@ -114,23 +114,31 @@ class S3Source(Source):
return
item
return
item
class
S3CatalogSource
(
S3Base
,
Source
):
class
S3CatalogSource
(
S3Base
):
type
=
"S3Catalog"
type
=
"S3Catalog"
def
__init__
(
self
,
root_href
:
str
,
**
kwargs
):
def
__init__
(
self
,
parameters
:
dict
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
self
.
root_href
=
parameters
.
pop
(
"root_href"
)
self
.
root_href
=
root_href
self
.
default_catalog_name
=
parameters
.
pop
(
"default_catalog_name"
,
None
)
super
().
__init__
(
**
parameters
)
def
harvest
(
self
)
->
Iterator
[
dict
]:
def
harvest
(
self
)
->
Iterator
[
dict
]:
logger
.
info
(
"Starting S3 Catalog harvesting"
)
logger
.
info
(
"Starting S3 Catalog harvesting"
)
parsed
=
urlparse
(
self
.
root_href
)
parsed
=
urlparse
(
self
.
root_href
)
yield
from
self
.
harvest_catalog
(
parsed
.
netloc
,
parsed
.
path
)
path
=
parsed
.
path
if
path
.
startswith
(
"/"
):
path
=
parsed
.
path
[
1
:]
if
path
.
endswith
(
"/"
)
and
self
.
default_catalog_name
:
path
=
join
(
path
,
self
.
default_catalog_name
)
yield
from
self
.
harvest_catalog
(
parsed
.
netloc
,
path
)
def
fetch_json
(
self
,
bucket
:
str
,
key
:
str
)
->
dict
:
def
fetch_json
(
self
,
bucket
:
str
,
key
:
str
)
->
dict
:
"""
"""
Loads the given object identifier by bucket and key and loads it as
Loads the given object identifier by bucket and key and loads it as
JSON.
JSON.
"""
"""
if
key
.
startswith
(
"/"
):
key
=
key
[
1
:]
response
=
self
.
client
.
get_object
(
Bucket
=
bucket
,
Key
=
key
)
response
=
self
.
client
.
get_object
(
Bucket
=
bucket
,
Key
=
key
)
return
json
.
load
(
response
[
"Body"
])
return
json
.
load
(
response
[
"Body"
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment