EOX GitLab Instance

Commit 26b364eb authored by Fabian Schindler's avatar Fabian Schindler
Browse files

Fixing argument parsing and handling absolute paths

parent a70884cc
Pipeline #19462 passed with stages
in 2 minutes and 6 seconds
...@@ -114,23 +114,31 @@ class S3Source(Source): ...@@ -114,23 +114,31 @@ class S3Source(Source):
return item return item
class S3CatalogSource(S3Base, Source): class S3CatalogSource(S3Base):
type = "S3Catalog" type = "S3Catalog"
def __init__(self, root_href: str, **kwargs): def __init__(self, parameters: dict, **kwargs):
super().__init__(**kwargs) self.root_href = parameters.pop("root_href")
self.root_href = root_href self.default_catalog_name = parameters.pop("default_catalog_name", None)
def harvest(self) -> Iterator[dict]: def harvest(self) -> Iterator[dict]:
logger.info("Starting S3 Catalog harvesting") logger.info("Starting S3 Catalog harvesting")
parsed = urlparse(self.root_href) parsed = urlparse(self.root_href)
yield from self.harvest_catalog(parsed.netloc, parsed.path) path = parsed.path
if path.startswith("/"):
path = parsed.path[1:]
if path.endswith("/") and self.default_catalog_name:
path = join(path, self.default_catalog_name)
yield from self.harvest_catalog(parsed.netloc, path)
def fetch_json(self, bucket: str, key: str) -> dict: def fetch_json(self, bucket: str, key: str) -> dict:
""" """
Loads the given object identifier by bucket and key and loads it as Loads the given object identifier by bucket and key and loads it as
""" """
if key.startswith("/"):
key = key[1:]
response = self.client.get_object(Bucket=bucket, Key=key) response = self.client.get_object(Bucket=bucket, Key=key)
return json.load(response["Body"]) return json.load(response["Body"])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment