EOX GitLab Instance

Commit 26b364eb authored by Fabian Schindler's avatar Fabian Schindler
Browse files

Fixing argument parsing and handling absolute paths

parent a70884cc
Pipeline #19462 passed with stages
in 2 minutes and 6 seconds
...@@ -114,23 +114,31 @@ class S3Source(Source): ...@@ -114,23 +114,31 @@ class S3Source(Source):
return item return item
class S3CatalogSource(S3Base, Source): class S3CatalogSource(S3Base):
type = "S3Catalog" type = "S3Catalog"
def __init__(self, root_href: str, **kwargs): def __init__(self, parameters: dict, **kwargs):
super().__init__(**kwargs) self.root_href = parameters.pop("root_href")
self.root_href = root_href self.default_catalog_name = parameters.pop("default_catalog_name", None)
super().__init__(**parameters)
def harvest(self) -> Iterator[dict]: def harvest(self) -> Iterator[dict]:
logger.info("Starting S3 Catalog harvesting") logger.info("Starting S3 Catalog harvesting")
parsed = urlparse(self.root_href) parsed = urlparse(self.root_href)
yield from self.harvest_catalog(parsed.netloc, parsed.path) path = parsed.path
if path.startswith("/"):
path = parsed.path[1:]
if path.endswith("/") and self.default_catalog_name:
path = join(path, self.default_catalog_name)
yield from self.harvest_catalog(parsed.netloc, path)
def fetch_json(self, bucket: str, key: str) -> dict: def fetch_json(self, bucket: str, key: str) -> dict:
""" """
Loads the given object identifier by bucket and key and loads it as Loads the given object identifier by bucket and key and loads it as
JSON. JSON.
""" """
if key.startswith("/"):
key = key[1:]
response = self.client.get_object(Bucket=bucket, Key=key) response = self.client.get_object(Bucket=bucket, Key=key)
return json.load(response["Body"]) return json.load(response["Body"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment