EOX GitLab Instance

Commit 26b364eb authored by Fabian Schindler's avatar Fabian Schindler
Browse files

Fixing argument parsing and handling absolute paths

parent a70884cc
Pipeline #19462 passed with stages
in 2 minutes and 6 seconds
......@@ -114,23 +114,31 @@ class S3Source(Source):
return item
class S3CatalogSource(S3Base, Source):
class S3CatalogSource(S3Base):
type = "S3Catalog"
def __init__(self, root_href: str, **kwargs):
self.root_href = root_href
def __init__(self, parameters: dict, **kwargs):
self.root_href = parameters.pop("root_href")
self.default_catalog_name = parameters.pop("default_catalog_name", None)
def harvest(self) -> Iterator[dict]:
logger.info("Starting S3 Catalog harvesting")
parsed = urlparse(self.root_href)
yield from self.harvest_catalog(parsed.netloc, parsed.path)
path = parsed.path
if path.startswith("/"):
path = parsed.path[1:]
if path.endswith("/") and self.default_catalog_name:
path = join(path, self.default_catalog_name)
yield from self.harvest_catalog(parsed.netloc, path)
def fetch_json(self, bucket: str, key: str) -> dict:
Loads the given object identifier by bucket and key and loads it as
if key.startswith("/"):
key = key[1:]
response = self.client.get_object(Bucket=bucket, Key=key)
return json.load(response["Body"])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment