EOX GitLab Instance

Commit 6f2e908d authored by Fabian Schindler's avatar Fabian Schindler
Browse files

Using urljoin to simplify

parent 103012c9
Pipeline #19430 failed with stage
in 38 seconds
import logging
import json
from os.path import join
from typing import Iterator
from urllib.parse import urlparse, urljoin
from urllib.parse import urljoin
from ..abc import FileScheme, Source
......@@ -18,15 +17,6 @@ class STACCatalogScheme(FileScheme):
def _read_json(self, path):
return json.load(self.source.open(path))
def _join_href(self, path: str, href: str) -> str:
"""
Joins the given href with a previous bucket/key. When we have a fully
qualified S3 URL, the included bucket/key pair is returned.
If href is a relative path, it is joined with the previous key.
"""
parsed = urlparse(href)
return urljoin(parsed._replace(path=join(path, parsed.path)))
def harvest(self) -> Iterator[dict]:
yield from self.harvest_catalog(self.root_path)
......@@ -39,9 +29,9 @@ class STACCatalogScheme(FileScheme):
catalog = self._read_json(path)
for link in catalog["links"]:
if link["rel"] == "item":
item_href = self._join_href(path, link["href"])
item_href = urljoin(path, link["href"])
logger.info(f"Harvested item {item_href}")
yield self._read_json(item_href)
elif link["rel"] == "child":
catalog_href = self._join_href(path, link["href"])
catalog_href = urljoin(path, link["href"])
yield from self.harvest_catalog(catalog_href)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment