Using urljoin to simplify

import logging
import json
from os.path import join
from typing import Iterator
from urllib.parse import urlparse, urljoin
from urllib.parse import urljoin
from ..abc import FileScheme, Source
......@@ -18,15 +17,6 @@ class STACCatalogScheme(FileScheme):
def _read_json(self, path):
return json.load(self.source.open(path))
def _join_href(self, path: str, href: str) -> str:
Joins the given href with a previous bucket/key. When we have a fully
qualified S3 URL, the included bucket/key pair is returned.
If href is a relative path, it is joined with the previous key.
parsed = urlparse(href)
return urljoin(parsed._replace(path=join(path, parsed.path)))
def harvest(self) -> Iterator[dict]:
yield from self.harvest_catalog(self.root_path)
......@@ -39,9 +29,9 @@ class STACCatalogScheme(FileScheme):
catalog = self._read_json(path)
for link in catalog["links"]:
if link["rel"] == "item":
item_href = self._join_href(path, link["href"])
item_href = urljoin(path, link["href"])
logger.info(f"Harvested item {item_href}")
yield self._read_json(item_href)
elif link["rel"] == "child":
catalog_href = self._join_href(path, link["href"])
catalog_href = urljoin(path, link["href"])
yield from self.harvest_catalog(catalog_href)
