EOX GitLab Instance

Commit d9166b51 authored by Bernhard Mallinger's avatar Bernhard Mallinger
Browse files

Added some general review comments

parent 2af63104
Pipeline #18474 passed with stage
in 30 seconds
......@@ -27,6 +27,9 @@ def run_daemon(config: dict, host: str, port: str, listen_queue: str):
client = init_client(host, port)
logger.debug("waiting for redis queue '%s'", listen_queue)
# REVIEW: if you'd really want to test this, you could mock brpop or main and
# replace it with a function which throws something after a few calls, but
# i'd say it's fine to have this small piece of startup code without tests
while True:
# fetch an item from the queue to be harvested
_, value = client.brpop(listen_queue)
......
......@@ -18,6 +18,8 @@ class Endpoint(ABC):
self.query = Query(**query)
self.filter = json_parse(filter)
# REVIEW: is this the only important method that's ever being called this class?
# if so, having a class might not even be necessary
@abstractmethod
def harvest(self) -> list:
# All endpoints should extend function to do following
......@@ -30,6 +32,11 @@ class Endpoint(ABC):
@classmethod
def from_config(cls, endpoint_config: dict) -> "Endpoint":
# REVIEW: iirc using __subclasses__ can be dangerous because it only contains
# known subclasses, i.e. classes which have already been imported. so if
# do a refactoring and move a class somewhere else, it could be missing here.
# an alternative to this could be an explicit list of subclasses in the global scope
# somewhere
subclass_map = {subclass.type: subclass for subclass in cls.__subclasses__()}
endpoint_type = endpoint_config.pop("type", None)
......
......@@ -13,6 +13,9 @@ from .Endpoint import Endpoint
logger = logging.getLogger(__name__)
# REVIEW: using frozen=True can prevent accidental modification and
# can make it easier to reason about code because it guarantees that
# nothing happens with the data
@dataclass
class SearchPage:
index: int
......@@ -28,6 +31,7 @@ class OpenSearchFormat(ABC):
subclass_map = {
subclass.mimetype: subclass for subclass in cls.__subclasses__()
}
# REVIEW: is it a valid use case that type_ is None here?
type_ = config.pop("type", None)
subclass = subclass_map[type_]
......@@ -84,6 +88,7 @@ class AtomFormat(OpenSearchFormat):
class OpenSearchEndpoint(Endpoint):
# REVIEW: this should probably be `mimetype = ...`
type = "OpenSearch"
NS = {
......
......@@ -9,6 +9,7 @@ from .exceptions import HarvestError
logger = logging.getLogger(__name__)
# REVIEW: this could be a dataclass
class Harvester:
def __init__(
self,
......@@ -51,6 +52,12 @@ class Harvester:
def main(config: dict, value: str, client: Redis):
logger.info("running harvesting for %s", value)
# REVIEW: does the `harvester` object only live in this function?
# if so, it might not be necessary to have a class for it, it could just
# parse `value` as necessary and then build the endpoint.
# there is this general approach where you write code as directly as possible,
# and then introduce abstractions when you detect patterns in this code.
# Initialize harvester
try:
harvester = Harvester.from_json(value)
......
......@@ -5,6 +5,19 @@ import pytest
from harvester.query import Query
# REVIEW: i think this is a nice way of testing this.
# my personal approach would be to structure this a bit differently,
# e.g. in one test called `test_query_initialization_accepts_time_formats`
# where the assert in the end is only for `time_begin` and `time_end`
# i wouldn't write a test case for the rest because those are not important
# behaviors of the system (it's not important for the whole system that e.g.
# there's a property called `collection` on the query object).
# however i would write tests that call `main` with certain configs.
# the main purpose of this whole service seems to be turning harvester configs
# into `encoded`, so there could be a function doing just this.
# `main` would then just call this function and push the result to a redis queue.
# the tests however could then inspect the result.
@pytest.mark.parametrize("time", [("20200101"), (datetime.datetime(2020, 1, 1))])
def test_initialization_of_query(time):
"""Test initialization of query"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment