From 9d36c491d7719b8a9cf1449a5b5a92153995fe17 Mon Sep 17 00:00:00 2001 From: Fabian Schindler <fabian.schindler.strauss@gmail.com> Date: Tue, 25 Aug 2020 16:54:07 +0200 Subject: [PATCH] Adding pycache files to gitignore --- .gitignore | 2 ++ preprocessor/preprocessor/archive.py | 22 +++++++++++++++------- preprocessor/preprocessor/util.py | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 3af0ccb6..47fb2ace 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /data +*.pyc +**/__pycache__ \ No newline at end of file diff --git a/preprocessor/preprocessor/archive.py b/preprocessor/preprocessor/archive.py index 893dc950..7dedcd5f 100644 --- a/preprocessor/preprocessor/archive.py +++ b/preprocessor/preprocessor/archive.py @@ -1,5 +1,6 @@ from os import PathLike import os.path +import io from typing import List, Union, BinaryIO import tarfile import zipfile @@ -35,12 +36,12 @@ def is_tarfile(archive_file: Union[PathLike, BinaryIO]) -> bool: def open_tarfile(archive_file: Union[PathLike, BinaryIO]) -> tarfile.TarFile: """ Open a TAR file from either a path or a file object. """ - if isinstance(archive_file, BinaryIO): + if isinstance(archive_file, (BinaryIO, io.BufferedReader)): return tarfile.open(fileobj=archive_file) return tarfile.open(archive_file) -def unpack_files(archive_path: Union[PathLike, BinaryIO] , target_dir: PathLike, glob=None, filenames=None, recursive=False) -> List[PathLike]: +def unpack_files(archive_path: Union[PathLike, BinaryIO], target_dir: PathLike, glob=None, filenames=None, recursive=False) -> List[PathLike]: """ Unpacks the contents of the specified ZIP or TAR archive to the given target directory. Optionally, only a given list of filenames will be extracted. @@ -49,6 +50,7 @@ def unpack_files(archive_path: Union[PathLike, BinaryIO] , target_dir: PathLike, """ iszip = False istar = False + # open the archive and extract a list of filenames if is_tarfile(archive_path): archive = open_tarfile(archive_path) @@ -94,21 +96,27 @@ def unpack_files(archive_path: Union[PathLike, BinaryIO] , target_dir: PathLike, for extension in ARCHIVE_EXTENSIONS: sub_archives = filter_filenames(all_filenames, '*.%s' % extension) for sub_archive in sub_archives: + sub_archive_filename = os.path.join( + os.path.dirname(archive_path), + os.path.basename(sub_archive), + ) if istar: - sub_archive_file = archive.extractfile( + archive.extract( archive.getmember(sub_archive) ) + os.rename(sub_archive, sub_archive_filename) if iszip: - sub_archive_file = archive.open(sub_archive) + archive.extract(sub_archive) + os.rename(sub_archive, sub_archive_filename) sub_filenames = unpack_files( - sub_archive_file, + sub_archive_filename, os.path.join(target_dir, sub_archive), glob, filenames, - recursive + recursive, ) extracted_filenames.extend(sub_filenames) # return a list of files extracted - return extracted_filenames \ No newline at end of file + return extracted_filenames diff --git a/preprocessor/preprocessor/util.py b/preprocessor/preprocessor/util.py index ebc6a6ed..9d75850e 100644 --- a/preprocessor/preprocessor/util.py +++ b/preprocessor/preprocessor/util.py @@ -2,5 +2,5 @@ import os from os.path import splitext -def replace_ext(filename: os.PathLike, new_ext: str) -> os.PathLike: - return splitext(filename)[0] + ('' if new_ext.startswith('.') else '.') + new_ext +def replace_ext(filename: os.PathLike, new_ext: str, force_dot: bool=True) -> os.PathLike: + return splitext(filename)[0] + ('' if new_ext.startswith('.') or not force_dot else '.') + new_ext -- GitLab