    fpath: str | PathLike, chunk_size: int = 1024 * 1024
) -> str

Calculate the MD5 checksum of a file.


  • fpath (str | PathLike) –

    Path to the file.

  • chunk_size (int, default: 1024 * 1024 ) –

    Chunk size for reading the file. Defaults to 1024*1024.


  • str

    MD5 checksum of the file.

Source code in src/nplinker/
def calculate_md5(fpath: str | PathLike, chunk_size: int = 1024 * 1024) -> str:
    """Calculate the MD5 checksum of a file.

        fpath: Path to the file.
        chunk_size: Chunk size for reading the file. Defaults to 1024*1024.

        MD5 checksum of the file.
    if sys.version_info >= (3, 9):
        md5 = hashlib.md5(usedforsecurity=False)
        md5 = hashlib.md5()
    with open(fpath, "rb") as f:
        for chunk in iter(lambda:, b""):
    return md5.hexdigest()



A decorator to check available disk space.

If the available disk space is less than 500GB, raise and log a warning.


  • UserWarning

    If the available disk space is less than 500GB.

Source code in src/nplinker/
def check_disk_space(func):
    """A decorator to check available disk space.

    If the available disk space is less than 500GB, raise and log a warning.

        UserWarning: If the available disk space is less than 500GB.

    def wrapper_check_disk_space(*args, **kwargs):
        _, _, free = shutil.disk_usage("/")
        free_gb = free // (2**30)
        if free_gb < 50:
            warning_message = f"Available disk space is {free_gb}GB. Is it enough for your project?"
            warnings.warn(warning_message, UserWarning)
        return func(*args, **kwargs)

    return wrapper_check_disk_space


check_md5(fpath: str | PathLike, md5: str) -> bool

Verify the MD5 checksum of a file.


  • fpath (str | PathLike) –

    Path to the file.

  • md5 (str) –

    MD5 checksum to verify.


  • bool

    True if the MD5 checksum matches, False otherwise.

Source code in src/nplinker/
def check_md5(fpath: str | PathLike, md5: str) -> bool:
    """Verify the MD5 checksum of a file.

        fpath: Path to the file.
        md5: MD5 checksum to verify.

        True if the MD5 checksum matches, False otherwise.
    return md5 == calculate_md5(fpath)


    url: str,
    download_root: str | PathLike,
    extract_root: str | Path | None = None,
    filename: str | None = None,
    md5: str | None = None,
    remove_finished: bool = False,
) -> None

Download an archive file and then extract it.

This method is a wrapper of download_url and extract_archive functions.


  • url (str) –

    URL to download file from

  • download_root (str | PathLike) –

    Path to the directory to place downloaded file in. If it doesn't exist, it will be created.

  • extract_root (str | Path | None, default: None ) –

    Path to the directory the file will be extracted to. The given directory will be created if not exist. If omitted, the download_root is used.

  • filename (str | None, default: None ) –

    Name to save the downloaded file under. If None, use the basename of the URL

  • md5 (str | None, default: None ) –

    MD5 checksum of the download. If None, do not check

  • remove_finished (bool, default: False ) –

    If True, remove the downloaded file after the extraction. Defaults to False.

Source code in src/nplinker/
def download_and_extract_archive(
    url: str,
    download_root: str | PathLike,
    extract_root: str | Path | None = None,
    filename: str | None = None,
    md5: str | None = None,
    remove_finished: bool = False,
) -> None:
    """Download an archive file and then extract it.

    This method is a wrapper of [`download_url`][nplinker.utils.download_url] and
    [`extract_archive`][nplinker.utils.extract_archive] functions.

        url: URL to download file from
        download_root: Path to the directory to place downloaded
            file in. If it doesn't exist, it will be created.
        extract_root: Path to the directory the file
            will be extracted to. The given directory will be created if not exist.
            If omitted, the `download_root` is used.
        filename: Name to save the downloaded file under.
            If None, use the basename of the URL
        md5: MD5 checksum of the download. If None, do not check
        remove_finished: If `True`, remove the downloaded file
             after the extraction. Defaults to False.
    download_root = Path(download_root)
    if extract_root is None:
        extract_root = download_root
        extract_root = Path(extract_root)
    if not filename:
        filename = Path(url).name

    download_url(url, download_root, filename, md5)

    archive = download_root / filename
    extract_archive(archive, extract_root, remove_finished=remove_finished)


    url: str,
    root: str | PathLike,
    filename: str | None = None,
    md5: str | None = None,
    http_method: str = "GET",
    allow_http_redirect: bool = True,
) -> None

Download a file from a url and place it in root.


  • url (str) –

    URL to download file from

  • root (str | PathLike) –

    Directory to place downloaded file in. If it doesn't exist, it will be created.

  • filename (str | None, default: None ) –

    Name to save the file under. If None, use the basename of the URL.

  • md5 (str | None, default: None ) –

    MD5 checksum of the download. If None, do not check.

  • http_method (str, default: 'GET' ) –

    HTTP request method, e.g. "GET", "POST". Defaults to "GET".

  • allow_http_redirect (bool, default: True ) –

    If true, enable following redirects for all HTTP ("http:") methods.

Source code in src/nplinker/
def download_url(
    url: str,
    root: str | PathLike,
    filename: str | None = None,
    md5: str | None = None,
    http_method: str = "GET",
    allow_http_redirect: bool = True,
) -> None:
    """Download a file from a url and place it in root.

        url: URL to download file from
        root: Directory to place downloaded file in. If it doesn't exist, it will be created.
        filename: Name to save the file under. If None, use the
            basename of the URL.
        md5: MD5 checksum of the download. If None, do not check.
        http_method: HTTP request method, e.g. "GET", "POST".
            Defaults to "GET".
        allow_http_redirect: If true, enable following redirects for all HTTP ("http:") methods.
    root = transform_to_full_path(root)
    # create the download directory if not exist
    if not filename:
        filename = Path(url).name
    fpath = root / filename

    # check if file is already present locally
    if fpath.is_file() and md5 is not None and check_md5(fpath, md5):"Using downloaded and verified file: " + str(fpath))

    # download the file"Downloading {filename} to {root}")
    with open(fpath, "wb") as fh:
        with, url, follow_redirects=allow_http_redirect) as response:
            if not response.is_success:
                raise RuntimeError(
                    f"Failed to download url {url} with status code {response.status_code}"
            total = int(response.headers.get("Content-Length", 0))

            with Progress(
            ) as progress:
                task = progress.add_task(f"[hot_pink]Downloading {}", total=total)
                for chunk in response.iter_bytes():
                    progress.update(task, advance=len(chunk))

    # check integrity of downloaded file
    if md5 is not None and not check_md5(fpath, md5):
        raise RuntimeError("MD5 validation failed.")


    from_path: str | PathLike,
    extract_root: str | PathLike | None = None,
    members: list | None = None,
    remove_finished: bool = False,
) -> str

Extract an archive.

The archive type and a possible compression is automatically detected from the file name.

If the file is compressed but not an archive, the call is dispatched to _decompress function.


  • from_path (str | PathLike) –

    Path to the file to be extracted.

  • extract_root (str | PathLike | None, default: None ) –

    Path to the directory the file will be extracted to. The given directory will be created if not exist. If omitted, the directory of the archive file is used.

  • members (list | None, default: None ) –

    Optional selection of members to extract. If not specified, all members are extracted. Members must be a subset of the list returned by - zipfile.ZipFile.namelist() or a list of strings for zip file - tarfile.TarFile.getmembers() for tar file

  • remove_finished (bool, default: False ) –

    If True, remove the file after the extraction.


  • str

    Path to the directory the file was extracted to.

Source code in src/nplinker/
def extract_archive(
    from_path: str | PathLike,
    extract_root: str | PathLike | None = None,
    members: list | None = None,
    remove_finished: bool = False,
) -> str:
    """Extract an archive.

    The archive type and a possible compression is automatically detected from
    the file name.

    If the file is compressed but not an archive, the call is dispatched to `_decompress` function.

        from_path: Path to the file to be extracted.
        extract_root: Path to the directory the file will be extracted to.
            The given directory will be created if not exist.
            If omitted, the directory of the archive file is used.
        members: Optional selection of members to extract. If not specified,
            all members are extracted.
            Members must be a subset of the list returned by
            - `zipfile.ZipFile.namelist()` or a list of strings for zip file
            - `tarfile.TarFile.getmembers()` for tar file
        remove_finished: If `True`, remove the file after the extraction.

        Path to the directory the file was extracted to.
    from_path = Path(from_path)

    if extract_root is None:
        extract_root = from_path.parent
        extract_root = Path(extract_root)

    # create the extract directory if not exist
    extract_root.mkdir(exist_ok=True)"Extracting {from_path} to {extract_root}")
    suffix, archive_type, compression = _detect_file_type(from_path)
    if not archive_type:
        return _decompress(
            extract_root /, ""),

    extractor = _ARCHIVE_EXTRACTORS[archive_type]

    extractor(str(from_path), str(extract_root), members, compression)
    if remove_finished:

    return str(extract_root)


    file: str | PathLike, format: str = "tsv"
) -> bool

Check if the file is in the given format.


  • file (str | PathLike) –

    Path to the file to check.

  • format (str, default: 'tsv' ) –

    The format to check for, either "tsv" or "csv".


  • bool

    True if the file is in the given format, False otherwise.

Source code in src/nplinker/
def is_file_format(file: str | PathLike, format: str = "tsv") -> bool:
    """Check if the file is in the given format.

        file: Path to the file to check.
        format: The format to check for, either "tsv" or "csv".

        True if the file is in the given format, False otherwise.
        with open(file, "rt") as f:
            if format == "tsv":
                reader = csv.reader(f, delimiter="\t")
            elif format == "csv":
                reader = csv.reader(f, delimiter=",")
                raise ValueError(f"Unknown format '{format}'.")
            for _ in reader:
        return True
    except csv.Error:
        return False


    root: str | PathLike, keep_parent: bool = True
) -> list[str]

List all directories at a given root.


  • root (str | PathLike) –

    Path to directory whose folders need to be listed

  • keep_parent (bool, default: True ) –

    If true, prepends the path to each result, otherwise only returns the name of the directories found

Source code in src/nplinker/
def list_dirs(root: str | PathLike, keep_parent: bool = True) -> list[str]:
    """List all directories at a given root.

        root: Path to directory whose folders need to be listed
        keep_parent: If true, prepends the path to each result, otherwise
            only returns the name of the directories found
    root = transform_to_full_path(root)
    directories = [str(p) for p in root.iterdir() if p.is_dir()]
    if not keep_parent:
        directories = [os.path.basename(d) for d in directories]
    return directories


    root: str | PathLike,
    prefix: str | tuple[str, ...] = "",
    suffix: str | tuple[str, ...] = "",
    keep_parent: bool = True,
) -> list[str]

List all files at a given root.


  • root (str | PathLike) –

    Path to directory whose files need to be listed

  • prefix (str | tuple[str, ...], default: '' ) –

    Prefix of the file names to match, Defaults to empty string '""'.

  • suffix (str | tuple[str, ...], default: '' ) –

    Suffix of the files to match, e.g. ".png" or (".jpg", ".png"). Defaults to empty string '""'.

  • keep_parent (bool, default: True ) –

    If true, prepends the parent path to each result, otherwise only returns the name of the files found. Defaults to False.

Source code in src/nplinker/
def list_files(
    root: str | PathLike,
    prefix: str | tuple[str, ...] = "",
    suffix: str | tuple[str, ...] = "",
    keep_parent: bool = True,
) -> list[str]:
    """List all files at a given root.

        root: Path to directory whose files need to be listed
        prefix: Prefix of the file names to match,
            Defaults to empty string '""'.
        suffix: Suffix of the files to match, e.g. ".png" or
            (".jpg", ".png").
            Defaults to empty string '""'.
        keep_parent: If true, prepends the parent path to each
            result, otherwise only returns the name of the files found.
            Defaults to False.
    root = Path(root)
    files = [
        for p in root.iterdir()
        if p.is_file() and and

    if not keep_parent:
        files = [os.path.basename(f) for f in files]

    return files


transform_to_full_path(p: str | PathLike) -> Path

Transform a path to a full path.

The path is expanded (i.e. the ~ will be replaced with actual path) and converted to an absolute path (i.e. . or .. will be replaced with actual path).



  • Path

    The transformed full path.

Source code in src/nplinker/
def transform_to_full_path(p: str | PathLike) -> Path:
    """Transform a path to a full path.

    The path is expanded (i.e. the `~` will be replaced with actual path) and converted to an
    absolute path (i.e. `.` or `..` will be replaced with actual path).

        p: The path to transform.

        The transformed full path.
    # Multiple calls to `Path` are used to ensure static typing compatibility.
    p = Path(p).expanduser()
    p = Path(p).resolve()
    return Path(p)