Skip to content

Data Models

nplinker.strain

Strain

Strain(id: str)

Class to model the mapping between strain id and its aliases.

It's recommended to use NCBI taxonomy strain id or name as the primary id.

Attributes:

  • id (str) –

    The representative id of the strain.

  • names (set[str]) –

    A set of names associated with the strain.

  • aliases (set[str]) –

    A set of aliases associated with the strain.

Parameters:

  • id (str) –

    the representative id of the strain.

Source code in src/nplinker/strain/strain.py
def __init__(self, id: str) -> None:
    """To model the mapping between strain id and its aliases.

    Args:
        id: the representative id of the strain.
    """
    self.id: str = id
    self._aliases: set[str] = set()

id instance-attribute

id: str = id

names property

names: set[str]

Get the set of strain names including id and aliases.

Returns:

  • set[str]

    A set of names associated with the strain.

aliases property

aliases: set[str]

Get the set of known aliases.

Returns:

  • set[str]

    A set of aliases associated with the strain.

__repr__

__repr__() -> str
Source code in src/nplinker/strain/strain.py
def __repr__(self) -> str:
    return str(self)

__str__

__str__() -> str
Source code in src/nplinker/strain/strain.py
def __str__(self) -> str:
    return f"Strain({self.id}) [{len(self._aliases)} aliases]"

__eq__

__eq__(other) -> bool
Source code in src/nplinker/strain/strain.py
def __eq__(self, other) -> bool:
    if isinstance(other, Strain):
        return self.id == other.id
    return NotImplemented

__hash__

__hash__() -> int

Hash function for Strain.

Note that Strain is a mutable container, so here we hash on only the id to avoid the hash value changes when self._aliases is updated.

Source code in src/nplinker/strain/strain.py
def __hash__(self) -> int:
    """Hash function for Strain.

    Note that Strain is a mutable container, so here we hash on only the id
    to avoid the hash value changes when `self._aliases` is updated.
    """
    return hash(self.id)

__contains__

__contains__(alias: str) -> bool
Source code in src/nplinker/strain/strain.py
def __contains__(self, alias: str) -> bool:
    if not isinstance(alias, str):
        raise TypeError(f"Expected str, got {type(alias)}")
    return alias in self._aliases

add_alias

add_alias(alias: str) -> None

Add an alias for the strain.

Parameters:

  • alias (str) –

    The alias to add for the strain.

Source code in src/nplinker/strain/strain.py
def add_alias(self, alias: str) -> None:
    """Add an alias for the strain.

    Args:
        alias: The alias to add for the strain.
    """
    if not isinstance(alias, str):
        raise TypeError(f"Expected str, got {type(alias)}")
    if len(alias) == 0:
        logger.warning("Refusing to add an empty-string alias to strain {%s}", self)
    else:
        self._aliases.add(alias)

StrainCollection

StrainCollection()

A collection of Strain objects.

Source code in src/nplinker/strain/strain_collection.py
def __init__(self) -> None:
    # the order of strains is needed for scoring part, so use a list
    self._strains: list[Strain] = []
    self._strain_dict_name: dict[str, list[Strain]] = {}

__repr__

__repr__() -> str
Source code in src/nplinker/strain/strain_collection.py
def __repr__(self) -> str:
    return str(self)

__str__

__str__() -> str
Source code in src/nplinker/strain/strain_collection.py
def __str__(self) -> str:
    if len(self) > 20:
        return f"StrainCollection(n={len(self)})"

    return f"StrainCollection(n={len(self)}) [" + ",".join(s.id for s in self._strains) + "]"

__len__

__len__() -> int
Source code in src/nplinker/strain/strain_collection.py
def __len__(self) -> int:
    return len(self._strains)

__eq__

__eq__(other) -> bool
Source code in src/nplinker/strain/strain_collection.py
def __eq__(self, other) -> bool:
    if isinstance(other, StrainCollection):
        return (
            self._strains == other._strains
            and self._strain_dict_name == other._strain_dict_name
        )
    return NotImplemented

__add__

__add__(other) -> StrainCollection
Source code in src/nplinker/strain/strain_collection.py
def __add__(self, other) -> StrainCollection:
    if isinstance(other, StrainCollection):
        sc = StrainCollection()
        for strain in self._strains:
            sc.add(strain)
        for strain in other._strains:
            sc.add(strain)
        return sc
    return NotImplemented

__contains__

__contains__(item: Strain) -> bool

Check if the strain collection contains the given Strain object.

Source code in src/nplinker/strain/strain_collection.py
def __contains__(self, item: Strain) -> bool:
    """Check if the strain collection contains the given Strain object."""
    if isinstance(item, Strain):
        return item.id in self._strain_dict_name
    raise TypeError(f"Expected Strain, got {type(item)}")

__iter__

__iter__() -> Iterator[Strain]
Source code in src/nplinker/strain/strain_collection.py
def __iter__(self) -> Iterator[Strain]:
    return iter(self._strains)

add

add(strain: Strain) -> None

Add strain to the collection.

If the strain already exists, merge the aliases.

Parameters:

  • strain (Strain) –

    The strain to add.

Source code in src/nplinker/strain/strain_collection.py
def add(self, strain: Strain) -> None:
    """Add strain to the collection.

    If the strain already exists, merge the aliases.

    Args:
        strain: The strain to add.
    """
    if strain in self._strains:
        # only one strain object per id
        strain_ref = self._strain_dict_name[strain.id][0]
        new_aliases = [alias for alias in strain.aliases if alias not in strain_ref.aliases]
        for alias in new_aliases:
            strain_ref.add_alias(alias)
            if alias not in self._strain_dict_name:
                self._strain_dict_name[alias] = [strain_ref]
            else:
                self._strain_dict_name[alias].append(strain_ref)
    else:
        self._strains.append(strain)
        for name in strain.names:
            if name not in self._strain_dict_name:
                self._strain_dict_name[name] = [strain]
            else:
                self._strain_dict_name[name].append(strain)

remove

remove(strain: Strain) -> None

Remove a strain from the collection.

It removes the given strain object from the collection by strain id. If the strain id is not found, raise ValueError.

Parameters:

  • strain (Strain) –

    The strain to remove.

Raises:

  • ValueError

    If the strain is not found in the collection.

Source code in src/nplinker/strain/strain_collection.py
def remove(self, strain: Strain) -> None:
    """Remove a strain from the collection.

    It removes the given strain object from the collection by strain id.
    If the strain id is not found, raise `ValueError`.

    Args:
        strain: The strain to remove.

    Raises:
        ValueError: If the strain is not found in the collection.
    """
    if strain in self._strains:
        self._strains.remove(strain)
        # only one strain object per id
        strain_ref = self._strain_dict_name[strain.id][0]
        for name in strain_ref.names:
            if name in self._strain_dict_name:
                new_strain_list = [s for s in self._strain_dict_name[name] if s.id != strain.id]
                if not new_strain_list:
                    del self._strain_dict_name[name]
                else:
                    self._strain_dict_name[name] = new_strain_list
    else:
        raise ValueError(f"Strain {strain} not found in the strain collection.")

filter

filter(strain_set: set[Strain])

Remove all strains that are not in strain_set from the strain collection.

Parameters:

  • strain_set (set[Strain]) –

    Set of strains to keep.

Source code in src/nplinker/strain/strain_collection.py
def filter(self, strain_set: set[Strain]):
    """Remove all strains that are not in `strain_set` from the strain collection.

    Args:
        strain_set: Set of strains to keep.
    """
    # note that we need to copy the list of strains, as we are modifying it
    for strain in self._strains.copy():
        if strain not in strain_set:
            self.remove(strain)

intersection

intersection(other: StrainCollection) -> StrainCollection

Get the intersection of two strain collections.

Parameters:

Returns:

  • StrainCollection

    StrainCollection object containing the strains that are in both collections.

Source code in src/nplinker/strain/strain_collection.py
def intersection(self, other: StrainCollection) -> StrainCollection:
    """Get the intersection of two strain collections.

    Args:
        other: The other strain collection to compare.

    Returns:
        StrainCollection object containing the strains that are in both collections.
    """
    intersection = StrainCollection()
    for strain in self:
        if strain in other:
            intersection.add(strain)
    return intersection

has_name

has_name(name: str) -> bool

Check if the strain collection contains the given strain name (id or alias).

Parameters:

  • name (str) –

    Strain name (id or alias) to check.

Returns:

  • bool

    True if the strain name is in the collection, False otherwise.

Source code in src/nplinker/strain/strain_collection.py
def has_name(self, name: str) -> bool:
    """Check if the strain collection contains the given strain name (id or alias).

    Args:
        name: Strain name (id or alias) to check.

    Returns:
        True if the strain name is in the collection, False otherwise.
    """
    return name in self._strain_dict_name

lookup

lookup(name: str) -> list[Strain]

Lookup a strain by name (id or alias).

Parameters:

  • name (str) –

    Strain name (id or alias) to lookup.

Returns:

  • list[Strain]

    List of Strain objects with the given name.

Raises:

Source code in src/nplinker/strain/strain_collection.py
def lookup(self, name: str) -> list[Strain]:
    """Lookup a strain by name (id or alias).

    Args:
        name: Strain name (id or alias) to lookup.

    Returns:
        List of Strain objects with the given name.

    Raises:
        ValueError: If the strain name is not found.
    """
    if name in self._strain_dict_name:
        return self._strain_dict_name[name]
    raise ValueError(f"Strain {name} not found in the strain collection.")

read_json staticmethod

read_json(file: str | PathLike) -> StrainCollection

Read a strain mappings JSON file and return a StrainCollection object.

Parameters:

  • file (str | PathLike) –

    Path to the strain mappings JSON file.

Returns:

Source code in src/nplinker/strain/strain_collection.py
@staticmethod
def read_json(file: str | PathLike) -> StrainCollection:
    """Read a strain mappings JSON file and return a `StrainCollection` object.

    Args:
        file: Path to the strain mappings JSON file.

    Returns:
        `StrainCollection` object.
    """
    with open(file, "r") as f:
        json_data = json.load(f)

    # validate json data
    validate(instance=json_data, schema=STRAIN_MAPPINGS_SCHEMA)

    strain_collection = StrainCollection()
    for data in json_data["strain_mappings"]:
        strain = Strain(data["strain_id"])
        for alias in data["strain_alias"]:
            strain.add_alias(alias)
        strain_collection.add(strain)
    return strain_collection

to_json

to_json(file: str | PathLike | None = None) -> str | None

Convert the StrainCollection object to a JSON string.

Parameters:

  • file (str | PathLike | None, default: None ) –

    Path to output JSON file. If None, return the JSON string instead.

Returns:

  • str | None

    If input file is None, return the JSON string. Otherwise, write the JSON string to the given

  • str | None

    file.

Source code in src/nplinker/strain/strain_collection.py
def to_json(self, file: str | PathLike | None = None) -> str | None:
    """Convert the `StrainCollection` object to a JSON string.

    Args:
        file: Path to output JSON file. If None, return the JSON string instead.

    Returns:
        If input `file` is None, return the JSON string. Otherwise, write the JSON string to the given
        file.
    """
    data_list = [
        {"strain_id": strain.id, "strain_alias": list(strain.aliases)} for strain in self
    ]
    json_data = {"strain_mappings": data_list, "version": "1.0"}

    # validate json data
    validate(instance=json_data, schema=STRAIN_MAPPINGS_SCHEMA)

    if file is not None:
        with open(file, "w") as f:
            json.dump(json_data, f)
        return None
    return json.dumps(json_data)