Data Models

nplinker.strain ¶

Strain ¶

Strain(id: str)

Class to model the mapping between strain id and its aliases.

It's recommended to use NCBI taxonomy strain id or name as the primary id.

Attributes:

id (str) –

The representative id of the strain.
names (set[str]) –

A set of names associated with the strain.
aliases (set[str]) –

A set of aliases associated with the strain.

Parameters:

id (str) –

the representative id of the strain.

Source code in src/nplinker/strain/strain.py

def __init__(self, id: str) -> None:
    """To model the mapping between strain id and its aliases.

    Args:
        id: the representative id of the strain.
    """
    self.id: str = id
    self._aliases: set[str] = set()

id `instance-attribute` ¶

id: str = id

names `property` ¶

names: set[str]

Get the set of strain names including id and aliases.

Returns:

set[str] –

A set of names associated with the strain.

aliases `property` ¶

aliases: set[str]

Get the set of known aliases.

Returns:

set[str] –

A set of aliases associated with the strain.

repr ¶

__repr__() -> str

Source code in src/nplinker/strain/strain.py

def __repr__(self) -> str:
    return str(self)

str ¶

__str__() -> str

Source code in src/nplinker/strain/strain.py

def __str__(self) -> str:
    return f"Strain({self.id}) [{len(self._aliases)} aliases]"

eq ¶

__eq__(other) -> bool

Source code in src/nplinker/strain/strain.py

def __eq__(self, other) -> bool:
    if isinstance(other, Strain):
        return self.id == other.id
    return NotImplemented

hash ¶

__hash__() -> int

Hash function for Strain.

Note that Strain is a mutable container, so here we hash on only the id to avoid the hash value changes when self._aliases is updated.

Source code in src/nplinker/strain/strain.py

def __hash__(self) -> int:
    """Hash function for Strain.

    Note that Strain is a mutable container, so here we hash on only the id
    to avoid the hash value changes when `self._aliases` is updated.
    """
    return hash(self.id)

contains ¶

__contains__(alias: str) -> bool

Source code in src/nplinker/strain/strain.py

def __contains__(self, alias: str) -> bool:
    if not isinstance(alias, str):
        raise TypeError(f"Expected str, got {type(alias)}")
    return alias in self._aliases

add_alias ¶

add_alias(alias: str) -> None

Add an alias for the strain.

Parameters:

alias (str) –

The alias to add for the strain.

Source code in src/nplinker/strain/strain.py

def add_alias(self, alias: str) -> None:
    """Add an alias for the strain.

    Args:
        alias: The alias to add for the strain.
    """
    if not isinstance(alias, str):
        raise TypeError(f"Expected str, got {type(alias)}")
    if len(alias) == 0:
        logger.warning("Refusing to add an empty-string alias to strain {%s}", self)
    else:
        self._aliases.add(alias)

StrainCollection ¶

StrainCollection()

A collection of Strain objects.

Source code in src/nplinker/strain/strain_collection.py

def __init__(self) -> None:
    # the order of strains is needed for scoring part, so use a list
    self._strains: list[Strain] = []
    self._strain_dict_name: dict[str, list[Strain]] = {}

repr ¶

__repr__() -> str

Source code in src/nplinker/strain/strain_collection.py

def __repr__(self) -> str:
    return str(self)

str ¶

__str__() -> str

Source code in src/nplinker/strain/strain_collection.py

def __str__(self) -> str:
    if len(self) > 20:
        return f"StrainCollection(n={len(self)})"

    return f"StrainCollection(n={len(self)}) [" + ",".join(s.id for s in self._strains) + "]"

len ¶

__len__() -> int

Source code in src/nplinker/strain/strain_collection.py

def __len__(self) -> int:
    return len(self._strains)

eq ¶

__eq__(other) -> bool

Source code in src/nplinker/strain/strain_collection.py

def __eq__(self, other) -> bool:
    if isinstance(other, StrainCollection):
        return (
            self._strains == other._strains
            and self._strain_dict_name == other._strain_dict_name
        )
    return NotImplemented

add ¶

__add__(other) -> StrainCollection

Source code in src/nplinker/strain/strain_collection.py

def __add__(self, other) -> StrainCollection:
    if isinstance(other, StrainCollection):
        sc = StrainCollection()
        for strain in self._strains:
            sc.add(strain)
        for strain in other._strains:
            sc.add(strain)
        return sc
    return NotImplemented

contains ¶

__contains__(item: Strain) -> bool

Check if the strain collection contains the given Strain object.

Source code in src/nplinker/strain/strain_collection.py

def __contains__(self, item: Strain) -> bool:
    """Check if the strain collection contains the given Strain object."""
    if isinstance(item, Strain):
        return item.id in self._strain_dict_name
    raise TypeError(f"Expected Strain, got {type(item)}")

iter ¶

__iter__() -> Iterator[Strain]

Source code in src/nplinker/strain/strain_collection.py

def __iter__(self) -> Iterator[Strain]:
    return iter(self._strains)

add ¶

add(strain: Strain) -> None

Add strain to the collection.

If the strain already exists, merge the aliases.

Parameters:

strain (Strain) –

The strain to add.

Source code in src/nplinker/strain/strain_collection.py

def add(self, strain: Strain) -> None:
    """Add strain to the collection.

    If the strain already exists, merge the aliases.

    Args:
        strain: The strain to add.
    """
    if strain in self._strains:
        # only one strain object per id
        strain_ref = self._strain_dict_name[strain.id][0]
        new_aliases = [alias for alias in strain.aliases if alias not in strain_ref.aliases]
        for alias in new_aliases:
            strain_ref.add_alias(alias)
            if alias not in self._strain_dict_name:
                self._strain_dict_name[alias] = [strain_ref]
            else:
                self._strain_dict_name[alias].append(strain_ref)
    else:
        self._strains.append(strain)
        for name in strain.names:
            if name not in self._strain_dict_name:
                self._strain_dict_name[name] = [strain]
            else:
                self._strain_dict_name[name].append(strain)

remove ¶

remove(strain: Strain) -> None

Remove a strain from the collection.

It removes the given strain object from the collection by strain id. If the strain id is not found, raise ValueError.

Parameters:

strain (Strain) –

The strain to remove.

Raises:

ValueError –

If the strain is not found in the collection.

Source code in src/nplinker/strain/strain_collection.py

def remove(self, strain: Strain) -> None:
    """Remove a strain from the collection.

    It removes the given strain object from the collection by strain id.
    If the strain id is not found, raise `ValueError`.

    Args:
        strain: The strain to remove.

    Raises:
        ValueError: If the strain is not found in the collection.
    """
    if strain in self._strains:
        self._strains.remove(strain)
        # only one strain object per id
        strain_ref = self._strain_dict_name[strain.id][0]
        for name in strain_ref.names:
            if name in self._strain_dict_name:
                new_strain_list = [s for s in self._strain_dict_name[name] if s.id != strain.id]
                if not new_strain_list:
                    del self._strain_dict_name[name]
                else:
                    self._strain_dict_name[name] = new_strain_list
    else:
        raise ValueError(f"Strain {strain} not found in the strain collection.")

filter ¶

filter(strain_set: set[Strain])

Remove all strains that are not in strain_set from the strain collection.

Parameters:

strain_set (set[Strain]) –

Set of strains to keep.

Source code in src/nplinker/strain/strain_collection.py

def filter(self, strain_set: set[Strain]):
    """Remove all strains that are not in `strain_set` from the strain collection.

    Args:
        strain_set: Set of strains to keep.
    """
    # note that we need to copy the list of strains, as we are modifying it
    for strain in self._strains.copy():
        if strain not in strain_set:
            self.remove(strain)

intersection ¶

intersection(other: StrainCollection) -> StrainCollection

Get the intersection of two strain collections.

Parameters:

other (StrainCollection) –

The other strain collection to compare.

Returns:

StrainCollection –

StrainCollection object containing the strains that are in both collections.

Source code in src/nplinker/strain/strain_collection.py

def intersection(self, other: StrainCollection) -> StrainCollection:
    """Get the intersection of two strain collections.

    Args:
        other: The other strain collection to compare.

    Returns:
        StrainCollection object containing the strains that are in both collections.
    """
    intersection = StrainCollection()
    for strain in self:
        if strain in other:
            intersection.add(strain)
    return intersection

has_name ¶

has_name(name: str) -> bool

Check if the strain collection contains the given strain name (id or alias).

Parameters:

name (str) –

Strain name (id or alias) to check.

Returns:

bool –

True if the strain name is in the collection, False otherwise.

Source code in src/nplinker/strain/strain_collection.py

def has_name(self, name: str) -> bool:
    """Check if the strain collection contains the given strain name (id or alias).

    Args:
        name: Strain name (id or alias) to check.

    Returns:
        True if the strain name is in the collection, False otherwise.
    """
    return name in self._strain_dict_name

lookup ¶

lookup(name: str) -> list[Strain]

Lookup a strain by name (id or alias).

Parameters:

name (str) –

Strain name (id or alias) to lookup.

Returns:

list[Strain] –

List of Strain objects with the given name.

Raises:

ValueError –

If the strain name is not found.

Source code in src/nplinker/strain/strain_collection.py

def lookup(self, name: str) -> list[Strain]:
    """Lookup a strain by name (id or alias).

    Args:
        name: Strain name (id or alias) to lookup.

    Returns:
        List of Strain objects with the given name.

    Raises:
        ValueError: If the strain name is not found.
    """
    if name in self._strain_dict_name:
        return self._strain_dict_name[name]
    raise ValueError(f"Strain {name} not found in the strain collection.")

read_json `staticmethod` ¶

read_json(file: str | PathLike) -> StrainCollection

Read a strain mappings JSON file and return a StrainCollection object.

Parameters:

file (str | PathLike) –

Path to the strain mappings JSON file.

Returns:

StrainCollection –

StrainCollection object.

Source code in src/nplinker/strain/strain_collection.py

@staticmethod
def read_json(file: str | PathLike) -> StrainCollection:
    """Read a strain mappings JSON file and return a `StrainCollection` object.

    Args:
        file: Path to the strain mappings JSON file.

    Returns:
        `StrainCollection` object.
    """
    with open(file, "r") as f:
        json_data = json.load(f)

    # validate json data
    validate(instance=json_data, schema=STRAIN_MAPPINGS_SCHEMA)

    strain_collection = StrainCollection()
    for data in json_data["strain_mappings"]:
        strain = Strain(data["strain_id"])
        for alias in data["strain_alias"]:
            strain.add_alias(alias)
        strain_collection.add(strain)
    return strain_collection

to_json ¶

to_json(file: str | PathLike | None = None) -> str | None

Convert the StrainCollection object to a JSON string.

Parameters:

file (str | PathLike | None, default: None ) –

Path to output JSON file. If None, return the JSON string instead.

Returns:

str | None –

If input file is None, return the JSON string. Otherwise, write the JSON string to the given
str | None –

file.

Source code in src/nplinker/strain/strain_collection.py

def to_json(self, file: str | PathLike | None = None) -> str | None:
    """Convert the `StrainCollection` object to a JSON string.

    Args:
        file: Path to output JSON file. If None, return the JSON string instead.

    Returns:
        If input `file` is None, return the JSON string. Otherwise, write the JSON string to the given
        file.
    """
    data_list = [
        {"strain_id": strain.id, "strain_alias": list(strain.aliases)} for strain in self
    ]
    json_data = {"strain_mappings": data_list, "version": "1.0"}

    # validate json data
    validate(instance=json_data, schema=STRAIN_MAPPINGS_SCHEMA)

    if file is not None:
        with open(file, "w") as f:
            json.dump(json_data, f)
        return None
    return json.dumps(json_data)

Data Models

nplinker.strain ¶

Strain ¶

id instance-attribute ¶

names property ¶

aliases property ¶

__repr__ ¶

__str__ ¶

__eq__ ¶

__hash__ ¶

__contains__ ¶

add_alias ¶

StrainCollection ¶

__repr__ ¶

__str__ ¶

__len__ ¶

__eq__ ¶

__add__ ¶

__contains__ ¶

__iter__ ¶

add ¶

remove ¶

filter ¶

intersection ¶

has_name ¶

lookup ¶

read_json staticmethod ¶

to_json ¶

id `instance-attribute` ¶

names `property` ¶

aliases `property` ¶

repr ¶

str ¶

eq ¶

hash ¶

contains ¶

repr ¶

str ¶

len ¶

eq ¶

add ¶

contains ¶

iter ¶

read_json `staticmethod` ¶