Skip to content

GNPS

nplinker.metabolomics.gnps

GNPSFormat

Bases: Enum

Enum class for GNPS formats or workflows.

Concept

GNPS data

The name of the enum is a short name for the workflow, and the value of the enum is the workflow name used on the GNPS website.

SNETS class-attribute instance-attribute

SNETS = 'METABOLOMICS-SNETS'

SNETSV2 class-attribute instance-attribute

SNETSV2 = 'METABOLOMICS-SNETS-V2'

FBMN class-attribute instance-attribute

FBMN = 'FEATURE-BASED-MOLECULAR-NETWORKING'

GNPS2CN class-attribute instance-attribute

GNPS2CN = 'classical_networking_workflow'

GNPS2FBMN class-attribute instance-attribute

GNPS2FBMN = 'feature_based_molecular_networking_workflow'

Unknown class-attribute instance-attribute

Unknown = 'Unknown-GNPS-Workflow'

GNPSDownloader

GNPSDownloader(
    task_id: str,
    download_root: str | PathLike,
    gnps_version: str = "1",
)

Download GNPS zip archive for the given task id.

Concept

GNPS data

Note that only GNPS workflows listed in the GNPSFormat enum are supported.

Attributes:

Parameters:

  • task_id (str) –

    GNPS task id, identifying the data to be downloaded.

  • download_root (str | PathLike) –

    Path where to store the downloaded archive.

  • gnps_version (str, default: '1' ) –

    Version of GNPS platform that has been used to run the task. Available values are "1" and "2". Choose "1" if the platform https://gnps.ucsd.edu/ has been used; or "2" for the platform https://gnps2.org/.

Raises:

  • ValueError

    If the given task id does not correspond to a supported GNPS workflow.

  • ValueError

    If the given GNPS version is not valid.

Examples:

Download GNPS1 job

>>> GNPSDownloader("c22f44b14a3d450eb836d607cb9521bb", "~/downloads", "1")

Download GNPS2 job

>>> GNPSDownloader("2014f321d72542afb5216c932e0d5079", "~/downloads", "2")
Source code in src/nplinker/metabolomics/gnps/gnps_downloader.py
def __init__(
    self, task_id: str, download_root: str | PathLike, gnps_version: str = "1"
) -> None:
    """Initialize the GNPSDownloader.

    Args:
        task_id: GNPS task id, identifying the data to be downloaded.
        download_root: Path where to store the downloaded archive.
        gnps_version: Version of GNPS platform that has been used to run the task.
            Available values are "1" and "2". Choose "1" if the platform https://gnps.ucsd.edu/
            has been used; or "2" for the platform https://gnps2.org/.

    Raises:
        ValueError: If the given task id does not correspond to a supported GNPS workflow.
        ValueError: If the given GNPS version is not valid.

    Examples:
        Download GNPS1 job
        >>> GNPSDownloader("c22f44b14a3d450eb836d607cb9521bb", "~/downloads", "1")

        Download GNPS2 job
        >>> GNPSDownloader("2014f321d72542afb5216c932e0d5079", "~/downloads", "2")
    """
    if gnps_version == "1":
        gnps_format = gnps_format_from_gnps1_task_id(task_id)
        if gnps_format == GNPSFormat.Unknown:
            raise ValueError(
                f"Unknown workflow type for GNPS task '{task_id}'."
                f"Supported GNPS workflows are described in the GNPSFormat enum, "
                f"including such as 'METABOLOMICS-SNETS', 'METABOLOMICS-SNETS-V2' "
                f"and 'FEATURE-BASED-MOLECULAR-NETWORKING'."
            )
        self._gnps_format = gnps_format
        self._file_name = gnps_format.value + "-" + task_id + ".zip"
    elif gnps_version == "2":
        self._file_name = task_id + ".tar"
    else:
        raise ValueError(
            f"Invalid GNPS version '{gnps_version}'. Supported versions are '1' and '2'."
        )

    self._task_id = task_id
    self._download_root: Path = Path(download_root)
    self._gnps_version = gnps_version

GNPS_DATA_DOWNLOAD_URL class-attribute instance-attribute

GNPS_DATA_DOWNLOAD_URL: str = (
    "https://gnps.ucsd.edu/ProteoSAFe/DownloadResult?task={}&view=download_clustered_spectra"
)

GNPS_DATA_DOWNLOAD_URL_FBMN class-attribute instance-attribute

GNPS_DATA_DOWNLOAD_URL_FBMN: str = (
    "https://gnps.ucsd.edu/ProteoSAFe/DownloadResult?task={}&view=download_cytoscape_data"
)

GNPS2_DATA_DOWNLOAD_URL class-attribute instance-attribute

GNPS2_DATA_DOWNLOAD_URL: str = (
    "https://gnps2.org/taskzip?task={}"
)

download

download() -> Self

Download GNPS data.

Source code in src/nplinker/metabolomics/gnps/gnps_downloader.py
def download(self) -> Self:
    """Download GNPS data."""
    if self._gnps_version == "1":
        download_url(
            self.get_url(), self._download_root, filename=self._file_name, http_method="POST"
        )
    if self._gnps_version == "2":
        download_url(
            self.get_url(), self._download_root, filename=self._file_name, http_method="GET"
        )
    return self

get_download_file

get_download_file() -> str

Get the path to the downloaded file.

Returns:

  • str

    Download path as string

Source code in src/nplinker/metabolomics/gnps/gnps_downloader.py
def get_download_file(self) -> str:
    """Get the path to the downloaded file.

    Returns:
        Download path as string
    """
    return str(Path(self._download_root) / self._file_name)

get_task_id

get_task_id() -> str

Get the GNPS task id.

Returns:

  • str

    Task id as string.

Source code in src/nplinker/metabolomics/gnps/gnps_downloader.py
def get_task_id(self) -> str:
    """Get the GNPS task id.

    Returns:
        Task id as string.
    """
    return self._task_id

get_url

get_url() -> str

Get the download URL.

Returns:

  • str

    URL pointing to the GNPS data to be downloaded.

Source code in src/nplinker/metabolomics/gnps/gnps_downloader.py
def get_url(self) -> str:
    """Get the download URL.

    Returns:
        URL pointing to the GNPS data to be downloaded.
    """
    if self._gnps_version == "1":
        if self._gnps_format == GNPSFormat.FBMN:
            return GNPSDownloader.GNPS_DATA_DOWNLOAD_URL_FBMN.format(self._task_id)
        return GNPSDownloader.GNPS_DATA_DOWNLOAD_URL.format(self._task_id)
    elif self._gnps_version == "2":
        return GNPSDownloader.GNPS2_DATA_DOWNLOAD_URL.format(self._task_id)
    else:
        raise ValueError(
            f"Invalid GNPS version '{self._gnps_version}'. Supported versions are '1' and '2'."
        )

GNPSExtractor

GNPSExtractor(
    file: str | PathLike, extract_dir: str | PathLike
)

Extract files from a GNPS molecular networking archive.

Concept

GNPS data

Four files are extracted and renamed to the following names:

  • file_mappings(.tsv/.csv)
  • spectra.mgf
  • molecular_families.tsv
  • annotations.tsv

The files to be extracted are selected based on the GNPS workflow type, as described below (in the order of the files above):

  1. METABOLOMICS-SNETS
    • clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.tsv
    • METABOLOMICS-SNETS*.mgf
    • networkedges_selfloop/*.pairsinfo
    • result_specnets_DB/*.tsv
  2. METABOLOMICS-SNETS-V2
    • clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.clustersummary
    • METABOLOMICS-SNETS-V2*.mgf
    • networkedges_selfloop/*.selfloop
    • result_specnets_DB/.tsv
  3. FEATURE-BASED-MOLECULAR-NETWORKING
    • quantification_table/.csv
    • spectra/*.mgf
    • networkedges_selfloop/*.selfloop
    • DB_result/*.tsv
  4. GNPS2 classical_networking_workflow
    • nf_output/clustering/featuretable_reformatted_presence.csv
    • nf_output/clustering/specs_ms.mgf
    • nf_output/networking/filtered_pairs.tsv
    • nf_output/library/merged_results_with_gnps.tsv
  5. GNPS2 feature_based_molecular_networking_workflow
    • nf_output/clustering/featuretable_reformated.csv
    • nf_output/clustering/specs_ms.mgf
    • nf_output/networking/filtered_pairs.tsv
    • nf_output/library/merged_results_with_gnps.tsv

Attributes:

Parameters:

  • file (str | PathLike) –

    The path to the GNPS archive file.

  • extract_dir (str | PathLike) –

    path to the directory where to extract the files to.

Raises:

  • ValueError

    If the given file is an invalid GNPS archive.

Examples:

>>> gnps_extractor = GNPSExtractor("path/to/gnps_archive.zip", "path/to/extract_dir")
>>> gnps_extractor.gnps_format
<GNPSFormat.SNETS: 'METABOLOMICS-SNETS'>
>>> gnps_extractor.extract_dir
'path/to/extract_dir'
Source code in src/nplinker/metabolomics/gnps/gnps_extractor.py
def __init__(self, file: str | PathLike, extract_dir: str | PathLike):
    """Initialize the GNPSExtractor.

    Args:
        file: The path to the GNPS archive file.
        extract_dir: path to the directory where to extract the files to.

    Raises:
        ValueError: If the given file is an invalid GNPS archive.

    Examples:
        >>> gnps_extractor = GNPSExtractor("path/to/gnps_archive.zip", "path/to/extract_dir")
        >>> gnps_extractor.gnps_format
        <GNPSFormat.SNETS: 'METABOLOMICS-SNETS'>
        >>> gnps_extractor.extract_dir
        'path/to/extract_dir'
    """
    gnps_format = gnps_format_from_archive(file)
    if gnps_format == GNPSFormat.Unknown:
        raise ValueError(f"Unknown workflow type for GNPS archive '{file}'.")

    self._file = Path(file)
    self._extract_path = Path(extract_dir)
    self._gnps_format = gnps_format
    # the order of filenames matters
    self._target_files = [
        "file_mappings",
        "spectra.mgf",
        "molecular_families.tsv",
        "annotations.tsv",
    ]

    self._extract()

gnps_format property

gnps_format: GNPSFormat

Get the GNPS workflow type.

Returns:

extract_dir property

extract_dir: str

Get the path where to extract the files to.

Returns:

  • str

    Path where to extract files as string.

GNPSSpectrumLoader

GNPSSpectrumLoader(file: str | PathLike)

Bases: SpectrumLoaderBase

Load mass spectra from the given GNPS MGF file.

Concept

GNPS data

The file mappings file is from GNPS output archive, as described below for each GNPS workflow type:

  1. METABOLOMICS-SNETS
    • METABOLOMICS-SNETS*.mgf
  2. METABOLOMICS-SNETS-V2
    • METABOLOMICS-SNETS-V2*.mgf
  3. FEATURE-BASED-MOLECULAR-NETWORKING
    • spectra/*.mgf
  4. GNPS2 classical_networking_workflow
    • nf_output/clustering/specs_ms.mgf
  5. GNPS2 feature_based_molecular_networking_workflow
    • nf_output/clustering/specs_ms.mgf

Parameters:

Raises:

  • ValueError

    Raises ValueError if the file is not valid.

Examples:

>>> loader = GNPSSpectrumLoader("gnps_spectra.mgf")
>>> print(loader.spectra[0])
Source code in src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py
def __init__(self, file: str | PathLike) -> None:
    """Initialize the GNPSSpectrumLoader.

    Args:
        file: path to the MGF file.

    Raises:
        ValueError: Raises ValueError if the file is not valid.

    Examples:
        >>> loader = GNPSSpectrumLoader("gnps_spectra.mgf")
        >>> print(loader.spectra[0])
    """
    self._file = str(file)
    self._spectra: list[Spectrum] = []

    self._validate()
    self._load()

spectra property

spectra: list[Spectrum]

Get the list of Spectrum objects.

Returns:

  • list[Spectrum]

    list[Spectrum]: the loaded spectra as a list of Spectrum objects.

GNPSMolecularFamilyLoader

GNPSMolecularFamilyLoader(file: str | PathLike)

Bases: MolecularFamilyLoaderBase

Load molecular families from GNPS data.

Concept

GNPS data

The molecular family file is from GNPS output archive, as described below for each GNPS workflow type:

  1. METABOLOMICS-SNETS
    • networkedges_selfloop/*.pairsinfo
  2. METABOLOMICS-SNETS-V2
    • networkedges_selfloop/*.selfloop
  3. FEATURE-BASED-MOLECULAR-NETWORKING
    • networkedges_selfloop/*.selfloop
  4. GNPS2 classical_networking_workflow
    • nf_output/networking/filtered_pairs.tsv
  5. GNPS2 feature_based_molecular_networking_workflow
    • nf_output/networking/filtered_pairs.tsv

The ComponentIndex column in the GNPS molecular family file is treated as family id.

But for molecular families that have only one member (i.e. spectrum), named singleton molecular families, their files have the same value of -1 in the ComponentIndex column. To make the family id unique,the spectrum id plus a prefix singleton- is used as the family id of singleton molecular families.

Parameters:

  • file (str | PathLike) –

    Path to the GNPS molecular family file.

Raises:

  • ValueError

    Raises ValueError if the file is not valid.

Examples:

>>> loader = GNPSMolecularFamilyLoader("gnps_molecular_families.tsv")
>>> print(loader.families)
[<MolecularFamily 1>, <MolecularFamily 2>, ...]
>>> print(loader.families[0].spectra_ids)
{'1', '3', '7', ...}
Source code in src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py
def __init__(self, file: str | PathLike) -> None:
    """Initialize the GNPSMolecularFamilyLoader.

    Args:
        file: Path to the GNPS molecular family file.

    Raises:
        ValueError: Raises ValueError if the file is not valid.

    Examples:
        >>> loader = GNPSMolecularFamilyLoader("gnps_molecular_families.tsv")
        >>> print(loader.families)
        [<MolecularFamily 1>, <MolecularFamily 2>, ...]
        >>> print(loader.families[0].spectra_ids)
        {'1', '3', '7', ...}
    """
    self._mfs: list[MolecularFamily] = []
    self._file = file

    self._validate()
    self._load()

get_mfs

get_mfs(
    keep_singleton: bool = False,
) -> list[MolecularFamily]

Get MolecularFamily objects.

Parameters:

  • keep_singleton (bool, default: False ) –

    True to keep singleton molecular families. A singleton molecular family is a molecular family that contains only one spectrum.

Returns:

Source code in src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py
def get_mfs(self, keep_singleton: bool = False) -> list[MolecularFamily]:
    """Get MolecularFamily objects.

    Args:
        keep_singleton: True to keep singleton molecular families. A
            singleton molecular family is a molecular family that contains
            only one spectrum.

    Returns:
        A list of MolecularFamily objects with their spectra ids.
    """
    mfs = self._mfs
    if not keep_singleton:
        mfs = [mf for mf in mfs if not mf.is_singleton()]
    return mfs

GNPSAnnotationLoader

GNPSAnnotationLoader(file: str | PathLike)

Bases: AnnotationLoaderBase

Load annotations from GNPS output file.

Concept

GNPS data

The annotation file is a .tsv file from GNPS output archive, as described below for each GNPS workflow type:

  1. METABOLOMICS-SNETS
    • result_specnets_DB/*.tsv
  2. METABOLOMICS-SNETS-V2
    • result_specnets_DB/.tsv
  3. FEATURE-BASED-MOLECULAR-NETWORKING
    • DB_result/*.tsv
  4. GNPS2 classical_networking_workflow
    • nf_output/library/merged_results_with_gnps.tsv
  5. GNPS2 feature_based_molecular_networking_workflow
    • nf_output/library/merged_results_with_gnps.tsv

Parameters:

Examples:

>>> loader = GNPSAnnotationLoader("gnps_annotations.tsv")
>>> print(loader.annotations["100"])
{'#Scan#': '100',
'Adduct': 'M+H',
'CAS_Number': 'N/A',
'Charge': '1',
'Compound_Name': 'MLS002153841-01!Iobenguane sulfate',
'Compound_Source': 'NIH Pharmacologically Active Library',
'Data_Collector': 'VP/LMS',
'ExactMass': '274.992',
'INCHI': 'N/A',
'INCHI_AUX': 'N/A',
'Instrument': 'qTof',
'IonMode': 'Positive',
'Ion_Source': 'LC-ESI',
'LibMZ': '276.003',
'LibraryName': 'lib-00014.mgf',
'LibraryQualityString': 'Gold',
'Library_Class': '1',
'MQScore': '0.704152',
'MZErrorPPM': '405416',
'MassDiff': '111.896',
'Organism': 'GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE',
'PI': 'Dorrestein',
'Precursor_MZ': '276.003',
'Pubmed_ID': 'N/A',
'RT_Query': '795.979',
'SharedPeaks': '7',
'Smiles': 'NC(=N)NCc1cccc(I)c1.OS(=O)(=O)O',
'SpecCharge': '1',
'SpecMZ': '164.107',
'SpectrumFile': 'spectra/specs_ms.pklbin',
'SpectrumID': 'CCMSLIB00000086167',
'TIC_Query': '986.997',
'UpdateWorkflowName': 'UPDATE-SINGLE-ANNOTATED-GOLD',
'tags': ' ',
'png_url': 'https://metabolomics-usi.gnps2.org/png/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167',
'json_url': 'https://metabolomics-usi.gnps2.org/json/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167',
'svg_url': 'https://metabolomics-usi.gnps2.org/svg/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167',
'spectrum_url': 'https://metabolomics-usi.gnps2.org/spectrum/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167'}
Source code in src/nplinker/metabolomics/gnps/gnps_annotation_loader.py
def __init__(self, file: str | PathLike) -> None:
    """Initialize the GNPSAnnotationLoader.

    Args:
        file: The GNPS annotation file.

    Examples:
        >>> loader = GNPSAnnotationLoader("gnps_annotations.tsv")
        >>> print(loader.annotations["100"])
        {'#Scan#': '100',
        'Adduct': 'M+H',
        'CAS_Number': 'N/A',
        'Charge': '1',
        'Compound_Name': 'MLS002153841-01!Iobenguane sulfate',
        'Compound_Source': 'NIH Pharmacologically Active Library',
        'Data_Collector': 'VP/LMS',
        'ExactMass': '274.992',
        'INCHI': 'N/A',
        'INCHI_AUX': 'N/A',
        'Instrument': 'qTof',
        'IonMode': 'Positive',
        'Ion_Source': 'LC-ESI',
        'LibMZ': '276.003',
        'LibraryName': 'lib-00014.mgf',
        'LibraryQualityString': 'Gold',
        'Library_Class': '1',
        'MQScore': '0.704152',
        'MZErrorPPM': '405416',
        'MassDiff': '111.896',
        'Organism': 'GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE',
        'PI': 'Dorrestein',
        'Precursor_MZ': '276.003',
        'Pubmed_ID': 'N/A',
        'RT_Query': '795.979',
        'SharedPeaks': '7',
        'Smiles': 'NC(=N)NCc1cccc(I)c1.OS(=O)(=O)O',
        'SpecCharge': '1',
        'SpecMZ': '164.107',
        'SpectrumFile': 'spectra/specs_ms.pklbin',
        'SpectrumID': 'CCMSLIB00000086167',
        'TIC_Query': '986.997',
        'UpdateWorkflowName': 'UPDATE-SINGLE-ANNOTATED-GOLD',
        'tags': ' ',
        'png_url': 'https://metabolomics-usi.gnps2.org/png/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167',
        'json_url': 'https://metabolomics-usi.gnps2.org/json/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167',
        'svg_url': 'https://metabolomics-usi.gnps2.org/svg/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167',
        'spectrum_url': 'https://metabolomics-usi.gnps2.org/spectrum/?usi1=mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00000086167'}
    """
    self._file = Path(file)
    self._annotations: dict[str, dict] = {}

    self._validate()
    self._load()

annotations property

annotations: dict[str, dict]

Get annotations.

Returns:

  • dict[str, dict]

    Keys are spectrum ids ("#Scan#" in annotation file) and values are the annotations dict

  • dict[str, dict]

    for each spectrum.

GNPSFileMappingLoader

GNPSFileMappingLoader(file: str | PathLike)

Bases: FileMappingLoaderBase

Class to load file mappings from GNPS output file.

Concept

GNPS data

File mappings refers to the mapping from spectrum id to files in which this spectrum occurs.

The file mappings file is from GNPS output archive, as described below for each GNPS workflow type:

  1. METABOLOMICS-SNETS
    • clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.tsv
  2. METABOLOMICS-SNETS-V2
    • clusterinfosummarygroup_attributes_withIDs_withcomponentID/*.clustersummary (.tsv file)
  3. FEATURE-BASED-MOLECULAR-NETWORKING
    • quantification_table/.csv
  4. GNPS2 classical_networking_workflow
    • nf_output/clustering/featuretable_reformatted_presence.csv
  5. GNPS2 feature_based_molecular_networking_workflow
    • nf_output/clustering/featuretable_reformated.csv

The tsv files from different workflows have different headers, while the .csv files from different workflows have consistent headers.

Parameters:

  • file (str | PathLike) –

    Path to the GNPS file mappings file.

Raises:

  • ValueError

    Raises ValueError if the file is not valid.

Examples:

>>> loader = GNPSFileMappingLoader("gnps_file_mappings.tsv")
>>> print(loader.mappings["1"])
['26c.mzXML']
>>> print(loader.mapping_reversed["26c.mzXML"])
{'1', '3', '7', ...}
Source code in src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py
def __init__(self, file: str | PathLike) -> None:
    """Initialize the GNPSFileMappingLoader.

    Args:
        file: Path to the GNPS file mappings file.

    Raises:
        ValueError: Raises ValueError if the file is not valid.

    Examples:
        >>> loader = GNPSFileMappingLoader("gnps_file_mappings.tsv")
        >>> print(loader.mappings["1"])
        ['26c.mzXML']
        >>> print(loader.mapping_reversed["26c.mzXML"])
        {'1', '3', '7', ...}
    """
    self._gnps_format = self._detect_gnps_format(file)
    if self._gnps_format is GNPSFormat.Unknown:
        raise ValueError("Unknown workflow type for GNPS file mappings file ")

    self._file = Path(file)
    self._mapping: dict[str, list[str]] = {}

    self._validate()
    self._load()

mappings property

mappings: dict[str, list[str]]

Return mapping from spectrum id to files in which this spectrum occurs.

Returns:

  • dict[str, list[str]]

    Mapping from spectrum id to names of all files in which this spectrum occurs.

mapping_reversed property

mapping_reversed: dict[str, set[str]]

Return mapping from file name to all spectra that occur in this file.

Returns:

  • dict[str, set[str]]

    Mapping from file name to all spectra ids that occur in this file.

gnps_format_from_archive

gnps_format_from_archive(
    file: str | PathLike,
) -> GNPSFormat

Detect GNPS format or workflow from GNPS archive file.

GNPS archive files can be in two formats: GNPS1 (.zip) and GNPS2 (.tar).

For GNPS1 data, the detection of workflow format is based on the filename of the zip archive and the names of the files contained in the zip archive.

For GNPS2 data, the workflow format is taken from the submission_parameters.yaml file in the tar archive, which has a key workflowname.

Parameters:

  • file (str | PathLike) –

    Path to the GNPS archive file.

Returns:

  • GNPSFormat

    The format identified in the GNPS archive file.

Examples:

>>> gnps_format_from_archive("ProteoSAFe-METABOLOMICS-SNETS-c22f44b1-download_clustered_spectra.zip")
<GNPSFormat.SNETS: 'METABOLOMICS-SNETS'>
>>> gnps_format_from_archive("ProteoSAFe-METABOLOMICS-SNETS-V2-189e8bf1-download_clustered_spectra.zip")
<GNPSFormat.SNETSV2: 'METABOLOMICS-SNETS-V2'>
>>> gnps_format_from_archive("ProteoSAFe-FEATURE-BASED-MOLECULAR-NETWORKING-672d0a53-download_cytoscape_data.zip")
<GNPSFormat.FBMN: 'FEATURE-BASED-MOLECULAR-NETWORKING'>
>>> gnps_format_from_archive("206a7b40b7ed41c1ae6b4fbd2def3636.tar")
<GNPSFormat.GNPS2CN: 'classical_networking_workflow'>
>>> gnps_format_from_archive("2014f321d72542afb5216c932e0d5079.tar")
<GNPSFormat.GNPS2FBMN: 'feature_based_molecular_networking_workflow'>
Source code in src/nplinker/metabolomics/gnps/gnps_format.py
def gnps_format_from_archive(file: str | PathLike) -> GNPSFormat:
    """Detect GNPS format or workflow from GNPS archive file.

    GNPS archive files can be in two formats: GNPS1 (.zip) and GNPS2 (.tar).

    For GNPS1 data, the detection of workflow format is based on the filename of the zip archive and
    the names of the files contained in the zip archive.

    For GNPS2 data, the workflow format is taken from the `submission_parameters.yaml` file in the
    tar archive, which has a key `workflowname`.

    Args:
        file: Path to the GNPS archive file.

    Returns:
        The format identified in the GNPS archive file.

    Examples:
        >>> gnps_format_from_archive("ProteoSAFe-METABOLOMICS-SNETS-c22f44b1-download_clustered_spectra.zip")
        <GNPSFormat.SNETS: 'METABOLOMICS-SNETS'>
        >>> gnps_format_from_archive("ProteoSAFe-METABOLOMICS-SNETS-V2-189e8bf1-download_clustered_spectra.zip")
        <GNPSFormat.SNETSV2: 'METABOLOMICS-SNETS-V2'>
        >>> gnps_format_from_archive("ProteoSAFe-FEATURE-BASED-MOLECULAR-NETWORKING-672d0a53-download_cytoscape_data.zip")
        <GNPSFormat.FBMN: 'FEATURE-BASED-MOLECULAR-NETWORKING'>
        >>> gnps_format_from_archive("206a7b40b7ed41c1ae6b4fbd2def3636.tar")
        <GNPSFormat.GNPS2CN: 'classical_networking_workflow'>
        >>> gnps_format_from_archive("2014f321d72542afb5216c932e0d5079.tar")
        <GNPSFormat.GNPS2FBMN: 'feature_based_molecular_networking_workflow'>
    """
    file = Path(file)
    suffix = file.suffix
    if suffix == ".zip":
        return _gnps_format_from_archive_gnps1(file)
    if suffix == ".tar":
        return _gnps_format_from_archive_gnps2(file)
    return GNPSFormat.Unknown