Skip to content

camlhmp.engines.blast

Below are the functions available in the camlhmp.engines.blast module.

camlhmp.engines.blast.run_blast(engine, subject, query, min_pident, min_coverage)

Query sequences against a input subject using a specified BLAST+ algorithm.

Parameters:

Name Type Description Default
engine str

The BLAST engine to use

required
subject str

The subject database (input)

required
query str

The query file (targets)

required
min_pident float

The minimum percent identity to count a hit

required
min_coverage int

The minimum percent coverage to count a hit

required

Returns:

Name Type Description
list list

The parsed BLAST results, raw blast results, and stderr

Examples:

>>> from camlhmp.engines.blast import run_blast
>>> hits, blast_stdout, blast_stderr = run_blast(
        framework["engine"]["tool"], input_path, targets_path, min_pident, min_coverage
    )
Source code in camlhmp/engines/blast.py
def run_blast(engine: str, subject: str, query: str, min_pident: float, min_coverage: int) -> list:
    """
    Query sequences against a input subject using a specified BLAST+ algorithm.

    Args:
        engine (str): The BLAST engine to use
        subject (str): The subject database (input)
        query (str): The query file (targets)
        min_pident (float): The minimum percent identity to count a hit
        min_coverage (int): The minimum percent coverage to count a hit

    Returns:
        list: The parsed BLAST results, raw blast results, and stderr

    Examples:
        >>> from camlhmp.engines.blast import run_blast
        >>> hits, blast_stdout, blast_stderr = run_blast(
                framework["engine"]["tool"], input_path, targets_path, min_pident, min_coverage
            )
    """
    outfmt = " ".join(BLASTN_COLS)
    cat_type = "zcat" if str(subject).endswith(".gz") else "cat"
    qcov_hsp_perc = f"-qcov_hsp_perc {min_coverage}" if min_coverage else ""
    perc_identity = f"-perc_identity {min_pident}" if min_pident and engine != "tblastn" else ""
    stdout, stderr = execute(
        f"{cat_type} {subject} | {engine} -query {query} -subject - -outfmt '6 {outfmt}' {qcov_hsp_perc} {perc_identity}",
        capture=True,
    )

    # Convert BLAST results to a list of dicts
    results = []
    target_hits = []
    for line in stdout.split("\n"):
        if line == "":
            continue
        cols = line.split("\t")
        results.append(dict(zip(BLASTN_COLS, cols)))
        target_hits.append(cols[0])

    if not results:
        # Create an empty dict if no results are found
        results.append(dict(zip(BLASTN_COLS, ["NO_HITS"] * len(BLASTN_COLS))))

    return [target_hits, results, stderr]

camlhmp.engines.blast.run_blastn(subject, query, min_pident, min_coverage)

An alias for run_blast which uses blastn

Parameters:

Name Type Description Default
subject str

The subject database (input)

required
query str

The query file (targets)

required
min_pident float

The minimum percent identity to count a hit

required
min_coverage int

The minimum percent coverage to count a hit

required

Returns:

Name Type Description
list list

The parsed BLAST results, raw blast results, and stderr

Examples:

>>> from camlhmp.engines.blast import run_blastn
>>> hits, blast_stdout, blast_stderr = run_blastn(
        input_path, targets_path, min_pident, min_coverage
    )
Source code in camlhmp/engines/blast.py
def run_blastn(subject: str, query: str, min_pident: float, min_coverage: int) -> list:
    """
    An alias for `run_blast` which uses `blastn`

    Args:
        subject (str): The subject database (input)
        query (str): The query file (targets)
        min_pident (float): The minimum percent identity to count a hit
        min_coverage (int): The minimum percent coverage to count a hit

    Returns:
        list: The parsed BLAST results, raw blast results, and stderr

    Examples:
        >>> from camlhmp.engines.blast import run_blastn
        >>> hits, blast_stdout, blast_stderr = run_blastn(
                input_path, targets_path, min_pident, min_coverage
            )
    """
    return run_blast("blastn", subject, query, min_pident, min_coverage)

camlhmp.engines.blast.run_tblastn(subject, query, min_pident, min_coverage)

An alias for run_blast which uses tblastn.

Parameters:

Name Type Description Default
subject str

The subject database (input)

required
query str

The query file (targets)

required
min_pident float

The minimum percent identity to count a hit

required
min_coverage int

The minimum percent coverage to count a hit

required

Returns:

Name Type Description
list list

The parsed BLAST results, raw blast results, and stderr

Examples:

>>> from camlhmp.engines.blast import run_tblastn
>>> hits, blast_stdout, blast_stderr = run_tblastn(
        input_path, targets_path, min_pident, min_coverage
    )
Source code in camlhmp/engines/blast.py
def run_tblastn(subject: str, query: str, min_pident: float, min_coverage: int) -> list:
    """
    An alias for `run_blast` which uses `tblastn`.

    Args:
        subject (str): The subject database (input)
        query (str): The query file (targets)
        min_pident (float): The minimum percent identity to count a hit
        min_coverage (int): The minimum percent coverage to count a hit

    Returns:
        list: The parsed BLAST results, raw blast results, and stderr

    Examples:
        >>> from camlhmp.engines.blast import run_tblastn
        >>> hits, blast_stdout, blast_stderr = run_tblastn(
                input_path, targets_path, min_pident, min_coverage
            )
    """
    return run_blast("tblastn", subject, query, min_pident, min_coverage)