Source code for src.core.analyzer.annotation_adapter

"""This module defines interfaces and implementations
for variant annotation adapters, specifically using SnpEff.

It provides a protocol interface for annotation adapters
and a concrete implementation that uses SnpEff
to annotate variant data in VCF files.

Classes:
    - IAnnotationAdapter:
        Protocol interface for annotation adapters.
    - SnpEffAnnotationAdapter:
        Implements variant annotation using SnpEff.

Main Features:
    - Annotates VCF files with variant effect predictions.
    - Generates summary reports in HTML and CSV formats.
    - Supports integration with command execution frameworks.
"""

# region Imports
import os

from os import PathLike
from typing import Protocol, Union, AnyStr

from src.configurator import Configurator

from src.core.base import LoggerMixin
from src.core.base import CommandExecutor
from src.core.base import execute
from src.core.base import insert_processing_infix

from src.core.sample_data_container import SampleDataContainer
# endregion


[docs] class IAnnotationAdapter(Protocol): """Interface for annotation adapters that perform variant annotation on sequencing data. """
[docs] def annotate( self, sample: SampleDataContainer, reference_ident: str, executor: Union[CommandExecutor, callable] ) -> PathLike[AnyStr]: """Performs annotation on the given sample's variant data. Args: sample (SampleDataContainer): The sample containing variant data (e.g., VCF file). reference_ident (str): Identifier for the reference genome or annotation database. executor (callable or CommandExecutor): Function or object to execute system commands. Returns: PathLike: Path to the annotated variant file (e.g., annotated VCF). """
[docs] class SnpEffAnnotationAdapter(LoggerMixin, IAnnotationAdapter): """Implementation of the IAnnotationAdapter interface using SnpEff for variant annotation. """ def __init__(self, configurator: Configurator): """Initializes the SnpEffAnnotationAdapter with configuration settings. Args: configurator (Configurator): The configuration object with paths and parameters. """ self.configurator = configurator super().__init__(logger=self.configurator.logger)
[docs] def annotate( self, sample: SampleDataContainer, reference_ident: str, executor: Union[CommandExecutor, callable] ) -> PathLike[AnyStr]: """Annotates variants in the sample's VCF file using SnpEff. Args: sample (SampleDataContainer): The sample with VCF data to annotate. reference_ident (str): The reference genome or database identifier. executor (callable): Function or object to execute system commands. Returns: PathLike: Path to the annotated VCF file. """ annotated_vcf = insert_processing_infix('.ann', sample.vcf_filepath) html_stats_path = os.path.join( sample.processing_logpath, f"{sample.sid}_snpEff_summary.html") csv_stats_path = os.path.join( sample.processing_logpath, f"{sample.sid}_snpEff_summary.csv") cmd = ' '.join([ self.configurator.config['java'], '-jar', self.configurator.config['snpeff'], reference_ident, '-stats', html_stats_path, '-csvStats', csv_stats_path, sample.vcf_filepath, '>', annotated_vcf]) execute(executor, cmd) return insert_processing_infix('.ann', sample.vcf_filepath)