Source code for src.utils.report_aggregator.annotation_data_container

"""Module containing the AnnotationDataContainer class. This class
encapsulates data for a single annotation entry from an annotation section
(ANN), crucial for analyzing variant impact and location. It inherits
from IReportDataContainer.
"""

from dataclasses import dataclass

from src.utils.report_aggregator.i_report_data_container import \
    IReportDataContainer


[docs] @dataclass(slots=True) class AnnotationDataContainer(IReportDataContainer): """Annotation section (ANN) has a bit of annotations divided by 15 fields such as attributes listed bellow Attributes: Allele Annotation Annotation_Impact Gene_Name Gene_ID Feature_Type Feature_ID Transcript_BioType Rank HGVS.c HGVS.p cDNA.pos / cDNA.length CDS.pos / CDS.length AA.pos / AA.length Distance ERRORS / WARNINGS / INFO Distance to feature: All items in this field are options, so the field could be empty. * Up/Downstream: Distance to first / last codon * Intergenic: Distance to the closest gene * Distance to the closest Intron boundary in exon (+/- up/downstream). If same, use positive number. * Distance to the closest exon boundary in Intron (+/- up/downstream) * Distance to first base in MOTIF * Distance to first base in miRNA * Distance to exon-intron boundary in splice_site or splice _region * ChipSeq peak: Distance to summit (or peak center) * Histone mark / Histone state: Distance to summit (or peak center) """ allele: str annotation: str annotation_impact: str gene_name: str gene_id: str mutation_type: str mutation_id: str transcript_biotype: str exon: str hgvs_cds: str hgvs_protein: str c_dna: str cds: str aminoacid: str distance: str info: str
[docs] def to_dict(self): return { # "Allele": self.allele, "Annotation": self.annotation, "Impact": self.annotation_impact, # "Gene name": self.gene_name, # "Gene ID": self.gene_id, "Mutation type": self.mutation_type, "Mutation ID": self.mutation_id, "Transcript type": self.transcript_biotype, # "Exon": self.exon, # "HGVS.c": self.hgvs_cds, # "HGVS.p": self.hgvs_protein, "C_DNA": self.c_dna, "CDS": self.cds, "Aminoacid": self.aminoacid, "Distance": self.distance, "Info": self.info}
# Predicted loss of function (LOF) effects for this variant section has fields # Gene_Name # Gene_ID # Number_of_transcripts_in_gene # Percent_of_transcripts_affected # Predicted nonsense mediated decay (NMD) effects for this variant has fields # Gene_Name # Gene_ID # Number_of_transcripts_in_gene # Percent_of_transcripts_affected