Source code for src.core.analyzer.bqsr_performer
"""This module contains the BQSRPerformer class, which manages
Base Quality Score Recalibration (BQSR) using GATK's BaseRecalibrator
and ApplyBQSR tools.
It performs the following key steps:
1. Generates a recalibration table with BaseRecalibrator. \
2. Applies the recalibration to produce a recalibrated BAM file \
with ApplyBQSR.
The process enhances variant calling accuracy by adjusting
quality scores based on known sites and covariates,
improving downstream analyses.
Classes:
- BQSRPerformer:
Executes BQSR by running GATK commands, managing logs, \
and handling input/output files.
Main Features:
- Constructs command-line strings for GATK tools.
- Executes commands with logging and error handling.
- Handles input sample data and target regions.
- Renames output files post-processing.
"""
# region Imports
import os
import sys
from os import PathLike
from typing import Union, AnyStr
from src.configurator import Configurator
from src.core.base import LoggerMixin
from src.core.base import CommandExecutor
from src.core.base import insert_processing_infix
from src.core.base import execute
from src.core.sample_data_container import SampleDataContainer
from src.core.analyzer.i_data_preparator import IDataPreparator
# endregion
[docs]
class BQSRPerformer(LoggerMixin, IDataPreparator):
"""Handles Base Quality Score Recalibration (BQSR) using GATK's tools.
Performs two main steps:
1. Generates a recalibration table with BaseRecalibrator.
2. Applies the recalibration with ApplyBQSR
to produce a corrected BAM file.
This process improves the accuracy of variant calling
by adjusting quality scores based on known sites and covariates.
"""
def __init__(
self,
configurator: Configurator
):
"""Initializes the BQSRPerformer
with configuration and target regions.
Args:
configurator (Configurator):
Contains paths and parameters.
"""
super().__init__(logger=configurator.logger)
self.configurator = configurator
[docs]
def perform(
self,
sample: SampleDataContainer,
executor: Union[CommandExecutor, callable]
) -> PathLike[AnyStr]:
"""Executes BQSR using GATK's BaseRecalibrator and ApplyBQSR.
Args:
sample (SampleDataContainer):
The sample data to process.
executor (Union[CommandExecutor, callable]):
Function or object to run commands.
Returns:
PathLike[AnyStr]:
Path to the recalibrated BAM file.
Raises:
Propagates exceptions from command execution
or file operations.
"""
base_recal_logpath = os.path.abspath(os.path.join(
sample.processing_logpath,
f"{os.path.basename(
self.configurator.config['gatk'])}-BaseRecalibrator.log"))
racalibration_table_path = os.path.abspath(os.path.join(
sample.processing_path, f"{sample.sid}.table"))
base_recal_cmd_str = ' '.join([
self.configurator.config['gatk'], 'BaseRecalibrator',
'--input', sample.bam_filepath,
'--output', racalibration_table_path,
'--reference', self.configurator.config['reference'],
# TODO: Have to make it works with a list of sites
'--known-sites', self.configurator.config['annotation-database'],
# ' '.join(
# [f"--intervals {interval}" for interval in
# [sample.target_regions[i][0] for i in range(
# len(sample.target_regions))]]),
'2>', base_recal_logpath,
'>>', base_recal_logpath])
try:
self.logger.info(
"Executing BaseRecalibrator command")
self.configurator.logger.debug(
"Command: %s",
base_recal_cmd_str)
execute(executor, base_recal_cmd_str)
self.configurator.logger.info(
f"BaseRecalibrator completed successfully. "
f"See the log at '{base_recal_logpath}'")
recalibrated_outpath = insert_processing_infix(
'.recalibrated', sample.bam_filepath)
apply_bqsr_logpath = base_recal_logpath.replace(
'BaseRecalibrator', 'ApplyBQSR')
# Construct the ApplyBQSR command by
# modifying the original BaseRecalibrator command string
apply_bqsr_cmd_str = ' '.join([
base_recal_cmd_str
.replace('BaseRecalibrator', 'ApplyBQSR')
.replace(racalibration_table_path, recalibrated_outpath)
.replace(base_recal_logpath, apply_bqsr_logpath)
.replace('--known-sites', '')
.replace(self.configurator.config['annotation-database'], ''),
'--bqsr-recal-file', racalibration_table_path])
self.logger.info("Executing ApplyBQSR command")
self.logger.debug("Command: %s", apply_bqsr_cmd_str)
execute(executor, apply_bqsr_cmd_str)
os.rename(sample.bam_filepath, recalibrated_outpath)
self.logger.info(
"ApplyBQSR completed successfully. See the log at '%s'",
apply_bqsr_logpath
)
return recalibrated_outpath
except (
OSError,
IOError,
SystemError,
FileNotFoundError,
PermissionError
) as e:
self.logger.critical(
"Error '%s' occurred at line '%s' during BQSR",
repr(e),
e.__traceback__.tb_frame.f_lineno
)
sys.exit(os.EX_SOFTWARE)