Source code for src.core.analyzer.sequence_aligner
"""This module defines the SequenceAligner class,
responsible for mapping sequencing reads to a reference genome
using an aligner such as BWA-MEM2.
It handles the construction and execution of alignment commands,
logging the process, and managing output files.
Classes:
- SequenceAligner:
Performs read alignment to a reference genome,
logs the process, and returns the path
to the aligned reads file.
Main Features:
- Constructs command-line instructions for BWA-MEM2.
- Ensures log directories exist.
- Handles sample information and reference genome input.
- Manages output paths for alignment results.
- Implements error handling with logging.
"""
# region Imports
import os
from os import PathLike
from typing import Union, AnyStr
from src.core.base import LoggerMixin
from src.core.base import CommandExecutor
from src.core.base import execute
from src.core.analyzer.i_data_preparator import IDataPreparator
from src.core.sample_data_container import SampleDataContainer
# endregion
[docs]
class SequenceAligner(LoggerMixin, IDataPreparator):
"""Class responsible for mapping sequencing reads to a reference genome.
Utilizes an aligner like BWA-MEM2 to perform
the mapping and logs the process.
"""
def __init__(self, configurator):
"""Initializes the SequenceAligner with a configurator instance.
Args:
configurator:
Configuration object containing paths,
parameters, and logger.
"""
super().__init__(logger=configurator.logger)
self.configurator = configurator
[docs]
def perform(
self,
sample: SampleDataContainer,
reference_source: PathLike[AnyStr],
executor: Union[CommandExecutor, callable]
) -> PathLike[AnyStr]:
"""Mapping reads to the reference human genome.
This is the stage at which, for each read, it is determined
where a similar sequence is located in the reference genome,
and their alignment is performed relative to each other.
Args:
sample (SampleDataContainer):
The container holding sample's sequencing data,
including raw reads path.
reference_source (PathLike[AnyStr]):
Path to the reference genome file to which reads
will be aligned.
executor (Union[CommandExecutor, callable]):
The parameter is an external callable object or a
special class to handling or/and wrapping system calls.
Returns:
PathLike[AnyStr]:
A path to mapped reads file
"""
aligning_logpath = os.path.abspath(os.path.join(
sample.processing_logpath, os.path.basename(
os.path.splitext(self.configurator.config['bwa-mem2'])[0]
))+'-mem'+'.log')
if not os.path.exists(os.path.dirname(aligning_logpath)):
os.makedirs(os.path.dirname(aligning_logpath))
try:
aligning_outpath = os.path.abspath(
os.path.join(sample.processing_path, sample.sid+'.sam'))
reads_mapping_cmd = ' '.join([
self.configurator.config['bwa-mem2'], 'mem',
reference_source,
sample.r1_source,
sample.r2_source if not sample.r2_source else '',
'-o', aligning_outpath,
'-t', str(self.configurator.args.threads),
'2>', aligning_logpath,
'-M'])
self.configurator.logger.info(
"Starting to map sample '%s' reads to reference '%s'",
sample.sid,
self.configurator.config['reference'])
self.configurator.logger.debug(
"Command: %s",
reads_mapping_cmd)
execute(executor, reads_mapping_cmd)
self.configurator.logger.info(
"Alignment completed successfully. See the log at '%s'",
aligning_logpath)
return aligning_outpath
except Exception as e:
self.configurator.logger.critical(
"A fatal error '%s' occurred at '%s'",
repr(e),
e.__traceback__.tb_frame)
raise e
[docs]
class BWAAligner(LoggerMixin, IDataPreparator):
"""Class responsible for mapping sequencing reads to a reference genome.
Utilizes an aligner like BWA-MEM2 to perform
the mapping and logs the process.
"""
def __init__(self, configurator):
"""Initializes the SequenceAligner with a configurator instance.
Args:
configurator:
Configuration object containing paths,
parameters, and logger.
"""
super().__init__(logger=configurator.logger)
self.configurator = configurator
[docs]
def perform(
self,
sample: SampleDataContainer,
reference_source: PathLike[AnyStr],
executor: Union[CommandExecutor, callable]
) -> PathLike[AnyStr]:
"""Mapping reads to the reference human genome.
This is the stage at which, for each read, it is determined
where a similar sequence is located in the reference genome,
and their alignment is performed relative to each other.
Args:
sample (SampleDataContainer):
The container holding sample's sequencing data,
including raw reads path.
reference_source (PathLike[AnyStr]):
Path to the reference genome file to which reads
will be aligned.
executor (Union[CommandExecutor, callable]):
The parameter is an external callable object or a
special class to handling or/and wrapping system calls.
Returns:
PathLike[AnyStr]:
A path to mapped reads file
"""
aligning_logpath = os.path.abspath(os.path.join(
sample.processing_logpath, os.path.basename(
os.path.splitext(self.configurator.config['bwa'])[0]
))+'-mem'+'.log')
if not os.path.exists(os.path.dirname(aligning_logpath)):
os.makedirs(os.path.dirname(aligning_logpath))
try:
aligning_outpath = os.path.abspath(
os.path.join(sample.processing_path, sample.sid+'.sam'))
reads_mapping_cmd = ' '.join([
self.configurator.config['bwa'], 'mem',
reference_source,
sample.r1_source,
sample.r2_source if not sample.r2_source else '',
'-o', aligning_outpath,
'-t', str(self.configurator.args.threads),
'2>', aligning_logpath,
'-M'])
self.configurator.logger.info(
"Starting to map sample '%s' reads to reference '%s'",
sample.sid,
self.configurator.config['reference'])
self.configurator.logger.debug(
"Command: %s",
reads_mapping_cmd)
execute(executor, reads_mapping_cmd)
self.configurator.logger.info(
"Alignment completed successfully. See the log at '%s'",
aligning_logpath)
return aligning_outpath
except Exception as e:
self.configurator.logger.critical(
"A fatal error '%s' occurred at '%s'",
repr(e),
e.__traceback__.tb_frame)
raise e