Source code for src.core.analyzer.primer_cutter
"""This module contains classes and functions for preparing
sequencing data by executing primer trimming operation.
Classes:
- CutPrimers:
Handles execution of external primer cutting
scripts on sequencing samples.
- PTrimmer:
Performs primer sequence trimming from paired-end reads.
- PrimerCutter:
Factory class for creating instances of primer-related
data preparators based on specified cutter type.
Purpose:
This module facilitates data preprocessing steps essential for sequencing
analysis pipelines, such as trimming primer sequences and cutting primers
based on external scripts, while maintaining detailed logs of operations.
"""
# region Imports
import os
import logging
from os import PathLike
from typing import Optional, Union, AnyStr
from src.configurator import Configurator
from src.core.base import LoggerMixin
from src.core.base import CommandExecutor
from src.core.base import execute
from src.core.base import insert_processing_infix
from src.core.sample_data_container import SampleDataContainer
from src.core.analyzer.i_data_preparator import IDataPreparator
# endregion
[docs]
class CutPrimers(LoggerMixin, IDataPreparator):
"""Class responsible for executing primer cutting on sequencing data.
Runs an external primer cutting script with specified parameters
and logs progress.
"""
def __init__(self, configurator: Configurator):
super().__init__(logger=configurator.logger)
self.configurator = configurator
[docs]
def perform(
self,
sample: SampleDataContainer,
executor: Union[CommandExecutor, callable]
) -> tuple[PathLike[AnyStr], PathLike[AnyStr]]:
"""Executes the primer cutting process on the provided sample data.
This method constructs a command to run an external \
primer cutting script with the specified parameters, \
manages logging setup, and runs the command using \
the provided executor. \
It generates trimmed and untrimmed file paths, \
logs the execution details, and returns \
the paths to the trimmed read files.
Args:
sample (SampleDataContainer): \
The sample data containing source file \
paths and processing directories.
executor (Union[CommandExecutor, callable]): \
An executor object or function responsible for \
running the command.
Returns:
Tuple[PathLike[AnyStr], PathLike[AnyStr]]: \
Paths to the trimmed R1 and R2 files.
"""
primer_cutter_logpath = os.path.join(
sample.processing_logpath, 'cutPrimers.log')
if not os.path.exists(os.path.dirname(primer_cutter_logpath)):
os.makedirs(os.path.dirname(primer_cutter_logpath))
if not os.path.exists(primer_cutter_logpath):
with open(primer_cutter_logpath, 'a', encoding='utf-8'):
pass
tr1 = os.path.join(
sample.processing_path, os.path.basename(sample.r1_source))
tr1 = insert_processing_infix('.trimmed', tr1)
tr2 = os.path.join(
sample.processing_path, os.path.basename(sample.r2_source))
tr2 = insert_processing_infix('.trimmed', tr2)
utr1 = os.path.join(
sample.processing_path, os.path.basename(sample.r1_source))
utr1 = insert_processing_infix('.untrimmed', utr1)
utr2 = os.path.join(
sample.processing_path, os.path.basename(sample.r2_source))
utr2 = insert_processing_infix('.untrimmed', utr2)
cmd = ' '.join([
self.configurator.config['python'],
self.configurator.config['cutprimers'],
'-r1', sample.r1_source,
'-tr1', tr1,
'-utr1', utr1,
'-r2', sample.r2_source,
'-tr2', tr2,
'-utr2', utr2,
'-pr15', self.configurator.config['primer15'],
'-pr13', self.configurator.config['primer13'],
'-pr25', self.configurator.config['primer25'],
'-pr23', self.configurator.config['primer23'],
'-stat', primer_cutter_logpath,
'-t', str(self.configurator.args.threads)])
# '''cmd_bam = ' '.join([
# self.configurator.config['python'],
# self.configurator.config['cutprimers'],
# '-bam', sample.bam_filepath,
# '--coordinates-file', sample.bam_filepath+'.coords',
# '-outbam', insert_processing_infix(
# '.trimmed', sample.bam_filepath),
# '-outbam2', insert_processing_infix(
# '.untrimmed', sample.bam_filepath),
# '-pr15', self.configurator.config['primer15'],
# '-pr13', self.configurator.config['primer13'],
# '-pr25', self.configurator.config['primer25'],
# '-pr23', self.configurator.config['primer23'],
# '-stat', primer_cutter_logpath,
# '-t', str(self.configurator.args.threads)])
# '''
self.configurator.logger.info("Executing cutPrimers command")
self.configurator.logger.debug("Command: %s", cmd)
execute(executor, cmd)
self.configurator.logger.info(
"cutPrimers completed successfully. See the log at '%s'",
primer_cutter_logpath)
return tr1, tr2
[docs]
class PTrimmer(LoggerMixin, IDataPreparator):
"""Class responsible for trimming
primer sequences from paired-end reads.
It runs an external trimming tool and logs progress.
"""
def __init__(self, configurator: Configurator):
super().__init__(logger=configurator.logger)
self.configurator = configurator
[docs]
def perform(
self,
sample: SampleDataContainer,
executor: Union[CommandExecutor, callable]
) -> tuple[PathLike[AnyStr], PathLike[AnyStr]]:
"""Performs primer trimming on the sample's read files.
Args:
sample (SampleDataContainer):
The sample data with source file paths.
executor (CommandExecutor or callable):
Executor for running commands.
Returns:
Tuple of paths to the trimmed R1 and R2 files.
"""
primer_cutter_logpath = os.path.join(
sample.processing_logpath, 'pTrimmer.log')
if not os.path.exists(os.path.dirname(primer_cutter_logpath)):
os.makedirs(os.path.abspath(
os.path.dirname(primer_cutter_logpath)))
if not os.path.exists(primer_cutter_logpath):
with open(primer_cutter_logpath, 'a', encoding='utf-8'):
pass
r1_trimmed = os.path.join(
sample.processing_path, insert_processing_infix(
'.trimmed', os.path.basename(sample.r1_source)))
r2_trimmed = os.path.join(
sample.processing_path, insert_processing_infix(
'.trimmed', os.path.basename(sample.r2_source)))
cmd = ' '.join([
self.configurator.config['ptrimmer'],
'--seqtype', 'pair',
'--ampfile', self.configurator.config['ampfile'],
'--read1', sample.r1_source,
'--trim1', r1_trimmed,
'--read2', sample.r2_source,
'--trim2', r2_trimmed,
'--summary', os.path.join(
sample.processing_logpath, 'pTrimmer.summary'),
'--mismatch', str(1),
'--kmer', str(4),
'>', primer_cutter_logpath,
'2>&1',
'--gzip'])
self.configurator.logger.info("Executing pTrimmer command")
self.configurator.logger.debug("Command: %s", cmd)
execute(executor, cmd)
self.configurator.logger.info(
"pTrimmer completed successfully. See the log at '{}'".format(
primer_cutter_logpath))
return r1_trimmed, r2_trimmed
[docs]
class PrimerCutter(LoggerMixin):
"""Factory class for creating primer-related data preparator instances.
Provides a method to instantiate specific primer cutter
classes based on name.
"""
def __init__(
self,
configurator: Configurator,
logger: Optional[logging.Logger] = None
):
super().__init__(logger=logger or configurator.logger)
self.configurator = configurator
[docs]
@staticmethod
def create_primer_cutter(
configurator: Configurator,
cutter_name: Optional[str] = 'cutprimers'
) -> IDataPreparator:
"""Factory method to instantiate a primer cutter object
based on the cutter_name.
Args:
configurator (Configurator):
Configuration object with parameters and logger.
cutter_name (str):
Name of the cutter type ('cutprimers' or 'ptrimmer').
Returns:
IDataPreparator instance corresponding to the cutter.
"""
match cutter_name.lower():
case 'cutprimers':
return CutPrimers(configurator)
case 'ptrimmer':
return PTrimmer(configurator)
case _:
raise NotImplementedError(
"There is no any cutter with name '%s'" % cutter_name)