"""This module defines the BclToFastqAdapter class, \
which is a demultiplexer adapter for converting BCL files to FASTQ \
format using the Illumina bcl2fastq tool.
It inherits from LoggerMixin and IDemultiplexorAdapter, \
providing logging capabilities and adhering to the \
demultiplexer adapter interface.
The adapter handles configuration validation, \
command construction, \
and execution of the bcl2fastq command.
"""
import os
import logging
from os import PathLike
from typing import Optional, AnyStr
from src.core.base import LoggerMixin
from src.core.base import execute
from src.core.configurator.configuration_error import ConfigurationError
from .i_demultiplexor_adapter import IDemultiplexorAdapter
[docs]
class BclToFastqAdapter(LoggerMixin, IDemultiplexorAdapter):
"""Adapter class to convert BCL files to FASTQ \
format by Illumina bcl2fastq tool.
This class manages the construction and \
execution of a demultiplexing command
based on provided configuration parameters. \
It validates the configuration,
constructs command-line arguments accordingly, \
and executes the demultiplexing \
process via a specified command caller.
Attributes:
config (dict[str, str]): \
Configuration parameters used for demultiplexing.
cmd_caller (callable): \
Function used to execute system commands.
logger (logging.Logger): \
Logger instance for logging messages \
(inherited from LoggerMixin).
"""
def __init__(
self,
config: dict[str, str],
cmd_caller: Optional[callable] = os.system,
logger: logging.Logger = None
):
super().__init__()
if not callable(cmd_caller):
msg = f"'cmd_caller' should be callable, got {type(cmd_caller)}"
if logger:
logger.error(msg)
raise TypeError(msg)
self.cmd_caller = cmd_caller
is_config_valid, msg = self._check_config(config)
if not is_config_valid:
self.logger.critical(msg)
raise ConfigurationError(msg)
self.config = config
[docs]
@staticmethod
def _check_config(config: dict[str, str],) -> tuple[bool, str]:
"""Validates the provided configuration dictionary.
Args:
config (dict[str, str]): \
The configuration dictionary to validate.
Returns: tuple[bool, str] contains
flag (bool): \
True if the configuration is valid, False otherwise.
message (str): \
A message indicating the validation result \
or describing missing keys.
"""
if not isinstance(config, dict):
return (
False,
f"Expected 'config' to be a dict, got {type(config)}")
required_keys = [
'demultiplexor',
'input-dir',
'output-dir',
'sample-sheet',
'runfolder-dir']
missing_keys = [k for k in required_keys if k not in config]
if missing_keys:
return (
False, f"Missing required configuration keys: {missing_keys}")
return True, "OK"
[docs]
def _add_param(
self,
arguments: list,
arg_name: str,
config_key=None,
is_flag: bool = False
):
"""Adds a command-line argument to the list \
based on configuration and parameters.
Args:
arguments (list): \
The list of command-line arguments to \
which new arguments will be appended.
arg_name (str): \
The argument name, e.g., '--min-log-level'.
config_key (str, optional): \
The key to look up in the configuration dictionary. \
Defaults to None, in which case the argument name \
without '--' is used.
is_flag (bool, optional): \
If True, adds only the flag (without a value) if \
the corresponding config is True. Defaults to False.
Note:
If 'is_flag' is True and the configuration value \
for the key is True, appends 'arg_name' to the arguments list.
Otherwise, if a value exists in the configuration for the key, \
appends both 'arg_name' and the string representation \
of the value to the list.
"""
key = config_key or arg_name.lstrip('-')
if is_flag:
if self.config[key] == "True":
arguments.append(arg_name)
else:
val = self.config.get(key)
if val is not None:
arguments.extend([arg_name, str(val)])
[docs]
def demultiplex(self) -> None:
"""Constructs and executes the demultiplexing command \
based on the current configuration.
Note:
Relies on the '_add_param' method to append arguments \
based on configuration values.
Assumes 'self.config' contains all necessary configuration entries.
Uses 'self.cmd_caller' to execute the command \
with the constructed arguments.
"""
cmd_args = [self.config['demultiplexor']]
required = [
'runfolder-dir',
'input-dir',
'output-dir',
'sample-sheet',
'tiles',
'use-bases-mask'
]
_ = [self._add_param(
cmd_args, arg_name=f"--{arg}", config_key=arg)
for arg in required]
defaults = {
'min-log-level': 'INFO',
'loading-threads': 4,
'processing-threads': 4,
'writing-threads': 4,
'minimum-trimmed-read-length': 35,
'mask-short-adapter-reads': 22,
'adapter-stringency': 0.9,
'fastq-compression-level': 4,
'barcode-mismatches': 1
}
for key in defaults:
self._add_param(cmd_args, arg_name=f"--{key}", config_key=key)
for flag in [
'ignore-missing-bcls',
'ignore-missing-filter',
'ignore-missing-positions',
'ignore-missing-controls',
'write-fastq-reverse-complement',
'with-failed-reads',
'create-fastq-for-index-reads',
'find-adapters-with-sliding-window',
'no-bgzf-compression',
'no-lane-splitting'
]:
self._add_param(
cmd_args,
arg_name=f"--{flag}",
config_key=flag,
is_flag=True)
for other_optional in [
'intensities-dir',
'stats-dir',
'interop-dir',
'reports-dir'
]:
self._add_param(
cmd_args,
arg_name=f"--{other_optional}",
config_key=other_optional)
_ = [print(arg) for arg in cmd_args]
execute(self.cmd_caller, ' '.join(cmd_args))