Source code for src.utils.util

"""This module provides utility functions for handling
    configuration-based data generation and other helper operations.

    Currently, it includes:
        - reg_tuple_generator:
            Generates a tuple containing a region identifier
            and the corresponding mpileup file path
            based on a given configuration and chromosome interval.

    Dependencies:
        - src.configurator.Configurator:
            A configuration handler that provides configuration data.

    Usage:
        Import functions from this module to facilitate region
        and file path generation based on configured settings.
"""

# region Imports
import os
import logging
import tempfile

from os import PathLike
from typing import AnyStr, Optional

from src.configurator import Configurator
# endregion


[docs] def reg_tuple_generator( configurator: Configurator, chr_interval: str ) -> tuple[str, str]: """Generate a tuple (region, mpileup_filepath) based on the configuration. """ regions_section = configurator.parse_configuration( base_config_filepath=configurator.args.configFilepath, target_section='Regions') if str(chr_interval).lower() in regions_section: return ( regions_section[ str(chr_interval).lower() ].replace('chr', '').strip(), f"mpileup{chr_interval[3:5]}" ) else: return None
[docs] def depth_filter( filepath: PathLike[AnyStr], depth: int = 10, logger: Optional[logging.Logger] = None ) -> None: """Filters lines in a mpileup file based on a depth value in the fourth field. Reads the specified file line by line, and writes only those lines where the integer value in the fourth field (index 3) is greater than or equal to the specified 'depth' threshold. The original file is atomically replaced with the filtered content. Args: filepath (PathLike[AnyStr]): Path to the input file to be filtered. depth (int, optional): Minimum depth value to retain lines. Defaults to 10. logger (Optional[logging.Logger], optional): Logger instance for warnings and critical messages. If None, messages are printed to standard output. Raises: FileNotFoundError: If the input file does not exist. PermissionError: If there are insufficient permissions to read/write the file. SystemError, IOError, OSError: For other I/O related errors. Example: depth_filter('data.txt', depth=15) """ try: with open( file=filepath, mode='r', encoding='utf-8' ) as fd, tempfile.NamedTemporaryFile( mode='w', delete=False, encoding='utf-8', dir=os.path.dirname(filepath) ) as temp_fd: for line in fd: fields = line.strip().split() if len(fields) < 4: continue try: depth_value = int(fields[3]) if depth_value >= depth: temp_fd.write(line) except (ValueError, IndexError) as e: msg = f"An error '{repr(e)}' occurred at line " \ f"'{e.__traceback__.tb_frame.f_lineno}'. " \ f"Skip the line '{line}'" if logger: logger.warning(msg) else: print(msg) continue os.replace(temp_fd.name, filepath) except ( FileNotFoundError, PermissionError, SystemError, IOError, OSError ) as e: msg = \ f"A critical error '{repr(e)}' occurred " \ f"at line {e.__traceback__.tb_frame.f_lineno}" if logger: logger.critical(msg) else: print(msg) raise e