Source code for sequana_pipetools.options

#
#  This file is part of Sequana software
#
#  Copyright (c) 2016-2021 - Sequana Dev Team (https://sequana.readthedocs.io)
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  Website:       https://github.com/sequana/sequana
#  Documentation: http://sequana.readthedocs.io
#  Contributors:  https://github.com/sequana/sequana/graphs/contributors
##############################################################################
import argparse
import inspect
import os
import shutil
import sys

from sequana_pipetools.snaketools import Pipeline

from .misc import print_version

__all__ = [
    "ClickGeneralOptions",
    "ClickInputOptions",
    "ClickFeatureCountsOptions",
    "ClickKrakenOptions",
    "ClickSlurmOptions",
    "ClickSnakemakeOptions",
    "ClickTrimmingOptions",
    "init_click",
    "include_options_from",
    "OptionEatAll",
]

import rich_click as click

from sequana_pipetools.info import sequana_epilog, sequana_prolog

click.rich_click.USE_MARKDOWN = True
click.rich_click.SHOW_METAVARS_COLUMN = False
click.rich_click.APPEND_METAVARS_HELP = True
click.rich_click.STYLE_ERRORS_SUGGESTION = "magenta italic"
click.rich_click.SHOW_ARGUMENTS = True
click.rich_click.FOOTER_TEXT = sequana_epilog


[docs] def init_click(NAME, groups={}): """This function populates click variables and groups NAME is added to the rich_context so that ClickXXOptions classes may reuse it. It also sets the HEADER_TEXT and initiate a OPTION_GROUPS to be used by rich_click. In a sequana pipeline, you can use this code:: CTX = init_click(NAME, groups={ "Pipeline Specific": [ "--method-example"], } ) @click.command(context_settings=CONTEXT_SETTINGS) @include_options_from(ClickSnakemakeOptions, working_directory=NAME) @click.options("--method-example") def main(**kwargs): pass """ click.rich_click.HEADER_TEXT = sequana_prolog.format(name=NAME) click.rich_click.OPTION_GROUPS[f"sequana_{NAME}"] = [] click.rich_context.RichContext.NAME = NAME for name, options in groups.items(): click.rich_click.OPTION_GROUPS[f"sequana_{NAME}"].append({"name": name, "options": options}) # a common context for the help return dict(help_option_names=["-h", "--help"])
# A decorator to include common set of options # This decorator also populates the OPTION GROUPS # dynamically
[docs] def include_options_from(cls, *args, **kwargs): def decorator(f): caller_module = inspect.getmodule(f) if caller_module and "NAME" in caller_module.__dict__: NAME = caller_module.__dict__["NAME"] else: # pragma: no cover print("You must define NAME as your pipeline name in the module main.py ") sys.exit(1) # add options dynamically to the main click command for option in cls(*args, **kwargs).options: option(f) # add groups dynamically to the OPTION_GROUPS # NAME = kwargs.get("caller", None) click.rich_click.OPTION_GROUPS[f"sequana_{NAME}"].insert(0, cls.metadata) return f return decorator
# This is a recipe from https://stackoverflow.com/questions/48391777/nargs-equivalent-for-options-in-click # to allow command line such as # sequana_multitax --databases 1 2 3
[docs] class OptionEatAll(click.Option): def __init__(self, *args, **kwargs): self.save_other_options = kwargs.pop("save_other_options", True) nargs = kwargs.pop("nargs", -1) assert nargs == -1, "nargs, if set, must be -1 not {}".format(nargs) super(OptionEatAll, self).__init__(*args, **kwargs) self._previous_parser_process = None self._eat_all_parser = None
[docs] def add_to_parser(self, parser, ctx): def parser_process(value, state): # method to hook to the parser.process done = False value = [value] if self.save_other_options: # grab everything up to the next option while state.rargs and not done: for prefix in self._eat_all_parser.prefixes: if state.rargs[0].startswith(prefix): done = True if not done: value.append(state.rargs.pop(0)) else: # grab everything remaining value += state.rargs state.rargs[:] = [] value = tuple(value) # call the actual process self._previous_parser_process(value, state) retval = super(OptionEatAll, self).add_to_parser(parser, ctx) for name in self.opts: our_parser = parser._long_opt.get(name) or parser._short_opt.get(name) if our_parser: self._eat_all_parser = our_parser self._previous_parser_process = our_parser.process our_parser.process = parser_process break return retval
[docs] class ClickGeneralOptions: group_name = "General" metadata = { "name": group_name, "options": ["--deps", "--from-project", "--help", "--level", "--version"], } def __init__(self, caller=None): self.options = [ click.option( "--deps", is_flag=True, callback=self.deps_callback, help="Show the known dependencies of the pipeline" ), click.option( "--from-project", "from_project", type=click.Path(), callback=self.from_project_callback, help="""You can initiate a new analysis run from an existing project. In theory, sequana project have a hidden .sequana directory, which can be used as input. The name of the run directory itself should suffice (if .sequana is found inside). From there, the config file and the pipeline files are copied in your new working directory""", ), click.option( "--level", "level", default="INFO", type=click.Choice(["INFO", "DEBUG", "WARNING", "ERROR", "CRITICAL"]), help="logging level in INFO, DEBUG, WARNING, ERROR, CRITICAL", ), click.option( "-v", "--version", is_flag=True, callback=self.version_callback, help="Print the version and exit" ), ]
[docs] @staticmethod def version_callback(ctx, param, value): if not value: return print_version(ctx.NAME) ctx.exit(0)
[docs] @staticmethod def from_project_callback(ctx, param, value): if not value: return else: # When --from-project is called, all value of arguments are are replaced by the ones # found in the config file. Therefore, users may ommit all arguments. However, some # may be compulsary, so we need to reset all 'required' arguments to False for option in ctx.command.params: option.required = False return value
[docs] @staticmethod def deps_callback(ctx, param, value): if not value: return module = Pipeline(ctx.NAME) with open(str(module.requirements), "r") as fin: data = fin.read() data = data.split() data = "\n".join(sorted(data)) click.echo(f"Those software will be required for the pipeline to work correctly:\n\n{data}\n") ctx.exit(0)
def guess_scheduler(): """Guesses whether we are on a SLURM cluster or not. If not, we assume a local run is expected. """ if shutil.which("sbatch") and shutil.which("srun"): # pragma: no cover return "slurm" else: return "local"
[docs] class ClickSnakemakeOptions: group_name = "Snakemake" metadata = { "name": group_name, "options": [ "--apptainer-prefix", "--apptainer-args", "--force", "--jobs", "--use-apptainer", "--working-directory", ], } def __init__(self, working_directory="analysis", caller=None): self.workdir = working_directory _default_jobs = 40 if guess_scheduler() == "slurm" else 4 self.options = [ click.option( "--apptainer-prefix", "apptainer_prefix", default=None, show_default=True, type=click.Path(), help="""If set, pipelines will download apptainer files in this directory otherwise they will be downloaded in the working directory of the pipeline .""", ), click.option( "--apptainer-args", "apptainer_args", default="", show_default=True, help="""provide any arguments accepted by apptainer. By default, we set -B $HOME:$HOME """, ), click.option( "--force", "force", is_flag=True, default=False, help="""If the working directory exists, proceed anyway.""", ), click.option( "--jobs", "jobs", default=_default_jobs, show_default=True, help="""Number of jobs to run at the same time (default 4 on a local computer, 40 on a SLURM scheduler). This is the --jobs options of Snakemake""", ), click.option( "--use-apptainer", "use_apptainer", is_flag=True, default=False, help="""If set, pipelines will download apptainer files for all external tools.""", ), click.option( "--working-directory", "workdir", default=self.workdir, show_default=True, help="""where to save the pipeline and its configuration file and where the analyse can be run""", ), ]
[docs] class ClickInputOptions: group_name = "Data" metadata = {"name": group_name, "options": ["--input-directory", "--input-pattern", "--input-readtag"]} def __init__(self, input_directory=".", input_pattern="*fastq.gz", add_input_readtag=True, caller=None): self.input_directory = input_directory self.input_pattern = input_pattern self.add_input_readtag = add_input_readtag self.options = [ click.option( "--input-directory", "input_directory", default=self.input_directory, type=click.Path(exists=True, file_okay=False), # required=True, show_default=True, help="""Where to find the input files""", ), click.option( "--input-pattern", "input_pattern", default=self.input_pattern, type=click.STRING, show_default=True, help=f"pattern for the input files ({input_pattern})", ), ] if self.add_input_readtag: self.options.append( click.option( "--input-readtag", "input_readtag", default="_R[12]_", show_default=True, type=click.STRING, help="""pattern for the paired/single end FastQ. If your files are tagged with _R1_ or _R2_, please set this value to '_R[12]_'. If your files are tagged with _1 and _2, you must change this readtag accordingly to '_[12]'.""", ) )
[docs] class ClickKrakenOptions: group_name = "Kraken" metadata = { "name": group_name, "options": [ "--kraken-databases", "--skip-kraken", ], } def __init__(self, caller=None): self.options = [ click.option( "--kraken-databases", "kraken_databases", type=click.STRING, nargs="+", help="""Path to a valid set of Kraken database(s). If you do not have any, please see https://sequana.readthedocs.io or use sequana_taxonomy --download option. You may use several, in which case, an iterative taxonomy is performed as explained in online sequana documentation""", ), click.option( "--skip-kraken", is_flag=True, default=False, show_default=True, help="""If provided, kraken taxonomy is performed. A database must be provided (see below). """, ), ]
[docs] class ClickTrimmingOptions: group_name = "Trimming" metadata = { "name": group_name, "options": [ "--software-choice", "--trimming-minimum-length", "--trimming-adapter-read1", "--trimming-adapter-read2", "--disable-trimming", "--trimming-cutadapt-mode", "--trimming-cutadapt-options", "--trimming-quality", ], } def __init__(self, software=["cutadapt", "atropos", "fastp"], caller=None): """This section is dedicated to reads trimming and filtering and adapter trimming. We currently provide supports for Cutadapt/Atropos and FastP tools. This section uniformizes the options for such tools """ self.software = software self.software_default = "fastp" if "fastp" in software else software[0] def quality(x): x = int(x) if x < 0 and x != -1: click.BadParameter("quality must be positive") return x self.options = [ click.option( "--software-choice", "trimming_software_choice", default=self.software_default, show_default=True, type=click.Choice(self.software), help="""additional options understood by cutadapt""", ), click.option("--disable-trimming", is_flag=True, default=False, help="If provided, disable trimming "), click.option( "--trimming-adapter-read1", "trimming_adapter_read1", default="", show_default=True, help="""fastp auto-detects adapters. You may specify the adapter sequence specificically for fastp or cutadapt/atropos with option for read1""", ), click.option( "--trimming-adapter-read2", "trimming_adapter_read2", default="", show_default=True, help="""fastp auto-detects adapters. You may specify the adapter sequence specificically for fastp or cutadapt/atropos with option for read1""", ), click.option( "--trimming-minimum-length", default=20, show_default=True, help="""minimum number of bases required; read discarded otherwise. For cutadapt, default is 20 and for fastp, 15. We set it to 20.""", ), click.option( "--trimming-quality", "trimming_quality", default=-1, show_default=True, type=quality, help="""Trimming quality parameter depends on the algorithm used by the software behind the scene and may vary greatly; consequently, we do not provide a default value. Cutadapt uses 30 by default, fastp uses 15 by default. If unset, the rnaseq pipeline set the default to 30 for cutadapt and 15 for fastp. """, ), click.option( # Cutadapt specific "--trimming-cutadapt-mode", "trimming_cutadapt_mode", default="b", show_default=True, type=click.Choice(["g", "a", "b"]), help="""Mode used to remove adapters. g for 5', a for 3', b for both 5'/3' as defined in cutadapt documentation""", ), click.option( "--trimming-cutadapt-options", "trimming_cutadapt_options", default=" -O 6 --trim-n", show_default=True, help="""additional options understood by cutadapt. Here, we trim the Ns; -O 6 is the minimum overlap length between read and adapter for an adapter to be found""", ), ]
[docs] class ClickFeatureCountsOptions: group_name = "Feature Counts" metadata = { "name": group_name, "options": [ "--feature-counts-strandness", "--feature-counts-attribute", "--feature-counts-extra-attributes", "--feature-counts-feature-type", "--feature-counts-options", ], } def __init__(self, feature_type="gene", attribute="ID", options=None, strandness=None, caller=None): self.feature_type = feature_type self.attribute = attribute self.options = options self.strandness = strandness self.options = [ click.option( "--feature-counts-strandness", default=self.strandness, help="""0 for unstranded, 1 for stranded and 2 for reversely stranded. If you do not know, let the pipeline guess for you.""", ), click.option( "--feature-counts-attribute", default=self.attribute, help="""the GFF attribute to use as identifier. If you do not know, look at the GFF file or use 'sequana summary YOURFILE.gff' command to get information about attributes and features contained in your annotation file.""", ), click.option( "--feature-counts-extra-attributes", default=None, help="""any extra attribute to add in final feature counts files""", ), click.option( "--feature-counts-feature-type", default=self.feature_type, help="""the GFF feature type (e.g., gene, exon, mRNA, etc). If you do not know, look at the GFF file or use 'sequana summary YOURFILE.gff'. Would you need to perform an analysis on several features, you can either build your own custom GFF file (see Please see https://github.com/sequana/rnaseq/wiki) or provide several entries separated by commas""", ), click.option( "--feature-counts-options", default=self.options, help="""Any extra options for feature counts. Note that the -s option (strandness), the -g option (attribute name) and -t options (genetic type) have their own options. If you use still use one of the -s/-g/-t, it will replace the --feature-counts-strandness, --feature-counts-attribute and -feature-counts-feature options respectively""", ), ]
[docs] class ClickSlurmOptions: group_name = "Slurm" metadata = { "name": group_name, "options": ["--profile", "--slurm-queue", "--slurm-memory"], } def __init__(self, memory="4G", queue="common", profile=None, caller=None): self.memory = memory self.queue = queue self.profile = guess_scheduler() self.options = [ click.option( "--profile", "profile", default=self.profile, show_default=True, type=click.Choice(["local", "slurm"]), help="Create cluster (HPC) profile directory. By default, it uses local profile", ), click.option( "--slurm-memory", "slurm_memory", default=self.memory, show_default=True, help="""Specify the memory required by default. (default 4G; stands for 4 Gbytes)""", ), click.option( "--slurm-queue", "slurm_queue", default=self.queue, show_default=True, help="SLURM queue to be used (biomics)", ), ]