nf-core · giorgiagandolfi · Jul 21, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 30, 2025
diff --git a/modules/nf-core/viber/environment.yml b/modules/nf-core/viber/environment.yml
@@ -0,0 +1,9 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::r-cnaqc=1.1.2"
+  - "bioconda::r-viber=1.0.0"
+  - "conda-forge::r-cli=3.6.5"
+  - "conda-forge::r-dplyr=1.1.4"
+  - "conda-forge::r-ggplot2=3.5.2"
diff --git a/modules/nf-core/viber/main.nf b/modules/nf-core/viber/main.nf
@@ -0,0 +1,67 @@
+process VIBER {
+    tag "$meta.id"
+    label "process_single"
+    label "error_ignore"
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/r-cnaqc_r-viber:014077a3164189d5':
+        'community.wave.seqera.io/library/r-cnaqc_r-viber:2314592f7d2f9abe'}"
+
+    input:
+        tuple val(meta), path(rds_join), val(tumour_samples) //rds from either JOIN_CNAQC or JOIN_FIT, should be always grouped
+
+    output:
+        tuple val(meta), path("*_viber_best_st_fit.rds"), emit: viber_rds
+        tuple val(meta), path("*_viber_best_st_heuristic_fit.rds"), emit: viber_heuristic_rds
+        tuple val(meta), path("*_${plot1}"), emit: viber_plots_rds
+        tuple val(meta), path("*_${plot2}"), emit: viber_heuristic_plots_rds
+        tuple val(meta), path("*_REPORT_plots_viber.rds"), emit: viber_report_rds
+        tuple val(meta), path("*_REPORT_plots_viber.pdf"), emit: viber_report_pdf
+        tuple val(meta), path("*_REPORT_plots_viber.png"), emit: viber_report_png
+        path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+
+    script:
+    def n_samples = tumour_samples.size()
+    if (n_samples==1) {
+        plot1 = "viber_best_st_mixing_plots.rds"
+        plot2 = "viber_best_st_heuristic_mixing_plots.rds"
+    } else {
+        plot1 = "viber_best_st_fit_plots.rds"
+        plot2 = "viber_best_st_heuristic_fit_plots.rds"
+    }
+
+    template "main_script.R"
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def n_samples = tumour_samples.size()
+    if (n_samples==1) {
+        plot1 = "viber_best_st_mixing_plots.rds"
+        plot2 = "viber_best_st_heuristic_mixing_plots.rds"
+    } else {
+        plot1 = "viber_best_st_fit_plots.rds"
+        plot2 = "viber_best_st_heuristic_fit_plots.rds"
+    }
+
+    """
+    touch ${prefix}_viber_best_st_fit.rds
+    touch ${prefix}_viber_best_st_heuristic_fit.rds
+    touch ${prefix}_${plot1}
+    touch ${prefix}_${plot2}
+    touch ${prefix}_REPORT_plots_viber.rds
+    touch ${prefix}_REPORT_plots_viber.pdf
+    touch ${prefix}_REPORT_plots_viber.png
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        VIBER: \$(Rscript -e 'library(VIBER); sessionInfo()\$otherPkgs\$VIBER\$Version')
+        cli: \$(Rscript -e 'library(cli); sessionInfo()\$otherPkgs\$cli\$Version')
+        dplyr: \$(Rscript -e 'library(dplyr); sessionInfo()\$otherPkgs\$dplyr\$Version')
+        ggplot2: \$(Rscript -e 'library(ggplot2); sessionInfo()\$otherPkgs\$ggplot2\$Version')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/viber/meta.yml b/modules/nf-core/viber/meta.yml
@@ -0,0 +1,126 @@
+name: viber
+description: Multisample subclonal deconvolution of cancer genome sequencing data.
+keywords:
+  - subclonal deconvolution
+  - genomics
+  - cancer evolution
+tools:
+  - "viber":
+      description: |
+        VIBER is a package that implements a variational Bayesian model to fit multi-variate Binomial mixtures.
+        The statistical model is semi-parametric and fit by a variational mean-field approximation to the model posterior.
+        The components are Binomial distributions which can model count data;
+        these can be used to model sequencing counts in the context of cancer, for instance.
+        The package implements methods to fit and visualize clustering results.
+      homepage: "https://caravagnalab.github.io/VIBER/"
+      documentation: "https://caravagnalab.github.io/VIBER/"
+      tool_dev_url: "https://github.com/caravagnalab/VIBER/"
+      doi: "10.1038/s41588-020-0675-5"
+      licence: ["GPL v3-or-later"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1']`
+    - rds_join:
+        type: file
+        description: Either a .rds object of class mCNAqc or a .csv mutations table
+        pattern: "*.{rds,csv}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3752" # csv
+    - tumour_samples:
+        type: list
+        description: List of tumour sample identifiers for a specific patient
+output:
+  viber_rds:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_viber_best_st_fit.rds":
+          type: file
+          description: Best viber fit as an .rds object
+          pattern: "*_viber_best_st_fit.rds"
+          ontologies: []
+  viber_heuristic_rds:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_viber_best_st_heuristic_fit.rds":
+          type: file
+          description: Cluster-filtering heuristics viber fit as an .rds object
+          pattern: "_viber_best_st_heuristic_fit.rds"
+          ontologies: []
+  viber_plots_rds:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_${plot1}":
+          type: file
+          description: mixining proportion plot or best fit plot
+          pattern: "*_plots.rds"
+          ontologies: []
+  viber_heuristic_plots_rds:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_${plot2}":
+          type: file
+          description: mixining proportion plot or best fit plot heuristics
+          pattern: "*viber_best_st_heuristic*.rds"
+          ontologies: []
+  viber_report_rds:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_REPORT_plots_viber.rds":
+          type: file
+          description: final report plots as a .pdf file
+          pattern: "*_REPORT_plots_viber.rds"
+          ontologies: []
+  viber_report_pdf:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_REPORT_plots_viber.pdf":
+          type: file
+          description: final report plots as a .pdf file
+          pattern: "*_REPORT_plots_viber.pdf"
+          ontologies: []
+  viber_report_png:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*_REPORT_plots_viber.png":
+          type: file
+          description: final report plots as a .png file
+          pattern: "*_REPORT_plots_viber.png"
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@giorgiagandolfi"
+maintainers:
+  - "@giorgiagandolfi"
diff --git a/modules/nf-core/viber/templates/main_script.R b/modules/nf-core/viber/templates/main_script.R
@@ -0,0 +1,175 @@
+#!/usr/bin/env Rscript
+
+parse_args = function(x) {
+    x = gsub("\\\\[","",x)
+    x = gsub("\\\\]","",x)
+    # giving errors when we have lists like c(xxx, xxx) since it will separate it
+    # args_list = unlist(strsplit(x, ', ')[[1]])
+    args_list = unlist(strsplit(x, ", (?=[^)]*(?:\\\\(|\$))", perl=TRUE))
+    # args_vals = lapply(args_list, function(x) strsplit(x, split=":")[[1]])
+    args_vals = lapply(args_list, function(x) {
+        x_splt = strsplit(x, split=":")[[1]]
+        c(x_splt[1],  paste(x_splt[2:length(x_splt)], collapse=":"))
+    })
+
+    # Ensure the option vectors are length 2 (key/ value) to catch empty ones
+    args_vals = lapply(args_vals, function(z){ length(z) = 2; z})
+
+    parsed_args = structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1]))
+    parsed_args[! is.na(parsed_args)]
+}
+
+opt = list(
+    prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
+)
+args_opt = parse_args('$task.ext.args')
+for ( ao in names(args_opt)) opt[[ao]] = args_opt[[ao]]
+print(opt)
+
+# Script #####
+
+library(VIBER)
+library(dplyr)
+library(tidyverse)
+library(ggplot2)
+library(CNAqc)
+
+#patientID = "$meta.patient"
+samples = substr("$tumour_samples", 2, nchar("$tumour_samples")-1)
+samples = strsplit(samples, ", ")[[1]]
+print("$meta.patient")
+print("$tumour_samples")
+print("$rds_join")
+print(samples)
+
+if ( grepl(".rds\$", tolower("$rds_join")) ) {
+    input_obj = readRDS("$rds_join")
+    if (class(input_obj) == "m_cnaqc") {
+        shared = input_obj %>% get_sample(sample=samples, which_obj="shared")
+        joint_table = lapply(names(shared),
+                        function(sample_name)
+                        shared[[sample_name]] %>%
+                            # keep only mutations on the diploid karyotype
+                            CNAqc::subset_by_segment_karyotype("1:1") %>%
+                            CNAqc::Mutations() %>%
+                            dplyr::mutate(sample_id=sample_name)
+                        ) %>% dplyr::bind_rows()
+        } else {
+          cli::cli_alert_warning("Object of class {class(input_obj)} not supported.")
+          return()
+        }
+} else {
+  joint_table = read.csv("$rds_join")
+}
+
+print("Subset joint done")
+
+## TODO : add drivers to `input_tab`
+
+## Read input joint table
+input_tab = joint_table %>%
+dplyr::mutate(VAF=replace(VAF, VAF==0, 1e-7))
+
+## Convert the input table into longer format
+reads_data = input_tab %>%
+dplyr::select(chr, from, ref, alt, NV, DP, VAF, sample_id) %>%
+tidyr::pivot_wider(names_from="sample_id",
+                    values_from=c("NV","DP","VAF"), names_sep=".")
+
+## Extract DP (depth)
+dp = reads_data %>%
+# dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module
+dplyr::select(dplyr::starts_with("DP")) %>%
+dplyr::mutate(dplyr::across(.cols=dplyr::everything(),
+                            .fns=function(x) replace(x, is.na(x), 0))) %>%
+dplyr::rename_all(function(x) stringr::str_remove_all(x,"DP."))
+
+## Extract NV (alt_counts)
+nv = reads_data %>%
+# dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module
+dplyr::select(dplyr::starts_with("NV")) %>%
+dplyr::mutate(dplyr::across(.cols=dplyr::everything(),
+                            .fns=function(x) replace(x, is.na(x), 0))) %>%
+dplyr::rename_all(function(x) stringr::str_remove_all(x,"NV."))
+
+# Standard fit
+viber_K = as.integer(opt[["K"]])
+
+
+message("Starting standard fit")
+st_fit = VIBER::variational_fit(nv, dp,
+                                K=viber_K,
+                                data=reads_data,
+                                description="$meta.patient"
+                                )
+message("End standard fit")
+best_fit = best_fit_heuristic = st_fit
+
+# If all clusters are removed -> keep the origianl best fit
+tryCatch(expr = {
+    # Apply the heuristic
+    best_fit_heuristic = VIBER::choose_clusters(st_fit,
+        binomial_cutoff=as.numeric(opt[["binomial_cutoff"]]),
+        dimensions_cutoff=as.integer(opt[["dimensions_cutoff"]]),
+        pi_cutoff=as.numeric(opt[["pi_cutoff"]]),
+        re_assign=as.logical(opt[["re_assign"]])
+        )
+    }, error = function(e) {
+        print(e)
+        best_fit_heuristic <<- st_fit
+    } )
+
+# Save fits
+saveRDS(best_fit, file=paste0(opt[["prefix"]], "_viber_best_st_fit.rds"))
+saveRDS(best_fit_heuristic, file = paste0(opt[["prefix"]], "_viber_best_st_heuristic_fit.rds"))
+
+# Save plots
+n_samples = ncol(best_fit[["x"]]) - 1
+if (n_samples >1) { #mutlisample mode on
+print("multisample mode on")
+plot_fit = plot(best_fit)
+plot_fit_heuristic = plot(best_fit_heuristic)
+
+saveRDS(plot_fit, file=paste0(opt[["prefix"]], "_viber_best_st_fit_plots.rds"))
+saveRDS(plot_fit_heuristic, file=paste0(opt[["prefix"]], "_viber_best_st_heuristic_fit_plots.rds"))
+} else {
+plot_fit_mixing = plot_mixing_proportions(best_fit)
+plot_fit_mixing_heuristic = plot_mixing_proportions(best_fit_heuristic)
+
+saveRDS(plot_fit_mixing, file=paste0(opt[["prefix"]], "_viber_best_st_mixing_plots.rds"))
+saveRDS(plot_fit_mixing_heuristic, file=paste0(opt[["prefix"]], "_viber_best_st_heuristic_mixing_plots.rds"))
+}
+
+# Save report plot
+n_samples = ncol(best_fit[["x"]]) - 1
+marginals = multivariate = ggplot()
+
+try(expr = {marginals <<- VIBER::plot_1D(best_fit)} )
+#try(expr = {multivariate = plot(best_fit) %>% patchwork::wrap_plots()} )
+#top_p = patchwork::wrap_plots(marginals, multivariate, design=ifelse(n_samples>2, "ABB", "AAB"))
+
+try(expr = {multivariate = plot(best_fit)})
+try(expr = {multivariate = ggpubr::ggarrange(plotlist = multivariate)})
+top_p = ggpubr::ggarrange(plotlist = list(marginals, multivariate), widths=ifelse(n_samples>2, c(1,2), c(2,1)))
+
+mix_p = VIBER::plot_mixing_proportions(best_fit)
+binom = VIBER::plot_peaks(best_fit)
+elbo = VIBER::plot_ELBO(best_fit)
+#bottom_p = patchwork::wrap_plots(mix_p, binom, elbo, design="ABBBC")
+bottom_p = ggpubr::ggarrange(plotlist = list(mix_p, binom, elbo), widths = c(1,2,1), nrow = 1)
+
+#report_fig = patchwork::wrap_plots(top_p, bottom_p, design=ifelse(n_samples>2, "AAAB", "AAB"))
+report_fig = ggpubr::ggarrange(top_p, bottom_p, nrow = 2, heights = ifelse(n_samples>2, c(3,1), c(2,1)))
+saveRDS(report_fig, file=paste0(opt[["prefix"]], "_REPORT_plots_viber.rds"))
+ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_viber.pdf"), height=210, width=210, units="mm", dpi = 200)
+ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_viber.png"), height=210, width=210, units="mm", dpi = 200)
+
+
+# version export
+f = file("versions.yml","w")
+cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version
+viber_version = sessionInfo()\$otherPkgs\$VIBER\$Version
+writeLines(paste0('"', "$task.process", '"', ":"), f)
+writeLines(paste("    CNAqc:", cnaqc_version), f)
+writeLines(paste("    VIBER:", viber_version), f)
+close(f)