-
Notifications
You must be signed in to change notification settings - Fork 888
add viber module #8797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
add viber module #8797
Changes from all commits
a386f7e
9940e01
a48d567
2995964
05d5ef2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::r-cnaqc=1.1.2" | ||
- "bioconda::r-viber=1.0.0" | ||
- "conda-forge::r-cli=3.6.5" | ||
- "conda-forge::r-dplyr=1.1.4" | ||
- "conda-forge::r-ggplot2=3.5.2" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
process VIBER { | ||
tag "$meta.id" | ||
label "process_single" | ||
label "error_ignore" | ||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'oras://community.wave.seqera.io/library/r-cnaqc_r-viber:014077a3164189d5': | ||
'community.wave.seqera.io/library/r-cnaqc_r-viber:2314592f7d2f9abe'}" | ||
|
||
input: | ||
tuple val(meta), path(rds_join), val(tumour_samples) //rds from either JOIN_CNAQC or JOIN_FIT, should be always grouped | ||
|
||
output: | ||
tuple val(meta), path("*_viber_best_st_fit.rds"), emit: viber_rds | ||
tuple val(meta), path("*_viber_best_st_heuristic_fit.rds"), emit: viber_heuristic_rds | ||
tuple val(meta), path("*_${plot1}"), emit: viber_plots_rds | ||
tuple val(meta), path("*_${plot2}"), emit: viber_heuristic_plots_rds | ||
tuple val(meta), path("*_REPORT_plots_viber.rds"), emit: viber_report_rds | ||
tuple val(meta), path("*_REPORT_plots_viber.pdf"), emit: viber_report_pdf | ||
tuple val(meta), path("*_REPORT_plots_viber.png"), emit: viber_report_png | ||
path "versions.yml", emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
|
||
script: | ||
def n_samples = tumour_samples.size() | ||
if (n_samples==1) { | ||
plot1 = "viber_best_st_mixing_plots.rds" | ||
plot2 = "viber_best_st_heuristic_mixing_plots.rds" | ||
} else { | ||
plot1 = "viber_best_st_fit_plots.rds" | ||
plot2 = "viber_best_st_heuristic_fit_plots.rds" | ||
} | ||
|
||
template "main_script.R" | ||
|
||
stub: | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def n_samples = tumour_samples.size() | ||
if (n_samples==1) { | ||
plot1 = "viber_best_st_mixing_plots.rds" | ||
plot2 = "viber_best_st_heuristic_mixing_plots.rds" | ||
} else { | ||
plot1 = "viber_best_st_fit_plots.rds" | ||
plot2 = "viber_best_st_heuristic_fit_plots.rds" | ||
} | ||
|
||
""" | ||
touch ${prefix}_viber_best_st_fit.rds | ||
touch ${prefix}_viber_best_st_heuristic_fit.rds | ||
touch ${prefix}_${plot1} | ||
touch ${prefix}_${plot2} | ||
touch ${prefix}_REPORT_plots_viber.rds | ||
touch ${prefix}_REPORT_plots_viber.pdf | ||
touch ${prefix}_REPORT_plots_viber.png | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
VIBER: \$(Rscript -e 'library(VIBER); sessionInfo()\$otherPkgs\$VIBER\$Version') | ||
cli: \$(Rscript -e 'library(cli); sessionInfo()\$otherPkgs\$cli\$Version') | ||
dplyr: \$(Rscript -e 'library(dplyr); sessionInfo()\$otherPkgs\$dplyr\$Version') | ||
ggplot2: \$(Rscript -e 'library(ggplot2); sessionInfo()\$otherPkgs\$ggplot2\$Version') | ||
END_VERSIONS | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
name: viber | ||
description: Multisample subclonal deconvolution of cancer genome sequencing data. | ||
keywords: | ||
- subclonal deconvolution | ||
- genomics | ||
- cancer evolution | ||
tools: | ||
- "viber": | ||
description: | | ||
VIBER is a package that implements a variational Bayesian model to fit multi-variate Binomial mixtures. | ||
The statistical model is semi-parametric and fit by a variational mean-field approximation to the model posterior. | ||
The components are Binomial distributions which can model count data; | ||
these can be used to model sequencing counts in the context of cancer, for instance. | ||
The package implements methods to fit and visualize clustering results. | ||
homepage: "https://caravagnalab.github.io/VIBER/" | ||
documentation: "https://caravagnalab.github.io/VIBER/" | ||
tool_dev_url: "https://github.com/caravagnalab/VIBER/" | ||
doi: "10.1038/s41588-020-0675-5" | ||
licence: ["GPL v3-or-later"] | ||
identifier: "" | ||
|
||
input: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1']` | ||
- rds_join: | ||
type: file | ||
description: Either a .rds object of class mCNAqc or a .csv mutations table | ||
pattern: "*.{rds,csv}" | ||
ontologies: | ||
- edam: "http://edamontology.org/format_3752" # csv | ||
- tumour_samples: | ||
type: list | ||
description: List of tumour sample identifiers for a specific patient | ||
output: | ||
viber_rds: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_viber_best_st_fit.rds": | ||
type: file | ||
description: Best viber fit as an .rds object | ||
pattern: "*_viber_best_st_fit.rds" | ||
ontologies: [] | ||
viber_heuristic_rds: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_viber_best_st_heuristic_fit.rds": | ||
type: file | ||
description: Cluster-filtering heuristics viber fit as an .rds object | ||
pattern: "_viber_best_st_heuristic_fit.rds" | ||
ontologies: [] | ||
viber_plots_rds: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_${plot1}": | ||
type: file | ||
description: mixining proportion plot or best fit plot | ||
pattern: "*_plots.rds" | ||
ontologies: [] | ||
viber_heuristic_plots_rds: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_${plot2}": | ||
type: file | ||
description: mixining proportion plot or best fit plot heuristics | ||
pattern: "*viber_best_st_heuristic*.rds" | ||
ontologies: [] | ||
viber_report_rds: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_REPORT_plots_viber.rds": | ||
type: file | ||
description: final report plots as a .pdf file | ||
pattern: "*_REPORT_plots_viber.rds" | ||
ontologies: [] | ||
viber_report_pdf: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_REPORT_plots_viber.pdf": | ||
type: file | ||
description: final report plots as a .pdf file | ||
pattern: "*_REPORT_plots_viber.pdf" | ||
ontologies: [] | ||
viber_report_png: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1' ]` | ||
- "*_REPORT_plots_viber.png": | ||
type: file | ||
description: final report plots as a .png file | ||
pattern: "*_REPORT_plots_viber.png" | ||
ontologies: [] | ||
versions: | ||
- versions.yml: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
ontologies: | ||
- edam: http://edamontology.org/format_3750 # YAML | ||
authors: | ||
- "@giorgiagandolfi" | ||
maintainers: | ||
- "@giorgiagandolfi" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
#!/usr/bin/env Rscript | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer that the script be called something more informtive like Calling it main_script.R is too generic to be really useful. |
||
|
||
parse_args = function(x) { | ||
x = gsub("\\\\[","",x) | ||
x = gsub("\\\\]","",x) | ||
# giving errors when we have lists like c(xxx, xxx) since it will separate it | ||
# args_list = unlist(strsplit(x, ', ')[[1]]) | ||
args_list = unlist(strsplit(x, ", (?=[^)]*(?:\\\\(|\$))", perl=TRUE)) | ||
# args_vals = lapply(args_list, function(x) strsplit(x, split=":")[[1]]) | ||
args_vals = lapply(args_list, function(x) { | ||
x_splt = strsplit(x, split=":")[[1]] | ||
c(x_splt[1], paste(x_splt[2:length(x_splt)], collapse=":")) | ||
}) | ||
|
||
# Ensure the option vectors are length 2 (key/ value) to catch empty ones | ||
args_vals = lapply(args_vals, function(z){ length(z) = 2; z}) | ||
|
||
parsed_args = structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line seems an odd bit of code. Can you clarify what it does and why it is needed? |
||
parsed_args[! is.na(parsed_args)] | ||
} | ||
|
||
opt = list( | ||
prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix') | ||
) | ||
args_opt = parse_args('$task.ext.args') | ||
for ( ao in names(args_opt)) opt[[ao]] = args_opt[[ao]] | ||
print(opt) | ||
|
||
# Script ##### | ||
|
||
library(VIBER) | ||
library(dplyr) | ||
library(tidyverse) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your code calls tidyverse, but your conda library doesn't. If you actually need all of tidyverse, I think you need to add it to the conda environment yml. |
||
library(ggplot2) | ||
library(CNAqc) | ||
|
||
#patientID = "$meta.patient" | ||
samples = substr("$tumour_samples", 2, nchar("$tumour_samples")-1) | ||
samples = strsplit(samples, ", ")[[1]] | ||
print("$meta.patient") | ||
print("$tumour_samples") | ||
print("$rds_join") | ||
print(samples) | ||
|
||
if ( grepl(".rds\$", tolower("$rds_join")) ) { | ||
input_obj = readRDS("$rds_join") | ||
if (class(input_obj) == "m_cnaqc") { | ||
shared = input_obj %>% get_sample(sample=samples, which_obj="shared") | ||
joint_table = lapply(names(shared), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find the mixture of lapply and dplyr grating, and would prefer using purrr:: and dplyr::, but that's a personal taste. |
||
function(sample_name) | ||
shared[[sample_name]] %>% | ||
# keep only mutations on the diploid karyotype | ||
CNAqc::subset_by_segment_karyotype("1:1") %>% | ||
CNAqc::Mutations() %>% | ||
dplyr::mutate(sample_id=sample_name) | ||
) %>% dplyr::bind_rows() | ||
} else { | ||
cli::cli_alert_warning("Object of class {class(input_obj)} not supported.") | ||
return() | ||
} | ||
} else { | ||
joint_table = read.csv("$rds_join") | ||
} | ||
|
||
print("Subset joint done") | ||
|
||
## TODO : add drivers to `input_tab` | ||
|
||
## Read input joint table | ||
input_tab = joint_table %>% | ||
dplyr::mutate(VAF=replace(VAF, VAF==0, 1e-7)) | ||
|
||
## Convert the input table into longer format | ||
reads_data = input_tab %>% | ||
dplyr::select(chr, from, ref, alt, NV, DP, VAF, sample_id) %>% | ||
tidyr::pivot_wider(names_from="sample_id", | ||
values_from=c("NV","DP","VAF"), names_sep=".") | ||
|
||
## Extract DP (depth) | ||
dp = reads_data %>% | ||
# dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If these commented lines are not necessary, please just remove them. |
||
dplyr::select(dplyr::starts_with("DP")) %>% | ||
dplyr::mutate(dplyr::across(.cols=dplyr::everything(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't line 75 imply there is only one DP column? Then why eveything? Also, since the same code repeats, why not just use tidyr::replace_na() for all the values you want to replace in one go? |
||
.fns=function(x) replace(x, is.na(x), 0))) %>% | ||
dplyr::rename_all(function(x) stringr::str_remove_all(x,"DP.")) | ||
|
||
## Extract NV (alt_counts) | ||
nv = reads_data %>% | ||
# dplyr::filter(mutation_id %in% non_tail) %>% ## this step should be managed before by other module | ||
dplyr::select(dplyr::starts_with("NV")) %>% | ||
dplyr::mutate(dplyr::across(.cols=dplyr::everything(), | ||
.fns=function(x) replace(x, is.na(x), 0))) %>% | ||
dplyr::rename_all(function(x) stringr::str_remove_all(x,"NV.")) | ||
|
||
# Standard fit | ||
viber_K = as.integer(opt[["K"]]) | ||
|
||
|
||
message("Starting standard fit") | ||
st_fit = VIBER::variational_fit(nv, dp, | ||
K=viber_K, | ||
data=reads_data, | ||
description="$meta.patient" | ||
) | ||
message("End standard fit") | ||
best_fit = best_fit_heuristic = st_fit | ||
|
||
# If all clusters are removed -> keep the origianl best fit | ||
tryCatch(expr = { | ||
# Apply the heuristic | ||
best_fit_heuristic = VIBER::choose_clusters(st_fit, | ||
binomial_cutoff=as.numeric(opt[["binomial_cutoff"]]), | ||
dimensions_cutoff=as.integer(opt[["dimensions_cutoff"]]), | ||
pi_cutoff=as.numeric(opt[["pi_cutoff"]]), | ||
re_assign=as.logical(opt[["re_assign"]]) | ||
) | ||
}, error = function(e) { | ||
print(e) | ||
best_fit_heuristic <<- st_fit | ||
} ) | ||
|
||
# Save fits | ||
saveRDS(best_fit, file=paste0(opt[["prefix"]], "_viber_best_st_fit.rds")) | ||
saveRDS(best_fit_heuristic, file = paste0(opt[["prefix"]], "_viber_best_st_heuristic_fit.rds")) | ||
|
||
# Save plots | ||
n_samples = ncol(best_fit[["x"]]) - 1 | ||
if (n_samples >1) { #mutlisample mode on | ||
print("multisample mode on") | ||
plot_fit = plot(best_fit) | ||
plot_fit_heuristic = plot(best_fit_heuristic) | ||
|
||
saveRDS(plot_fit, file=paste0(opt[["prefix"]], "_viber_best_st_fit_plots.rds")) | ||
saveRDS(plot_fit_heuristic, file=paste0(opt[["prefix"]], "_viber_best_st_heuristic_fit_plots.rds")) | ||
} else { | ||
plot_fit_mixing = plot_mixing_proportions(best_fit) | ||
plot_fit_mixing_heuristic = plot_mixing_proportions(best_fit_heuristic) | ||
|
||
saveRDS(plot_fit_mixing, file=paste0(opt[["prefix"]], "_viber_best_st_mixing_plots.rds")) | ||
saveRDS(plot_fit_mixing_heuristic, file=paste0(opt[["prefix"]], "_viber_best_st_heuristic_mixing_plots.rds")) | ||
} | ||
|
||
# Save report plot | ||
n_samples = ncol(best_fit[["x"]]) - 1 | ||
marginals = multivariate = ggplot() | ||
|
||
try(expr = {marginals <<- VIBER::plot_1D(best_fit)} ) | ||
#try(expr = {multivariate = plot(best_fit) %>% patchwork::wrap_plots()} ) | ||
#top_p = patchwork::wrap_plots(marginals, multivariate, design=ifelse(n_samples>2, "ABB", "AAB")) | ||
|
||
try(expr = {multivariate = plot(best_fit)}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no catch, so is the point only to continue if this fails? Or is the error standard? Do you need encapsulated in try? If not, just have the inside code and let it fail. |
||
try(expr = {multivariate = ggpubr::ggarrange(plotlist = multivariate)}) | ||
top_p = ggpubr::ggarrange(plotlist = list(marginals, multivariate), widths=ifelse(n_samples>2, c(1,2), c(2,1))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please try running a code fromatter, because these kind of lines are hard to read. RStudio has a good code formatter. |
||
|
||
mix_p = VIBER::plot_mixing_proportions(best_fit) | ||
binom = VIBER::plot_peaks(best_fit) | ||
elbo = VIBER::plot_ELBO(best_fit) | ||
#bottom_p = patchwork::wrap_plots(mix_p, binom, elbo, design="ABBBC") | ||
bottom_p = ggpubr::ggarrange(plotlist = list(mix_p, binom, elbo), widths = c(1,2,1), nrow = 1) | ||
|
||
#report_fig = patchwork::wrap_plots(top_p, bottom_p, design=ifelse(n_samples>2, "AAAB", "AAB")) | ||
report_fig = ggpubr::ggarrange(top_p, bottom_p, nrow = 2, heights = ifelse(n_samples>2, c(3,1), c(2,1))) | ||
saveRDS(report_fig, file=paste0(opt[["prefix"]], "_REPORT_plots_viber.rds")) | ||
ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_viber.pdf"), height=210, width=210, units="mm", dpi = 200) | ||
ggplot2::ggsave(plot=report_fig, filename=paste0(opt[["prefix"]], "_REPORT_plots_viber.png"), height=210, width=210, units="mm", dpi = 200) | ||
|
||
|
||
# version export | ||
f = file("versions.yml","w") | ||
cnaqc_version = sessionInfo()\$otherPkgs\$CNAqc\$Version | ||
viber_version = sessionInfo()\$otherPkgs\$VIBER\$Version | ||
writeLines(paste0('"', "$task.process", '"', ":"), f) | ||
writeLines(paste(" CNAqc:", cnaqc_version), f) | ||
writeLines(paste(" VIBER:", viber_version), f) | ||
close(f) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find this a bit weird that there are two options for outptut, but neither is actually optional.
Could you just use the common pattern as the defintion, like plot2 would be something like "heuristic.rds"
Why do you call them plots when they are rds (not plots)?
Don't plot1 and plo2 overlap with viber_rds and viber_heuristic_rds if you have more than one sample? If these are obligatory and always outputted, just make the other version optional.